Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 111 aaronmk
7 818 aaronmk
import dates
8 300 aaronmk
import exc
9 917 aaronmk
import maps
10 1234 aaronmk
import strings
11 827 aaronmk
import term
12 1047 aaronmk
import util
13 86 aaronmk
import xml_dom
14 1321 aaronmk
import xpath
15 86 aaronmk
16 995 aaronmk
##### Exceptions
17
18 962 aaronmk
class SyntaxException(Exception):
19 797 aaronmk
    def __init__(self, cause):
20 962 aaronmk
        Exception.__init__(self, 'Invalid XML function syntax: '
21
            +exc.str_(cause))
22 278 aaronmk
23 843 aaronmk
class FormatException(SyntaxException): pass
24
25 995 aaronmk
##### Functions
26
27
funcs = {}
28
29
def process(node, on_error=exc.raise_):
30
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
31
    name = node.tagName
32
    if name.startswith('_') and name in funcs:
33 1369 aaronmk
        try:
34
            value = funcs[name](xml_dom.NodeTextEntryIter(node))
35
            xml_dom.replace_with_text(node, value)
36
        except Exception, e: # also catch XML func internal errors
37 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
38
            exc.add_traceback(e)
39 995 aaronmk
            str_ = str(node)
40
            exc.add_msg(e, 'function:\n'+str_)
41
            xml_dom.replace(node, node.ownerDocument.createComment(
42 1234 aaronmk
                '\n'+term.emph_multiline(str_).replace('--','-')))
43
                # comments can't contain '--'
44 995 aaronmk
            on_error(e)
45
46 86 aaronmk
def map_items(func, items):
47
    return [(name, func(value)) for name, value in items]
48
49 1234 aaronmk
def cast(type_, val):
50
    '''Throws SyntaxException if can't cast'''
51
    try: return type_(val)
52
    except ValueError, e: raise SyntaxException(e)
53
54 278 aaronmk
def conv_items(type_, items):
55 1234 aaronmk
    return map_items(lambda val: cast(type_, val),
56
        xml_dom.TextEntryOnlyIter(items))
57 278 aaronmk
58 995 aaronmk
#### XML functions
59
60
# Function names must start with _ to avoid collisions with real tags
61
# Functions take arguments (items)
62
63
def _ignore(items):
64 994 aaronmk
    '''Used to "comment out" an XML subtree'''
65
    return None
66 995 aaronmk
funcs['_ignore'] = _ignore
67 994 aaronmk
68 1234 aaronmk
def _eq(items):
69
    items = dict(items)
70
    try:
71
        left = items['left']
72
        right = items['right']
73
    except KeyError: return '' # a value was None
74
    return util.bool2str(left == right)
75
funcs['_eq'] = _eq
76
77
def _if(items):
78
    items = dict(items)
79
    try:
80
        cond = items['cond']
81
        then = items['then']
82
    except KeyError, e: raise SyntaxException(e)
83
    else_ = items.get('else', None)
84
    cond = bool(cast(str, cond))
85
    if cond: return then
86
    else: return else_
87
funcs['_if'] = _if
88
89 995 aaronmk
def _alt(items):
90 113 aaronmk
    items = list(items)
91
    items.sort()
92 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
93 1187 aaronmk
    except IndexError: return None # input got removed by e.g. SyntaxException
94 995 aaronmk
funcs['_alt'] = _alt
95 113 aaronmk
96 995 aaronmk
def _merge(items):
97 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
98
        # get *once* from iter and check types
99 917 aaronmk
    items.sort()
100
    return maps.merge_values(*[v for k, v in items])
101 995 aaronmk
funcs['_merge'] = _merge
102 917 aaronmk
103 995 aaronmk
def _label(items):
104 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
105
        # get *once* from iter and check types
106 917 aaronmk
    try:
107
        label = items['label']
108
        value = items['value']
109
    except KeyError, e: raise SyntaxException(e)
110
    return label+': '+value
111 995 aaronmk
funcs['_label'] = _label
112 917 aaronmk
113 1047 aaronmk
def _nullIf(items):
114
    items = dict(conv_items(str, items))
115
    try:
116
        null = items['null']
117
        value = items['value']
118
    except KeyError, e: raise SyntaxException(e)
119 1219 aaronmk
    type_str = items.get('type', None)
120
    type_ = str
121
    if type_str == 'float': type_ = float
122
    return util.none_if(value, type_(null))
123 1047 aaronmk
funcs['_nullIf'] = _nullIf
124
125 1219 aaronmk
def _map(items):
126
    items = conv_items(str, items) # get *once* from iter and check types
127
    try: value = items.pop()[1] # value is last entry's value
128
    except IndexError, e: raise SyntaxException(e)
129
    map_ = dict(items)
130 1304 aaronmk
    closed = bool(map_.pop('_closed', False))
131 1219 aaronmk
    try: return map_[value]
132 1304 aaronmk
    except KeyError, e:
133
        if closed: raise SyntaxException(e)
134
        else: return value
135 1219 aaronmk
funcs['_map'] = _map
136
137
def _replace(items):
138
    items = conv_items(str, items) # get *once* from iter and check types
139 1424 aaronmk
    try: value = items.pop()[1] # last entry contains value
140 1219 aaronmk
    except IndexError, e: raise SyntaxException(e)
141
    try:
142
        for repl, with_ in items:
143
            if re.match(r'^\w+$', repl):
144
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
145
            value = re.sub(repl, with_, value)
146
    except sre_constants.error, e: raise SyntaxException(e)
147 1427 aaronmk
    return util.none_if(value, u'') # empty strings always mean None
148 1219 aaronmk
funcs['_replace'] = _replace
149
150 1225 aaronmk
def _units(items):
151
    items = dict(conv_items(str, items))
152
    try:
153
        units = items['units']
154
        value = items['value']
155
    except KeyError, e: raise SyntaxException(e)
156
    return value#+' '+units # don't add yet because unit conversion isn't ready
157
funcs['_units'] = _units
158
159 995 aaronmk
def _range(items):
160 278 aaronmk
    items = dict(conv_items(float, items))
161 965 aaronmk
    from_ = items.get('from', None)
162
    to = items.get('to', None)
163
    if from_ == None or to == None: return None
164 326 aaronmk
    return str(to - from_)
165 995 aaronmk
funcs['_range'] = _range
166 86 aaronmk
167 1399 aaronmk
def parse_range(str_, range_sep='-'):
168
    default = (str_, None)
169
    start, sep, end = str_.partition(range_sep)
170
    if sep == '': return default # not a range
171 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
172 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
173
174
def _rangeStart(items):
175
    items = dict(conv_items(str, items))
176
    try: value = items['value']
177 1406 aaronmk
    except KeyError: return None # input is empty
178 1399 aaronmk
    return parse_range(value)[0]
179
funcs['_rangeStart'] = _rangeStart
180
181
def _rangeEnd(items):
182
    items = dict(conv_items(str, items))
183
    try: value = items['value']
184 1406 aaronmk
    except KeyError: return None # input is empty
185 1399 aaronmk
    return parse_range(value)[1]
186
funcs['_rangeEnd'] = _rangeEnd
187
188 995 aaronmk
def _avg(items):
189 86 aaronmk
    count = 0
190
    sum_ = 0.
191 278 aaronmk
    for name, value in conv_items(float, items):
192 86 aaronmk
        count += 1
193
        sum_ += value
194
    return str(sum_/count)
195 995 aaronmk
funcs['_avg'] = _avg
196 86 aaronmk
197 968 aaronmk
class CvException(Exception):
198
    def __init__(self):
199
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
200
            ' allowed for ratio scale data '
201
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
202
203 995 aaronmk
def _noCV(items):
204 968 aaronmk
    try: name, value = items.next()
205
    except StopIteration: return None
206
    if re.match('^(?i)CV *\d+$', value): raise SyntaxException(CvException())
207
    return value
208 995 aaronmk
funcs['_noCV'] = _noCV
209 968 aaronmk
210 995 aaronmk
def _date(items):
211 917 aaronmk
    items = conv_items(str, items) # get *once* from iter and check types
212 786 aaronmk
    try: str_ = dict(items)['date']
213
    except KeyError:
214 1308 aaronmk
        items = dict(conv_items(int, items))
215 1292 aaronmk
        try: items['year'] # year is required
216 1309 aaronmk
        except KeyError, e:
217
            if items == {}: return None # entire date is empty
218
            else: raise SyntaxException(e)
219 786 aaronmk
        items.setdefault('month', 1)
220
        items.setdefault('day', 1)
221
        try: date = datetime.date(**items)
222
        except ValueError, e: raise SyntaxException(e)
223
    else:
224 324 aaronmk
        try: year = float(str_)
225
        except ValueError:
226 1264 aaronmk
            try: date = dates.strtotime(str_)
227 324 aaronmk
            except ImportError: return str_
228
            except ValueError, e: raise SyntaxException(e)
229
        else: date = (datetime.date(int(year), 1, 1) +
230
            datetime.timedelta(round((year % 1.)*365)))
231 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
232 843 aaronmk
    except ValueError, e: raise FormatException(e)
233 995 aaronmk
funcs['_date'] = _date
234 86 aaronmk
235 1366 aaronmk
def _dateRangeStart(items):
236
    items = dict(conv_items(str, items))
237
    try: value = items['value']
238 1406 aaronmk
    except KeyError: return None # input is empty
239 1366 aaronmk
    return dates.parse_date_range(value)[0]
240
funcs['_dateRangeStart'] = _dateRangeStart
241 1311 aaronmk
242 1366 aaronmk
def _dateRangeEnd(items):
243 1311 aaronmk
    items = dict(conv_items(str, items))
244 1366 aaronmk
    try: value = items['value']
245 1406 aaronmk
    except KeyError: return None # input is empty
246 1366 aaronmk
    return dates.parse_date_range(value)[1]
247
funcs['_dateRangeEnd'] = _dateRangeEnd
248 1311 aaronmk
249 328 aaronmk
_name_parts_slices_items = [
250
    ('first', slice(None, 1)),
251
    ('middle', slice(1, -1)),
252
    ('last', slice(-1, None)),
253
]
254
name_parts_slices = dict(_name_parts_slices_items)
255
name_parts = [name for name, slice_ in _name_parts_slices_items]
256
257 995 aaronmk
def _name(items):
258 89 aaronmk
    items = dict(items)
259 102 aaronmk
    parts = []
260 328 aaronmk
    for part in name_parts:
261
        if part in items: parts.append(items[part])
262 102 aaronmk
    return ' '.join(parts)
263 995 aaronmk
funcs['_name'] = _name
264 102 aaronmk
265 995 aaronmk
def _namePart(items):
266 328 aaronmk
    out_items = []
267
    for part, value in items:
268
        try: slice_ = name_parts_slices[part]
269
        except KeyError, e: raise SyntaxException(e)
270 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
271 995 aaronmk
    return _name(out_items)
272
funcs['_namePart'] = _namePart
273 1321 aaronmk
274
def _simplifyPath(items):
275
    items = dict(items)
276
    try:
277
        next = cast(str, items['next'])
278
        require = cast(str, items['require'])
279
        root = items['path']
280
    except KeyError, e: raise SyntaxException(e)
281
282
    node = root
283
    while node != None:
284
        new_node = xpath.get_1(node, next, allow_rooted=False)
285
        if xpath.get_1(node, require, allow_rooted=False) == None: # empty elem
286
            xml_dom.replace(node, new_node) # remove current elem
287
            if node is root: root = new_node # also update root
288
        node = new_node
289
    return root
290
funcs['_simplifyPath'] = _simplifyPath