Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 111 aaronmk
7 1607 aaronmk
import angles
8 818 aaronmk
import dates
9 300 aaronmk
import exc
10 1580 aaronmk
import format
11 917 aaronmk
import maps
12 1234 aaronmk
import strings
13 827 aaronmk
import term
14 1468 aaronmk
import units
15 1047 aaronmk
import util
16 86 aaronmk
import xml_dom
17 1321 aaronmk
import xpath
18 86 aaronmk
19 995 aaronmk
##### Exceptions
20
21 1612 aaronmk
class SyntaxError(exc.ExceptionWithCause):
22 797 aaronmk
    def __init__(self, cause):
23 1611 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
24
            cause)
25 278 aaronmk
26 1613 aaronmk
class FormatException(exc.ExceptionWithCause):
27
    def __init__(self, cause):
28
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
29 843 aaronmk
30 1992 aaronmk
##### Helper functions
31 995 aaronmk
32 1992 aaronmk
def map_items(func, items):
33
    return [(name, func(value)) for name, value in items]
34
35
def cast(type_, val):
36
    '''Throws FormatException if can't cast'''
37
    try: return type_(val)
38
    except ValueError, e: raise FormatException(e)
39
40
def conv_items(type_, items):
41
    return map_items(lambda val: cast(type_, val),
42
        xml_dom.TextEntryOnlyIter(items))
43
44
def pop_value(items, name='value'):
45
    '''@param name Name of value param, or None to accept any name'''
46
    try: last = items.pop() # last entry contains value
47
    except IndexError: return None # input is empty and no actions
48
    if name != None and last[0] != name: return None # input is empty
49
    return last[1]
50
51 995 aaronmk
funcs = {}
52
53 1992 aaronmk
##### Public functions
54
55 995 aaronmk
def process(node, on_error=exc.raise_):
56
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
57
    name = node.tagName
58
    if name.startswith('_') and name in funcs:
59 1369 aaronmk
        try:
60
            value = funcs[name](xml_dom.NodeTextEntryIter(node))
61
            xml_dom.replace_with_text(node, value)
62 1613 aaronmk
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
63 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
64
            exc.add_traceback(e)
65 1562 aaronmk
            str_ = strings.ustr(node)
66 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
67 1810 aaronmk
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
68
                '\n'+term.emph_multiline(str_)))
69
70 995 aaronmk
            on_error(e)
71
72 1992 aaronmk
def strip(node):
73
    '''Replaces every XML function with its last parameter (which is usually its
74 1995 aaronmk
    value), except for _ignore, which is removed completely'''
75 1992 aaronmk
    for child in xml_dom.NodeElemIter(node): strip(child)
76
    name = node.tagName
77
    if name.startswith('_') and name in funcs:
78 1995 aaronmk
        if name == '_ignore': value = None
79
        else: value = pop_value(list(xml_dom.NodeTextEntryIter(node)), None)
80 1992 aaronmk
        xml_dom.replace_with_text(node, value)
81 86 aaronmk
82 1469 aaronmk
##### XML functions
83 995 aaronmk
84
# Function names must start with _ to avoid collisions with real tags
85
# Functions take arguments (items)
86
87 1469 aaronmk
#### General
88
89 995 aaronmk
def _ignore(items):
90 994 aaronmk
    '''Used to "comment out" an XML subtree'''
91
    return None
92 995 aaronmk
funcs['_ignore'] = _ignore
93 994 aaronmk
94 1469 aaronmk
#### Conditionals
95
96 1234 aaronmk
def _eq(items):
97
    items = dict(items)
98
    try:
99
        left = items['left']
100
        right = items['right']
101
    except KeyError: return '' # a value was None
102
    return util.bool2str(left == right)
103
funcs['_eq'] = _eq
104
105
def _if(items):
106
    items = dict(items)
107
    try:
108
        cond = items['cond']
109
        then = items['then']
110 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
111 1234 aaronmk
    else_ = items.get('else', None)
112 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
113 1234 aaronmk
    if cond: return then
114
    else: return else_
115
funcs['_if'] = _if
116
117 1469 aaronmk
#### Combining values
118
119 995 aaronmk
def _alt(items):
120 113 aaronmk
    items = list(items)
121
    items.sort()
122 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
123 1609 aaronmk
    except IndexError: return None # input got removed by e.g. FormatException
124 995 aaronmk
funcs['_alt'] = _alt
125 113 aaronmk
126 995 aaronmk
def _merge(items):
127 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
128 1562 aaronmk
        # get *once* from iter, check types
129 917 aaronmk
    items.sort()
130
    return maps.merge_values(*[v for k, v in items])
131 995 aaronmk
funcs['_merge'] = _merge
132 917 aaronmk
133 995 aaronmk
def _label(items):
134 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
135 1562 aaronmk
        # get *once* from iter, check types
136 2014 aaronmk
    value = items.get('value', None)
137
    if value == None: return None # input is empty
138
    try: label = items['label']
139 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
140 917 aaronmk
    return label+': '+value
141 995 aaronmk
funcs['_label'] = _label
142 917 aaronmk
143 1469 aaronmk
#### Transforming values
144
145 2012 aaronmk
def _collapse(items):
146
    '''Collapses a subtree if the "value" element in it is NULL'''
147
    items = dict(items)
148
    try: require = cast(strings.ustr, items['require'])
149
    except KeyError, e: raise SyntaxError(e)
150
    value = items.get('value', None)
151
152
    required_node = xpath.get_1(value, require, allow_rooted=False)
153
    if required_node == None or xml_dom.is_empty(required_node): return None
154
    else: return value
155
funcs['_collapse'] = _collapse
156
157 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
158 1477 aaronmk
159 1047 aaronmk
def _nullIf(items):
160 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
161 1477 aaronmk
    try: null = items['null']
162 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
163 1477 aaronmk
    value = items.get('value', None)
164 1219 aaronmk
    type_str = items.get('type', None)
165 1477 aaronmk
166
    try: type_ = types_by_name[type_str]
167 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
168 1477 aaronmk
    null = type_(null)
169
170
    try: return util.none_if(value, null)
171
    except ValueError: return value # value not convertible, so can't equal null
172 1047 aaronmk
funcs['_nullIf'] = _nullIf
173
174 1602 aaronmk
def repl(repls, value):
175 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
176 1602 aaronmk
    @param repls dict repl:with
177
        repl "*" means all other input values
178
        with "*" means keep input value the same
179
        with "" means ignore input value
180 1537 aaronmk
    '''
181 1602 aaronmk
    try: new_value = repls[value]
182 1304 aaronmk
    except KeyError, e:
183 1537 aaronmk
        # Save traceback right away in case another exception raised
184 1609 aaronmk
        fe = FormatException(e)
185 1602 aaronmk
        try: new_value = repls['*']
186 1609 aaronmk
        except KeyError: raise fe
187 1537 aaronmk
    if new_value == '*': new_value = value # '*' means keep input value the same
188 1607 aaronmk
    return new_value
189 1602 aaronmk
190
def _map(items):
191
    '''See repl()
192
    @param items
193
        <last_entry> Value
194
        <other_entries> name=value Mappings. Special values: See repl() repls.
195
    '''
196
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
197
    value = pop_value(items)
198
    if value == None: return None # input is empty
199 1607 aaronmk
    return util.none_if(repl(dict(items), value), u'') # empty value means None
200 1219 aaronmk
funcs['_map'] = _map
201
202
def _replace(items):
203 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
204 1581 aaronmk
    value = pop_value(items)
205
    if value == None: return None # input is empty
206 1219 aaronmk
    try:
207
        for repl, with_ in items:
208
            if re.match(r'^\w+$', repl):
209
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
210
            value = re.sub(repl, with_, value)
211 1612 aaronmk
    except sre_constants.error, e: raise SyntaxError(e)
212 1624 aaronmk
    return util.none_if(value.strip(), u'') # empty strings always mean None
213 1219 aaronmk
funcs['_replace'] = _replace
214
215 1469 aaronmk
#### Quantities
216
217 1225 aaronmk
def _units(items):
218 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
219 1581 aaronmk
    value = pop_value(items)
220
    if value == None: return None # input is empty
221 1471 aaronmk
222 1581 aaronmk
    quantity = units.str2quantity(value)
223 1471 aaronmk
    try:
224
        for action, units_ in items:
225
            units_ = util.none_if(units_, u'')
226
            if action == 'default': units.set_default_units(quantity, units_)
227 1567 aaronmk
            elif action == 'to':
228
                try: quantity = units.convert(quantity, units_)
229 1609 aaronmk
                except ValueError, e: raise FormatException(e)
230 1612 aaronmk
            else: raise SyntaxError(ValueError('Invalid action: '+action))
231 1609 aaronmk
    except units.MissingUnitsException, e: raise FormatException(e)
232 1471 aaronmk
    return units.quantity2str(quantity)
233 1225 aaronmk
funcs['_units'] = _units
234
235 1399 aaronmk
def parse_range(str_, range_sep='-'):
236
    default = (str_, None)
237
    start, sep, end = str_.partition(range_sep)
238
    if sep == '': return default # not a range
239 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
240 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
241
242
def _rangeStart(items):
243 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
244 1399 aaronmk
    try: value = items['value']
245 1406 aaronmk
    except KeyError: return None # input is empty
246 1399 aaronmk
    return parse_range(value)[0]
247
funcs['_rangeStart'] = _rangeStart
248
249
def _rangeEnd(items):
250 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
251 1399 aaronmk
    try: value = items['value']
252 1406 aaronmk
    except KeyError: return None # input is empty
253 1399 aaronmk
    return parse_range(value)[1]
254
funcs['_rangeEnd'] = _rangeEnd
255
256 1472 aaronmk
def _range(items):
257
    items = dict(conv_items(float, items))
258
    from_ = items.get('from', None)
259
    to = items.get('to', None)
260
    if from_ == None or to == None: return None
261
    return str(to - from_)
262
funcs['_range'] = _range
263
264 995 aaronmk
def _avg(items):
265 86 aaronmk
    count = 0
266
    sum_ = 0.
267 278 aaronmk
    for name, value in conv_items(float, items):
268 86 aaronmk
        count += 1
269
        sum_ += value
270 1472 aaronmk
    if count == 0: return None # input is empty
271
    else: return str(sum_/count)
272 995 aaronmk
funcs['_avg'] = _avg
273 86 aaronmk
274 968 aaronmk
class CvException(Exception):
275
    def __init__(self):
276
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
277
            ' allowed for ratio scale data '
278
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
279
280 995 aaronmk
def _noCV(items):
281 968 aaronmk
    try: name, value = items.next()
282
    except StopIteration: return None
283 1609 aaronmk
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
284 968 aaronmk
    return value
285 995 aaronmk
funcs['_noCV'] = _noCV
286 968 aaronmk
287 1469 aaronmk
#### Dates
288
289 995 aaronmk
def _date(items):
290 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
291
        # get *once* from iter, check types
292 1514 aaronmk
    try: str_ = items['date']
293 786 aaronmk
    except KeyError:
294 1515 aaronmk
        # Year is required
295
        try: items['year']
296 1309 aaronmk
        except KeyError, e:
297
            if items == {}: return None # entire date is empty
298 1609 aaronmk
            else: raise FormatException(e)
299 1515 aaronmk
300
        # Convert month name to number
301
        try: month = items['month']
302
        except KeyError: pass
303
        else:
304
            if not month.isdigit(): # month is name
305 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
306 1609 aaronmk
                except ValueError, e: raise FormatException(e)
307 1515 aaronmk
308 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
309 786 aaronmk
        items.setdefault('month', 1)
310
        items.setdefault('day', 1)
311 1535 aaronmk
312
        for try_num in xrange(2):
313
            try:
314
                date = datetime.date(**items)
315
                break
316
            except ValueError, e:
317 1609 aaronmk
                if try_num > 0: raise FormatException(e)
318 1536 aaronmk
                    # exception still raised after retry
319 1562 aaronmk
                msg = strings.ustr(e)
320 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
321
                    items['month'], items['day'] = items['day'], items['month']
322 1609 aaronmk
                else: raise FormatException(e)
323 786 aaronmk
    else:
324 324 aaronmk
        try: year = float(str_)
325
        except ValueError:
326 1264 aaronmk
            try: date = dates.strtotime(str_)
327 324 aaronmk
            except ImportError: return str_
328 1609 aaronmk
            except ValueError, e: raise FormatException(e)
329 324 aaronmk
        else: date = (datetime.date(int(year), 1, 1) +
330
            datetime.timedelta(round((year % 1.)*365)))
331 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
332 843 aaronmk
    except ValueError, e: raise FormatException(e)
333 995 aaronmk
funcs['_date'] = _date
334 86 aaronmk
335 1366 aaronmk
def _dateRangeStart(items):
336 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
337 1366 aaronmk
    try: value = items['value']
338 1406 aaronmk
    except KeyError: return None # input is empty
339 1366 aaronmk
    return dates.parse_date_range(value)[0]
340
funcs['_dateRangeStart'] = _dateRangeStart
341 1311 aaronmk
342 1366 aaronmk
def _dateRangeEnd(items):
343 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
344 1366 aaronmk
    try: value = items['value']
345 1406 aaronmk
    except KeyError: return None # input is empty
346 1366 aaronmk
    return dates.parse_date_range(value)[1]
347
funcs['_dateRangeEnd'] = _dateRangeEnd
348 1311 aaronmk
349 1469 aaronmk
#### Names
350
351 328 aaronmk
_name_parts_slices_items = [
352
    ('first', slice(None, 1)),
353
    ('middle', slice(1, -1)),
354
    ('last', slice(-1, None)),
355
]
356
name_parts_slices = dict(_name_parts_slices_items)
357
name_parts = [name for name, slice_ in _name_parts_slices_items]
358
359 995 aaronmk
def _name(items):
360 89 aaronmk
    items = dict(items)
361 102 aaronmk
    parts = []
362 328 aaronmk
    for part in name_parts:
363
        if part in items: parts.append(items[part])
364 102 aaronmk
    return ' '.join(parts)
365 995 aaronmk
funcs['_name'] = _name
366 102 aaronmk
367 995 aaronmk
def _namePart(items):
368 328 aaronmk
    out_items = []
369
    for part, value in items:
370
        try: slice_ = name_parts_slices[part]
371 1612 aaronmk
        except KeyError, e: raise SyntaxError(e)
372 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
373 995 aaronmk
    return _name(out_items)
374
funcs['_namePart'] = _namePart
375 1321 aaronmk
376 1607 aaronmk
#### Angles
377
378
def _compass(items):
379
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
380
    items = dict(conv_items(strings.ustr, items))
381
    try: value = items['value']
382
    except KeyError: return None # input is empty
383
384
    if not value.isupper(): return value # pass through other coordinate formats
385
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
386
    except KeyError, e: raise FormatException(e)
387
funcs['_compass'] = _compass
388
389 1469 aaronmk
#### Paths
390
391 1321 aaronmk
def _simplifyPath(items):
392
    items = dict(items)
393
    try:
394 1562 aaronmk
        next = cast(strings.ustr, items['next'])
395
        require = cast(strings.ustr, items['require'])
396 1321 aaronmk
        root = items['path']
397 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
398 1321 aaronmk
399
    node = root
400
    while node != None:
401
        new_node = xpath.get_1(node, next, allow_rooted=False)
402 2006 aaronmk
        required_node = xpath.get_1(node, require, allow_rooted=False)
403
        if required_node == None or xml_dom.is_empty(required_node):# empty elem
404 1321 aaronmk
            xml_dom.replace(node, new_node) # remove current elem
405
            if node is root: root = new_node # also update root
406
        node = new_node
407
    return root
408
funcs['_simplifyPath'] = _simplifyPath