Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 2017 aaronmk
import warnings
7 111 aaronmk
8 1607 aaronmk
import angles
9 818 aaronmk
import dates
10 300 aaronmk
import exc
11 1580 aaronmk
import format
12 917 aaronmk
import maps
13 1234 aaronmk
import strings
14 827 aaronmk
import term
15 1468 aaronmk
import units
16 1047 aaronmk
import util
17 86 aaronmk
import xml_dom
18 1321 aaronmk
import xpath
19 86 aaronmk
20 995 aaronmk
##### Exceptions
21
22 1612 aaronmk
class SyntaxError(exc.ExceptionWithCause):
23 797 aaronmk
    def __init__(self, cause):
24 1611 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
25
            cause)
26 278 aaronmk
27 1613 aaronmk
class FormatException(exc.ExceptionWithCause):
28
    def __init__(self, cause):
29
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
30 843 aaronmk
31 1992 aaronmk
##### Helper functions
32 995 aaronmk
33 1992 aaronmk
def map_items(func, items):
34
    return [(name, func(value)) for name, value in items]
35
36
def cast(type_, val):
37
    '''Throws FormatException if can't cast'''
38
    try: return type_(val)
39
    except ValueError, e: raise FormatException(e)
40
41
def conv_items(type_, items):
42
    return map_items(lambda val: cast(type_, val),
43
        xml_dom.TextEntryOnlyIter(items))
44
45
def pop_value(items, name='value'):
46
    '''@param name Name of value param, or None to accept any name'''
47
    try: last = items.pop() # last entry contains value
48
    except IndexError: return None # input is empty and no actions
49
    if name != None and last[0] != name: return None # input is empty
50
    return last[1]
51
52 995 aaronmk
funcs = {}
53
54 1992 aaronmk
##### Public functions
55
56 995 aaronmk
def process(node, on_error=exc.raise_):
57
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
58
    name = node.tagName
59
    if name.startswith('_') and name in funcs:
60 1369 aaronmk
        try:
61 2016 aaronmk
            value = funcs[name](xml_dom.NodeTextEntryIter(node), node)
62 1369 aaronmk
            xml_dom.replace_with_text(node, value)
63 1613 aaronmk
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
64 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
65
            exc.add_traceback(e)
66 1562 aaronmk
            str_ = strings.ustr(node)
67 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
68 1810 aaronmk
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
69
                '\n'+term.emph_multiline(str_)))
70
71 995 aaronmk
            on_error(e)
72
73 1992 aaronmk
def strip(node):
74
    '''Replaces every XML function with its last parameter (which is usually its
75 1995 aaronmk
    value), except for _ignore, which is removed completely'''
76 1992 aaronmk
    for child in xml_dom.NodeElemIter(node): strip(child)
77
    name = node.tagName
78
    if name.startswith('_') and name in funcs:
79 1995 aaronmk
        if name == '_ignore': value = None
80
        else: value = pop_value(list(xml_dom.NodeTextEntryIter(node)), None)
81 1992 aaronmk
        xml_dom.replace_with_text(node, value)
82 86 aaronmk
83 1469 aaronmk
##### XML functions
84 995 aaronmk
85
# Function names must start with _ to avoid collisions with real tags
86
# Functions take arguments (items)
87
88 1469 aaronmk
#### General
89
90 2017 aaronmk
def _ignore(items, node):
91 994 aaronmk
    '''Used to "comment out" an XML subtree'''
92
    return None
93 995 aaronmk
funcs['_ignore'] = _ignore
94 994 aaronmk
95 2017 aaronmk
def _ref(items, node):
96
    '''Used to retrieve a value from another XML node
97
    @param items
98
        addr=<path> XPath to value, relative to the XML func's parent node
99
    '''
100
    items = dict(items)
101
    try: addr = items['addr']
102
    except KeyError, e: raise SyntaxError(e)
103
104
    value = xpath.get_value(node.parentNode, addr)
105
    if value == None:
106
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
107
            +str(addr)))
108
    return value
109
funcs['_ref'] = _ref
110
111 1469 aaronmk
#### Conditionals
112
113 2016 aaronmk
def _eq(items, node):
114 1234 aaronmk
    items = dict(items)
115
    try:
116
        left = items['left']
117
        right = items['right']
118
    except KeyError: return '' # a value was None
119
    return util.bool2str(left == right)
120
funcs['_eq'] = _eq
121
122 2016 aaronmk
def _if(items, node):
123 1234 aaronmk
    items = dict(items)
124
    try:
125
        cond = items['cond']
126
        then = items['then']
127 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
128 1234 aaronmk
    else_ = items.get('else', None)
129 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
130 1234 aaronmk
    if cond: return then
131
    else: return else_
132
funcs['_if'] = _if
133
134 1469 aaronmk
#### Combining values
135
136 2016 aaronmk
def _alt(items, node):
137 113 aaronmk
    items = list(items)
138
    items.sort()
139 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
140 1609 aaronmk
    except IndexError: return None # input got removed by e.g. FormatException
141 995 aaronmk
funcs['_alt'] = _alt
142 113 aaronmk
143 2016 aaronmk
def _merge(items, node):
144 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
145 1562 aaronmk
        # get *once* from iter, check types
146 917 aaronmk
    items.sort()
147
    return maps.merge_values(*[v for k, v in items])
148 995 aaronmk
funcs['_merge'] = _merge
149 917 aaronmk
150 2016 aaronmk
def _label(items, node):
151 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
152 1562 aaronmk
        # get *once* from iter, check types
153 2014 aaronmk
    value = items.get('value', None)
154
    if value == None: return None # input is empty
155
    try: label = items['label']
156 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
157 917 aaronmk
    return label+': '+value
158 995 aaronmk
funcs['_label'] = _label
159 917 aaronmk
160 1469 aaronmk
#### Transforming values
161
162 2016 aaronmk
def _collapse(items, node):
163 2012 aaronmk
    '''Collapses a subtree if the "value" element in it is NULL'''
164
    items = dict(items)
165
    try: require = cast(strings.ustr, items['require'])
166
    except KeyError, e: raise SyntaxError(e)
167
    value = items.get('value', None)
168
169
    required_node = xpath.get_1(value, require, allow_rooted=False)
170
    if required_node == None or xml_dom.is_empty(required_node): return None
171
    else: return value
172
funcs['_collapse'] = _collapse
173
174 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
175 1477 aaronmk
176 2016 aaronmk
def _nullIf(items, node):
177 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
178 1477 aaronmk
    try: null = items['null']
179 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
180 1477 aaronmk
    value = items.get('value', None)
181 1219 aaronmk
    type_str = items.get('type', None)
182 1477 aaronmk
183
    try: type_ = types_by_name[type_str]
184 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
185 1477 aaronmk
    null = type_(null)
186
187
    try: return util.none_if(value, null)
188
    except ValueError: return value # value not convertible, so can't equal null
189 1047 aaronmk
funcs['_nullIf'] = _nullIf
190
191 1602 aaronmk
def repl(repls, value):
192 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
193 1602 aaronmk
    @param repls dict repl:with
194
        repl "*" means all other input values
195
        with "*" means keep input value the same
196
        with "" means ignore input value
197 1537 aaronmk
    '''
198 1602 aaronmk
    try: new_value = repls[value]
199 1304 aaronmk
    except KeyError, e:
200 1537 aaronmk
        # Save traceback right away in case another exception raised
201 1609 aaronmk
        fe = FormatException(e)
202 1602 aaronmk
        try: new_value = repls['*']
203 1609 aaronmk
        except KeyError: raise fe
204 1537 aaronmk
    if new_value == '*': new_value = value # '*' means keep input value the same
205 1607 aaronmk
    return new_value
206 1602 aaronmk
207 2016 aaronmk
def _map(items, node):
208 1602 aaronmk
    '''See repl()
209
    @param items
210
        <last_entry> Value
211
        <other_entries> name=value Mappings. Special values: See repl() repls.
212
    '''
213
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
214
    value = pop_value(items)
215
    if value == None: return None # input is empty
216 1607 aaronmk
    return util.none_if(repl(dict(items), value), u'') # empty value means None
217 1219 aaronmk
funcs['_map'] = _map
218
219 2016 aaronmk
def _replace(items, node):
220 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
221 1581 aaronmk
    value = pop_value(items)
222
    if value == None: return None # input is empty
223 1219 aaronmk
    try:
224
        for repl, with_ in items:
225
            if re.match(r'^\w+$', repl):
226
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
227
            value = re.sub(repl, with_, value)
228 1612 aaronmk
    except sre_constants.error, e: raise SyntaxError(e)
229 1624 aaronmk
    return util.none_if(value.strip(), u'') # empty strings always mean None
230 1219 aaronmk
funcs['_replace'] = _replace
231
232 1469 aaronmk
#### Quantities
233
234 2016 aaronmk
def _units(items, node):
235 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
236 1581 aaronmk
    value = pop_value(items)
237
    if value == None: return None # input is empty
238 1471 aaronmk
239 1581 aaronmk
    quantity = units.str2quantity(value)
240 1471 aaronmk
    try:
241
        for action, units_ in items:
242
            units_ = util.none_if(units_, u'')
243
            if action == 'default': units.set_default_units(quantity, units_)
244 1567 aaronmk
            elif action == 'to':
245
                try: quantity = units.convert(quantity, units_)
246 1609 aaronmk
                except ValueError, e: raise FormatException(e)
247 1612 aaronmk
            else: raise SyntaxError(ValueError('Invalid action: '+action))
248 1609 aaronmk
    except units.MissingUnitsException, e: raise FormatException(e)
249 1471 aaronmk
    return units.quantity2str(quantity)
250 1225 aaronmk
funcs['_units'] = _units
251
252 1399 aaronmk
def parse_range(str_, range_sep='-'):
253
    default = (str_, None)
254
    start, sep, end = str_.partition(range_sep)
255
    if sep == '': return default # not a range
256 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
257 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
258
259 2016 aaronmk
def _rangeStart(items, node):
260 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
261 1399 aaronmk
    try: value = items['value']
262 1406 aaronmk
    except KeyError: return None # input is empty
263 1399 aaronmk
    return parse_range(value)[0]
264
funcs['_rangeStart'] = _rangeStart
265
266 2016 aaronmk
def _rangeEnd(items, node):
267 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
268 1399 aaronmk
    try: value = items['value']
269 1406 aaronmk
    except KeyError: return None # input is empty
270 1399 aaronmk
    return parse_range(value)[1]
271
funcs['_rangeEnd'] = _rangeEnd
272
273 2016 aaronmk
def _range(items, node):
274 1472 aaronmk
    items = dict(conv_items(float, items))
275
    from_ = items.get('from', None)
276
    to = items.get('to', None)
277
    if from_ == None or to == None: return None
278
    return str(to - from_)
279
funcs['_range'] = _range
280
281 2016 aaronmk
def _avg(items, node):
282 86 aaronmk
    count = 0
283
    sum_ = 0.
284 278 aaronmk
    for name, value in conv_items(float, items):
285 86 aaronmk
        count += 1
286
        sum_ += value
287 1472 aaronmk
    if count == 0: return None # input is empty
288
    else: return str(sum_/count)
289 995 aaronmk
funcs['_avg'] = _avg
290 86 aaronmk
291 968 aaronmk
class CvException(Exception):
292
    def __init__(self):
293
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
294
            ' allowed for ratio scale data '
295
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
296
297 2016 aaronmk
def _noCV(items, node):
298 968 aaronmk
    try: name, value = items.next()
299
    except StopIteration: return None
300 1609 aaronmk
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
301 968 aaronmk
    return value
302 995 aaronmk
funcs['_noCV'] = _noCV
303 968 aaronmk
304 1469 aaronmk
#### Dates
305
306 2016 aaronmk
def _date(items, node):
307 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
308
        # get *once* from iter, check types
309 1514 aaronmk
    try: str_ = items['date']
310 786 aaronmk
    except KeyError:
311 1515 aaronmk
        # Year is required
312
        try: items['year']
313 1309 aaronmk
        except KeyError, e:
314
            if items == {}: return None # entire date is empty
315 1609 aaronmk
            else: raise FormatException(e)
316 1515 aaronmk
317
        # Convert month name to number
318
        try: month = items['month']
319
        except KeyError: pass
320
        else:
321
            if not month.isdigit(): # month is name
322 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
323 1609 aaronmk
                except ValueError, e: raise FormatException(e)
324 1515 aaronmk
325 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
326 786 aaronmk
        items.setdefault('month', 1)
327
        items.setdefault('day', 1)
328 1535 aaronmk
329
        for try_num in xrange(2):
330
            try:
331
                date = datetime.date(**items)
332
                break
333
            except ValueError, e:
334 1609 aaronmk
                if try_num > 0: raise FormatException(e)
335 1536 aaronmk
                    # exception still raised after retry
336 1562 aaronmk
                msg = strings.ustr(e)
337 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
338
                    items['month'], items['day'] = items['day'], items['month']
339 1609 aaronmk
                else: raise FormatException(e)
340 786 aaronmk
    else:
341 324 aaronmk
        try: year = float(str_)
342
        except ValueError:
343 1264 aaronmk
            try: date = dates.strtotime(str_)
344 324 aaronmk
            except ImportError: return str_
345 1609 aaronmk
            except ValueError, e: raise FormatException(e)
346 324 aaronmk
        else: date = (datetime.date(int(year), 1, 1) +
347
            datetime.timedelta(round((year % 1.)*365)))
348 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
349 843 aaronmk
    except ValueError, e: raise FormatException(e)
350 995 aaronmk
funcs['_date'] = _date
351 86 aaronmk
352 2016 aaronmk
def _dateRangeStart(items, node):
353 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
354 1366 aaronmk
    try: value = items['value']
355 1406 aaronmk
    except KeyError: return None # input is empty
356 1366 aaronmk
    return dates.parse_date_range(value)[0]
357
funcs['_dateRangeStart'] = _dateRangeStart
358 1311 aaronmk
359 2016 aaronmk
def _dateRangeEnd(items, node):
360 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
361 1366 aaronmk
    try: value = items['value']
362 1406 aaronmk
    except KeyError: return None # input is empty
363 1366 aaronmk
    return dates.parse_date_range(value)[1]
364
funcs['_dateRangeEnd'] = _dateRangeEnd
365 1311 aaronmk
366 1469 aaronmk
#### Names
367
368 328 aaronmk
_name_parts_slices_items = [
369
    ('first', slice(None, 1)),
370
    ('middle', slice(1, -1)),
371
    ('last', slice(-1, None)),
372
]
373
name_parts_slices = dict(_name_parts_slices_items)
374
name_parts = [name for name, slice_ in _name_parts_slices_items]
375
376 2016 aaronmk
def _name(items, node):
377 89 aaronmk
    items = dict(items)
378 102 aaronmk
    parts = []
379 328 aaronmk
    for part in name_parts:
380
        if part in items: parts.append(items[part])
381 102 aaronmk
    return ' '.join(parts)
382 995 aaronmk
funcs['_name'] = _name
383 102 aaronmk
384 2016 aaronmk
def _namePart(items, node):
385 328 aaronmk
    out_items = []
386
    for part, value in items:
387
        try: slice_ = name_parts_slices[part]
388 1612 aaronmk
        except KeyError, e: raise SyntaxError(e)
389 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
390 2016 aaronmk
    return _name(out_items, node)
391 995 aaronmk
funcs['_namePart'] = _namePart
392 1321 aaronmk
393 1607 aaronmk
#### Angles
394
395 2016 aaronmk
def _compass(items, node):
396 1607 aaronmk
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
397
    items = dict(conv_items(strings.ustr, items))
398
    try: value = items['value']
399
    except KeyError: return None # input is empty
400
401
    if not value.isupper(): return value # pass through other coordinate formats
402
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
403
    except KeyError, e: raise FormatException(e)
404
funcs['_compass'] = _compass
405
406 1469 aaronmk
#### Paths
407
408 2016 aaronmk
def _simplifyPath(items, node):
409 1321 aaronmk
    items = dict(items)
410
    try:
411 1562 aaronmk
        next = cast(strings.ustr, items['next'])
412
        require = cast(strings.ustr, items['require'])
413 1321 aaronmk
        root = items['path']
414 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
415 1321 aaronmk
416
    node = root
417
    while node != None:
418
        new_node = xpath.get_1(node, next, allow_rooted=False)
419 2006 aaronmk
        required_node = xpath.get_1(node, require, allow_rooted=False)
420
        if required_node == None or xml_dom.is_empty(required_node):# empty elem
421 1321 aaronmk
            xml_dom.replace(node, new_node) # remove current elem
422
            if node is root: root = new_node # also update root
423
        node = new_node
424
    return root
425
funcs['_simplifyPath'] = _simplifyPath