Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 2017 aaronmk
import warnings
7 111 aaronmk
8 1607 aaronmk
import angles
9 818 aaronmk
import dates
10 300 aaronmk
import exc
11 1580 aaronmk
import format
12 917 aaronmk
import maps
13 3077 aaronmk
import sql_io
14 1234 aaronmk
import strings
15 827 aaronmk
import term
16 1468 aaronmk
import units
17 1047 aaronmk
import util
18 86 aaronmk
import xml_dom
19 1321 aaronmk
import xpath
20 86 aaronmk
21 995 aaronmk
##### Exceptions
22
23 1612 aaronmk
class SyntaxError(exc.ExceptionWithCause):
24 797 aaronmk
    def __init__(self, cause):
25 1611 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
26
            cause)
27 278 aaronmk
28 1613 aaronmk
class FormatException(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
31 843 aaronmk
32 1992 aaronmk
##### Helper functions
33 995 aaronmk
34 1992 aaronmk
def map_items(func, items):
35
    return [(name, func(value)) for name, value in items]
36
37
def cast(type_, val):
38
    '''Throws FormatException if can't cast'''
39
    try: return type_(val)
40
    except ValueError, e: raise FormatException(e)
41
42
def conv_items(type_, items):
43
    return map_items(lambda val: cast(type_, val),
44
        xml_dom.TextEntryOnlyIter(items))
45
46
def pop_value(items, name='value'):
47
    '''@param name Name of value param, or None to accept any name'''
48
    try: last = items.pop() # last entry contains value
49
    except IndexError: return None # input is empty and no actions
50
    if name != None and last[0] != name: return None # input is empty
51
    return last[1]
52
53 3335 aaronmk
def merge_tagged(root):
54
    '''Merges siblings in root that are marked as mergeable.
55
    Used to recombine pieces of nodes that were split apart in the mappings.
56
    '''
57
    for name in set((c.tagName for c in xpath.get(root, '*[@merge=1]'))):
58
        xml_dom.merge_by_name(root, name)
59
60
    # Recurse
61
    for child in xml_dom.NodeElemIter(root): merge_tagged(child)
62
63 995 aaronmk
funcs = {}
64
65 2557 aaronmk
structural_funcs = set()
66
67 1992 aaronmk
##### Public functions
68
69 2112 aaronmk
def is_func_name(name):
70
    return name.startswith('_') and name != '_' # '_' is default root node name
71
72
def is_func(node): return is_func_name(node.tagName)
73
74
def is_xml_func_name(name): return is_func_name(name) and name in funcs
75
76
def is_xml_func(node): return is_xml_func_name(node.tagName)
77
78 2602 aaronmk
def process(node, on_error=exc.raise_, rel_funcs=None, db=None):
79 2597 aaronmk
    '''Evaluates the XML functions in an XML tree.
80 2602 aaronmk
    @param rel_funcs None|set(str...) Relational functions
81 2597 aaronmk
        * container can be any iterable type
82 2602 aaronmk
        * If != None: Non-relational functions are removed, or relational
83
          functions are treated specially, depending on the db param (below).
84
    @param db
85
        * If None: Non-relational functions other than structural functions are
86
          replaced with their last parameter (usually the value), not evaluated.
87
          This is used in column-based mode to remove XML-only functions.
88
        * If != None: Relational functions are evaluated directly. This is used
89
          in row-based mode to combine relational and XML functions.
90 2597 aaronmk
    '''
91 2602 aaronmk
    has_rel_funcs = rel_funcs != None
92
    assert db == None or has_rel_funcs # rel_funcs required if db set
93 2597 aaronmk
94 3333 aaronmk
    for child in xml_dom.NodeElemIter(node):
95
        process(child, on_error, rel_funcs, db)
96 3335 aaronmk
    merge_tagged(node)
97 3333 aaronmk
98 995 aaronmk
    name = node.tagName
99 3227 aaronmk
    if not is_func_name(name): return node # not any kind of function
100 2602 aaronmk
101
    # Change rel_funcs *after* processing child nodes, which needs orig value
102
    if not has_rel_funcs: rel_funcs = set()
103
    rel_funcs = set(rel_funcs)
104
105
    row_mode = has_rel_funcs and db != None
106
    column_mode = has_rel_funcs and db == None
107 3028 aaronmk
    items = list(xml_dom.NodeTextEntryIter(node))
108 2602 aaronmk
109 3029 aaronmk
    # Parse function
110
    if len(items) == 1 and items[0][0].isdigit(): # has single numeric param
111
        # pass-through optimization for aggregating functions with one arg
112
        value = items[0][1] # pass through first arg
113
    elif row_mode and name in rel_funcs: # row-based mode: evaluate using DB
114 3077 aaronmk
        value = sql_io.put(db, name, dict(items))
115 2602 aaronmk
    elif column_mode and not name in structural_funcs: # column-based mode
116
        if name in rel_funcs: return # preserve relational functions
117
        # otherwise XML-only, so just replace with last param
118 3028 aaronmk
        value = pop_value(items, None)
119 2602 aaronmk
    else: # local XML function
120
        try: value = funcs[name](items, node)
121 1613 aaronmk
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
122 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
123
            exc.add_traceback(e)
124 1562 aaronmk
            str_ = strings.ustr(node)
125 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
126 1810 aaronmk
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
127
                '\n'+term.emph_multiline(str_)))
128
129 995 aaronmk
            on_error(e)
130 2602 aaronmk
            return # in case on_error() returns
131 3227 aaronmk
132 3333 aaronmk
    xml_dom.replace_with_text(node, value)
133 995 aaronmk
134 1469 aaronmk
##### XML functions
135 995 aaronmk
136
# Function names must start with _ to avoid collisions with real tags
137
# Functions take arguments (items)
138
139 2557 aaronmk
#### Structural
140 1469 aaronmk
141 2017 aaronmk
def _ignore(items, node):
142 994 aaronmk
    '''Used to "comment out" an XML subtree'''
143
    return None
144 995 aaronmk
funcs['_ignore'] = _ignore
145 2557 aaronmk
structural_funcs.add('_ignore')
146 994 aaronmk
147 2017 aaronmk
def _ref(items, node):
148
    '''Used to retrieve a value from another XML node
149
    @param items
150
        addr=<path> XPath to value, relative to the XML func's parent node
151
    '''
152
    items = dict(items)
153
    try: addr = items['addr']
154
    except KeyError, e: raise SyntaxError(e)
155
156
    value = xpath.get_value(node.parentNode, addr)
157
    if value == None:
158
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
159
            +str(addr)))
160
    return value
161
funcs['_ref'] = _ref
162 2557 aaronmk
structural_funcs.add('_ref')
163 2017 aaronmk
164 1469 aaronmk
#### Conditionals
165
166 2016 aaronmk
def _eq(items, node):
167 1234 aaronmk
    items = dict(items)
168
    try:
169
        left = items['left']
170
        right = items['right']
171
    except KeyError: return '' # a value was None
172
    return util.bool2str(left == right)
173
funcs['_eq'] = _eq
174
175 2016 aaronmk
def _if(items, node):
176 1234 aaronmk
    items = dict(items)
177
    try:
178
        cond = items['cond']
179
        then = items['then']
180 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
181 1234 aaronmk
    else_ = items.get('else', None)
182 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
183 1234 aaronmk
    if cond: return then
184
    else: return else_
185
funcs['_if'] = _if
186
187 1469 aaronmk
#### Combining values
188
189 2016 aaronmk
def _alt(items, node):
190 113 aaronmk
    items = list(items)
191
    items.sort()
192 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
193 1609 aaronmk
    except IndexError: return None # input got removed by e.g. FormatException
194 995 aaronmk
funcs['_alt'] = _alt
195 113 aaronmk
196 2016 aaronmk
def _merge(items, node):
197 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
198 1562 aaronmk
        # get *once* from iter, check types
199 917 aaronmk
    items.sort()
200
    return maps.merge_values(*[v for k, v in items])
201 995 aaronmk
funcs['_merge'] = _merge
202 917 aaronmk
203 2016 aaronmk
def _label(items, node):
204 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
205 1562 aaronmk
        # get *once* from iter, check types
206 2014 aaronmk
    value = items.get('value', None)
207
    if value == None: return None # input is empty
208
    try: label = items['label']
209 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
210 917 aaronmk
    return label+': '+value
211 995 aaronmk
funcs['_label'] = _label
212 917 aaronmk
213 1469 aaronmk
#### Transforming values
214
215 2016 aaronmk
def _collapse(items, node):
216 2012 aaronmk
    '''Collapses a subtree if the "value" element in it is NULL'''
217
    items = dict(items)
218
    try: require = cast(strings.ustr, items['require'])
219
    except KeyError, e: raise SyntaxError(e)
220
    value = items.get('value', None)
221
222 2558 aaronmk
    if xpath.get_value(value, require, allow_rooted=False) == None: return None
223 2012 aaronmk
    else: return value
224
funcs['_collapse'] = _collapse
225
226 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
227 1477 aaronmk
228 2016 aaronmk
def _nullIf(items, node):
229 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
230 1477 aaronmk
    try: null = items['null']
231 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
232 1477 aaronmk
    value = items.get('value', None)
233 1219 aaronmk
    type_str = items.get('type', None)
234 1477 aaronmk
235
    try: type_ = types_by_name[type_str]
236 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
237 1477 aaronmk
    null = type_(null)
238
239
    try: return util.none_if(value, null)
240
    except ValueError: return value # value not convertible, so can't equal null
241 1047 aaronmk
funcs['_nullIf'] = _nullIf
242
243 1602 aaronmk
def repl(repls, value):
244 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
245 1602 aaronmk
    @param repls dict repl:with
246
        repl "*" means all other input values
247
        with "*" means keep input value the same
248
        with "" means ignore input value
249 1537 aaronmk
    '''
250 1602 aaronmk
    try: new_value = repls[value]
251 1304 aaronmk
    except KeyError, e:
252 1537 aaronmk
        # Save traceback right away in case another exception raised
253 2984 aaronmk
        fe = FormatException(e)
254 1602 aaronmk
        try: new_value = repls['*']
255 1609 aaronmk
        except KeyError: raise fe
256 1537 aaronmk
    if new_value == '*': new_value = value # '*' means keep input value the same
257 1607 aaronmk
    return new_value
258 1602 aaronmk
259 2016 aaronmk
def _map(items, node):
260 1602 aaronmk
    '''See repl()
261
    @param items
262
        <last_entry> Value
263
        <other_entries> name=value Mappings. Special values: See repl() repls.
264
    '''
265
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
266
    value = pop_value(items)
267
    if value == None: return None # input is empty
268 1607 aaronmk
    return util.none_if(repl(dict(items), value), u'') # empty value means None
269 1219 aaronmk
funcs['_map'] = _map
270
271 2016 aaronmk
def _replace(items, node):
272 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
273 1581 aaronmk
    value = pop_value(items)
274
    if value == None: return None # input is empty
275 1219 aaronmk
    try:
276
        for repl, with_ in items:
277
            if re.match(r'^\w+$', repl):
278
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
279
            value = re.sub(repl, with_, value)
280 1612 aaronmk
    except sre_constants.error, e: raise SyntaxError(e)
281 1624 aaronmk
    return util.none_if(value.strip(), u'') # empty strings always mean None
282 1219 aaronmk
funcs['_replace'] = _replace
283
284 1469 aaronmk
#### Quantities
285
286 2016 aaronmk
def _units(items, node):
287 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
288 1581 aaronmk
    value = pop_value(items)
289
    if value == None: return None # input is empty
290 1471 aaronmk
291 1581 aaronmk
    quantity = units.str2quantity(value)
292 1471 aaronmk
    try:
293
        for action, units_ in items:
294
            units_ = util.none_if(units_, u'')
295
            if action == 'default': units.set_default_units(quantity, units_)
296 1567 aaronmk
            elif action == 'to':
297
                try: quantity = units.convert(quantity, units_)
298 1609 aaronmk
                except ValueError, e: raise FormatException(e)
299 1612 aaronmk
            else: raise SyntaxError(ValueError('Invalid action: '+action))
300 1609 aaronmk
    except units.MissingUnitsException, e: raise FormatException(e)
301 1471 aaronmk
    return units.quantity2str(quantity)
302 1225 aaronmk
funcs['_units'] = _units
303
304 1399 aaronmk
def parse_range(str_, range_sep='-'):
305
    default = (str_, None)
306
    start, sep, end = str_.partition(range_sep)
307
    if sep == '': return default # not a range
308 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
309 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
310
311 2016 aaronmk
def _rangeStart(items, node):
312 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
313 1399 aaronmk
    try: value = items['value']
314 1406 aaronmk
    except KeyError: return None # input is empty
315 1399 aaronmk
    return parse_range(value)[0]
316
funcs['_rangeStart'] = _rangeStart
317
318 2016 aaronmk
def _rangeEnd(items, node):
319 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
320 1399 aaronmk
    try: value = items['value']
321 1406 aaronmk
    except KeyError: return None # input is empty
322 1399 aaronmk
    return parse_range(value)[1]
323
funcs['_rangeEnd'] = _rangeEnd
324
325 2016 aaronmk
def _range(items, node):
326 1472 aaronmk
    items = dict(conv_items(float, items))
327
    from_ = items.get('from', None)
328
    to = items.get('to', None)
329
    if from_ == None or to == None: return None
330
    return str(to - from_)
331
funcs['_range'] = _range
332
333 2016 aaronmk
def _avg(items, node):
334 86 aaronmk
    count = 0
335
    sum_ = 0.
336 278 aaronmk
    for name, value in conv_items(float, items):
337 86 aaronmk
        count += 1
338
        sum_ += value
339 1472 aaronmk
    if count == 0: return None # input is empty
340
    else: return str(sum_/count)
341 995 aaronmk
funcs['_avg'] = _avg
342 86 aaronmk
343 968 aaronmk
class CvException(Exception):
344
    def __init__(self):
345
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
346
            ' allowed for ratio scale data '
347
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
348
349 2016 aaronmk
def _noCV(items, node):
350 3046 aaronmk
    try: name, value = items.pop() # last entry contains value
351
    except IndexError: return None # input is empty
352 1609 aaronmk
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
353 968 aaronmk
    return value
354 995 aaronmk
funcs['_noCV'] = _noCV
355 968 aaronmk
356 1469 aaronmk
#### Dates
357
358 2016 aaronmk
def _date(items, node):
359 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
360
        # get *once* from iter, check types
361 1514 aaronmk
    try: str_ = items['date']
362 786 aaronmk
    except KeyError:
363 1515 aaronmk
        # Year is required
364
        try: items['year']
365 1309 aaronmk
        except KeyError, e:
366
            if items == {}: return None # entire date is empty
367 1609 aaronmk
            else: raise FormatException(e)
368 1515 aaronmk
369
        # Convert month name to number
370
        try: month = items['month']
371
        except KeyError: pass
372
        else:
373
            if not month.isdigit(): # month is name
374 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
375 1609 aaronmk
                except ValueError, e: raise FormatException(e)
376 1515 aaronmk
377 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
378 786 aaronmk
        items.setdefault('month', 1)
379
        items.setdefault('day', 1)
380 1535 aaronmk
381
        for try_num in xrange(2):
382
            try:
383
                date = datetime.date(**items)
384
                break
385
            except ValueError, e:
386 1609 aaronmk
                if try_num > 0: raise FormatException(e)
387 1536 aaronmk
                    # exception still raised after retry
388 1562 aaronmk
                msg = strings.ustr(e)
389 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
390
                    items['month'], items['day'] = items['day'], items['month']
391 1609 aaronmk
                else: raise FormatException(e)
392 786 aaronmk
    else:
393 324 aaronmk
        try: year = float(str_)
394
        except ValueError:
395 1264 aaronmk
            try: date = dates.strtotime(str_)
396 324 aaronmk
            except ImportError: return str_
397 1609 aaronmk
            except ValueError, e: raise FormatException(e)
398 324 aaronmk
        else: date = (datetime.date(int(year), 1, 1) +
399
            datetime.timedelta(round((year % 1.)*365)))
400 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
401 843 aaronmk
    except ValueError, e: raise FormatException(e)
402 995 aaronmk
funcs['_date'] = _date
403 86 aaronmk
404 2016 aaronmk
def _dateRangeStart(items, node):
405 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
406 1366 aaronmk
    try: value = items['value']
407 1406 aaronmk
    except KeyError: return None # input is empty
408 1366 aaronmk
    return dates.parse_date_range(value)[0]
409
funcs['_dateRangeStart'] = _dateRangeStart
410 1311 aaronmk
411 2016 aaronmk
def _dateRangeEnd(items, node):
412 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
413 1366 aaronmk
    try: value = items['value']
414 1406 aaronmk
    except KeyError: return None # input is empty
415 1366 aaronmk
    return dates.parse_date_range(value)[1]
416
funcs['_dateRangeEnd'] = _dateRangeEnd
417 1311 aaronmk
418 1469 aaronmk
#### Names
419
420 328 aaronmk
_name_parts_slices_items = [
421
    ('first', slice(None, 1)),
422
    ('middle', slice(1, -1)),
423
    ('last', slice(-1, None)),
424
]
425
name_parts_slices = dict(_name_parts_slices_items)
426
name_parts = [name for name, slice_ in _name_parts_slices_items]
427
428 2016 aaronmk
def _name(items, node):
429 89 aaronmk
    items = dict(items)
430 102 aaronmk
    parts = []
431 328 aaronmk
    for part in name_parts:
432
        if part in items: parts.append(items[part])
433 102 aaronmk
    return ' '.join(parts)
434 995 aaronmk
funcs['_name'] = _name
435 102 aaronmk
436 2016 aaronmk
def _namePart(items, node):
437 328 aaronmk
    out_items = []
438
    for part, value in items:
439
        try: slice_ = name_parts_slices[part]
440 1612 aaronmk
        except KeyError, e: raise SyntaxError(e)
441 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
442 2016 aaronmk
    return _name(out_items, node)
443 995 aaronmk
funcs['_namePart'] = _namePart
444 1321 aaronmk
445 1607 aaronmk
#### Angles
446
447 2016 aaronmk
def _compass(items, node):
448 1607 aaronmk
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
449
    items = dict(conv_items(strings.ustr, items))
450
    try: value = items['value']
451
    except KeyError: return None # input is empty
452
453
    if not value.isupper(): return value # pass through other coordinate formats
454
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
455
    except KeyError, e: raise FormatException(e)
456
funcs['_compass'] = _compass
457
458 1469 aaronmk
#### Paths
459
460 2016 aaronmk
def _simplifyPath(items, node):
461 1321 aaronmk
    items = dict(items)
462
    try:
463 1562 aaronmk
        next = cast(strings.ustr, items['next'])
464
        require = cast(strings.ustr, items['require'])
465 1321 aaronmk
        root = items['path']
466 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
467 1321 aaronmk
468
    node = root
469
    while node != None:
470
        new_node = xpath.get_1(node, next, allow_rooted=False)
471 2558 aaronmk
        if xpath.get_value(node, require, allow_rooted=False) == None: # empty
472 1321 aaronmk
            xml_dom.replace(node, new_node) # remove current elem
473
            if node is root: root = new_node # also update root
474
        node = new_node
475
    return root
476
funcs['_simplifyPath'] = _simplifyPath