Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 2017 aaronmk
import warnings
7 111 aaronmk
8 1607 aaronmk
import angles
9 818 aaronmk
import dates
10 300 aaronmk
import exc
11 1580 aaronmk
import format
12 917 aaronmk
import maps
13 3077 aaronmk
import sql_io
14 1234 aaronmk
import strings
15 827 aaronmk
import term
16 1468 aaronmk
import units
17 1047 aaronmk
import util
18 86 aaronmk
import xml_dom
19 1321 aaronmk
import xpath
20 86 aaronmk
21 995 aaronmk
##### Exceptions
22
23 1612 aaronmk
class SyntaxError(exc.ExceptionWithCause):
24 797 aaronmk
    def __init__(self, cause):
25 1611 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
26
            cause)
27 278 aaronmk
28 1613 aaronmk
class FormatException(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
31 843 aaronmk
32 1992 aaronmk
##### Helper functions
33 995 aaronmk
34 1992 aaronmk
def map_items(func, items):
35
    return [(name, func(value)) for name, value in items]
36
37
def cast(type_, val):
38
    '''Throws FormatException if can't cast'''
39
    try: return type_(val)
40
    except ValueError, e: raise FormatException(e)
41
42
def conv_items(type_, items):
43
    return map_items(lambda val: cast(type_, val),
44
        xml_dom.TextEntryOnlyIter(items))
45
46
def pop_value(items, name='value'):
47
    '''@param name Name of value param, or None to accept any name'''
48
    try: last = items.pop() # last entry contains value
49
    except IndexError: return None # input is empty and no actions
50
    if name != None and last[0] != name: return None # input is empty
51
    return last[1]
52
53 3335 aaronmk
def merge_tagged(root):
54
    '''Merges siblings in root that are marked as mergeable.
55
    Used to recombine pieces of nodes that were split apart in the mappings.
56
    '''
57
    for name in set((c.tagName for c in xpath.get(root, '*[@merge=1]'))):
58
        xml_dom.merge_by_name(root, name)
59
60
    # Recurse
61
    for child in xml_dom.NodeElemIter(root): merge_tagged(child)
62
63 995 aaronmk
funcs = {}
64
65 2557 aaronmk
structural_funcs = set()
66
67 1992 aaronmk
##### Public functions
68
69 2112 aaronmk
def is_func_name(name):
70
    return name.startswith('_') and name != '_' # '_' is default root node name
71
72
def is_func(node): return is_func_name(node.tagName)
73
74
def is_xml_func_name(name): return is_func_name(name) and name in funcs
75
76
def is_xml_func(node): return is_xml_func_name(node.tagName)
77
78 3424 aaronmk
def process(node, on_error=exc.raise_, is_rel_func=None, db=None):
79 2597 aaronmk
    '''Evaluates the XML functions in an XML tree.
80 3424 aaronmk
    @param is_rel_func None|f(str) Tests if a name is a relational function.
81 2602 aaronmk
        * If != None: Non-relational functions are removed, or relational
82
          functions are treated specially, depending on the db param (below).
83
    @param db
84
        * If None: Non-relational functions other than structural functions are
85
          replaced with their last parameter (usually the value), not evaluated.
86
          This is used in column-based mode to remove XML-only functions.
87
        * If != None: Relational functions are evaluated directly. This is used
88
          in row-based mode to combine relational and XML functions.
89 2597 aaronmk
    '''
90 3424 aaronmk
    has_rel_funcs = is_rel_func != None
91 2602 aaronmk
    assert db == None or has_rel_funcs # rel_funcs required if db set
92 2597 aaronmk
93 3333 aaronmk
    for child in xml_dom.NodeElemIter(node):
94 3424 aaronmk
        process(child, on_error, is_rel_func, db)
95 3335 aaronmk
    merge_tagged(node)
96 3333 aaronmk
97 995 aaronmk
    name = node.tagName
98 3227 aaronmk
    if not is_func_name(name): return node # not any kind of function
99 2602 aaronmk
100
    row_mode = has_rel_funcs and db != None
101
    column_mode = has_rel_funcs and db == None
102 3629 aaronmk
    func = funcs.get(name, None)
103 3028 aaronmk
    items = list(xml_dom.NodeTextEntryIter(node))
104 2602 aaronmk
105 3029 aaronmk
    # Parse function
106
    if len(items) == 1 and items[0][0].isdigit(): # has single numeric param
107
        # pass-through optimization for aggregating functions with one arg
108
        value = items[0][1] # pass through first arg
109 3629 aaronmk
    elif row_mode and (is_rel_func(name) or func == None): # row-based mode
110
        value = sql_io.put(db, name, dict(items)) # evaluate using DB
111 2602 aaronmk
    elif column_mode and not name in structural_funcs: # column-based mode
112 3424 aaronmk
        if is_rel_func(name): return # preserve relational functions
113 2602 aaronmk
        # otherwise XML-only, so just replace with last param
114 3028 aaronmk
        value = pop_value(items, None)
115 2602 aaronmk
    else: # local XML function
116 3629 aaronmk
        try: value = func(items, node)
117 1613 aaronmk
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
118 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
119
            exc.add_traceback(e)
120 1562 aaronmk
            str_ = strings.ustr(node)
121 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
122 1810 aaronmk
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
123
                '\n'+term.emph_multiline(str_)))
124
125 995 aaronmk
            on_error(e)
126 2602 aaronmk
            return # in case on_error() returns
127 3227 aaronmk
128 3333 aaronmk
    xml_dom.replace_with_text(node, value)
129 995 aaronmk
130 1469 aaronmk
##### XML functions
131 995 aaronmk
132
# Function names must start with _ to avoid collisions with real tags
133
# Functions take arguments (items)
134
135 2557 aaronmk
#### Structural
136 1469 aaronmk
137 2017 aaronmk
def _ignore(items, node):
138 994 aaronmk
    '''Used to "comment out" an XML subtree'''
139
    return None
140 995 aaronmk
funcs['_ignore'] = _ignore
141 2557 aaronmk
structural_funcs.add('_ignore')
142 994 aaronmk
143 2017 aaronmk
def _ref(items, node):
144
    '''Used to retrieve a value from another XML node
145
    @param items
146
        addr=<path> XPath to value, relative to the XML func's parent node
147
    '''
148
    items = dict(items)
149
    try: addr = items['addr']
150
    except KeyError, e: raise SyntaxError(e)
151
152
    value = xpath.get_value(node.parentNode, addr)
153
    if value == None:
154
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
155
            +str(addr)))
156
    return value
157
funcs['_ref'] = _ref
158 2557 aaronmk
structural_funcs.add('_ref')
159 2017 aaronmk
160 1469 aaronmk
#### Conditionals
161
162 2016 aaronmk
def _eq(items, node):
163 1234 aaronmk
    items = dict(items)
164
    try:
165
        left = items['left']
166
        right = items['right']
167
    except KeyError: return '' # a value was None
168
    return util.bool2str(left == right)
169
funcs['_eq'] = _eq
170
171 2016 aaronmk
def _if(items, node):
172 1234 aaronmk
    items = dict(items)
173
    try:
174
        cond = items['cond']
175
        then = items['then']
176 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
177 1234 aaronmk
    else_ = items.get('else', None)
178 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
179 1234 aaronmk
    if cond: return then
180
    else: return else_
181
funcs['_if'] = _if
182
183 1469 aaronmk
#### Combining values
184
185 2016 aaronmk
def _alt(items, node):
186 113 aaronmk
    items = list(items)
187
    items.sort()
188 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
189 1609 aaronmk
    except IndexError: return None # input got removed by e.g. FormatException
190 995 aaronmk
funcs['_alt'] = _alt
191 113 aaronmk
192 2016 aaronmk
def _merge(items, node):
193 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
194 1562 aaronmk
        # get *once* from iter, check types
195 917 aaronmk
    items.sort()
196
    return maps.merge_values(*[v for k, v in items])
197 995 aaronmk
funcs['_merge'] = _merge
198 917 aaronmk
199 2016 aaronmk
def _label(items, node):
200 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
201 1562 aaronmk
        # get *once* from iter, check types
202 2014 aaronmk
    value = items.get('value', None)
203
    if value == None: return None # input is empty
204
    try: label = items['label']
205 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
206 917 aaronmk
    return label+': '+value
207 995 aaronmk
funcs['_label'] = _label
208 917 aaronmk
209 1469 aaronmk
#### Transforming values
210
211 2016 aaronmk
def _collapse(items, node):
212 2012 aaronmk
    '''Collapses a subtree if the "value" element in it is NULL'''
213
    items = dict(items)
214
    try: require = cast(strings.ustr, items['require'])
215
    except KeyError, e: raise SyntaxError(e)
216
    value = items.get('value', None)
217
218 2558 aaronmk
    if xpath.get_value(value, require, allow_rooted=False) == None: return None
219 2012 aaronmk
    else: return value
220
funcs['_collapse'] = _collapse
221
222 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
223 1477 aaronmk
224 2016 aaronmk
def _nullIf(items, node):
225 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
226 1477 aaronmk
    try: null = items['null']
227 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
228 1477 aaronmk
    value = items.get('value', None)
229 1219 aaronmk
    type_str = items.get('type', None)
230 1477 aaronmk
231
    try: type_ = types_by_name[type_str]
232 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
233 1477 aaronmk
    null = type_(null)
234
235
    try: return util.none_if(value, null)
236
    except ValueError: return value # value not convertible, so can't equal null
237 1047 aaronmk
funcs['_nullIf'] = _nullIf
238
239 1602 aaronmk
def repl(repls, value):
240 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
241 1602 aaronmk
    @param repls dict repl:with
242
        repl "*" means all other input values
243
        with "*" means keep input value the same
244
        with "" means ignore input value
245 1537 aaronmk
    '''
246 1602 aaronmk
    try: new_value = repls[value]
247 1304 aaronmk
    except KeyError, e:
248 1537 aaronmk
        # Save traceback right away in case another exception raised
249 2984 aaronmk
        fe = FormatException(e)
250 1602 aaronmk
        try: new_value = repls['*']
251 1609 aaronmk
        except KeyError: raise fe
252 1537 aaronmk
    if new_value == '*': new_value = value # '*' means keep input value the same
253 1607 aaronmk
    return new_value
254 1602 aaronmk
255 2016 aaronmk
def _map(items, node):
256 1602 aaronmk
    '''See repl()
257
    @param items
258
        <last_entry> Value
259
        <other_entries> name=value Mappings. Special values: See repl() repls.
260
    '''
261
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
262
    value = pop_value(items)
263
    if value == None: return None # input is empty
264 1607 aaronmk
    return util.none_if(repl(dict(items), value), u'') # empty value means None
265 1219 aaronmk
funcs['_map'] = _map
266
267 2016 aaronmk
def _replace(items, node):
268 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
269 1581 aaronmk
    value = pop_value(items)
270
    if value == None: return None # input is empty
271 1219 aaronmk
    try:
272
        for repl, with_ in items:
273
            if re.match(r'^\w+$', repl):
274
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
275
            value = re.sub(repl, with_, value)
276 1612 aaronmk
    except sre_constants.error, e: raise SyntaxError(e)
277 1624 aaronmk
    return util.none_if(value.strip(), u'') # empty strings always mean None
278 1219 aaronmk
funcs['_replace'] = _replace
279
280 1469 aaronmk
#### Quantities
281
282 2016 aaronmk
def _units(items, node):
283 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
284 1581 aaronmk
    value = pop_value(items)
285
    if value == None: return None # input is empty
286 1471 aaronmk
287 1581 aaronmk
    quantity = units.str2quantity(value)
288 1471 aaronmk
    try:
289
        for action, units_ in items:
290
            units_ = util.none_if(units_, u'')
291
            if action == 'default': units.set_default_units(quantity, units_)
292 1567 aaronmk
            elif action == 'to':
293
                try: quantity = units.convert(quantity, units_)
294 1609 aaronmk
                except ValueError, e: raise FormatException(e)
295 1612 aaronmk
            else: raise SyntaxError(ValueError('Invalid action: '+action))
296 1609 aaronmk
    except units.MissingUnitsException, e: raise FormatException(e)
297 1471 aaronmk
    return units.quantity2str(quantity)
298 1225 aaronmk
funcs['_units'] = _units
299
300 1399 aaronmk
def parse_range(str_, range_sep='-'):
301
    default = (str_, None)
302
    start, sep, end = str_.partition(range_sep)
303
    if sep == '': return default # not a range
304 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
305 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
306
307 2016 aaronmk
def _rangeStart(items, node):
308 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
309 1399 aaronmk
    try: value = items['value']
310 1406 aaronmk
    except KeyError: return None # input is empty
311 1399 aaronmk
    return parse_range(value)[0]
312
funcs['_rangeStart'] = _rangeStart
313
314 2016 aaronmk
def _rangeEnd(items, node):
315 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
316 1399 aaronmk
    try: value = items['value']
317 1406 aaronmk
    except KeyError: return None # input is empty
318 1399 aaronmk
    return parse_range(value)[1]
319
funcs['_rangeEnd'] = _rangeEnd
320
321 2016 aaronmk
def _range(items, node):
322 1472 aaronmk
    items = dict(conv_items(float, items))
323
    from_ = items.get('from', None)
324
    to = items.get('to', None)
325
    if from_ == None or to == None: return None
326
    return str(to - from_)
327
funcs['_range'] = _range
328
329 2016 aaronmk
def _avg(items, node):
330 86 aaronmk
    count = 0
331
    sum_ = 0.
332 278 aaronmk
    for name, value in conv_items(float, items):
333 86 aaronmk
        count += 1
334
        sum_ += value
335 1472 aaronmk
    if count == 0: return None # input is empty
336
    else: return str(sum_/count)
337 995 aaronmk
funcs['_avg'] = _avg
338 86 aaronmk
339 968 aaronmk
class CvException(Exception):
340
    def __init__(self):
341
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
342
            ' allowed for ratio scale data '
343
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
344
345 2016 aaronmk
def _noCV(items, node):
346 3631 aaronmk
    items = list(conv_items(strings.ustr, items))
347 3046 aaronmk
    try: name, value = items.pop() # last entry contains value
348
    except IndexError: return None # input is empty
349 1609 aaronmk
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
350 968 aaronmk
    return value
351 995 aaronmk
funcs['_noCV'] = _noCV
352 968 aaronmk
353 1469 aaronmk
#### Dates
354
355 2016 aaronmk
def _date(items, node):
356 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
357
        # get *once* from iter, check types
358 1514 aaronmk
    try: str_ = items['date']
359 786 aaronmk
    except KeyError:
360 1515 aaronmk
        # Year is required
361
        try: items['year']
362 1309 aaronmk
        except KeyError, e:
363
            if items == {}: return None # entire date is empty
364 1609 aaronmk
            else: raise FormatException(e)
365 1515 aaronmk
366
        # Convert month name to number
367
        try: month = items['month']
368
        except KeyError: pass
369
        else:
370
            if not month.isdigit(): # month is name
371 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
372 1609 aaronmk
                except ValueError, e: raise FormatException(e)
373 1515 aaronmk
374 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
375 786 aaronmk
        items.setdefault('month', 1)
376
        items.setdefault('day', 1)
377 1535 aaronmk
378
        for try_num in xrange(2):
379
            try:
380
                date = datetime.date(**items)
381
                break
382
            except ValueError, e:
383 1609 aaronmk
                if try_num > 0: raise FormatException(e)
384 1536 aaronmk
                    # exception still raised after retry
385 1562 aaronmk
                msg = strings.ustr(e)
386 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
387
                    items['month'], items['day'] = items['day'], items['month']
388 1609 aaronmk
                else: raise FormatException(e)
389 786 aaronmk
    else:
390 324 aaronmk
        try: year = float(str_)
391
        except ValueError:
392 1264 aaronmk
            try: date = dates.strtotime(str_)
393 324 aaronmk
            except ImportError: return str_
394 1609 aaronmk
            except ValueError, e: raise FormatException(e)
395 324 aaronmk
        else: date = (datetime.date(int(year), 1, 1) +
396
            datetime.timedelta(round((year % 1.)*365)))
397 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
398 843 aaronmk
    except ValueError, e: raise FormatException(e)
399 995 aaronmk
funcs['_date'] = _date
400 86 aaronmk
401 2016 aaronmk
def _dateRangeStart(items, node):
402 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
403 1366 aaronmk
    try: value = items['value']
404 1406 aaronmk
    except KeyError: return None # input is empty
405 1366 aaronmk
    return dates.parse_date_range(value)[0]
406
funcs['_dateRangeStart'] = _dateRangeStart
407 1311 aaronmk
408 2016 aaronmk
def _dateRangeEnd(items, node):
409 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
410 1366 aaronmk
    try: value = items['value']
411 1406 aaronmk
    except KeyError: return None # input is empty
412 1366 aaronmk
    return dates.parse_date_range(value)[1]
413
funcs['_dateRangeEnd'] = _dateRangeEnd
414 1311 aaronmk
415 1469 aaronmk
#### Names
416
417 328 aaronmk
_name_parts_slices_items = [
418
    ('first', slice(None, 1)),
419
    ('middle', slice(1, -1)),
420
    ('last', slice(-1, None)),
421
]
422
name_parts_slices = dict(_name_parts_slices_items)
423
name_parts = [name for name, slice_ in _name_parts_slices_items]
424
425 2016 aaronmk
def _name(items, node):
426 3631 aaronmk
    items = dict(list(conv_items(strings.ustr, items)))
427 102 aaronmk
    parts = []
428 328 aaronmk
    for part in name_parts:
429
        if part in items: parts.append(items[part])
430 102 aaronmk
    return ' '.join(parts)
431 995 aaronmk
funcs['_name'] = _name
432 102 aaronmk
433 2016 aaronmk
def _namePart(items, node):
434 3631 aaronmk
    items = list(conv_items(strings.ustr, items))
435 328 aaronmk
    out_items = []
436
    for part, value in items:
437
        try: slice_ = name_parts_slices[part]
438 1612 aaronmk
        except KeyError, e: raise SyntaxError(e)
439 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
440 2016 aaronmk
    return _name(out_items, node)
441 995 aaronmk
funcs['_namePart'] = _namePart
442 1321 aaronmk
443 1607 aaronmk
#### Angles
444
445 2016 aaronmk
def _compass(items, node):
446 1607 aaronmk
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
447
    items = dict(conv_items(strings.ustr, items))
448
    try: value = items['value']
449
    except KeyError: return None # input is empty
450
451
    if not value.isupper(): return value # pass through other coordinate formats
452
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
453
    except KeyError, e: raise FormatException(e)
454
funcs['_compass'] = _compass
455
456 1469 aaronmk
#### Paths
457
458 2016 aaronmk
def _simplifyPath(items, node):
459 1321 aaronmk
    items = dict(items)
460
    try:
461 1562 aaronmk
        next = cast(strings.ustr, items['next'])
462
        require = cast(strings.ustr, items['require'])
463 1321 aaronmk
        root = items['path']
464 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
465 1321 aaronmk
466
    node = root
467
    while node != None:
468
        new_node = xpath.get_1(node, next, allow_rooted=False)
469 2558 aaronmk
        if xpath.get_value(node, require, allow_rooted=False) == None: # empty
470 1321 aaronmk
            xml_dom.replace(node, new_node) # remove current elem
471
            if node is root: root = new_node # also update root
472
        node = new_node
473
    return root
474
funcs['_simplifyPath'] = _simplifyPath