Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 2017 aaronmk
import warnings
7 111 aaronmk
8 1607 aaronmk
import angles
9 818 aaronmk
import dates
10 300 aaronmk
import exc
11 1580 aaronmk
import format
12 917 aaronmk
import maps
13 3077 aaronmk
import sql_io
14 1234 aaronmk
import strings
15 827 aaronmk
import term
16 1468 aaronmk
import units
17 1047 aaronmk
import util
18 86 aaronmk
import xml_dom
19 1321 aaronmk
import xpath
20 86 aaronmk
21 995 aaronmk
##### Exceptions
22
23 1612 aaronmk
class SyntaxError(exc.ExceptionWithCause):
24 797 aaronmk
    def __init__(self, cause):
25 1611 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
26
            cause)
27 278 aaronmk
28 1613 aaronmk
class FormatException(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
31 843 aaronmk
32 1992 aaronmk
##### Helper functions
33 995 aaronmk
34 1992 aaronmk
def map_items(func, items):
35
    return [(name, func(value)) for name, value in items]
36
37
def cast(type_, val):
38
    '''Throws FormatException if can't cast'''
39
    try: return type_(val)
40
    except ValueError, e: raise FormatException(e)
41
42
def conv_items(type_, items):
43
    return map_items(lambda val: cast(type_, val),
44
        xml_dom.TextEntryOnlyIter(items))
45
46
def pop_value(items, name='value'):
47
    '''@param name Name of value param, or None to accept any name'''
48
    try: last = items.pop() # last entry contains value
49
    except IndexError: return None # input is empty and no actions
50
    if name != None and last[0] != name: return None # input is empty
51
    return last[1]
52
53 3335 aaronmk
def merge_tagged(root):
54
    '''Merges siblings in root that are marked as mergeable.
55
    Used to recombine pieces of nodes that were split apart in the mappings.
56
    '''
57
    for name in set((c.tagName for c in xpath.get(root, '*[@merge=1]'))):
58
        xml_dom.merge_by_name(root, name)
59
60
    # Recurse
61
    for child in xml_dom.NodeElemIter(root): merge_tagged(child)
62
63 995 aaronmk
funcs = {}
64
65 2557 aaronmk
structural_funcs = set()
66
67 1992 aaronmk
##### Public functions
68
69 2112 aaronmk
def is_func_name(name):
70
    return name.startswith('_') and name != '_' # '_' is default root node name
71
72
def is_func(node): return is_func_name(node.tagName)
73
74
def is_xml_func_name(name): return is_func_name(name) and name in funcs
75
76
def is_xml_func(node): return is_xml_func_name(node.tagName)
77
78 3424 aaronmk
def process(node, on_error=exc.raise_, is_rel_func=None, db=None):
79 2597 aaronmk
    '''Evaluates the XML functions in an XML tree.
80 3424 aaronmk
    @param is_rel_func None|f(str) Tests if a name is a relational function.
81 2602 aaronmk
        * If != None: Non-relational functions are removed, or relational
82
          functions are treated specially, depending on the db param (below).
83
    @param db
84
        * If None: Non-relational functions other than structural functions are
85
          replaced with their last parameter (usually the value), not evaluated.
86
          This is used in column-based mode to remove XML-only functions.
87
        * If != None: Relational functions are evaluated directly. This is used
88
          in row-based mode to combine relational and XML functions.
89 2597 aaronmk
    '''
90 3424 aaronmk
    has_rel_funcs = is_rel_func != None
91 2602 aaronmk
    assert db == None or has_rel_funcs # rel_funcs required if db set
92 2597 aaronmk
93 3333 aaronmk
    for child in xml_dom.NodeElemIter(node):
94 3424 aaronmk
        process(child, on_error, is_rel_func, db)
95 3335 aaronmk
    merge_tagged(node)
96 3333 aaronmk
97 995 aaronmk
    name = node.tagName
98 3227 aaronmk
    if not is_func_name(name): return node # not any kind of function
99 2602 aaronmk
100
    row_mode = has_rel_funcs and db != None
101
    column_mode = has_rel_funcs and db == None
102 3028 aaronmk
    items = list(xml_dom.NodeTextEntryIter(node))
103 2602 aaronmk
104 3029 aaronmk
    # Parse function
105
    if len(items) == 1 and items[0][0].isdigit(): # has single numeric param
106
        # pass-through optimization for aggregating functions with one arg
107
        value = items[0][1] # pass through first arg
108 3424 aaronmk
    elif row_mode and is_rel_func(name): # row-based mode: evaluate using DB
109 3077 aaronmk
        value = sql_io.put(db, name, dict(items))
110 2602 aaronmk
    elif column_mode and not name in structural_funcs: # column-based mode
111 3424 aaronmk
        if is_rel_func(name): return # preserve relational functions
112 2602 aaronmk
        # otherwise XML-only, so just replace with last param
113 3028 aaronmk
        value = pop_value(items, None)
114 2602 aaronmk
    else: # local XML function
115
        try: value = funcs[name](items, node)
116 1613 aaronmk
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
117 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
118
            exc.add_traceback(e)
119 1562 aaronmk
            str_ = strings.ustr(node)
120 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
121 1810 aaronmk
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
122
                '\n'+term.emph_multiline(str_)))
123
124 995 aaronmk
            on_error(e)
125 2602 aaronmk
            return # in case on_error() returns
126 3227 aaronmk
127 3333 aaronmk
    xml_dom.replace_with_text(node, value)
128 995 aaronmk
129 1469 aaronmk
##### XML functions
130 995 aaronmk
131
# Function names must start with _ to avoid collisions with real tags
132
# Functions take arguments (items)
133
134 2557 aaronmk
#### Structural
135 1469 aaronmk
136 2017 aaronmk
def _ignore(items, node):
137 994 aaronmk
    '''Used to "comment out" an XML subtree'''
138
    return None
139 995 aaronmk
funcs['_ignore'] = _ignore
140 2557 aaronmk
structural_funcs.add('_ignore')
141 994 aaronmk
142 2017 aaronmk
def _ref(items, node):
143
    '''Used to retrieve a value from another XML node
144
    @param items
145
        addr=<path> XPath to value, relative to the XML func's parent node
146
    '''
147
    items = dict(items)
148
    try: addr = items['addr']
149
    except KeyError, e: raise SyntaxError(e)
150
151
    value = xpath.get_value(node.parentNode, addr)
152
    if value == None:
153
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
154
            +str(addr)))
155
    return value
156
funcs['_ref'] = _ref
157 2557 aaronmk
structural_funcs.add('_ref')
158 2017 aaronmk
159 1469 aaronmk
#### Conditionals
160
161 2016 aaronmk
def _eq(items, node):
162 1234 aaronmk
    items = dict(items)
163
    try:
164
        left = items['left']
165
        right = items['right']
166
    except KeyError: return '' # a value was None
167
    return util.bool2str(left == right)
168
funcs['_eq'] = _eq
169
170 2016 aaronmk
def _if(items, node):
171 1234 aaronmk
    items = dict(items)
172
    try:
173
        cond = items['cond']
174
        then = items['then']
175 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
176 1234 aaronmk
    else_ = items.get('else', None)
177 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
178 1234 aaronmk
    if cond: return then
179
    else: return else_
180
funcs['_if'] = _if
181
182 1469 aaronmk
#### Combining values
183
184 2016 aaronmk
def _alt(items, node):
185 113 aaronmk
    items = list(items)
186
    items.sort()
187 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
188 1609 aaronmk
    except IndexError: return None # input got removed by e.g. FormatException
189 995 aaronmk
funcs['_alt'] = _alt
190 113 aaronmk
191 2016 aaronmk
def _merge(items, node):
192 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
193 1562 aaronmk
        # get *once* from iter, check types
194 917 aaronmk
    items.sort()
195
    return maps.merge_values(*[v for k, v in items])
196 995 aaronmk
funcs['_merge'] = _merge
197 917 aaronmk
198 2016 aaronmk
def _label(items, node):
199 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
200 1562 aaronmk
        # get *once* from iter, check types
201 2014 aaronmk
    value = items.get('value', None)
202
    if value == None: return None # input is empty
203
    try: label = items['label']
204 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
205 917 aaronmk
    return label+': '+value
206 995 aaronmk
funcs['_label'] = _label
207 917 aaronmk
208 1469 aaronmk
#### Transforming values
209
210 2016 aaronmk
def _collapse(items, node):
211 2012 aaronmk
    '''Collapses a subtree if the "value" element in it is NULL'''
212
    items = dict(items)
213
    try: require = cast(strings.ustr, items['require'])
214
    except KeyError, e: raise SyntaxError(e)
215
    value = items.get('value', None)
216
217 2558 aaronmk
    if xpath.get_value(value, require, allow_rooted=False) == None: return None
218 2012 aaronmk
    else: return value
219
funcs['_collapse'] = _collapse
220
221 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
222 1477 aaronmk
223 2016 aaronmk
def _nullIf(items, node):
224 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
225 1477 aaronmk
    try: null = items['null']
226 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
227 1477 aaronmk
    value = items.get('value', None)
228 1219 aaronmk
    type_str = items.get('type', None)
229 1477 aaronmk
230
    try: type_ = types_by_name[type_str]
231 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
232 1477 aaronmk
    null = type_(null)
233
234
    try: return util.none_if(value, null)
235
    except ValueError: return value # value not convertible, so can't equal null
236 1047 aaronmk
funcs['_nullIf'] = _nullIf
237
238 1602 aaronmk
def repl(repls, value):
239 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
240 1602 aaronmk
    @param repls dict repl:with
241
        repl "*" means all other input values
242
        with "*" means keep input value the same
243
        with "" means ignore input value
244 1537 aaronmk
    '''
245 1602 aaronmk
    try: new_value = repls[value]
246 1304 aaronmk
    except KeyError, e:
247 1537 aaronmk
        # Save traceback right away in case another exception raised
248 2984 aaronmk
        fe = FormatException(e)
249 1602 aaronmk
        try: new_value = repls['*']
250 1609 aaronmk
        except KeyError: raise fe
251 1537 aaronmk
    if new_value == '*': new_value = value # '*' means keep input value the same
252 1607 aaronmk
    return new_value
253 1602 aaronmk
254 2016 aaronmk
def _map(items, node):
255 1602 aaronmk
    '''See repl()
256
    @param items
257
        <last_entry> Value
258
        <other_entries> name=value Mappings. Special values: See repl() repls.
259
    '''
260
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
261
    value = pop_value(items)
262
    if value == None: return None # input is empty
263 1607 aaronmk
    return util.none_if(repl(dict(items), value), u'') # empty value means None
264 1219 aaronmk
funcs['_map'] = _map
265
266 2016 aaronmk
def _replace(items, node):
267 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
268 1581 aaronmk
    value = pop_value(items)
269
    if value == None: return None # input is empty
270 1219 aaronmk
    try:
271
        for repl, with_ in items:
272
            if re.match(r'^\w+$', repl):
273
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
274
            value = re.sub(repl, with_, value)
275 1612 aaronmk
    except sre_constants.error, e: raise SyntaxError(e)
276 1624 aaronmk
    return util.none_if(value.strip(), u'') # empty strings always mean None
277 1219 aaronmk
funcs['_replace'] = _replace
278
279 1469 aaronmk
#### Quantities
280
281 2016 aaronmk
def _units(items, node):
282 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
283 1581 aaronmk
    value = pop_value(items)
284
    if value == None: return None # input is empty
285 1471 aaronmk
286 1581 aaronmk
    quantity = units.str2quantity(value)
287 1471 aaronmk
    try:
288
        for action, units_ in items:
289
            units_ = util.none_if(units_, u'')
290
            if action == 'default': units.set_default_units(quantity, units_)
291 1567 aaronmk
            elif action == 'to':
292
                try: quantity = units.convert(quantity, units_)
293 1609 aaronmk
                except ValueError, e: raise FormatException(e)
294 1612 aaronmk
            else: raise SyntaxError(ValueError('Invalid action: '+action))
295 1609 aaronmk
    except units.MissingUnitsException, e: raise FormatException(e)
296 1471 aaronmk
    return units.quantity2str(quantity)
297 1225 aaronmk
funcs['_units'] = _units
298
299 1399 aaronmk
def parse_range(str_, range_sep='-'):
300
    default = (str_, None)
301
    start, sep, end = str_.partition(range_sep)
302
    if sep == '': return default # not a range
303 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
304 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
305
306 2016 aaronmk
def _rangeStart(items, node):
307 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
308 1399 aaronmk
    try: value = items['value']
309 1406 aaronmk
    except KeyError: return None # input is empty
310 1399 aaronmk
    return parse_range(value)[0]
311
funcs['_rangeStart'] = _rangeStart
312
313 2016 aaronmk
def _rangeEnd(items, node):
314 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
315 1399 aaronmk
    try: value = items['value']
316 1406 aaronmk
    except KeyError: return None # input is empty
317 1399 aaronmk
    return parse_range(value)[1]
318
funcs['_rangeEnd'] = _rangeEnd
319
320 2016 aaronmk
def _range(items, node):
321 1472 aaronmk
    items = dict(conv_items(float, items))
322
    from_ = items.get('from', None)
323
    to = items.get('to', None)
324
    if from_ == None or to == None: return None
325
    return str(to - from_)
326
funcs['_range'] = _range
327
328 2016 aaronmk
def _avg(items, node):
329 86 aaronmk
    count = 0
330
    sum_ = 0.
331 278 aaronmk
    for name, value in conv_items(float, items):
332 86 aaronmk
        count += 1
333
        sum_ += value
334 1472 aaronmk
    if count == 0: return None # input is empty
335
    else: return str(sum_/count)
336 995 aaronmk
funcs['_avg'] = _avg
337 86 aaronmk
338 968 aaronmk
class CvException(Exception):
339
    def __init__(self):
340
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
341
            ' allowed for ratio scale data '
342
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
343
344 2016 aaronmk
def _noCV(items, node):
345 3046 aaronmk
    try: name, value = items.pop() # last entry contains value
346
    except IndexError: return None # input is empty
347 1609 aaronmk
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
348 968 aaronmk
    return value
349 995 aaronmk
funcs['_noCV'] = _noCV
350 968 aaronmk
351 1469 aaronmk
#### Dates
352
353 2016 aaronmk
def _date(items, node):
354 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
355
        # get *once* from iter, check types
356 1514 aaronmk
    try: str_ = items['date']
357 786 aaronmk
    except KeyError:
358 1515 aaronmk
        # Year is required
359
        try: items['year']
360 1309 aaronmk
        except KeyError, e:
361
            if items == {}: return None # entire date is empty
362 1609 aaronmk
            else: raise FormatException(e)
363 1515 aaronmk
364
        # Convert month name to number
365
        try: month = items['month']
366
        except KeyError: pass
367
        else:
368
            if not month.isdigit(): # month is name
369 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
370 1609 aaronmk
                except ValueError, e: raise FormatException(e)
371 1515 aaronmk
372 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
373 786 aaronmk
        items.setdefault('month', 1)
374
        items.setdefault('day', 1)
375 1535 aaronmk
376
        for try_num in xrange(2):
377
            try:
378
                date = datetime.date(**items)
379
                break
380
            except ValueError, e:
381 1609 aaronmk
                if try_num > 0: raise FormatException(e)
382 1536 aaronmk
                    # exception still raised after retry
383 1562 aaronmk
                msg = strings.ustr(e)
384 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
385
                    items['month'], items['day'] = items['day'], items['month']
386 1609 aaronmk
                else: raise FormatException(e)
387 786 aaronmk
    else:
388 324 aaronmk
        try: year = float(str_)
389
        except ValueError:
390 1264 aaronmk
            try: date = dates.strtotime(str_)
391 324 aaronmk
            except ImportError: return str_
392 1609 aaronmk
            except ValueError, e: raise FormatException(e)
393 324 aaronmk
        else: date = (datetime.date(int(year), 1, 1) +
394
            datetime.timedelta(round((year % 1.)*365)))
395 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
396 843 aaronmk
    except ValueError, e: raise FormatException(e)
397 995 aaronmk
funcs['_date'] = _date
398 86 aaronmk
399 2016 aaronmk
def _dateRangeStart(items, node):
400 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
401 1366 aaronmk
    try: value = items['value']
402 1406 aaronmk
    except KeyError: return None # input is empty
403 1366 aaronmk
    return dates.parse_date_range(value)[0]
404
funcs['_dateRangeStart'] = _dateRangeStart
405 1311 aaronmk
406 2016 aaronmk
def _dateRangeEnd(items, node):
407 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
408 1366 aaronmk
    try: value = items['value']
409 1406 aaronmk
    except KeyError: return None # input is empty
410 1366 aaronmk
    return dates.parse_date_range(value)[1]
411
funcs['_dateRangeEnd'] = _dateRangeEnd
412 1311 aaronmk
413 1469 aaronmk
#### Names
414
415 328 aaronmk
_name_parts_slices_items = [
416
    ('first', slice(None, 1)),
417
    ('middle', slice(1, -1)),
418
    ('last', slice(-1, None)),
419
]
420
name_parts_slices = dict(_name_parts_slices_items)
421
name_parts = [name for name, slice_ in _name_parts_slices_items]
422
423 2016 aaronmk
def _name(items, node):
424 89 aaronmk
    items = dict(items)
425 102 aaronmk
    parts = []
426 328 aaronmk
    for part in name_parts:
427
        if part in items: parts.append(items[part])
428 102 aaronmk
    return ' '.join(parts)
429 995 aaronmk
funcs['_name'] = _name
430 102 aaronmk
431 2016 aaronmk
def _namePart(items, node):
432 328 aaronmk
    out_items = []
433
    for part, value in items:
434
        try: slice_ = name_parts_slices[part]
435 1612 aaronmk
        except KeyError, e: raise SyntaxError(e)
436 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
437 2016 aaronmk
    return _name(out_items, node)
438 995 aaronmk
funcs['_namePart'] = _namePart
439 1321 aaronmk
440 1607 aaronmk
#### Angles
441
442 2016 aaronmk
def _compass(items, node):
443 1607 aaronmk
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
444
    items = dict(conv_items(strings.ustr, items))
445
    try: value = items['value']
446
    except KeyError: return None # input is empty
447
448
    if not value.isupper(): return value # pass through other coordinate formats
449
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
450
    except KeyError, e: raise FormatException(e)
451
funcs['_compass'] = _compass
452
453 1469 aaronmk
#### Paths
454
455 2016 aaronmk
def _simplifyPath(items, node):
456 1321 aaronmk
    items = dict(items)
457
    try:
458 1562 aaronmk
        next = cast(strings.ustr, items['next'])
459
        require = cast(strings.ustr, items['require'])
460 1321 aaronmk
        root = items['path']
461 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
462 1321 aaronmk
463
    node = root
464
    while node != None:
465
        new_node = xpath.get_1(node, next, allow_rooted=False)
466 2558 aaronmk
        if xpath.get_value(node, require, allow_rooted=False) == None: # empty
467 1321 aaronmk
            xml_dom.replace(node, new_node) # remove current elem
468
            if node is root: root = new_node # also update root
469
        node = new_node
470
    return root
471
funcs['_simplifyPath'] = _simplifyPath