Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 2017 aaronmk
import warnings
7 111 aaronmk
8 1607 aaronmk
import angles
9 818 aaronmk
import dates
10 300 aaronmk
import exc
11 1580 aaronmk
import format
12 917 aaronmk
import maps
13 3077 aaronmk
import sql_io
14 1234 aaronmk
import strings
15 827 aaronmk
import term
16 1468 aaronmk
import units
17 1047 aaronmk
import util
18 86 aaronmk
import xml_dom
19 1321 aaronmk
import xpath
20 86 aaronmk
21 995 aaronmk
##### Exceptions
22
23 1612 aaronmk
class SyntaxError(exc.ExceptionWithCause):
24 797 aaronmk
    def __init__(self, cause):
25 1611 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
26
            cause)
27 278 aaronmk
28 1613 aaronmk
class FormatException(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
31 843 aaronmk
32 1992 aaronmk
##### Helper functions
33 995 aaronmk
34 1992 aaronmk
def map_items(func, items):
35
    return [(name, func(value)) for name, value in items]
36
37
def cast(type_, val):
38
    '''Throws FormatException if can't cast'''
39
    try: return type_(val)
40
    except ValueError, e: raise FormatException(e)
41
42
def conv_items(type_, items):
43
    return map_items(lambda val: cast(type_, val),
44
        xml_dom.TextEntryOnlyIter(items))
45
46
def pop_value(items, name='value'):
47
    '''@param name Name of value param, or None to accept any name'''
48
    try: last = items.pop() # last entry contains value
49
    except IndexError: return None # input is empty and no actions
50
    if name != None and last[0] != name: return None # input is empty
51
    return last[1]
52
53 995 aaronmk
funcs = {}
54
55 2557 aaronmk
structural_funcs = set()
56
57 1992 aaronmk
##### Public functions
58
59 2112 aaronmk
def is_func_name(name):
60
    return name.startswith('_') and name != '_' # '_' is default root node name
61
62
def is_func(node): return is_func_name(node.tagName)
63
64
def is_xml_func_name(name): return is_func_name(name) and name in funcs
65
66
def is_xml_func(node): return is_xml_func_name(node.tagName)
67
68 2602 aaronmk
def process(node, on_error=exc.raise_, rel_funcs=None, db=None):
69 2597 aaronmk
    '''Evaluates the XML functions in an XML tree.
70 2602 aaronmk
    @param rel_funcs None|set(str...) Relational functions
71 2597 aaronmk
        * container can be any iterable type
72 2602 aaronmk
        * If != None: Non-relational functions are removed, or relational
73
          functions are treated specially, depending on the db param (below).
74
    @param db
75
        * If None: Non-relational functions other than structural functions are
76
          replaced with their last parameter (usually the value), not evaluated.
77
          This is used in column-based mode to remove XML-only functions.
78
        * If != None: Relational functions are evaluated directly. This is used
79
          in row-based mode to combine relational and XML functions.
80 3227 aaronmk
    @return The new node
81 2597 aaronmk
    '''
82 2602 aaronmk
    has_rel_funcs = rel_funcs != None
83
    assert db == None or has_rel_funcs # rel_funcs required if db set
84 2597 aaronmk
85 3227 aaronmk
    changed = []
86 3327 aaronmk
    for child in list(xml_dom.NodeElemIter(node)):
87 3227 aaronmk
        new = process(child, on_error, rel_funcs, db)
88 3326 aaronmk
        if new != None and new is not child and xml_dom.is_elem(new):
89
            changed.append(new)
90 3227 aaronmk
    # Do after iterating over the children to avoid invalidating the iterator
91 3326 aaronmk
    for child in changed: xml_dom.merge_same_name(child)
92 2602 aaronmk
93 995 aaronmk
    name = node.tagName
94 3227 aaronmk
    if not is_func_name(name): return node # not any kind of function
95 2602 aaronmk
96
    # Change rel_funcs *after* processing child nodes, which needs orig value
97
    if not has_rel_funcs: rel_funcs = set()
98
    rel_funcs = set(rel_funcs)
99
100
    row_mode = has_rel_funcs and db != None
101
    column_mode = has_rel_funcs and db == None
102 3028 aaronmk
    items = list(xml_dom.NodeTextEntryIter(node))
103 2602 aaronmk
104 3029 aaronmk
    # Parse function
105
    if len(items) == 1 and items[0][0].isdigit(): # has single numeric param
106
        # pass-through optimization for aggregating functions with one arg
107
        value = items[0][1] # pass through first arg
108
    elif row_mode and name in rel_funcs: # row-based mode: evaluate using DB
109 3077 aaronmk
        value = sql_io.put(db, name, dict(items))
110 2602 aaronmk
    elif column_mode and not name in structural_funcs: # column-based mode
111
        if name in rel_funcs: return # preserve relational functions
112
        # otherwise XML-only, so just replace with last param
113 3028 aaronmk
        value = pop_value(items, None)
114 2602 aaronmk
    else: # local XML function
115
        try: value = funcs[name](items, node)
116 1613 aaronmk
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
117 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
118
            exc.add_traceback(e)
119 1562 aaronmk
            str_ = strings.ustr(node)
120 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
121 1810 aaronmk
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
122
                '\n'+term.emph_multiline(str_)))
123
124 995 aaronmk
            on_error(e)
125 2602 aaronmk
            return # in case on_error() returns
126 3227 aaronmk
127
    return xml_dom.replace_with_text(node, value)
128 995 aaronmk
129 1469 aaronmk
##### XML functions
130 995 aaronmk
131
# Function names must start with _ to avoid collisions with real tags
132
# Functions take arguments (items)
133
134 2557 aaronmk
#### Structural
135 1469 aaronmk
136 2017 aaronmk
def _ignore(items, node):
137 994 aaronmk
    '''Used to "comment out" an XML subtree'''
138
    return None
139 995 aaronmk
funcs['_ignore'] = _ignore
140 2557 aaronmk
structural_funcs.add('_ignore')
141 994 aaronmk
142 2017 aaronmk
def _ref(items, node):
143
    '''Used to retrieve a value from another XML node
144
    @param items
145
        addr=<path> XPath to value, relative to the XML func's parent node
146
    '''
147
    items = dict(items)
148
    try: addr = items['addr']
149
    except KeyError, e: raise SyntaxError(e)
150
151
    value = xpath.get_value(node.parentNode, addr)
152
    if value == None:
153
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
154
            +str(addr)))
155
    return value
156
funcs['_ref'] = _ref
157 2557 aaronmk
structural_funcs.add('_ref')
158 2017 aaronmk
159 1469 aaronmk
#### Conditionals
160
161 2016 aaronmk
def _eq(items, node):
162 1234 aaronmk
    items = dict(items)
163
    try:
164
        left = items['left']
165
        right = items['right']
166
    except KeyError: return '' # a value was None
167
    return util.bool2str(left == right)
168
funcs['_eq'] = _eq
169
170 2016 aaronmk
def _if(items, node):
171 1234 aaronmk
    items = dict(items)
172
    try:
173
        cond = items['cond']
174
        then = items['then']
175 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
176 1234 aaronmk
    else_ = items.get('else', None)
177 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
178 1234 aaronmk
    if cond: return then
179
    else: return else_
180
funcs['_if'] = _if
181
182 1469 aaronmk
#### Combining values
183
184 2016 aaronmk
def _alt(items, node):
185 113 aaronmk
    items = list(items)
186
    items.sort()
187 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
188 1609 aaronmk
    except IndexError: return None # input got removed by e.g. FormatException
189 995 aaronmk
funcs['_alt'] = _alt
190 113 aaronmk
191 2016 aaronmk
def _merge(items, node):
192 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
193 1562 aaronmk
        # get *once* from iter, check types
194 917 aaronmk
    items.sort()
195
    return maps.merge_values(*[v for k, v in items])
196 995 aaronmk
funcs['_merge'] = _merge
197 917 aaronmk
198 2016 aaronmk
def _label(items, node):
199 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
200 1562 aaronmk
        # get *once* from iter, check types
201 2014 aaronmk
    value = items.get('value', None)
202
    if value == None: return None # input is empty
203
    try: label = items['label']
204 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
205 917 aaronmk
    return label+': '+value
206 995 aaronmk
funcs['_label'] = _label
207 917 aaronmk
208 1469 aaronmk
#### Transforming values
209
210 2016 aaronmk
def _collapse(items, node):
211 2012 aaronmk
    '''Collapses a subtree if the "value" element in it is NULL'''
212
    items = dict(items)
213
    try: require = cast(strings.ustr, items['require'])
214
    except KeyError, e: raise SyntaxError(e)
215
    value = items.get('value', None)
216
217 2558 aaronmk
    if xpath.get_value(value, require, allow_rooted=False) == None: return None
218 2012 aaronmk
    else: return value
219
funcs['_collapse'] = _collapse
220
221 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
222 1477 aaronmk
223 2016 aaronmk
def _nullIf(items, node):
224 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
225 1477 aaronmk
    try: null = items['null']
226 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
227 1477 aaronmk
    value = items.get('value', None)
228 1219 aaronmk
    type_str = items.get('type', None)
229 1477 aaronmk
230
    try: type_ = types_by_name[type_str]
231 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
232 1477 aaronmk
    null = type_(null)
233
234
    try: return util.none_if(value, null)
235
    except ValueError: return value # value not convertible, so can't equal null
236 1047 aaronmk
funcs['_nullIf'] = _nullIf
237
238 1602 aaronmk
def repl(repls, value):
239 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
240 1602 aaronmk
    @param repls dict repl:with
241
        repl "*" means all other input values
242
        with "*" means keep input value the same
243
        with "" means ignore input value
244 1537 aaronmk
    '''
245 1602 aaronmk
    try: new_value = repls[value]
246 1304 aaronmk
    except KeyError, e:
247 1537 aaronmk
        # Save traceback right away in case another exception raised
248 2984 aaronmk
        fe = FormatException(e)
249 1602 aaronmk
        try: new_value = repls['*']
250 1609 aaronmk
        except KeyError: raise fe
251 1537 aaronmk
    if new_value == '*': new_value = value # '*' means keep input value the same
252 1607 aaronmk
    return new_value
253 1602 aaronmk
254 2016 aaronmk
def _map(items, node):
255 1602 aaronmk
    '''See repl()
256
    @param items
257
        <last_entry> Value
258
        <other_entries> name=value Mappings. Special values: See repl() repls.
259
    '''
260
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
261
    value = pop_value(items)
262
    if value == None: return None # input is empty
263 1607 aaronmk
    return util.none_if(repl(dict(items), value), u'') # empty value means None
264 1219 aaronmk
funcs['_map'] = _map
265
266 2016 aaronmk
def _replace(items, node):
267 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
268 1581 aaronmk
    value = pop_value(items)
269
    if value == None: return None # input is empty
270 1219 aaronmk
    try:
271
        for repl, with_ in items:
272
            if re.match(r'^\w+$', repl):
273
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
274
            value = re.sub(repl, with_, value)
275 1612 aaronmk
    except sre_constants.error, e: raise SyntaxError(e)
276 1624 aaronmk
    return util.none_if(value.strip(), u'') # empty strings always mean None
277 1219 aaronmk
funcs['_replace'] = _replace
278
279 1469 aaronmk
#### Quantities
280
281 2016 aaronmk
def _units(items, node):
282 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
283 1581 aaronmk
    value = pop_value(items)
284
    if value == None: return None # input is empty
285 1471 aaronmk
286 1581 aaronmk
    quantity = units.str2quantity(value)
287 1471 aaronmk
    try:
288
        for action, units_ in items:
289
            units_ = util.none_if(units_, u'')
290
            if action == 'default': units.set_default_units(quantity, units_)
291 1567 aaronmk
            elif action == 'to':
292
                try: quantity = units.convert(quantity, units_)
293 1609 aaronmk
                except ValueError, e: raise FormatException(e)
294 1612 aaronmk
            else: raise SyntaxError(ValueError('Invalid action: '+action))
295 1609 aaronmk
    except units.MissingUnitsException, e: raise FormatException(e)
296 1471 aaronmk
    return units.quantity2str(quantity)
297 1225 aaronmk
funcs['_units'] = _units
298
299 1399 aaronmk
def parse_range(str_, range_sep='-'):
300
    default = (str_, None)
301
    start, sep, end = str_.partition(range_sep)
302
    if sep == '': return default # not a range
303 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
304 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
305
306 2016 aaronmk
def _rangeStart(items, node):
307 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
308 1399 aaronmk
    try: value = items['value']
309 1406 aaronmk
    except KeyError: return None # input is empty
310 1399 aaronmk
    return parse_range(value)[0]
311
funcs['_rangeStart'] = _rangeStart
312
313 2016 aaronmk
def _rangeEnd(items, node):
314 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
315 1399 aaronmk
    try: value = items['value']
316 1406 aaronmk
    except KeyError: return None # input is empty
317 1399 aaronmk
    return parse_range(value)[1]
318
funcs['_rangeEnd'] = _rangeEnd
319
320 2016 aaronmk
def _range(items, node):
321 1472 aaronmk
    items = dict(conv_items(float, items))
322
    from_ = items.get('from', None)
323
    to = items.get('to', None)
324
    if from_ == None or to == None: return None
325
    return str(to - from_)
326
funcs['_range'] = _range
327
328 2016 aaronmk
def _avg(items, node):
329 86 aaronmk
    count = 0
330
    sum_ = 0.
331 278 aaronmk
    for name, value in conv_items(float, items):
332 86 aaronmk
        count += 1
333
        sum_ += value
334 1472 aaronmk
    if count == 0: return None # input is empty
335
    else: return str(sum_/count)
336 995 aaronmk
funcs['_avg'] = _avg
337 86 aaronmk
338 968 aaronmk
class CvException(Exception):
339
    def __init__(self):
340
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
341
            ' allowed for ratio scale data '
342
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
343
344 2016 aaronmk
def _noCV(items, node):
345 3046 aaronmk
    try: name, value = items.pop() # last entry contains value
346
    except IndexError: return None # input is empty
347 1609 aaronmk
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
348 968 aaronmk
    return value
349 995 aaronmk
funcs['_noCV'] = _noCV
350 968 aaronmk
351 1469 aaronmk
#### Dates
352
353 2016 aaronmk
def _date(items, node):
354 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
355
        # get *once* from iter, check types
356 1514 aaronmk
    try: str_ = items['date']
357 786 aaronmk
    except KeyError:
358 1515 aaronmk
        # Year is required
359
        try: items['year']
360 1309 aaronmk
        except KeyError, e:
361
            if items == {}: return None # entire date is empty
362 1609 aaronmk
            else: raise FormatException(e)
363 1515 aaronmk
364
        # Convert month name to number
365
        try: month = items['month']
366
        except KeyError: pass
367
        else:
368
            if not month.isdigit(): # month is name
369 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
370 1609 aaronmk
                except ValueError, e: raise FormatException(e)
371 1515 aaronmk
372 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
373 786 aaronmk
        items.setdefault('month', 1)
374
        items.setdefault('day', 1)
375 1535 aaronmk
376
        for try_num in xrange(2):
377
            try:
378
                date = datetime.date(**items)
379
                break
380
            except ValueError, e:
381 1609 aaronmk
                if try_num > 0: raise FormatException(e)
382 1536 aaronmk
                    # exception still raised after retry
383 1562 aaronmk
                msg = strings.ustr(e)
384 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
385
                    items['month'], items['day'] = items['day'], items['month']
386 1609 aaronmk
                else: raise FormatException(e)
387 786 aaronmk
    else:
388 324 aaronmk
        try: year = float(str_)
389
        except ValueError:
390 1264 aaronmk
            try: date = dates.strtotime(str_)
391 324 aaronmk
            except ImportError: return str_
392 1609 aaronmk
            except ValueError, e: raise FormatException(e)
393 324 aaronmk
        else: date = (datetime.date(int(year), 1, 1) +
394
            datetime.timedelta(round((year % 1.)*365)))
395 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
396 843 aaronmk
    except ValueError, e: raise FormatException(e)
397 995 aaronmk
funcs['_date'] = _date
398 86 aaronmk
399 2016 aaronmk
def _dateRangeStart(items, node):
400 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
401 1366 aaronmk
    try: value = items['value']
402 1406 aaronmk
    except KeyError: return None # input is empty
403 1366 aaronmk
    return dates.parse_date_range(value)[0]
404
funcs['_dateRangeStart'] = _dateRangeStart
405 1311 aaronmk
406 2016 aaronmk
def _dateRangeEnd(items, node):
407 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
408 1366 aaronmk
    try: value = items['value']
409 1406 aaronmk
    except KeyError: return None # input is empty
410 1366 aaronmk
    return dates.parse_date_range(value)[1]
411
funcs['_dateRangeEnd'] = _dateRangeEnd
412 1311 aaronmk
413 1469 aaronmk
#### Names
414
415 328 aaronmk
_name_parts_slices_items = [
416
    ('first', slice(None, 1)),
417
    ('middle', slice(1, -1)),
418
    ('last', slice(-1, None)),
419
]
420
name_parts_slices = dict(_name_parts_slices_items)
421
name_parts = [name for name, slice_ in _name_parts_slices_items]
422
423 2016 aaronmk
def _name(items, node):
424 89 aaronmk
    items = dict(items)
425 102 aaronmk
    parts = []
426 328 aaronmk
    for part in name_parts:
427
        if part in items: parts.append(items[part])
428 102 aaronmk
    return ' '.join(parts)
429 995 aaronmk
funcs['_name'] = _name
430 102 aaronmk
431 2016 aaronmk
def _namePart(items, node):
432 328 aaronmk
    out_items = []
433
    for part, value in items:
434
        try: slice_ = name_parts_slices[part]
435 1612 aaronmk
        except KeyError, e: raise SyntaxError(e)
436 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
437 2016 aaronmk
    return _name(out_items, node)
438 995 aaronmk
funcs['_namePart'] = _namePart
439 1321 aaronmk
440 1607 aaronmk
#### Angles
441
442 2016 aaronmk
def _compass(items, node):
443 1607 aaronmk
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
444
    items = dict(conv_items(strings.ustr, items))
445
    try: value = items['value']
446
    except KeyError: return None # input is empty
447
448
    if not value.isupper(): return value # pass through other coordinate formats
449
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
450
    except KeyError, e: raise FormatException(e)
451
funcs['_compass'] = _compass
452
453 1469 aaronmk
#### Paths
454
455 2016 aaronmk
def _simplifyPath(items, node):
456 1321 aaronmk
    items = dict(items)
457
    try:
458 1562 aaronmk
        next = cast(strings.ustr, items['next'])
459
        require = cast(strings.ustr, items['require'])
460 1321 aaronmk
        root = items['path']
461 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
462 1321 aaronmk
463
    node = root
464
    while node != None:
465
        new_node = xpath.get_1(node, next, allow_rooted=False)
466 2558 aaronmk
        if xpath.get_value(node, require, allow_rooted=False) == None: # empty
467 1321 aaronmk
            xml_dom.replace(node, new_node) # remove current elem
468
            if node is root: root = new_node # also update root
469
        node = new_node
470
    return root
471
funcs['_simplifyPath'] = _simplifyPath