Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 2017 aaronmk
import warnings
7 111 aaronmk
8 1607 aaronmk
import angles
9 818 aaronmk
import dates
10 300 aaronmk
import exc
11 1580 aaronmk
import format
12 917 aaronmk
import maps
13 2105 aaronmk
import sql
14 1234 aaronmk
import strings
15 827 aaronmk
import term
16 1468 aaronmk
import units
17 1047 aaronmk
import util
18 86 aaronmk
import xml_dom
19 1321 aaronmk
import xpath
20 86 aaronmk
21 995 aaronmk
##### Exceptions
22
23 1612 aaronmk
class SyntaxError(exc.ExceptionWithCause):
24 797 aaronmk
    def __init__(self, cause):
25 1611 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
26
            cause)
27 278 aaronmk
28 1613 aaronmk
class FormatException(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
31 843 aaronmk
32 1992 aaronmk
##### Helper functions
33 995 aaronmk
34 1992 aaronmk
def map_items(func, items):
35
    return [(name, func(value)) for name, value in items]
36
37
def cast(type_, val):
38
    '''Throws FormatException if can't cast'''
39
    try: return type_(val)
40
    except ValueError, e: raise FormatException(e)
41
42
def conv_items(type_, items):
43
    return map_items(lambda val: cast(type_, val),
44
        xml_dom.TextEntryOnlyIter(items))
45
46
def pop_value(items, name='value'):
47
    '''@param name Name of value param, or None to accept any name'''
48
    try: last = items.pop() # last entry contains value
49
    except IndexError: return None # input is empty and no actions
50
    if name != None and last[0] != name: return None # input is empty
51
    return last[1]
52
53 995 aaronmk
funcs = {}
54
55 2557 aaronmk
structural_funcs = set()
56
57 1992 aaronmk
##### Public functions
58
59 2112 aaronmk
def is_func_name(name):
60
    return name.startswith('_') and name != '_' # '_' is default root node name
61
62
def is_func(node): return is_func_name(node.tagName)
63
64
def is_xml_func_name(name): return is_func_name(name) and name in funcs
65
66
def is_xml_func(node): return is_xml_func_name(node.tagName)
67
68 2597 aaronmk
def process(node, on_error=exc.raise_, db=None, preserve=set(), strip=False):
69
    '''Evaluates the XML functions in an XML tree.
70
    @param preserve set(str...) XML functions not to remove.
71
        * container can be any iterable type
72
    @param strip Whether to instead replace most XML functions with their last
73
        parameter (usually the value) and evaluate only structural functions
74
    '''
75
    preserve = set(preserve)
76
77
    for child in xml_dom.NodeElemIter(node):
78
        process(child, on_error, db, preserve, strip)
79 995 aaronmk
    name = node.tagName
80 2597 aaronmk
    if not is_xml_func_name(name) or name in preserve: pass
81
    elif strip and name not in structural_funcs: # just replace with last param
82
        value = pop_value(list(xml_dom.NodeTextEntryIter(node)), None)
83
        xml_dom.replace_with_text(node, value)
84
    else:
85 1369 aaronmk
        try:
86 2105 aaronmk
            items = xml_dom.NodeTextEntryIter(node)
87
            try: func = funcs[name]
88
            except KeyError:
89
                if db != None: # DB with relational functions available
90
                    value = sql.put(db, name, dict(items))
91
                else: value = pop_value(list(items)) # pass value through
92
            else: value = func(items, node) # local XML function
93
94 1369 aaronmk
            xml_dom.replace_with_text(node, value)
95 1613 aaronmk
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
96 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
97
            exc.add_traceback(e)
98 1562 aaronmk
            str_ = strings.ustr(node)
99 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
100 1810 aaronmk
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
101
                '\n'+term.emph_multiline(str_)))
102
103 995 aaronmk
            on_error(e)
104
105 1469 aaronmk
##### XML functions
106 995 aaronmk
107
# Function names must start with _ to avoid collisions with real tags
108
# Functions take arguments (items)
109
110 2557 aaronmk
#### Structural
111 1469 aaronmk
112 2017 aaronmk
def _ignore(items, node):
113 994 aaronmk
    '''Used to "comment out" an XML subtree'''
114
    return None
115 995 aaronmk
funcs['_ignore'] = _ignore
116 2557 aaronmk
structural_funcs.add('_ignore')
117 994 aaronmk
118 2017 aaronmk
def _ref(items, node):
119
    '''Used to retrieve a value from another XML node
120
    @param items
121
        addr=<path> XPath to value, relative to the XML func's parent node
122
    '''
123
    items = dict(items)
124
    try: addr = items['addr']
125
    except KeyError, e: raise SyntaxError(e)
126
127
    value = xpath.get_value(node.parentNode, addr)
128
    if value == None:
129
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
130
            +str(addr)))
131
    return value
132
funcs['_ref'] = _ref
133 2557 aaronmk
structural_funcs.add('_ref')
134 2017 aaronmk
135 1469 aaronmk
#### Conditionals
136
137 2016 aaronmk
def _eq(items, node):
138 1234 aaronmk
    items = dict(items)
139
    try:
140
        left = items['left']
141
        right = items['right']
142
    except KeyError: return '' # a value was None
143
    return util.bool2str(left == right)
144
funcs['_eq'] = _eq
145
146 2016 aaronmk
def _if(items, node):
147 1234 aaronmk
    items = dict(items)
148
    try:
149
        cond = items['cond']
150
        then = items['then']
151 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
152 1234 aaronmk
    else_ = items.get('else', None)
153 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
154 1234 aaronmk
    if cond: return then
155
    else: return else_
156
funcs['_if'] = _if
157
158 1469 aaronmk
#### Combining values
159
160 2016 aaronmk
def _alt(items, node):
161 113 aaronmk
    items = list(items)
162
    items.sort()
163 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
164 1609 aaronmk
    except IndexError: return None # input got removed by e.g. FormatException
165 995 aaronmk
funcs['_alt'] = _alt
166 113 aaronmk
167 2016 aaronmk
def _merge(items, node):
168 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
169 1562 aaronmk
        # get *once* from iter, check types
170 917 aaronmk
    items.sort()
171
    return maps.merge_values(*[v for k, v in items])
172 995 aaronmk
funcs['_merge'] = _merge
173 917 aaronmk
174 2016 aaronmk
def _label(items, node):
175 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
176 1562 aaronmk
        # get *once* from iter, check types
177 2014 aaronmk
    value = items.get('value', None)
178
    if value == None: return None # input is empty
179
    try: label = items['label']
180 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
181 917 aaronmk
    return label+': '+value
182 995 aaronmk
funcs['_label'] = _label
183 917 aaronmk
184 1469 aaronmk
#### Transforming values
185
186 2016 aaronmk
def _collapse(items, node):
187 2012 aaronmk
    '''Collapses a subtree if the "value" element in it is NULL'''
188
    items = dict(items)
189
    try: require = cast(strings.ustr, items['require'])
190
    except KeyError, e: raise SyntaxError(e)
191
    value = items.get('value', None)
192
193 2558 aaronmk
    if xpath.get_value(value, require, allow_rooted=False) == None: return None
194 2012 aaronmk
    else: return value
195
funcs['_collapse'] = _collapse
196
197 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
198 1477 aaronmk
199 2016 aaronmk
def _nullIf(items, node):
200 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
201 1477 aaronmk
    try: null = items['null']
202 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
203 1477 aaronmk
    value = items.get('value', None)
204 1219 aaronmk
    type_str = items.get('type', None)
205 1477 aaronmk
206
    try: type_ = types_by_name[type_str]
207 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
208 1477 aaronmk
    null = type_(null)
209
210
    try: return util.none_if(value, null)
211
    except ValueError: return value # value not convertible, so can't equal null
212 1047 aaronmk
funcs['_nullIf'] = _nullIf
213
214 1602 aaronmk
def repl(repls, value):
215 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
216 1602 aaronmk
    @param repls dict repl:with
217
        repl "*" means all other input values
218
        with "*" means keep input value the same
219
        with "" means ignore input value
220 1537 aaronmk
    '''
221 1602 aaronmk
    try: new_value = repls[value]
222 1304 aaronmk
    except KeyError, e:
223 1537 aaronmk
        # Save traceback right away in case another exception raised
224 1609 aaronmk
        fe = FormatException(e)
225 1602 aaronmk
        try: new_value = repls['*']
226 1609 aaronmk
        except KeyError: raise fe
227 1537 aaronmk
    if new_value == '*': new_value = value # '*' means keep input value the same
228 1607 aaronmk
    return new_value
229 1602 aaronmk
230 2016 aaronmk
def _map(items, node):
231 1602 aaronmk
    '''See repl()
232
    @param items
233
        <last_entry> Value
234
        <other_entries> name=value Mappings. Special values: See repl() repls.
235
    '''
236
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
237
    value = pop_value(items)
238
    if value == None: return None # input is empty
239 1607 aaronmk
    return util.none_if(repl(dict(items), value), u'') # empty value means None
240 1219 aaronmk
funcs['_map'] = _map
241
242 2016 aaronmk
def _replace(items, node):
243 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
244 1581 aaronmk
    value = pop_value(items)
245
    if value == None: return None # input is empty
246 1219 aaronmk
    try:
247
        for repl, with_ in items:
248
            if re.match(r'^\w+$', repl):
249
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
250
            value = re.sub(repl, with_, value)
251 1612 aaronmk
    except sre_constants.error, e: raise SyntaxError(e)
252 1624 aaronmk
    return util.none_if(value.strip(), u'') # empty strings always mean None
253 1219 aaronmk
funcs['_replace'] = _replace
254
255 1469 aaronmk
#### Quantities
256
257 2016 aaronmk
def _units(items, node):
258 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
259 1581 aaronmk
    value = pop_value(items)
260
    if value == None: return None # input is empty
261 1471 aaronmk
262 1581 aaronmk
    quantity = units.str2quantity(value)
263 1471 aaronmk
    try:
264
        for action, units_ in items:
265
            units_ = util.none_if(units_, u'')
266
            if action == 'default': units.set_default_units(quantity, units_)
267 1567 aaronmk
            elif action == 'to':
268
                try: quantity = units.convert(quantity, units_)
269 1609 aaronmk
                except ValueError, e: raise FormatException(e)
270 1612 aaronmk
            else: raise SyntaxError(ValueError('Invalid action: '+action))
271 1609 aaronmk
    except units.MissingUnitsException, e: raise FormatException(e)
272 1471 aaronmk
    return units.quantity2str(quantity)
273 1225 aaronmk
funcs['_units'] = _units
274
275 1399 aaronmk
def parse_range(str_, range_sep='-'):
276
    default = (str_, None)
277
    start, sep, end = str_.partition(range_sep)
278
    if sep == '': return default # not a range
279 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
280 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
281
282 2016 aaronmk
def _rangeStart(items, node):
283 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
284 1399 aaronmk
    try: value = items['value']
285 1406 aaronmk
    except KeyError: return None # input is empty
286 1399 aaronmk
    return parse_range(value)[0]
287
funcs['_rangeStart'] = _rangeStart
288
289 2016 aaronmk
def _rangeEnd(items, node):
290 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
291 1399 aaronmk
    try: value = items['value']
292 1406 aaronmk
    except KeyError: return None # input is empty
293 1399 aaronmk
    return parse_range(value)[1]
294
funcs['_rangeEnd'] = _rangeEnd
295
296 2016 aaronmk
def _range(items, node):
297 1472 aaronmk
    items = dict(conv_items(float, items))
298
    from_ = items.get('from', None)
299
    to = items.get('to', None)
300
    if from_ == None or to == None: return None
301
    return str(to - from_)
302
funcs['_range'] = _range
303
304 2016 aaronmk
def _avg(items, node):
305 86 aaronmk
    count = 0
306
    sum_ = 0.
307 278 aaronmk
    for name, value in conv_items(float, items):
308 86 aaronmk
        count += 1
309
        sum_ += value
310 1472 aaronmk
    if count == 0: return None # input is empty
311
    else: return str(sum_/count)
312 995 aaronmk
funcs['_avg'] = _avg
313 86 aaronmk
314 968 aaronmk
class CvException(Exception):
315
    def __init__(self):
316
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
317
            ' allowed for ratio scale data '
318
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
319
320 2016 aaronmk
def _noCV(items, node):
321 968 aaronmk
    try: name, value = items.next()
322
    except StopIteration: return None
323 1609 aaronmk
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
324 968 aaronmk
    return value
325 995 aaronmk
funcs['_noCV'] = _noCV
326 968 aaronmk
327 1469 aaronmk
#### Dates
328
329 2016 aaronmk
def _date(items, node):
330 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
331
        # get *once* from iter, check types
332 1514 aaronmk
    try: str_ = items['date']
333 786 aaronmk
    except KeyError:
334 1515 aaronmk
        # Year is required
335
        try: items['year']
336 1309 aaronmk
        except KeyError, e:
337
            if items == {}: return None # entire date is empty
338 1609 aaronmk
            else: raise FormatException(e)
339 1515 aaronmk
340
        # Convert month name to number
341
        try: month = items['month']
342
        except KeyError: pass
343
        else:
344
            if not month.isdigit(): # month is name
345 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
346 1609 aaronmk
                except ValueError, e: raise FormatException(e)
347 1515 aaronmk
348 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
349 786 aaronmk
        items.setdefault('month', 1)
350
        items.setdefault('day', 1)
351 1535 aaronmk
352
        for try_num in xrange(2):
353
            try:
354
                date = datetime.date(**items)
355
                break
356
            except ValueError, e:
357 1609 aaronmk
                if try_num > 0: raise FormatException(e)
358 1536 aaronmk
                    # exception still raised after retry
359 1562 aaronmk
                msg = strings.ustr(e)
360 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
361
                    items['month'], items['day'] = items['day'], items['month']
362 1609 aaronmk
                else: raise FormatException(e)
363 786 aaronmk
    else:
364 324 aaronmk
        try: year = float(str_)
365
        except ValueError:
366 1264 aaronmk
            try: date = dates.strtotime(str_)
367 324 aaronmk
            except ImportError: return str_
368 1609 aaronmk
            except ValueError, e: raise FormatException(e)
369 324 aaronmk
        else: date = (datetime.date(int(year), 1, 1) +
370
            datetime.timedelta(round((year % 1.)*365)))
371 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
372 843 aaronmk
    except ValueError, e: raise FormatException(e)
373 995 aaronmk
funcs['_date'] = _date
374 86 aaronmk
375 2016 aaronmk
def _dateRangeStart(items, node):
376 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
377 1366 aaronmk
    try: value = items['value']
378 1406 aaronmk
    except KeyError: return None # input is empty
379 1366 aaronmk
    return dates.parse_date_range(value)[0]
380
funcs['_dateRangeStart'] = _dateRangeStart
381 1311 aaronmk
382 2016 aaronmk
def _dateRangeEnd(items, node):
383 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
384 1366 aaronmk
    try: value = items['value']
385 1406 aaronmk
    except KeyError: return None # input is empty
386 1366 aaronmk
    return dates.parse_date_range(value)[1]
387
funcs['_dateRangeEnd'] = _dateRangeEnd
388 1311 aaronmk
389 1469 aaronmk
#### Names
390
391 328 aaronmk
_name_parts_slices_items = [
392
    ('first', slice(None, 1)),
393
    ('middle', slice(1, -1)),
394
    ('last', slice(-1, None)),
395
]
396
name_parts_slices = dict(_name_parts_slices_items)
397
name_parts = [name for name, slice_ in _name_parts_slices_items]
398
399 2016 aaronmk
def _name(items, node):
400 89 aaronmk
    items = dict(items)
401 102 aaronmk
    parts = []
402 328 aaronmk
    for part in name_parts:
403
        if part in items: parts.append(items[part])
404 102 aaronmk
    return ' '.join(parts)
405 995 aaronmk
funcs['_name'] = _name
406 102 aaronmk
407 2016 aaronmk
def _namePart(items, node):
408 328 aaronmk
    out_items = []
409
    for part, value in items:
410
        try: slice_ = name_parts_slices[part]
411 1612 aaronmk
        except KeyError, e: raise SyntaxError(e)
412 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
413 2016 aaronmk
    return _name(out_items, node)
414 995 aaronmk
funcs['_namePart'] = _namePart
415 1321 aaronmk
416 1607 aaronmk
#### Angles
417
418 2016 aaronmk
def _compass(items, node):
419 1607 aaronmk
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
420
    items = dict(conv_items(strings.ustr, items))
421
    try: value = items['value']
422
    except KeyError: return None # input is empty
423
424
    if not value.isupper(): return value # pass through other coordinate formats
425
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
426
    except KeyError, e: raise FormatException(e)
427
funcs['_compass'] = _compass
428
429 1469 aaronmk
#### Paths
430
431 2016 aaronmk
def _simplifyPath(items, node):
432 1321 aaronmk
    items = dict(items)
433
    try:
434 1562 aaronmk
        next = cast(strings.ustr, items['next'])
435
        require = cast(strings.ustr, items['require'])
436 1321 aaronmk
        root = items['path']
437 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
438 1321 aaronmk
439
    node = root
440
    while node != None:
441
        new_node = xpath.get_1(node, next, allow_rooted=False)
442 2558 aaronmk
        if xpath.get_value(node, require, allow_rooted=False) == None: # empty
443 1321 aaronmk
            xml_dom.replace(node, new_node) # remove current elem
444
            if node is root: root = new_node # also update root
445
        node = new_node
446
    return root
447
funcs['_simplifyPath'] = _simplifyPath