Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 2017 aaronmk
import warnings
7 111 aaronmk
8 1607 aaronmk
import angles
9 818 aaronmk
import dates
10 300 aaronmk
import exc
11 1580 aaronmk
import format
12 917 aaronmk
import maps
13 2105 aaronmk
import sql
14 1234 aaronmk
import strings
15 827 aaronmk
import term
16 1468 aaronmk
import units
17 1047 aaronmk
import util
18 86 aaronmk
import xml_dom
19 1321 aaronmk
import xpath
20 86 aaronmk
21 995 aaronmk
##### Exceptions
22
23 1612 aaronmk
class SyntaxError(exc.ExceptionWithCause):
24 797 aaronmk
    def __init__(self, cause):
25 1611 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
26
            cause)
27 278 aaronmk
28 1613 aaronmk
class FormatException(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
31 843 aaronmk
32 1992 aaronmk
##### Helper functions
33 995 aaronmk
34 1992 aaronmk
def map_items(func, items):
35
    return [(name, func(value)) for name, value in items]
36
37
def cast(type_, val):
38
    '''Throws FormatException if can't cast'''
39
    try: return type_(val)
40
    except ValueError, e: raise FormatException(e)
41
42
def conv_items(type_, items):
43
    return map_items(lambda val: cast(type_, val),
44
        xml_dom.TextEntryOnlyIter(items))
45
46
def pop_value(items, name='value'):
47
    '''@param name Name of value param, or None to accept any name'''
48
    try: last = items.pop() # last entry contains value
49
    except IndexError: return None # input is empty and no actions
50
    if name != None and last[0] != name: return None # input is empty
51
    return last[1]
52
53 995 aaronmk
funcs = {}
54
55 1992 aaronmk
##### Public functions
56
57 2112 aaronmk
def is_func_name(name):
58
    return name.startswith('_') and name != '_' # '_' is default root node name
59
60
def is_func(node): return is_func_name(node.tagName)
61
62
def is_xml_func_name(name): return is_func_name(name) and name in funcs
63
64
def is_xml_func(node): return is_xml_func_name(node.tagName)
65
66 2105 aaronmk
def process(node, on_error=exc.raise_, db=None):
67 995 aaronmk
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
68
    name = node.tagName
69 2112 aaronmk
    if is_func_name(name):
70 1369 aaronmk
        try:
71 2105 aaronmk
            items = xml_dom.NodeTextEntryIter(node)
72
            try: func = funcs[name]
73
            except KeyError:
74
                if db != None: # DB with relational functions available
75
                    value = sql.put(db, name, dict(items))
76
                else: value = pop_value(list(items)) # pass value through
77
            else: value = func(items, node) # local XML function
78
79 1369 aaronmk
            xml_dom.replace_with_text(node, value)
80 1613 aaronmk
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
81 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
82
            exc.add_traceback(e)
83 1562 aaronmk
            str_ = strings.ustr(node)
84 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
85 1810 aaronmk
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
86
                '\n'+term.emph_multiline(str_)))
87
88 995 aaronmk
            on_error(e)
89
90 1992 aaronmk
def strip(node):
91
    '''Replaces every XML function with its last parameter (which is usually its
92 1995 aaronmk
    value), except for _ignore, which is removed completely'''
93 1992 aaronmk
    for child in xml_dom.NodeElemIter(node): strip(child)
94
    name = node.tagName
95 2112 aaronmk
    if is_xml_func_name(name):
96 1995 aaronmk
        if name == '_ignore': value = None
97
        else: value = pop_value(list(xml_dom.NodeTextEntryIter(node)), None)
98 1992 aaronmk
        xml_dom.replace_with_text(node, value)
99 86 aaronmk
100 1469 aaronmk
##### XML functions
101 995 aaronmk
102
# Function names must start with _ to avoid collisions with real tags
103
# Functions take arguments (items)
104
105 1469 aaronmk
#### General
106
107 2017 aaronmk
def _ignore(items, node):
108 994 aaronmk
    '''Used to "comment out" an XML subtree'''
109
    return None
110 995 aaronmk
funcs['_ignore'] = _ignore
111 994 aaronmk
112 2017 aaronmk
def _ref(items, node):
113
    '''Used to retrieve a value from another XML node
114
    @param items
115
        addr=<path> XPath to value, relative to the XML func's parent node
116
    '''
117
    items = dict(items)
118
    try: addr = items['addr']
119
    except KeyError, e: raise SyntaxError(e)
120
121
    value = xpath.get_value(node.parentNode, addr)
122
    if value == None:
123
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
124
            +str(addr)))
125
    return value
126
funcs['_ref'] = _ref
127
128 1469 aaronmk
#### Conditionals
129
130 2016 aaronmk
def _eq(items, node):
131 1234 aaronmk
    items = dict(items)
132
    try:
133
        left = items['left']
134
        right = items['right']
135
    except KeyError: return '' # a value was None
136
    return util.bool2str(left == right)
137
funcs['_eq'] = _eq
138
139 2016 aaronmk
def _if(items, node):
140 1234 aaronmk
    items = dict(items)
141
    try:
142
        cond = items['cond']
143
        then = items['then']
144 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
145 1234 aaronmk
    else_ = items.get('else', None)
146 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
147 1234 aaronmk
    if cond: return then
148
    else: return else_
149
funcs['_if'] = _if
150
151 1469 aaronmk
#### Combining values
152
153 2016 aaronmk
def _alt(items, node):
154 113 aaronmk
    items = list(items)
155
    items.sort()
156 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
157 1609 aaronmk
    except IndexError: return None # input got removed by e.g. FormatException
158 995 aaronmk
funcs['_alt'] = _alt
159 113 aaronmk
160 2016 aaronmk
def _merge(items, node):
161 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
162 1562 aaronmk
        # get *once* from iter, check types
163 917 aaronmk
    items.sort()
164
    return maps.merge_values(*[v for k, v in items])
165 995 aaronmk
funcs['_merge'] = _merge
166 917 aaronmk
167 2016 aaronmk
def _label(items, node):
168 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
169 1562 aaronmk
        # get *once* from iter, check types
170 2014 aaronmk
    value = items.get('value', None)
171
    if value == None: return None # input is empty
172
    try: label = items['label']
173 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
174 917 aaronmk
    return label+': '+value
175 995 aaronmk
funcs['_label'] = _label
176 917 aaronmk
177 1469 aaronmk
#### Transforming values
178
179 2016 aaronmk
def _collapse(items, node):
180 2012 aaronmk
    '''Collapses a subtree if the "value" element in it is NULL'''
181
    items = dict(items)
182
    try: require = cast(strings.ustr, items['require'])
183
    except KeyError, e: raise SyntaxError(e)
184
    value = items.get('value', None)
185
186
    required_node = xpath.get_1(value, require, allow_rooted=False)
187
    if required_node == None or xml_dom.is_empty(required_node): return None
188
    else: return value
189
funcs['_collapse'] = _collapse
190
191 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
192 1477 aaronmk
193 2016 aaronmk
def _nullIf(items, node):
194 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
195 1477 aaronmk
    try: null = items['null']
196 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
197 1477 aaronmk
    value = items.get('value', None)
198 1219 aaronmk
    type_str = items.get('type', None)
199 1477 aaronmk
200
    try: type_ = types_by_name[type_str]
201 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
202 1477 aaronmk
    null = type_(null)
203
204
    try: return util.none_if(value, null)
205
    except ValueError: return value # value not convertible, so can't equal null
206 1047 aaronmk
funcs['_nullIf'] = _nullIf
207
208 1602 aaronmk
def repl(repls, value):
209 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
210 1602 aaronmk
    @param repls dict repl:with
211
        repl "*" means all other input values
212
        with "*" means keep input value the same
213
        with "" means ignore input value
214 1537 aaronmk
    '''
215 1602 aaronmk
    try: new_value = repls[value]
216 1304 aaronmk
    except KeyError, e:
217 1537 aaronmk
        # Save traceback right away in case another exception raised
218 1609 aaronmk
        fe = FormatException(e)
219 1602 aaronmk
        try: new_value = repls['*']
220 1609 aaronmk
        except KeyError: raise fe
221 1537 aaronmk
    if new_value == '*': new_value = value # '*' means keep input value the same
222 1607 aaronmk
    return new_value
223 1602 aaronmk
224 2016 aaronmk
def _map(items, node):
225 1602 aaronmk
    '''See repl()
226
    @param items
227
        <last_entry> Value
228
        <other_entries> name=value Mappings. Special values: See repl() repls.
229
    '''
230
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
231
    value = pop_value(items)
232
    if value == None: return None # input is empty
233 1607 aaronmk
    return util.none_if(repl(dict(items), value), u'') # empty value means None
234 1219 aaronmk
funcs['_map'] = _map
235
236 2016 aaronmk
def _replace(items, node):
237 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
238 1581 aaronmk
    value = pop_value(items)
239
    if value == None: return None # input is empty
240 1219 aaronmk
    try:
241
        for repl, with_ in items:
242
            if re.match(r'^\w+$', repl):
243
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
244
            value = re.sub(repl, with_, value)
245 1612 aaronmk
    except sre_constants.error, e: raise SyntaxError(e)
246 1624 aaronmk
    return util.none_if(value.strip(), u'') # empty strings always mean None
247 1219 aaronmk
funcs['_replace'] = _replace
248
249 1469 aaronmk
#### Quantities
250
251 2016 aaronmk
def _units(items, node):
252 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
253 1581 aaronmk
    value = pop_value(items)
254
    if value == None: return None # input is empty
255 1471 aaronmk
256 1581 aaronmk
    quantity = units.str2quantity(value)
257 1471 aaronmk
    try:
258
        for action, units_ in items:
259
            units_ = util.none_if(units_, u'')
260
            if action == 'default': units.set_default_units(quantity, units_)
261 1567 aaronmk
            elif action == 'to':
262
                try: quantity = units.convert(quantity, units_)
263 1609 aaronmk
                except ValueError, e: raise FormatException(e)
264 1612 aaronmk
            else: raise SyntaxError(ValueError('Invalid action: '+action))
265 1609 aaronmk
    except units.MissingUnitsException, e: raise FormatException(e)
266 1471 aaronmk
    return units.quantity2str(quantity)
267 1225 aaronmk
funcs['_units'] = _units
268
269 1399 aaronmk
def parse_range(str_, range_sep='-'):
270
    default = (str_, None)
271
    start, sep, end = str_.partition(range_sep)
272
    if sep == '': return default # not a range
273 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
274 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
275
276 2016 aaronmk
def _rangeStart(items, node):
277 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
278 1399 aaronmk
    try: value = items['value']
279 1406 aaronmk
    except KeyError: return None # input is empty
280 1399 aaronmk
    return parse_range(value)[0]
281
funcs['_rangeStart'] = _rangeStart
282
283 2016 aaronmk
def _rangeEnd(items, node):
284 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
285 1399 aaronmk
    try: value = items['value']
286 1406 aaronmk
    except KeyError: return None # input is empty
287 1399 aaronmk
    return parse_range(value)[1]
288
funcs['_rangeEnd'] = _rangeEnd
289
290 2016 aaronmk
def _range(items, node):
291 1472 aaronmk
    items = dict(conv_items(float, items))
292
    from_ = items.get('from', None)
293
    to = items.get('to', None)
294
    if from_ == None or to == None: return None
295
    return str(to - from_)
296
funcs['_range'] = _range
297
298 2016 aaronmk
def _avg(items, node):
299 86 aaronmk
    count = 0
300
    sum_ = 0.
301 278 aaronmk
    for name, value in conv_items(float, items):
302 86 aaronmk
        count += 1
303
        sum_ += value
304 1472 aaronmk
    if count == 0: return None # input is empty
305
    else: return str(sum_/count)
306 995 aaronmk
funcs['_avg'] = _avg
307 86 aaronmk
308 968 aaronmk
class CvException(Exception):
309
    def __init__(self):
310
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
311
            ' allowed for ratio scale data '
312
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
313
314 2016 aaronmk
def _noCV(items, node):
315 968 aaronmk
    try: name, value = items.next()
316
    except StopIteration: return None
317 1609 aaronmk
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
318 968 aaronmk
    return value
319 995 aaronmk
funcs['_noCV'] = _noCV
320 968 aaronmk
321 1469 aaronmk
#### Dates
322
323 2016 aaronmk
def _date(items, node):
324 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
325
        # get *once* from iter, check types
326 1514 aaronmk
    try: str_ = items['date']
327 786 aaronmk
    except KeyError:
328 1515 aaronmk
        # Year is required
329
        try: items['year']
330 1309 aaronmk
        except KeyError, e:
331
            if items == {}: return None # entire date is empty
332 1609 aaronmk
            else: raise FormatException(e)
333 1515 aaronmk
334
        # Convert month name to number
335
        try: month = items['month']
336
        except KeyError: pass
337
        else:
338
            if not month.isdigit(): # month is name
339 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
340 1609 aaronmk
                except ValueError, e: raise FormatException(e)
341 1515 aaronmk
342 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
343 786 aaronmk
        items.setdefault('month', 1)
344
        items.setdefault('day', 1)
345 1535 aaronmk
346
        for try_num in xrange(2):
347
            try:
348
                date = datetime.date(**items)
349
                break
350
            except ValueError, e:
351 1609 aaronmk
                if try_num > 0: raise FormatException(e)
352 1536 aaronmk
                    # exception still raised after retry
353 1562 aaronmk
                msg = strings.ustr(e)
354 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
355
                    items['month'], items['day'] = items['day'], items['month']
356 1609 aaronmk
                else: raise FormatException(e)
357 786 aaronmk
    else:
358 324 aaronmk
        try: year = float(str_)
359
        except ValueError:
360 1264 aaronmk
            try: date = dates.strtotime(str_)
361 324 aaronmk
            except ImportError: return str_
362 1609 aaronmk
            except ValueError, e: raise FormatException(e)
363 324 aaronmk
        else: date = (datetime.date(int(year), 1, 1) +
364
            datetime.timedelta(round((year % 1.)*365)))
365 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
366 843 aaronmk
    except ValueError, e: raise FormatException(e)
367 995 aaronmk
funcs['_date'] = _date
368 86 aaronmk
369 2016 aaronmk
def _dateRangeStart(items, node):
370 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
371 1366 aaronmk
    try: value = items['value']
372 1406 aaronmk
    except KeyError: return None # input is empty
373 1366 aaronmk
    return dates.parse_date_range(value)[0]
374
funcs['_dateRangeStart'] = _dateRangeStart
375 1311 aaronmk
376 2016 aaronmk
def _dateRangeEnd(items, node):
377 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
378 1366 aaronmk
    try: value = items['value']
379 1406 aaronmk
    except KeyError: return None # input is empty
380 1366 aaronmk
    return dates.parse_date_range(value)[1]
381
funcs['_dateRangeEnd'] = _dateRangeEnd
382 1311 aaronmk
383 1469 aaronmk
#### Names
384
385 328 aaronmk
_name_parts_slices_items = [
386
    ('first', slice(None, 1)),
387
    ('middle', slice(1, -1)),
388
    ('last', slice(-1, None)),
389
]
390
name_parts_slices = dict(_name_parts_slices_items)
391
name_parts = [name for name, slice_ in _name_parts_slices_items]
392
393 2016 aaronmk
def _name(items, node):
394 89 aaronmk
    items = dict(items)
395 102 aaronmk
    parts = []
396 328 aaronmk
    for part in name_parts:
397
        if part in items: parts.append(items[part])
398 102 aaronmk
    return ' '.join(parts)
399 995 aaronmk
funcs['_name'] = _name
400 102 aaronmk
401 2016 aaronmk
def _namePart(items, node):
402 328 aaronmk
    out_items = []
403
    for part, value in items:
404
        try: slice_ = name_parts_slices[part]
405 1612 aaronmk
        except KeyError, e: raise SyntaxError(e)
406 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
407 2016 aaronmk
    return _name(out_items, node)
408 995 aaronmk
funcs['_namePart'] = _namePart
409 1321 aaronmk
410 1607 aaronmk
#### Angles
411
412 2016 aaronmk
def _compass(items, node):
413 1607 aaronmk
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
414
    items = dict(conv_items(strings.ustr, items))
415
    try: value = items['value']
416
    except KeyError: return None # input is empty
417
418
    if not value.isupper(): return value # pass through other coordinate formats
419
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
420
    except KeyError, e: raise FormatException(e)
421
funcs['_compass'] = _compass
422
423 1469 aaronmk
#### Paths
424
425 2016 aaronmk
def _simplifyPath(items, node):
426 1321 aaronmk
    items = dict(items)
427
    try:
428 1562 aaronmk
        next = cast(strings.ustr, items['next'])
429
        require = cast(strings.ustr, items['require'])
430 1321 aaronmk
        root = items['path']
431 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
432 1321 aaronmk
433
    node = root
434
    while node != None:
435
        new_node = xpath.get_1(node, next, allow_rooted=False)
436 2006 aaronmk
        required_node = xpath.get_1(node, require, allow_rooted=False)
437
        if required_node == None or xml_dom.is_empty(required_node):# empty elem
438 1321 aaronmk
            xml_dom.replace(node, new_node) # remove current elem
439
            if node is root: root = new_node # also update root
440
        node = new_node
441
    return root
442
funcs['_simplifyPath'] = _simplifyPath