Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 2017 aaronmk
import warnings
7 111 aaronmk
8 1607 aaronmk
import angles
9 818 aaronmk
import dates
10 300 aaronmk
import exc
11 1580 aaronmk
import format
12 917 aaronmk
import maps
13 2105 aaronmk
import sql
14 1234 aaronmk
import strings
15 827 aaronmk
import term
16 1468 aaronmk
import units
17 1047 aaronmk
import util
18 86 aaronmk
import xml_dom
19 1321 aaronmk
import xpath
20 86 aaronmk
21 995 aaronmk
##### Exceptions
22
23 1612 aaronmk
class SyntaxError(exc.ExceptionWithCause):
24 797 aaronmk
    def __init__(self, cause):
25 1611 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
26
            cause)
27 278 aaronmk
28 1613 aaronmk
class FormatException(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
31 843 aaronmk
32 1992 aaronmk
##### Helper functions
33 995 aaronmk
34 1992 aaronmk
def map_items(func, items):
35
    return [(name, func(value)) for name, value in items]
36
37
def cast(type_, val):
38
    '''Throws FormatException if can't cast'''
39
    try: return type_(val)
40
    except ValueError, e: raise FormatException(e)
41
42
def conv_items(type_, items):
43
    return map_items(lambda val: cast(type_, val),
44
        xml_dom.TextEntryOnlyIter(items))
45
46
def pop_value(items, name='value'):
47
    '''@param name Name of value param, or None to accept any name'''
48
    try: last = items.pop() # last entry contains value
49
    except IndexError: return None # input is empty and no actions
50
    if name != None and last[0] != name: return None # input is empty
51
    return last[1]
52
53 995 aaronmk
funcs = {}
54
55 1992 aaronmk
##### Public functions
56
57 2105 aaronmk
def process(node, on_error=exc.raise_, db=None):
58 995 aaronmk
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
59
    name = node.tagName
60 2105 aaronmk
    if name.startswith('_') and name != '_': # '_' is default root node name
61 1369 aaronmk
        try:
62 2105 aaronmk
            items = xml_dom.NodeTextEntryIter(node)
63
            try: func = funcs[name]
64
            except KeyError:
65
                if db != None: # DB with relational functions available
66
                    value = sql.put(db, name, dict(items))
67
                else: value = pop_value(list(items)) # pass value through
68
            else: value = func(items, node) # local XML function
69
70 1369 aaronmk
            xml_dom.replace_with_text(node, value)
71 1613 aaronmk
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
72 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
73
            exc.add_traceback(e)
74 1562 aaronmk
            str_ = strings.ustr(node)
75 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
76 1810 aaronmk
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
77
                '\n'+term.emph_multiline(str_)))
78
79 995 aaronmk
            on_error(e)
80
81 1992 aaronmk
def strip(node):
82
    '''Replaces every XML function with its last parameter (which is usually its
83 1995 aaronmk
    value), except for _ignore, which is removed completely'''
84 1992 aaronmk
    for child in xml_dom.NodeElemIter(node): strip(child)
85
    name = node.tagName
86
    if name.startswith('_') and name in funcs:
87 1995 aaronmk
        if name == '_ignore': value = None
88
        else: value = pop_value(list(xml_dom.NodeTextEntryIter(node)), None)
89 1992 aaronmk
        xml_dom.replace_with_text(node, value)
90 86 aaronmk
91 1469 aaronmk
##### XML functions
92 995 aaronmk
93
# Function names must start with _ to avoid collisions with real tags
94
# Functions take arguments (items)
95
96 1469 aaronmk
#### General
97
98 2017 aaronmk
def _ignore(items, node):
99 994 aaronmk
    '''Used to "comment out" an XML subtree'''
100
    return None
101 995 aaronmk
funcs['_ignore'] = _ignore
102 994 aaronmk
103 2017 aaronmk
def _ref(items, node):
104
    '''Used to retrieve a value from another XML node
105
    @param items
106
        addr=<path> XPath to value, relative to the XML func's parent node
107
    '''
108
    items = dict(items)
109
    try: addr = items['addr']
110
    except KeyError, e: raise SyntaxError(e)
111
112
    value = xpath.get_value(node.parentNode, addr)
113
    if value == None:
114
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
115
            +str(addr)))
116
    return value
117
funcs['_ref'] = _ref
118
119 1469 aaronmk
#### Conditionals
120
121 2016 aaronmk
def _eq(items, node):
122 1234 aaronmk
    items = dict(items)
123
    try:
124
        left = items['left']
125
        right = items['right']
126
    except KeyError: return '' # a value was None
127
    return util.bool2str(left == right)
128
funcs['_eq'] = _eq
129
130 2016 aaronmk
def _if(items, node):
131 1234 aaronmk
    items = dict(items)
132
    try:
133
        cond = items['cond']
134
        then = items['then']
135 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
136 1234 aaronmk
    else_ = items.get('else', None)
137 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
138 1234 aaronmk
    if cond: return then
139
    else: return else_
140
funcs['_if'] = _if
141
142 1469 aaronmk
#### Combining values
143
144 2016 aaronmk
def _alt(items, node):
145 113 aaronmk
    items = list(items)
146
    items.sort()
147 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
148 1609 aaronmk
    except IndexError: return None # input got removed by e.g. FormatException
149 995 aaronmk
funcs['_alt'] = _alt
150 113 aaronmk
151 2016 aaronmk
def _merge(items, node):
152 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
153 1562 aaronmk
        # get *once* from iter, check types
154 917 aaronmk
    items.sort()
155
    return maps.merge_values(*[v for k, v in items])
156 995 aaronmk
funcs['_merge'] = _merge
157 917 aaronmk
158 2016 aaronmk
def _label(items, node):
159 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
160 1562 aaronmk
        # get *once* from iter, check types
161 2014 aaronmk
    value = items.get('value', None)
162
    if value == None: return None # input is empty
163
    try: label = items['label']
164 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
165 917 aaronmk
    return label+': '+value
166 995 aaronmk
funcs['_label'] = _label
167 917 aaronmk
168 1469 aaronmk
#### Transforming values
169
170 2016 aaronmk
def _collapse(items, node):
171 2012 aaronmk
    '''Collapses a subtree if the "value" element in it is NULL'''
172
    items = dict(items)
173
    try: require = cast(strings.ustr, items['require'])
174
    except KeyError, e: raise SyntaxError(e)
175
    value = items.get('value', None)
176
177
    required_node = xpath.get_1(value, require, allow_rooted=False)
178
    if required_node == None or xml_dom.is_empty(required_node): return None
179
    else: return value
180
funcs['_collapse'] = _collapse
181
182 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
183 1477 aaronmk
184 2016 aaronmk
def _nullIf(items, node):
185 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
186 1477 aaronmk
    try: null = items['null']
187 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
188 1477 aaronmk
    value = items.get('value', None)
189 1219 aaronmk
    type_str = items.get('type', None)
190 1477 aaronmk
191
    try: type_ = types_by_name[type_str]
192 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
193 1477 aaronmk
    null = type_(null)
194
195
    try: return util.none_if(value, null)
196
    except ValueError: return value # value not convertible, so can't equal null
197 1047 aaronmk
funcs['_nullIf'] = _nullIf
198
199 1602 aaronmk
def repl(repls, value):
200 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
201 1602 aaronmk
    @param repls dict repl:with
202
        repl "*" means all other input values
203
        with "*" means keep input value the same
204
        with "" means ignore input value
205 1537 aaronmk
    '''
206 1602 aaronmk
    try: new_value = repls[value]
207 1304 aaronmk
    except KeyError, e:
208 1537 aaronmk
        # Save traceback right away in case another exception raised
209 1609 aaronmk
        fe = FormatException(e)
210 1602 aaronmk
        try: new_value = repls['*']
211 1609 aaronmk
        except KeyError: raise fe
212 1537 aaronmk
    if new_value == '*': new_value = value # '*' means keep input value the same
213 1607 aaronmk
    return new_value
214 1602 aaronmk
215 2016 aaronmk
def _map(items, node):
216 1602 aaronmk
    '''See repl()
217
    @param items
218
        <last_entry> Value
219
        <other_entries> name=value Mappings. Special values: See repl() repls.
220
    '''
221
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
222
    value = pop_value(items)
223
    if value == None: return None # input is empty
224 1607 aaronmk
    return util.none_if(repl(dict(items), value), u'') # empty value means None
225 1219 aaronmk
funcs['_map'] = _map
226
227 2016 aaronmk
def _replace(items, node):
228 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
229 1581 aaronmk
    value = pop_value(items)
230
    if value == None: return None # input is empty
231 1219 aaronmk
    try:
232
        for repl, with_ in items:
233
            if re.match(r'^\w+$', repl):
234
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
235
            value = re.sub(repl, with_, value)
236 1612 aaronmk
    except sre_constants.error, e: raise SyntaxError(e)
237 1624 aaronmk
    return util.none_if(value.strip(), u'') # empty strings always mean None
238 1219 aaronmk
funcs['_replace'] = _replace
239
240 1469 aaronmk
#### Quantities
241
242 2016 aaronmk
def _units(items, node):
243 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
244 1581 aaronmk
    value = pop_value(items)
245
    if value == None: return None # input is empty
246 1471 aaronmk
247 1581 aaronmk
    quantity = units.str2quantity(value)
248 1471 aaronmk
    try:
249
        for action, units_ in items:
250
            units_ = util.none_if(units_, u'')
251
            if action == 'default': units.set_default_units(quantity, units_)
252 1567 aaronmk
            elif action == 'to':
253
                try: quantity = units.convert(quantity, units_)
254 1609 aaronmk
                except ValueError, e: raise FormatException(e)
255 1612 aaronmk
            else: raise SyntaxError(ValueError('Invalid action: '+action))
256 1609 aaronmk
    except units.MissingUnitsException, e: raise FormatException(e)
257 1471 aaronmk
    return units.quantity2str(quantity)
258 1225 aaronmk
funcs['_units'] = _units
259
260 1399 aaronmk
def parse_range(str_, range_sep='-'):
261
    default = (str_, None)
262
    start, sep, end = str_.partition(range_sep)
263
    if sep == '': return default # not a range
264 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
265 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
266
267 2016 aaronmk
def _rangeStart(items, node):
268 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
269 1399 aaronmk
    try: value = items['value']
270 1406 aaronmk
    except KeyError: return None # input is empty
271 1399 aaronmk
    return parse_range(value)[0]
272
funcs['_rangeStart'] = _rangeStart
273
274 2016 aaronmk
def _rangeEnd(items, node):
275 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
276 1399 aaronmk
    try: value = items['value']
277 1406 aaronmk
    except KeyError: return None # input is empty
278 1399 aaronmk
    return parse_range(value)[1]
279
funcs['_rangeEnd'] = _rangeEnd
280
281 2016 aaronmk
def _range(items, node):
282 1472 aaronmk
    items = dict(conv_items(float, items))
283
    from_ = items.get('from', None)
284
    to = items.get('to', None)
285
    if from_ == None or to == None: return None
286
    return str(to - from_)
287
funcs['_range'] = _range
288
289 2016 aaronmk
def _avg(items, node):
290 86 aaronmk
    count = 0
291
    sum_ = 0.
292 278 aaronmk
    for name, value in conv_items(float, items):
293 86 aaronmk
        count += 1
294
        sum_ += value
295 1472 aaronmk
    if count == 0: return None # input is empty
296
    else: return str(sum_/count)
297 995 aaronmk
funcs['_avg'] = _avg
298 86 aaronmk
299 968 aaronmk
class CvException(Exception):
300
    def __init__(self):
301
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
302
            ' allowed for ratio scale data '
303
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
304
305 2016 aaronmk
def _noCV(items, node):
306 968 aaronmk
    try: name, value = items.next()
307
    except StopIteration: return None
308 1609 aaronmk
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
309 968 aaronmk
    return value
310 995 aaronmk
funcs['_noCV'] = _noCV
311 968 aaronmk
312 1469 aaronmk
#### Dates
313
314 2016 aaronmk
def _date(items, node):
315 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
316
        # get *once* from iter, check types
317 1514 aaronmk
    try: str_ = items['date']
318 786 aaronmk
    except KeyError:
319 1515 aaronmk
        # Year is required
320
        try: items['year']
321 1309 aaronmk
        except KeyError, e:
322
            if items == {}: return None # entire date is empty
323 1609 aaronmk
            else: raise FormatException(e)
324 1515 aaronmk
325
        # Convert month name to number
326
        try: month = items['month']
327
        except KeyError: pass
328
        else:
329
            if not month.isdigit(): # month is name
330 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
331 1609 aaronmk
                except ValueError, e: raise FormatException(e)
332 1515 aaronmk
333 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
334 786 aaronmk
        items.setdefault('month', 1)
335
        items.setdefault('day', 1)
336 1535 aaronmk
337
        for try_num in xrange(2):
338
            try:
339
                date = datetime.date(**items)
340
                break
341
            except ValueError, e:
342 1609 aaronmk
                if try_num > 0: raise FormatException(e)
343 1536 aaronmk
                    # exception still raised after retry
344 1562 aaronmk
                msg = strings.ustr(e)
345 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
346
                    items['month'], items['day'] = items['day'], items['month']
347 1609 aaronmk
                else: raise FormatException(e)
348 786 aaronmk
    else:
349 324 aaronmk
        try: year = float(str_)
350
        except ValueError:
351 1264 aaronmk
            try: date = dates.strtotime(str_)
352 324 aaronmk
            except ImportError: return str_
353 1609 aaronmk
            except ValueError, e: raise FormatException(e)
354 324 aaronmk
        else: date = (datetime.date(int(year), 1, 1) +
355
            datetime.timedelta(round((year % 1.)*365)))
356 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
357 843 aaronmk
    except ValueError, e: raise FormatException(e)
358 995 aaronmk
funcs['_date'] = _date
359 86 aaronmk
360 2016 aaronmk
def _dateRangeStart(items, node):
361 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
362 1366 aaronmk
    try: value = items['value']
363 1406 aaronmk
    except KeyError: return None # input is empty
364 1366 aaronmk
    return dates.parse_date_range(value)[0]
365
funcs['_dateRangeStart'] = _dateRangeStart
366 1311 aaronmk
367 2016 aaronmk
def _dateRangeEnd(items, node):
368 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
369 1366 aaronmk
    try: value = items['value']
370 1406 aaronmk
    except KeyError: return None # input is empty
371 1366 aaronmk
    return dates.parse_date_range(value)[1]
372
funcs['_dateRangeEnd'] = _dateRangeEnd
373 1311 aaronmk
374 1469 aaronmk
#### Names
375
376 328 aaronmk
_name_parts_slices_items = [
377
    ('first', slice(None, 1)),
378
    ('middle', slice(1, -1)),
379
    ('last', slice(-1, None)),
380
]
381
name_parts_slices = dict(_name_parts_slices_items)
382
name_parts = [name for name, slice_ in _name_parts_slices_items]
383
384 2016 aaronmk
def _name(items, node):
385 89 aaronmk
    items = dict(items)
386 102 aaronmk
    parts = []
387 328 aaronmk
    for part in name_parts:
388
        if part in items: parts.append(items[part])
389 102 aaronmk
    return ' '.join(parts)
390 995 aaronmk
funcs['_name'] = _name
391 102 aaronmk
392 2016 aaronmk
def _namePart(items, node):
393 328 aaronmk
    out_items = []
394
    for part, value in items:
395
        try: slice_ = name_parts_slices[part]
396 1612 aaronmk
        except KeyError, e: raise SyntaxError(e)
397 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
398 2016 aaronmk
    return _name(out_items, node)
399 995 aaronmk
funcs['_namePart'] = _namePart
400 1321 aaronmk
401 1607 aaronmk
#### Angles
402
403 2016 aaronmk
def _compass(items, node):
404 1607 aaronmk
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
405
    items = dict(conv_items(strings.ustr, items))
406
    try: value = items['value']
407
    except KeyError: return None # input is empty
408
409
    if not value.isupper(): return value # pass through other coordinate formats
410
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
411
    except KeyError, e: raise FormatException(e)
412
funcs['_compass'] = _compass
413
414 1469 aaronmk
#### Paths
415
416 2016 aaronmk
def _simplifyPath(items, node):
417 1321 aaronmk
    items = dict(items)
418
    try:
419 1562 aaronmk
        next = cast(strings.ustr, items['next'])
420
        require = cast(strings.ustr, items['require'])
421 1321 aaronmk
        root = items['path']
422 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
423 1321 aaronmk
424
    node = root
425
    while node != None:
426
        new_node = xpath.get_1(node, next, allow_rooted=False)
427 2006 aaronmk
        required_node = xpath.get_1(node, require, allow_rooted=False)
428
        if required_node == None or xml_dom.is_empty(required_node):# empty elem
429 1321 aaronmk
            xml_dom.replace(node, new_node) # remove current elem
430
            if node is root: root = new_node # also update root
431
        node = new_node
432
    return root
433
funcs['_simplifyPath'] = _simplifyPath