Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 2017 aaronmk
import warnings
7 111 aaronmk
8 1607 aaronmk
import angles
9 818 aaronmk
import dates
10 300 aaronmk
import exc
11 1580 aaronmk
import format
12 917 aaronmk
import maps
13 2105 aaronmk
import sql
14 1234 aaronmk
import strings
15 827 aaronmk
import term
16 1468 aaronmk
import units
17 1047 aaronmk
import util
18 86 aaronmk
import xml_dom
19 1321 aaronmk
import xpath
20 86 aaronmk
21 995 aaronmk
##### Exceptions
22
23 1612 aaronmk
class SyntaxError(exc.ExceptionWithCause):
24 797 aaronmk
    def __init__(self, cause):
25 1611 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
26
            cause)
27 278 aaronmk
28 1613 aaronmk
class FormatException(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
31 843 aaronmk
32 1992 aaronmk
##### Helper functions
33 995 aaronmk
34 1992 aaronmk
def map_items(func, items):
35
    return [(name, func(value)) for name, value in items]
36
37
def cast(type_, val):
38
    '''Throws FormatException if can't cast'''
39
    try: return type_(val)
40
    except ValueError, e: raise FormatException(e)
41
42
def conv_items(type_, items):
43
    return map_items(lambda val: cast(type_, val),
44
        xml_dom.TextEntryOnlyIter(items))
45
46
def pop_value(items, name='value'):
47
    '''@param name Name of value param, or None to accept any name'''
48
    try: last = items.pop() # last entry contains value
49
    except IndexError: return None # input is empty and no actions
50
    if name != None and last[0] != name: return None # input is empty
51
    return last[1]
52
53 995 aaronmk
funcs = {}
54
55 2557 aaronmk
structural_funcs = set()
56
57 1992 aaronmk
##### Public functions
58
59 2112 aaronmk
def is_func_name(name):
60
    return name.startswith('_') and name != '_' # '_' is default root node name
61
62
def is_func(node): return is_func_name(node.tagName)
63
64
def is_xml_func_name(name): return is_func_name(name) and name in funcs
65
66
def is_xml_func(node): return is_xml_func_name(node.tagName)
67
68 2105 aaronmk
def process(node, on_error=exc.raise_, db=None):
69 995 aaronmk
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
70
    name = node.tagName
71 2112 aaronmk
    if is_func_name(name):
72 1369 aaronmk
        try:
73 2105 aaronmk
            items = xml_dom.NodeTextEntryIter(node)
74
            try: func = funcs[name]
75
            except KeyError:
76
                if db != None: # DB with relational functions available
77
                    value = sql.put(db, name, dict(items))
78
                else: value = pop_value(list(items)) # pass value through
79
            else: value = func(items, node) # local XML function
80
81 1369 aaronmk
            xml_dom.replace_with_text(node, value)
82 1613 aaronmk
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
83 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
84
            exc.add_traceback(e)
85 1562 aaronmk
            str_ = strings.ustr(node)
86 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
87 1810 aaronmk
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
88
                '\n'+term.emph_multiline(str_)))
89
90 995 aaronmk
            on_error(e)
91
92 2433 aaronmk
def strip(node, preserve=set()):
93 1992 aaronmk
    '''Replaces every XML function with its last parameter (which is usually its
94 2557 aaronmk
    value), except for structural functions, which are evaluated by process().
95 2433 aaronmk
    @param preserve set(str...) XML functions not to remove.
96
        * container can be any iterable type
97
    '''
98
    preserve = set(preserve)
99
100 1992 aaronmk
    name = node.tagName
101 2557 aaronmk
    is_func = is_xml_func_name(name) and name not in preserve
102
    if is_func and name in structural_funcs: process(node)
103
    else:
104
        for child in xml_dom.NodeElemIter(node): strip(child, preserve)
105
        if is_func:
106
            value = pop_value(list(xml_dom.NodeTextEntryIter(node)), None)
107
            xml_dom.replace_with_text(node, value)
108 86 aaronmk
109 1469 aaronmk
##### XML functions
110 995 aaronmk
111
# Function names must start with _ to avoid collisions with real tags
112
# Functions take arguments (items)
113
114 2557 aaronmk
#### Structural
115 1469 aaronmk
116 2017 aaronmk
def _ignore(items, node):
117 994 aaronmk
    '''Used to "comment out" an XML subtree'''
118
    return None
119 995 aaronmk
funcs['_ignore'] = _ignore
120 2557 aaronmk
structural_funcs.add('_ignore')
121 994 aaronmk
122 2017 aaronmk
def _ref(items, node):
123
    '''Used to retrieve a value from another XML node
124
    @param items
125
        addr=<path> XPath to value, relative to the XML func's parent node
126
    '''
127
    items = dict(items)
128
    try: addr = items['addr']
129
    except KeyError, e: raise SyntaxError(e)
130
131
    value = xpath.get_value(node.parentNode, addr)
132
    if value == None:
133
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
134
            +str(addr)))
135
    return value
136
funcs['_ref'] = _ref
137 2557 aaronmk
structural_funcs.add('_ref')
138 2017 aaronmk
139 1469 aaronmk
#### Conditionals
140
141 2016 aaronmk
def _eq(items, node):
142 1234 aaronmk
    items = dict(items)
143
    try:
144
        left = items['left']
145
        right = items['right']
146
    except KeyError: return '' # a value was None
147
    return util.bool2str(left == right)
148
funcs['_eq'] = _eq
149
150 2016 aaronmk
def _if(items, node):
151 1234 aaronmk
    items = dict(items)
152
    try:
153
        cond = items['cond']
154
        then = items['then']
155 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
156 1234 aaronmk
    else_ = items.get('else', None)
157 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
158 1234 aaronmk
    if cond: return then
159
    else: return else_
160
funcs['_if'] = _if
161
162 1469 aaronmk
#### Combining values
163
164 2016 aaronmk
def _alt(items, node):
165 113 aaronmk
    items = list(items)
166
    items.sort()
167 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
168 1609 aaronmk
    except IndexError: return None # input got removed by e.g. FormatException
169 995 aaronmk
funcs['_alt'] = _alt
170 113 aaronmk
171 2016 aaronmk
def _merge(items, node):
172 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
173 1562 aaronmk
        # get *once* from iter, check types
174 917 aaronmk
    items.sort()
175
    return maps.merge_values(*[v for k, v in items])
176 995 aaronmk
funcs['_merge'] = _merge
177 917 aaronmk
178 2016 aaronmk
def _label(items, node):
179 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
180 1562 aaronmk
        # get *once* from iter, check types
181 2014 aaronmk
    value = items.get('value', None)
182
    if value == None: return None # input is empty
183
    try: label = items['label']
184 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
185 917 aaronmk
    return label+': '+value
186 995 aaronmk
funcs['_label'] = _label
187 917 aaronmk
188 1469 aaronmk
#### Transforming values
189
190 2016 aaronmk
def _collapse(items, node):
191 2012 aaronmk
    '''Collapses a subtree if the "value" element in it is NULL'''
192
    items = dict(items)
193
    try: require = cast(strings.ustr, items['require'])
194
    except KeyError, e: raise SyntaxError(e)
195
    value = items.get('value', None)
196
197 2558 aaronmk
    if xpath.get_value(value, require, allow_rooted=False) == None: return None
198 2012 aaronmk
    else: return value
199
funcs['_collapse'] = _collapse
200
201 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
202 1477 aaronmk
203 2016 aaronmk
def _nullIf(items, node):
204 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
205 1477 aaronmk
    try: null = items['null']
206 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
207 1477 aaronmk
    value = items.get('value', None)
208 1219 aaronmk
    type_str = items.get('type', None)
209 1477 aaronmk
210
    try: type_ = types_by_name[type_str]
211 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
212 1477 aaronmk
    null = type_(null)
213
214
    try: return util.none_if(value, null)
215
    except ValueError: return value # value not convertible, so can't equal null
216 1047 aaronmk
funcs['_nullIf'] = _nullIf
217
218 1602 aaronmk
def repl(repls, value):
219 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
220 1602 aaronmk
    @param repls dict repl:with
221
        repl "*" means all other input values
222
        with "*" means keep input value the same
223
        with "" means ignore input value
224 1537 aaronmk
    '''
225 1602 aaronmk
    try: new_value = repls[value]
226 1304 aaronmk
    except KeyError, e:
227 1537 aaronmk
        # Save traceback right away in case another exception raised
228 1609 aaronmk
        fe = FormatException(e)
229 1602 aaronmk
        try: new_value = repls['*']
230 1609 aaronmk
        except KeyError: raise fe
231 1537 aaronmk
    if new_value == '*': new_value = value # '*' means keep input value the same
232 1607 aaronmk
    return new_value
233 1602 aaronmk
234 2016 aaronmk
def _map(items, node):
235 1602 aaronmk
    '''See repl()
236
    @param items
237
        <last_entry> Value
238
        <other_entries> name=value Mappings. Special values: See repl() repls.
239
    '''
240
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
241
    value = pop_value(items)
242
    if value == None: return None # input is empty
243 1607 aaronmk
    return util.none_if(repl(dict(items), value), u'') # empty value means None
244 1219 aaronmk
funcs['_map'] = _map
245
246 2016 aaronmk
def _replace(items, node):
247 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
248 1581 aaronmk
    value = pop_value(items)
249
    if value == None: return None # input is empty
250 1219 aaronmk
    try:
251
        for repl, with_ in items:
252
            if re.match(r'^\w+$', repl):
253
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
254
            value = re.sub(repl, with_, value)
255 1612 aaronmk
    except sre_constants.error, e: raise SyntaxError(e)
256 1624 aaronmk
    return util.none_if(value.strip(), u'') # empty strings always mean None
257 1219 aaronmk
funcs['_replace'] = _replace
258
259 1469 aaronmk
#### Quantities
260
261 2016 aaronmk
def _units(items, node):
262 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
263 1581 aaronmk
    value = pop_value(items)
264
    if value == None: return None # input is empty
265 1471 aaronmk
266 1581 aaronmk
    quantity = units.str2quantity(value)
267 1471 aaronmk
    try:
268
        for action, units_ in items:
269
            units_ = util.none_if(units_, u'')
270
            if action == 'default': units.set_default_units(quantity, units_)
271 1567 aaronmk
            elif action == 'to':
272
                try: quantity = units.convert(quantity, units_)
273 1609 aaronmk
                except ValueError, e: raise FormatException(e)
274 1612 aaronmk
            else: raise SyntaxError(ValueError('Invalid action: '+action))
275 1609 aaronmk
    except units.MissingUnitsException, e: raise FormatException(e)
276 1471 aaronmk
    return units.quantity2str(quantity)
277 1225 aaronmk
funcs['_units'] = _units
278
279 1399 aaronmk
def parse_range(str_, range_sep='-'):
280
    default = (str_, None)
281
    start, sep, end = str_.partition(range_sep)
282
    if sep == '': return default # not a range
283 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
284 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
285
286 2016 aaronmk
def _rangeStart(items, node):
287 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
288 1399 aaronmk
    try: value = items['value']
289 1406 aaronmk
    except KeyError: return None # input is empty
290 1399 aaronmk
    return parse_range(value)[0]
291
funcs['_rangeStart'] = _rangeStart
292
293 2016 aaronmk
def _rangeEnd(items, node):
294 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
295 1399 aaronmk
    try: value = items['value']
296 1406 aaronmk
    except KeyError: return None # input is empty
297 1399 aaronmk
    return parse_range(value)[1]
298
funcs['_rangeEnd'] = _rangeEnd
299
300 2016 aaronmk
def _range(items, node):
301 1472 aaronmk
    items = dict(conv_items(float, items))
302
    from_ = items.get('from', None)
303
    to = items.get('to', None)
304
    if from_ == None or to == None: return None
305
    return str(to - from_)
306
funcs['_range'] = _range
307
308 2016 aaronmk
def _avg(items, node):
309 86 aaronmk
    count = 0
310
    sum_ = 0.
311 278 aaronmk
    for name, value in conv_items(float, items):
312 86 aaronmk
        count += 1
313
        sum_ += value
314 1472 aaronmk
    if count == 0: return None # input is empty
315
    else: return str(sum_/count)
316 995 aaronmk
funcs['_avg'] = _avg
317 86 aaronmk
318 968 aaronmk
class CvException(Exception):
319
    def __init__(self):
320
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
321
            ' allowed for ratio scale data '
322
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
323
324 2016 aaronmk
def _noCV(items, node):
325 968 aaronmk
    try: name, value = items.next()
326
    except StopIteration: return None
327 1609 aaronmk
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
328 968 aaronmk
    return value
329 995 aaronmk
funcs['_noCV'] = _noCV
330 968 aaronmk
331 1469 aaronmk
#### Dates
332
333 2016 aaronmk
def _date(items, node):
334 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
335
        # get *once* from iter, check types
336 1514 aaronmk
    try: str_ = items['date']
337 786 aaronmk
    except KeyError:
338 1515 aaronmk
        # Year is required
339
        try: items['year']
340 1309 aaronmk
        except KeyError, e:
341
            if items == {}: return None # entire date is empty
342 1609 aaronmk
            else: raise FormatException(e)
343 1515 aaronmk
344
        # Convert month name to number
345
        try: month = items['month']
346
        except KeyError: pass
347
        else:
348
            if not month.isdigit(): # month is name
349 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
350 1609 aaronmk
                except ValueError, e: raise FormatException(e)
351 1515 aaronmk
352 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
353 786 aaronmk
        items.setdefault('month', 1)
354
        items.setdefault('day', 1)
355 1535 aaronmk
356
        for try_num in xrange(2):
357
            try:
358
                date = datetime.date(**items)
359
                break
360
            except ValueError, e:
361 1609 aaronmk
                if try_num > 0: raise FormatException(e)
362 1536 aaronmk
                    # exception still raised after retry
363 1562 aaronmk
                msg = strings.ustr(e)
364 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
365
                    items['month'], items['day'] = items['day'], items['month']
366 1609 aaronmk
                else: raise FormatException(e)
367 786 aaronmk
    else:
368 324 aaronmk
        try: year = float(str_)
369
        except ValueError:
370 1264 aaronmk
            try: date = dates.strtotime(str_)
371 324 aaronmk
            except ImportError: return str_
372 1609 aaronmk
            except ValueError, e: raise FormatException(e)
373 324 aaronmk
        else: date = (datetime.date(int(year), 1, 1) +
374
            datetime.timedelta(round((year % 1.)*365)))
375 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
376 843 aaronmk
    except ValueError, e: raise FormatException(e)
377 995 aaronmk
funcs['_date'] = _date
378 86 aaronmk
379 2016 aaronmk
def _dateRangeStart(items, node):
380 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
381 1366 aaronmk
    try: value = items['value']
382 1406 aaronmk
    except KeyError: return None # input is empty
383 1366 aaronmk
    return dates.parse_date_range(value)[0]
384
funcs['_dateRangeStart'] = _dateRangeStart
385 1311 aaronmk
386 2016 aaronmk
def _dateRangeEnd(items, node):
387 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
388 1366 aaronmk
    try: value = items['value']
389 1406 aaronmk
    except KeyError: return None # input is empty
390 1366 aaronmk
    return dates.parse_date_range(value)[1]
391
funcs['_dateRangeEnd'] = _dateRangeEnd
392 1311 aaronmk
393 1469 aaronmk
#### Names
394
395 328 aaronmk
_name_parts_slices_items = [
396
    ('first', slice(None, 1)),
397
    ('middle', slice(1, -1)),
398
    ('last', slice(-1, None)),
399
]
400
name_parts_slices = dict(_name_parts_slices_items)
401
name_parts = [name for name, slice_ in _name_parts_slices_items]
402
403 2016 aaronmk
def _name(items, node):
404 89 aaronmk
    items = dict(items)
405 102 aaronmk
    parts = []
406 328 aaronmk
    for part in name_parts:
407
        if part in items: parts.append(items[part])
408 102 aaronmk
    return ' '.join(parts)
409 995 aaronmk
funcs['_name'] = _name
410 102 aaronmk
411 2016 aaronmk
def _namePart(items, node):
412 328 aaronmk
    out_items = []
413
    for part, value in items:
414
        try: slice_ = name_parts_slices[part]
415 1612 aaronmk
        except KeyError, e: raise SyntaxError(e)
416 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
417 2016 aaronmk
    return _name(out_items, node)
418 995 aaronmk
funcs['_namePart'] = _namePart
419 1321 aaronmk
420 1607 aaronmk
#### Angles
421
422 2016 aaronmk
def _compass(items, node):
423 1607 aaronmk
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
424
    items = dict(conv_items(strings.ustr, items))
425
    try: value = items['value']
426
    except KeyError: return None # input is empty
427
428
    if not value.isupper(): return value # pass through other coordinate formats
429
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
430
    except KeyError, e: raise FormatException(e)
431
funcs['_compass'] = _compass
432
433 1469 aaronmk
#### Paths
434
435 2016 aaronmk
def _simplifyPath(items, node):
436 1321 aaronmk
    items = dict(items)
437
    try:
438 1562 aaronmk
        next = cast(strings.ustr, items['next'])
439
        require = cast(strings.ustr, items['require'])
440 1321 aaronmk
        root = items['path']
441 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
442 1321 aaronmk
443
    node = root
444
    while node != None:
445
        new_node = xpath.get_1(node, next, allow_rooted=False)
446 2558 aaronmk
        if xpath.get_value(node, require, allow_rooted=False) == None: # empty
447 1321 aaronmk
            xml_dom.replace(node, new_node) # remove current elem
448
            if node is root: root = new_node # also update root
449
        node = new_node
450
    return root
451
funcs['_simplifyPath'] = _simplifyPath