Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 2017 aaronmk
import warnings
7 111 aaronmk
8 1607 aaronmk
import angles
9 818 aaronmk
import dates
10 300 aaronmk
import exc
11 1580 aaronmk
import format
12 917 aaronmk
import maps
13 2105 aaronmk
import sql
14 1234 aaronmk
import strings
15 827 aaronmk
import term
16 1468 aaronmk
import units
17 1047 aaronmk
import util
18 86 aaronmk
import xml_dom
19 1321 aaronmk
import xpath
20 86 aaronmk
21 995 aaronmk
##### Exceptions
22
23 1612 aaronmk
class SyntaxError(exc.ExceptionWithCause):
24 797 aaronmk
    def __init__(self, cause):
25 1611 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
26
            cause)
27 278 aaronmk
28 1613 aaronmk
class FormatException(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
31 843 aaronmk
32 1992 aaronmk
##### Helper functions
33 995 aaronmk
34 1992 aaronmk
def map_items(func, items):
35
    return [(name, func(value)) for name, value in items]
36
37
def cast(type_, val):
38
    '''Throws FormatException if can't cast'''
39
    try: return type_(val)
40
    except ValueError, e: raise FormatException(e)
41
42
def conv_items(type_, items):
43
    return map_items(lambda val: cast(type_, val),
44
        xml_dom.TextEntryOnlyIter(items))
45
46
def pop_value(items, name='value'):
47
    '''@param name Name of value param, or None to accept any name'''
48
    try: last = items.pop() # last entry contains value
49
    except IndexError: return None # input is empty and no actions
50
    if name != None and last[0] != name: return None # input is empty
51
    return last[1]
52
53 995 aaronmk
funcs = {}
54
55 1992 aaronmk
##### Public functions
56
57 2112 aaronmk
def is_func_name(name):
58
    return name.startswith('_') and name != '_' # '_' is default root node name
59
60
def is_func(node): return is_func_name(node.tagName)
61
62
def is_xml_func_name(name): return is_func_name(name) and name in funcs
63
64
def is_xml_func(node): return is_xml_func_name(node.tagName)
65
66 2105 aaronmk
def process(node, on_error=exc.raise_, db=None):
67 995 aaronmk
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
68
    name = node.tagName
69 2112 aaronmk
    if is_func_name(name):
70 1369 aaronmk
        try:
71 2105 aaronmk
            items = xml_dom.NodeTextEntryIter(node)
72
            try: func = funcs[name]
73
            except KeyError:
74
                if db != None: # DB with relational functions available
75
                    value = sql.put(db, name, dict(items))
76
                else: value = pop_value(list(items)) # pass value through
77
            else: value = func(items, node) # local XML function
78
79 1369 aaronmk
            xml_dom.replace_with_text(node, value)
80 1613 aaronmk
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
81 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
82
            exc.add_traceback(e)
83 1562 aaronmk
            str_ = strings.ustr(node)
84 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
85 1810 aaronmk
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
86
                '\n'+term.emph_multiline(str_)))
87
88 995 aaronmk
            on_error(e)
89
90 2433 aaronmk
def strip(node, preserve=set()):
91 1992 aaronmk
    '''Replaces every XML function with its last parameter (which is usually its
92 2433 aaronmk
    value), except for _ignore, which is removed completely
93
    @param preserve set(str...) XML functions not to remove.
94
        * container can be any iterable type
95
    '''
96
    preserve = set(preserve)
97
98
    for child in xml_dom.NodeElemIter(node): strip(child, preserve)
99 1992 aaronmk
    name = node.tagName
100 2433 aaronmk
    if is_xml_func_name(name) and name not in preserve:
101 1995 aaronmk
        if name == '_ignore': value = None
102
        else: value = pop_value(list(xml_dom.NodeTextEntryIter(node)), None)
103 1992 aaronmk
        xml_dom.replace_with_text(node, value)
104 86 aaronmk
105 1469 aaronmk
##### XML functions
106 995 aaronmk
107
# Function names must start with _ to avoid collisions with real tags
108
# Functions take arguments (items)
109
110 1469 aaronmk
#### General
111
112 2017 aaronmk
def _ignore(items, node):
113 994 aaronmk
    '''Used to "comment out" an XML subtree'''
114
    return None
115 995 aaronmk
funcs['_ignore'] = _ignore
116 994 aaronmk
117 2017 aaronmk
def _ref(items, node):
118
    '''Used to retrieve a value from another XML node
119
    @param items
120
        addr=<path> XPath to value, relative to the XML func's parent node
121
    '''
122
    items = dict(items)
123
    try: addr = items['addr']
124
    except KeyError, e: raise SyntaxError(e)
125
126
    value = xpath.get_value(node.parentNode, addr)
127
    if value == None:
128
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
129
            +str(addr)))
130
    return value
131
funcs['_ref'] = _ref
132
133 1469 aaronmk
#### Conditionals
134
135 2016 aaronmk
def _eq(items, node):
136 1234 aaronmk
    items = dict(items)
137
    try:
138
        left = items['left']
139
        right = items['right']
140
    except KeyError: return '' # a value was None
141
    return util.bool2str(left == right)
142
funcs['_eq'] = _eq
143
144 2016 aaronmk
def _if(items, node):
145 1234 aaronmk
    items = dict(items)
146
    try:
147
        cond = items['cond']
148
        then = items['then']
149 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
150 1234 aaronmk
    else_ = items.get('else', None)
151 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
152 1234 aaronmk
    if cond: return then
153
    else: return else_
154
funcs['_if'] = _if
155
156 1469 aaronmk
#### Combining values
157
158 2016 aaronmk
def _alt(items, node):
159 113 aaronmk
    items = list(items)
160
    items.sort()
161 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
162 1609 aaronmk
    except IndexError: return None # input got removed by e.g. FormatException
163 995 aaronmk
funcs['_alt'] = _alt
164 113 aaronmk
165 2016 aaronmk
def _merge(items, node):
166 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
167 1562 aaronmk
        # get *once* from iter, check types
168 917 aaronmk
    items.sort()
169
    return maps.merge_values(*[v for k, v in items])
170 995 aaronmk
funcs['_merge'] = _merge
171 917 aaronmk
172 2016 aaronmk
def _label(items, node):
173 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
174 1562 aaronmk
        # get *once* from iter, check types
175 2014 aaronmk
    value = items.get('value', None)
176
    if value == None: return None # input is empty
177
    try: label = items['label']
178 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
179 917 aaronmk
    return label+': '+value
180 995 aaronmk
funcs['_label'] = _label
181 917 aaronmk
182 1469 aaronmk
#### Transforming values
183
184 2016 aaronmk
def _collapse(items, node):
185 2012 aaronmk
    '''Collapses a subtree if the "value" element in it is NULL'''
186
    items = dict(items)
187
    try: require = cast(strings.ustr, items['require'])
188
    except KeyError, e: raise SyntaxError(e)
189
    value = items.get('value', None)
190
191
    required_node = xpath.get_1(value, require, allow_rooted=False)
192
    if required_node == None or xml_dom.is_empty(required_node): return None
193
    else: return value
194
funcs['_collapse'] = _collapse
195
196 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
197 1477 aaronmk
198 2016 aaronmk
def _nullIf(items, node):
199 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
200 1477 aaronmk
    try: null = items['null']
201 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
202 1477 aaronmk
    value = items.get('value', None)
203 1219 aaronmk
    type_str = items.get('type', None)
204 1477 aaronmk
205
    try: type_ = types_by_name[type_str]
206 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
207 1477 aaronmk
    null = type_(null)
208
209
    try: return util.none_if(value, null)
210
    except ValueError: return value # value not convertible, so can't equal null
211 1047 aaronmk
funcs['_nullIf'] = _nullIf
212
213 1602 aaronmk
def repl(repls, value):
214 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
215 1602 aaronmk
    @param repls dict repl:with
216
        repl "*" means all other input values
217
        with "*" means keep input value the same
218
        with "" means ignore input value
219 1537 aaronmk
    '''
220 1602 aaronmk
    try: new_value = repls[value]
221 1304 aaronmk
    except KeyError, e:
222 1537 aaronmk
        # Save traceback right away in case another exception raised
223 1609 aaronmk
        fe = FormatException(e)
224 1602 aaronmk
        try: new_value = repls['*']
225 1609 aaronmk
        except KeyError: raise fe
226 1537 aaronmk
    if new_value == '*': new_value = value # '*' means keep input value the same
227 1607 aaronmk
    return new_value
228 1602 aaronmk
229 2016 aaronmk
def _map(items, node):
230 1602 aaronmk
    '''See repl()
231
    @param items
232
        <last_entry> Value
233
        <other_entries> name=value Mappings. Special values: See repl() repls.
234
    '''
235
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
236
    value = pop_value(items)
237
    if value == None: return None # input is empty
238 1607 aaronmk
    return util.none_if(repl(dict(items), value), u'') # empty value means None
239 1219 aaronmk
funcs['_map'] = _map
240
241 2016 aaronmk
def _replace(items, node):
242 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
243 1581 aaronmk
    value = pop_value(items)
244
    if value == None: return None # input is empty
245 1219 aaronmk
    try:
246
        for repl, with_ in items:
247
            if re.match(r'^\w+$', repl):
248
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
249
            value = re.sub(repl, with_, value)
250 1612 aaronmk
    except sre_constants.error, e: raise SyntaxError(e)
251 1624 aaronmk
    return util.none_if(value.strip(), u'') # empty strings always mean None
252 1219 aaronmk
funcs['_replace'] = _replace
253
254 1469 aaronmk
#### Quantities
255
256 2016 aaronmk
def _units(items, node):
257 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
258 1581 aaronmk
    value = pop_value(items)
259
    if value == None: return None # input is empty
260 1471 aaronmk
261 1581 aaronmk
    quantity = units.str2quantity(value)
262 1471 aaronmk
    try:
263
        for action, units_ in items:
264
            units_ = util.none_if(units_, u'')
265
            if action == 'default': units.set_default_units(quantity, units_)
266 1567 aaronmk
            elif action == 'to':
267
                try: quantity = units.convert(quantity, units_)
268 1609 aaronmk
                except ValueError, e: raise FormatException(e)
269 1612 aaronmk
            else: raise SyntaxError(ValueError('Invalid action: '+action))
270 1609 aaronmk
    except units.MissingUnitsException, e: raise FormatException(e)
271 1471 aaronmk
    return units.quantity2str(quantity)
272 1225 aaronmk
funcs['_units'] = _units
273
274 1399 aaronmk
def parse_range(str_, range_sep='-'):
275
    default = (str_, None)
276
    start, sep, end = str_.partition(range_sep)
277
    if sep == '': return default # not a range
278 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
279 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
280
281 2016 aaronmk
def _rangeStart(items, node):
282 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
283 1399 aaronmk
    try: value = items['value']
284 1406 aaronmk
    except KeyError: return None # input is empty
285 1399 aaronmk
    return parse_range(value)[0]
286
funcs['_rangeStart'] = _rangeStart
287
288 2016 aaronmk
def _rangeEnd(items, node):
289 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
290 1399 aaronmk
    try: value = items['value']
291 1406 aaronmk
    except KeyError: return None # input is empty
292 1399 aaronmk
    return parse_range(value)[1]
293
funcs['_rangeEnd'] = _rangeEnd
294
295 2016 aaronmk
def _range(items, node):
296 1472 aaronmk
    items = dict(conv_items(float, items))
297
    from_ = items.get('from', None)
298
    to = items.get('to', None)
299
    if from_ == None or to == None: return None
300
    return str(to - from_)
301
funcs['_range'] = _range
302
303 2016 aaronmk
def _avg(items, node):
304 86 aaronmk
    count = 0
305
    sum_ = 0.
306 278 aaronmk
    for name, value in conv_items(float, items):
307 86 aaronmk
        count += 1
308
        sum_ += value
309 1472 aaronmk
    if count == 0: return None # input is empty
310
    else: return str(sum_/count)
311 995 aaronmk
funcs['_avg'] = _avg
312 86 aaronmk
313 968 aaronmk
class CvException(Exception):
314
    def __init__(self):
315
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
316
            ' allowed for ratio scale data '
317
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
318
319 2016 aaronmk
def _noCV(items, node):
320 968 aaronmk
    try: name, value = items.next()
321
    except StopIteration: return None
322 1609 aaronmk
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
323 968 aaronmk
    return value
324 995 aaronmk
funcs['_noCV'] = _noCV
325 968 aaronmk
326 1469 aaronmk
#### Dates
327
328 2016 aaronmk
def _date(items, node):
329 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
330
        # get *once* from iter, check types
331 1514 aaronmk
    try: str_ = items['date']
332 786 aaronmk
    except KeyError:
333 1515 aaronmk
        # Year is required
334
        try: items['year']
335 1309 aaronmk
        except KeyError, e:
336
            if items == {}: return None # entire date is empty
337 1609 aaronmk
            else: raise FormatException(e)
338 1515 aaronmk
339
        # Convert month name to number
340
        try: month = items['month']
341
        except KeyError: pass
342
        else:
343
            if not month.isdigit(): # month is name
344 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
345 1609 aaronmk
                except ValueError, e: raise FormatException(e)
346 1515 aaronmk
347 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
348 786 aaronmk
        items.setdefault('month', 1)
349
        items.setdefault('day', 1)
350 1535 aaronmk
351
        for try_num in xrange(2):
352
            try:
353
                date = datetime.date(**items)
354
                break
355
            except ValueError, e:
356 1609 aaronmk
                if try_num > 0: raise FormatException(e)
357 1536 aaronmk
                    # exception still raised after retry
358 1562 aaronmk
                msg = strings.ustr(e)
359 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
360
                    items['month'], items['day'] = items['day'], items['month']
361 1609 aaronmk
                else: raise FormatException(e)
362 786 aaronmk
    else:
363 324 aaronmk
        try: year = float(str_)
364
        except ValueError:
365 1264 aaronmk
            try: date = dates.strtotime(str_)
366 324 aaronmk
            except ImportError: return str_
367 1609 aaronmk
            except ValueError, e: raise FormatException(e)
368 324 aaronmk
        else: date = (datetime.date(int(year), 1, 1) +
369
            datetime.timedelta(round((year % 1.)*365)))
370 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
371 843 aaronmk
    except ValueError, e: raise FormatException(e)
372 995 aaronmk
funcs['_date'] = _date
373 86 aaronmk
374 2016 aaronmk
def _dateRangeStart(items, node):
375 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
376 1366 aaronmk
    try: value = items['value']
377 1406 aaronmk
    except KeyError: return None # input is empty
378 1366 aaronmk
    return dates.parse_date_range(value)[0]
379
funcs['_dateRangeStart'] = _dateRangeStart
380 1311 aaronmk
381 2016 aaronmk
def _dateRangeEnd(items, node):
382 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
383 1366 aaronmk
    try: value = items['value']
384 1406 aaronmk
    except KeyError: return None # input is empty
385 1366 aaronmk
    return dates.parse_date_range(value)[1]
386
funcs['_dateRangeEnd'] = _dateRangeEnd
387 1311 aaronmk
388 1469 aaronmk
#### Names
389
390 328 aaronmk
_name_parts_slices_items = [
391
    ('first', slice(None, 1)),
392
    ('middle', slice(1, -1)),
393
    ('last', slice(-1, None)),
394
]
395
name_parts_slices = dict(_name_parts_slices_items)
396
name_parts = [name for name, slice_ in _name_parts_slices_items]
397
398 2016 aaronmk
def _name(items, node):
399 89 aaronmk
    items = dict(items)
400 102 aaronmk
    parts = []
401 328 aaronmk
    for part in name_parts:
402
        if part in items: parts.append(items[part])
403 102 aaronmk
    return ' '.join(parts)
404 995 aaronmk
funcs['_name'] = _name
405 102 aaronmk
406 2016 aaronmk
def _namePart(items, node):
407 328 aaronmk
    out_items = []
408
    for part, value in items:
409
        try: slice_ = name_parts_slices[part]
410 1612 aaronmk
        except KeyError, e: raise SyntaxError(e)
411 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
412 2016 aaronmk
    return _name(out_items, node)
413 995 aaronmk
funcs['_namePart'] = _namePart
414 1321 aaronmk
415 1607 aaronmk
#### Angles
416
417 2016 aaronmk
def _compass(items, node):
418 1607 aaronmk
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
419
    items = dict(conv_items(strings.ustr, items))
420
    try: value = items['value']
421
    except KeyError: return None # input is empty
422
423
    if not value.isupper(): return value # pass through other coordinate formats
424
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
425
    except KeyError, e: raise FormatException(e)
426
funcs['_compass'] = _compass
427
428 1469 aaronmk
#### Paths
429
430 2016 aaronmk
def _simplifyPath(items, node):
431 1321 aaronmk
    items = dict(items)
432
    try:
433 1562 aaronmk
        next = cast(strings.ustr, items['next'])
434
        require = cast(strings.ustr, items['require'])
435 1321 aaronmk
        root = items['path']
436 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
437 1321 aaronmk
438
    node = root
439
    while node != None:
440
        new_node = xpath.get_1(node, next, allow_rooted=False)
441 2006 aaronmk
        required_node = xpath.get_1(node, require, allow_rooted=False)
442
        if required_node == None or xml_dom.is_empty(required_node):# empty elem
443 1321 aaronmk
            xml_dom.replace(node, new_node) # remove current elem
444
            if node is root: root = new_node # also update root
445
        node = new_node
446
    return root
447
funcs['_simplifyPath'] = _simplifyPath