Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 5190 aaronmk
import operator
5 6398 aaronmk
import os
6 968 aaronmk
import re
7 1219 aaronmk
import sre_constants
8 2017 aaronmk
import warnings
9 111 aaronmk
10 1607 aaronmk
import angles
11 818 aaronmk
import dates
12 300 aaronmk
import exc
13 1580 aaronmk
import format
14 917 aaronmk
import maps
15 3688 aaronmk
import sql
16 3077 aaronmk
import sql_io
17 1234 aaronmk
import strings
18 827 aaronmk
import term
19 1468 aaronmk
import units
20 1047 aaronmk
import util
21 86 aaronmk
import xml_dom
22 1321 aaronmk
import xpath
23 86 aaronmk
24 995 aaronmk
##### Exceptions
25
26 1612 aaronmk
class SyntaxError(exc.ExceptionWithCause):
27 797 aaronmk
    def __init__(self, cause):
28 1611 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
29
            cause)
30 278 aaronmk
31 1613 aaronmk
class FormatException(exc.ExceptionWithCause):
32
    def __init__(self, cause):
33
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
34 843 aaronmk
35 1992 aaronmk
##### Helper functions
36 995 aaronmk
37 4334 aaronmk
def map_names(func, params):
38
    return [(func(name), value) for name, value in params]
39
40
def variadic_args(node):
41
    args = map_names(float, xml_dom.NodeEntryIter(node))
42
    args.sort()
43
    return [value for name, value in args]
44
45 1992 aaronmk
def map_items(func, items):
46
    return [(name, func(value)) for name, value in items]
47
48
def cast(type_, val):
49
    '''Throws FormatException if can't cast'''
50
    try: return type_(val)
51
    except ValueError, e: raise FormatException(e)
52
53
def conv_items(type_, items):
54
    return map_items(lambda val: cast(type_, val),
55
        xml_dom.TextEntryOnlyIter(items))
56
57
def pop_value(items, name='value'):
58
    '''@param name Name of value param, or None to accept any name'''
59
    try: last = items.pop() # last entry contains value
60
    except IndexError: return None # input is empty and no actions
61
    if name != None and last[0] != name: return None # input is empty
62
    return last[1]
63
64 3335 aaronmk
def merge_tagged(root):
65
    '''Merges siblings in root that are marked as mergeable.
66
    Used to recombine pieces of nodes that were split apart in the mappings.
67
    '''
68
    for name in set((c.tagName for c in xpath.get(root, '*[@merge=1]'))):
69
        xml_dom.merge_by_name(root, name)
70
71
    # Recurse
72
    for child in xml_dom.NodeElemIter(root): merge_tagged(child)
73
74 995 aaronmk
funcs = {}
75 4236 aaronmk
simplifying_funcs = {}
76 995 aaronmk
77 1992 aaronmk
##### Public functions
78
79 4239 aaronmk
var_name_prefix = '$'
80
81
def is_var_name(str_): return str_.startswith(var_name_prefix)
82
83
def is_var(node):
84
    return xml_dom.is_text_node(node) and is_var_name(xml_dom.value(node))
85
86 2112 aaronmk
def is_func_name(name):
87
    return name.startswith('_') and name != '_' # '_' is default root node name
88
89
def is_func(node): return is_func_name(node.tagName)
90
91
def is_xml_func_name(name): return is_func_name(name) and name in funcs
92
93
def is_xml_func(node): return is_xml_func_name(node.tagName)
94
95 4300 aaronmk
def passthru(node):
96 4302 aaronmk
    '''Passes through single child node. First prunes the node.'''
97 4322 aaronmk
    xml_dom.prune(node)
98 4300 aaronmk
    children = list(xml_dom.NodeEntryIter(node))
99
    if len(children) == 1: xml_dom.replace(node, children[0][1])
100
101 4041 aaronmk
def simplify(node):
102 4305 aaronmk
    '''Simplifies an XML tree.
103 4041 aaronmk
    * Merges nodes tagged as mergable
104 4236 aaronmk
    * Runs simplifying functions
105 4041 aaronmk
    '''
106
    for child in xml_dom.NodeElemIter(node): simplify(child)
107
    merge_tagged(node)
108
109 4227 aaronmk
    name = node.tagName
110 4041 aaronmk
111 4078 aaronmk
    # Pass-through optimizations
112 4228 aaronmk
    if is_func_name(name):
113 4236 aaronmk
        try: func = simplifying_funcs[name]
114 4756 aaronmk
        except KeyError: xml_dom.prune_empty(node)
115 4236 aaronmk
        else: func(node)
116 4229 aaronmk
    # Pruning optimizations
117
    else: # these should not run on functions because they would remove args
118 4319 aaronmk
        xml_dom.prune_children(node)
119 4041 aaronmk
120 3660 aaronmk
def process(node, on_error=exc.reraise, is_rel_func=None, db=None):
121 2597 aaronmk
    '''Evaluates the XML functions in an XML tree.
122 3424 aaronmk
    @param is_rel_func None|f(str) Tests if a name is a relational function.
123 2602 aaronmk
        * If != None: Non-relational functions are removed, or relational
124
          functions are treated specially, depending on the db param (below).
125
    @param db
126
        * If None: Non-relational functions other than structural functions are
127
          replaced with their last parameter (usually the value), not evaluated.
128
          This is used in column-based mode to remove XML-only functions.
129
        * If != None: Relational functions are evaluated directly. This is used
130
          in row-based mode to combine relational and XML functions.
131 2597 aaronmk
    '''
132 3424 aaronmk
    has_rel_funcs = is_rel_func != None
133 2602 aaronmk
    assert db == None or has_rel_funcs # rel_funcs required if db set
134 2597 aaronmk
135 3333 aaronmk
    for child in xml_dom.NodeElemIter(node):
136 3424 aaronmk
        process(child, on_error, is_rel_func, db)
137 3335 aaronmk
    merge_tagged(node)
138 3333 aaronmk
139 995 aaronmk
    name = node.tagName
140 3227 aaronmk
    if not is_func_name(name): return node # not any kind of function
141 2602 aaronmk
142
    row_mode = has_rel_funcs and db != None
143
    column_mode = has_rel_funcs and db == None
144 3629 aaronmk
    func = funcs.get(name, None)
145 3028 aaronmk
    items = list(xml_dom.NodeTextEntryIter(node))
146 2602 aaronmk
147 3029 aaronmk
    # Parse function
148
    if len(items) == 1 and items[0][0].isdigit(): # has single numeric param
149
        # pass-through optimization for aggregating functions with one arg
150
        value = items[0][1] # pass through first arg
151 3629 aaronmk
    elif row_mode and (is_rel_func(name) or func == None): # row-based mode
152 5190 aaronmk
        if reduce(operator.or_, (xml_dom.is_node(v) for n, v in items)):
153
            return # preserve complex funcs
154 5720 aaronmk
        # Evaluate using DB
155
        try: value = sql_io.put(db, name, dict(items), on_error=on_error)
156 3688 aaronmk
        except sql.DoesNotExistException: return # preserve unknown funcs
157
            # possibly a built-in function of db_xml.put()
158 4024 aaronmk
    elif column_mode or func == None:
159 3640 aaronmk
        # local XML function can't be used or does not exist
160
        if column_mode and is_rel_func(name): return # preserve relational funcs
161
        # otherwise XML-only in column mode, or DB-only in XML output mode
162
        value = pop_value(items, None) # just replace with last param
163 2602 aaronmk
    else: # local XML function
164 3629 aaronmk
        try: value = func(items, node)
165 1613 aaronmk
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
166 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
167
            exc.add_traceback(e)
168 1562 aaronmk
            str_ = strings.ustr(node)
169 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
170 1810 aaronmk
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
171
                '\n'+term.emph_multiline(str_)))
172
173 995 aaronmk
            on_error(e)
174 2602 aaronmk
            return # in case on_error() returns
175 3227 aaronmk
176 3333 aaronmk
    xml_dom.replace_with_text(node, value)
177 995 aaronmk
178 4236 aaronmk
##### Simplifying functions
179
180
# Function names must start with _ to avoid collisions with real tags
181
# Functions take params (node) and have no return value
182
183 4237 aaronmk
#### Logic
184
185 6431 aaronmk
simplifying_funcs['_and'] = passthru
186
simplifying_funcs['_or'] = passthru
187
188 4237 aaronmk
def _exists(node):
189
    '''Returns whether its node is non-empty'''
190
    xml_dom.replace_with_text(node, not xml_dom.is_empty(node))
191
simplifying_funcs['_exists'] = _exists
192
193 4240 aaronmk
def _if(node):
194 4477 aaronmk
    '''
195
    *Must* be run to remove conditions that functions._if() can't handle.
196
    Note: Can add `@name` attr to distinguish separate _if statements.
197
    '''
198 4240 aaronmk
    params = dict(xml_dom.NodeEntryIter(node))
199
    then = params.get('then', None)
200
    cond = params.get('cond', None)
201
    else_ = params.get('else', None)
202
203
    if cond == None: xml_dom.replace(node, else_) # always False
204
    elif then == else_: xml_dom.replace(node, then) # always same value
205
    elif is_var(cond): pass # can't simplify variable conditions
206
    elif xml_dom.is_text_node(cond) and bool(xml_dom.value(cond)): # always True
207
        xml_dom.replace(node, then)
208
simplifying_funcs['_if'] = _if
209
210 6357 aaronmk
def _nullIf(node):
211
    '''
212
    *Must* be run to remove conditions that functions._nullIf() can't handle.
213
    '''
214
    params = dict(xml_dom.NodeEntryIter(node))
215
    null = params.get('null', None)
216
    value = params.get('value', None)
217
218
    if value == None: xml_dom.prune_parent(node) # empty
219
    elif null == None: xml_dom.replace(node, value) # nothing to null out
220
simplifying_funcs['_nullIf'] = _nullIf
221
222 4303 aaronmk
#### Merging
223
224
simplifying_funcs['_alt'] = passthru
225 4326 aaronmk
simplifying_funcs['_join'] = passthru
226 5011 aaronmk
simplifying_funcs['_join_words'] = passthru
227 4303 aaronmk
simplifying_funcs['_merge'] = passthru
228 5409 aaronmk
simplifying_funcs['_min'] = passthru
229
simplifying_funcs['_max'] = passthru
230 4303 aaronmk
231 4335 aaronmk
def _first(node):
232
    '''Chooses the first param (after sorting by numeric param name)'''
233
    args = variadic_args(node)
234
    try: first = args[0]
235
    except IndexError: first = None
236
    xml_dom.replace(node, first)
237
simplifying_funcs['_first'] = _first
238
239 6398 aaronmk
#### Environment access
240
241
def _env(node):
242 6401 aaronmk
    params = dict(xml_dom.NodeTextEntryIter(node))
243 6398 aaronmk
    try: name = params['name']
244
    except KeyError, e: raise SyntaxError(e)
245
246 6400 aaronmk
    xml_dom.replace_with_text(node, os.environ[name])
247 6398 aaronmk
simplifying_funcs['_env'] = _env
248
249 1469 aaronmk
##### XML functions
250 995 aaronmk
251
# Function names must start with _ to avoid collisions with real tags
252 4144 aaronmk
# Functions take arguments (items, node)
253 995 aaronmk
254 1469 aaronmk
#### Transforming values
255
256 2016 aaronmk
def _replace(items, node):
257 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
258 1581 aaronmk
    value = pop_value(items)
259
    if value == None: return None # input is empty
260 1219 aaronmk
    try:
261
        for repl, with_ in items:
262
            if re.match(r'^\w+$', repl):
263
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
264
            value = re.sub(repl, with_, value)
265 1612 aaronmk
    except sre_constants.error, e: raise SyntaxError(e)
266 1624 aaronmk
    return util.none_if(value.strip(), u'') # empty strings always mean None
267 1219 aaronmk
funcs['_replace'] = _replace
268
269 1469 aaronmk
#### Quantities
270
271 2016 aaronmk
def _units(items, node):
272 1581 aaronmk
    value = pop_value(items)
273
    if value == None: return None # input is empty
274 1471 aaronmk
275 1581 aaronmk
    quantity = units.str2quantity(value)
276 1471 aaronmk
    try:
277
        for action, units_ in items:
278
            units_ = util.none_if(units_, u'')
279
            if action == 'default': units.set_default_units(quantity, units_)
280 1567 aaronmk
            elif action == 'to':
281
                try: quantity = units.convert(quantity, units_)
282 1609 aaronmk
                except ValueError, e: raise FormatException(e)
283 1612 aaronmk
            else: raise SyntaxError(ValueError('Invalid action: '+action))
284 1609 aaronmk
    except units.MissingUnitsException, e: raise FormatException(e)
285 1471 aaronmk
    return units.quantity2str(quantity)
286 1225 aaronmk
funcs['_units'] = _units
287
288 1399 aaronmk
def parse_range(str_, range_sep='-'):
289
    default = (str_, None)
290
    start, sep, end = str_.partition(range_sep)
291
    if sep == '': return default # not a range
292 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
293 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
294
295 2016 aaronmk
def _rangeStart(items, node):
296 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
297 1399 aaronmk
    try: value = items['value']
298 1406 aaronmk
    except KeyError: return None # input is empty
299 1399 aaronmk
    return parse_range(value)[0]
300
funcs['_rangeStart'] = _rangeStart
301
302 2016 aaronmk
def _rangeEnd(items, node):
303 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
304 1399 aaronmk
    try: value = items['value']
305 1406 aaronmk
    except KeyError: return None # input is empty
306 1399 aaronmk
    return parse_range(value)[1]
307
funcs['_rangeEnd'] = _rangeEnd
308
309 2016 aaronmk
def _range(items, node):
310 1472 aaronmk
    items = dict(conv_items(float, items))
311
    from_ = items.get('from', None)
312
    to = items.get('to', None)
313
    if from_ == None or to == None: return None
314
    return str(to - from_)
315
funcs['_range'] = _range
316
317 2016 aaronmk
def _avg(items, node):
318 86 aaronmk
    count = 0
319
    sum_ = 0.
320 278 aaronmk
    for name, value in conv_items(float, items):
321 86 aaronmk
        count += 1
322
        sum_ += value
323 1472 aaronmk
    if count == 0: return None # input is empty
324
    else: return str(sum_/count)
325 995 aaronmk
funcs['_avg'] = _avg
326 86 aaronmk
327 968 aaronmk
class CvException(Exception):
328
    def __init__(self):
329
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
330
            ' allowed for ratio scale data '
331
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
332
333 2016 aaronmk
def _noCV(items, node):
334 3631 aaronmk
    items = list(conv_items(strings.ustr, items))
335 3046 aaronmk
    try: name, value = items.pop() # last entry contains value
336
    except IndexError: return None # input is empty
337 1609 aaronmk
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
338 968 aaronmk
    return value
339 995 aaronmk
funcs['_noCV'] = _noCV
340 968 aaronmk
341 1607 aaronmk
#### Angles
342
343 2016 aaronmk
def _compass(items, node):
344 1607 aaronmk
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
345
    items = dict(conv_items(strings.ustr, items))
346
    try: value = items['value']
347
    except KeyError: return None # input is empty
348
349
    if not value.isupper(): return value # pass through other coordinate formats
350
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
351
    except KeyError, e: raise FormatException(e)
352
funcs['_compass'] = _compass