Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 5190 aaronmk
import operator
5 6398 aaronmk
import os
6 968 aaronmk
import re
7 1219 aaronmk
import sre_constants
8 2017 aaronmk
import warnings
9 111 aaronmk
10 1607 aaronmk
import angles
11 818 aaronmk
import dates
12 300 aaronmk
import exc
13 1580 aaronmk
import format
14 917 aaronmk
import maps
15 3688 aaronmk
import sql
16 3077 aaronmk
import sql_io
17 1234 aaronmk
import strings
18 827 aaronmk
import term
19 1468 aaronmk
import units
20 1047 aaronmk
import util
21 86 aaronmk
import xml_dom
22 1321 aaronmk
import xpath
23 86 aaronmk
24 995 aaronmk
##### Exceptions
25
26 1612 aaronmk
class SyntaxError(exc.ExceptionWithCause):
27 797 aaronmk
    def __init__(self, cause):
28 1611 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
29
            cause)
30 278 aaronmk
31 1613 aaronmk
class FormatException(exc.ExceptionWithCause):
32
    def __init__(self, cause):
33
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
34 843 aaronmk
35 1992 aaronmk
##### Helper functions
36 995 aaronmk
37 4334 aaronmk
def map_names(func, params):
38
    return [(func(name), value) for name, value in params]
39
40
def variadic_args(node):
41
    args = map_names(float, xml_dom.NodeEntryIter(node))
42
    args.sort()
43
    return [value for name, value in args]
44
45 1992 aaronmk
def map_items(func, items):
46
    return [(name, func(value)) for name, value in items]
47
48
def cast(type_, val):
49
    '''Throws FormatException if can't cast'''
50
    try: return type_(val)
51
    except ValueError, e: raise FormatException(e)
52
53
def conv_items(type_, items):
54
    return map_items(lambda val: cast(type_, val),
55
        xml_dom.TextEntryOnlyIter(items))
56
57
def pop_value(items, name='value'):
58
    '''@param name Name of value param, or None to accept any name'''
59
    try: last = items.pop() # last entry contains value
60
    except IndexError: return None # input is empty and no actions
61
    if name != None and last[0] != name: return None # input is empty
62
    return last[1]
63
64 3335 aaronmk
def merge_tagged(root):
65
    '''Merges siblings in root that are marked as mergeable.
66
    Used to recombine pieces of nodes that were split apart in the mappings.
67
    '''
68
    for name in set((c.tagName for c in xpath.get(root, '*[@merge=1]'))):
69
        xml_dom.merge_by_name(root, name)
70
71
    # Recurse
72
    for child in xml_dom.NodeElemIter(root): merge_tagged(child)
73
74 995 aaronmk
funcs = {}
75 4236 aaronmk
simplifying_funcs = {}
76 995 aaronmk
77 1992 aaronmk
##### Public functions
78
79 4239 aaronmk
var_name_prefix = '$'
80
81
def is_var_name(str_): return str_.startswith(var_name_prefix)
82
83
def is_var(node):
84
    return xml_dom.is_text_node(node) and is_var_name(xml_dom.value(node))
85
86 2112 aaronmk
def is_func_name(name):
87
    return name.startswith('_') and name != '_' # '_' is default root node name
88
89
def is_func(node): return is_func_name(node.tagName)
90
91
def is_xml_func_name(name): return is_func_name(name) and name in funcs
92
93
def is_xml_func(node): return is_xml_func_name(node.tagName)
94
95 4300 aaronmk
def passthru(node):
96 4302 aaronmk
    '''Passes through single child node. First prunes the node.'''
97 4322 aaronmk
    xml_dom.prune(node)
98 4300 aaronmk
    children = list(xml_dom.NodeEntryIter(node))
99
    if len(children) == 1: xml_dom.replace(node, children[0][1])
100
101 4041 aaronmk
def simplify(node):
102 4305 aaronmk
    '''Simplifies an XML tree.
103 4041 aaronmk
    * Merges nodes tagged as mergable
104 4236 aaronmk
    * Runs simplifying functions
105 4041 aaronmk
    '''
106
    for child in xml_dom.NodeElemIter(node): simplify(child)
107
    merge_tagged(node)
108
109 4227 aaronmk
    name = node.tagName
110 4041 aaronmk
111 4078 aaronmk
    # Pass-through optimizations
112 4228 aaronmk
    if is_func_name(name):
113 4236 aaronmk
        try: func = simplifying_funcs[name]
114 4756 aaronmk
        except KeyError: xml_dom.prune_empty(node)
115 4236 aaronmk
        else: func(node)
116 4229 aaronmk
    # Pruning optimizations
117
    else: # these should not run on functions because they would remove args
118 4319 aaronmk
        xml_dom.prune_children(node)
119 4041 aaronmk
120 3660 aaronmk
def process(node, on_error=exc.reraise, is_rel_func=None, db=None):
121 2597 aaronmk
    '''Evaluates the XML functions in an XML tree.
122 3424 aaronmk
    @param is_rel_func None|f(str) Tests if a name is a relational function.
123 2602 aaronmk
        * If != None: Non-relational functions are removed, or relational
124
          functions are treated specially, depending on the db param (below).
125
    @param db
126
        * If None: Non-relational functions other than structural functions are
127
          replaced with their last parameter (usually the value), not evaluated.
128
          This is used in column-based mode to remove XML-only functions.
129
        * If != None: Relational functions are evaluated directly. This is used
130
          in row-based mode to combine relational and XML functions.
131 2597 aaronmk
    '''
132 3424 aaronmk
    has_rel_funcs = is_rel_func != None
133 2602 aaronmk
    assert db == None or has_rel_funcs # rel_funcs required if db set
134 2597 aaronmk
135 3333 aaronmk
    for child in xml_dom.NodeElemIter(node):
136 3424 aaronmk
        process(child, on_error, is_rel_func, db)
137 3335 aaronmk
    merge_tagged(node)
138 3333 aaronmk
139 995 aaronmk
    name = node.tagName
140 3227 aaronmk
    if not is_func_name(name): return node # not any kind of function
141 2602 aaronmk
142
    row_mode = has_rel_funcs and db != None
143
    column_mode = has_rel_funcs and db == None
144 3629 aaronmk
    func = funcs.get(name, None)
145 3028 aaronmk
    items = list(xml_dom.NodeTextEntryIter(node))
146 2602 aaronmk
147 3029 aaronmk
    # Parse function
148
    if len(items) == 1 and items[0][0].isdigit(): # has single numeric param
149
        # pass-through optimization for aggregating functions with one arg
150
        value = items[0][1] # pass through first arg
151 3629 aaronmk
    elif row_mode and (is_rel_func(name) or func == None): # row-based mode
152 5190 aaronmk
        if reduce(operator.or_, (xml_dom.is_node(v) for n, v in items)):
153
            return # preserve complex funcs
154 5720 aaronmk
        # Evaluate using DB
155
        try: value = sql_io.put(db, name, dict(items), on_error=on_error)
156 3688 aaronmk
        except sql.DoesNotExistException: return # preserve unknown funcs
157
            # possibly a built-in function of db_xml.put()
158 4024 aaronmk
    elif column_mode or func == None:
159 3640 aaronmk
        # local XML function can't be used or does not exist
160
        if column_mode and is_rel_func(name): return # preserve relational funcs
161
        # otherwise XML-only in column mode, or DB-only in XML output mode
162
        value = pop_value(items, None) # just replace with last param
163 2602 aaronmk
    else: # local XML function
164 3629 aaronmk
        try: value = func(items, node)
165 1613 aaronmk
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
166 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
167
            exc.add_traceback(e)
168 1562 aaronmk
            str_ = strings.ustr(node)
169 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
170 1810 aaronmk
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
171
                '\n'+term.emph_multiline(str_)))
172
173 995 aaronmk
            on_error(e)
174 2602 aaronmk
            return # in case on_error() returns
175 3227 aaronmk
176 3333 aaronmk
    xml_dom.replace_with_text(node, value)
177 995 aaronmk
178 4236 aaronmk
##### Simplifying functions
179
180
# Function names must start with _ to avoid collisions with real tags
181
# Functions take params (node) and have no return value
182
183 4237 aaronmk
#### Logic
184
185
def _exists(node):
186
    '''Returns whether its node is non-empty'''
187
    xml_dom.replace_with_text(node, not xml_dom.is_empty(node))
188
simplifying_funcs['_exists'] = _exists
189
190 4240 aaronmk
def _if(node):
191 4477 aaronmk
    '''
192
    *Must* be run to remove conditions that functions._if() can't handle.
193
    Note: Can add `@name` attr to distinguish separate _if statements.
194
    '''
195 4240 aaronmk
    params = dict(xml_dom.NodeEntryIter(node))
196
    then = params.get('then', None)
197
    cond = params.get('cond', None)
198
    else_ = params.get('else', None)
199
200
    if cond == None: xml_dom.replace(node, else_) # always False
201
    elif then == else_: xml_dom.replace(node, then) # always same value
202
    elif is_var(cond): pass # can't simplify variable conditions
203
    elif xml_dom.is_text_node(cond) and bool(xml_dom.value(cond)): # always True
204
        xml_dom.replace(node, then)
205
simplifying_funcs['_if'] = _if
206
207 6357 aaronmk
def _nullIf(node):
208
    '''
209
    *Must* be run to remove conditions that functions._nullIf() can't handle.
210
    '''
211
    params = dict(xml_dom.NodeEntryIter(node))
212
    null = params.get('null', None)
213
    value = params.get('value', None)
214
215
    if value == None: xml_dom.prune_parent(node) # empty
216
    elif null == None: xml_dom.replace(node, value) # nothing to null out
217
simplifying_funcs['_nullIf'] = _nullIf
218
219 4303 aaronmk
#### Merging
220
221
simplifying_funcs['_alt'] = passthru
222 4326 aaronmk
simplifying_funcs['_join'] = passthru
223 5011 aaronmk
simplifying_funcs['_join_words'] = passthru
224 4303 aaronmk
simplifying_funcs['_merge'] = passthru
225 5409 aaronmk
simplifying_funcs['_min'] = passthru
226
simplifying_funcs['_max'] = passthru
227 4303 aaronmk
228 4335 aaronmk
def _first(node):
229
    '''Chooses the first param (after sorting by numeric param name)'''
230
    args = variadic_args(node)
231
    try: first = args[0]
232
    except IndexError: first = None
233
    xml_dom.replace(node, first)
234
simplifying_funcs['_first'] = _first
235
236 6398 aaronmk
#### Environment access
237
238
def _env(node):
239 6401 aaronmk
    params = dict(xml_dom.NodeTextEntryIter(node))
240 6398 aaronmk
    try: name = params['name']
241
    except KeyError, e: raise SyntaxError(e)
242
243 6400 aaronmk
    xml_dom.replace_with_text(node, os.environ[name])
244 6398 aaronmk
simplifying_funcs['_env'] = _env
245
246 1469 aaronmk
##### XML functions
247 995 aaronmk
248
# Function names must start with _ to avoid collisions with real tags
249 4144 aaronmk
# Functions take arguments (items, node)
250 995 aaronmk
251 1469 aaronmk
#### Transforming values
252
253 2016 aaronmk
def _replace(items, node):
254 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
255 1581 aaronmk
    value = pop_value(items)
256
    if value == None: return None # input is empty
257 1219 aaronmk
    try:
258
        for repl, with_ in items:
259
            if re.match(r'^\w+$', repl):
260
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
261
            value = re.sub(repl, with_, value)
262 1612 aaronmk
    except sre_constants.error, e: raise SyntaxError(e)
263 1624 aaronmk
    return util.none_if(value.strip(), u'') # empty strings always mean None
264 1219 aaronmk
funcs['_replace'] = _replace
265
266 1469 aaronmk
#### Quantities
267
268 2016 aaronmk
def _units(items, node):
269 1581 aaronmk
    value = pop_value(items)
270
    if value == None: return None # input is empty
271 1471 aaronmk
272 1581 aaronmk
    quantity = units.str2quantity(value)
273 1471 aaronmk
    try:
274
        for action, units_ in items:
275
            units_ = util.none_if(units_, u'')
276
            if action == 'default': units.set_default_units(quantity, units_)
277 1567 aaronmk
            elif action == 'to':
278
                try: quantity = units.convert(quantity, units_)
279 1609 aaronmk
                except ValueError, e: raise FormatException(e)
280 1612 aaronmk
            else: raise SyntaxError(ValueError('Invalid action: '+action))
281 1609 aaronmk
    except units.MissingUnitsException, e: raise FormatException(e)
282 1471 aaronmk
    return units.quantity2str(quantity)
283 1225 aaronmk
funcs['_units'] = _units
284
285 1399 aaronmk
def parse_range(str_, range_sep='-'):
286
    default = (str_, None)
287
    start, sep, end = str_.partition(range_sep)
288
    if sep == '': return default # not a range
289 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
290 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
291
292 2016 aaronmk
def _rangeStart(items, node):
293 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
294 1399 aaronmk
    try: value = items['value']
295 1406 aaronmk
    except KeyError: return None # input is empty
296 1399 aaronmk
    return parse_range(value)[0]
297
funcs['_rangeStart'] = _rangeStart
298
299 2016 aaronmk
def _rangeEnd(items, node):
300 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
301 1399 aaronmk
    try: value = items['value']
302 1406 aaronmk
    except KeyError: return None # input is empty
303 1399 aaronmk
    return parse_range(value)[1]
304
funcs['_rangeEnd'] = _rangeEnd
305
306 2016 aaronmk
def _range(items, node):
307 1472 aaronmk
    items = dict(conv_items(float, items))
308
    from_ = items.get('from', None)
309
    to = items.get('to', None)
310
    if from_ == None or to == None: return None
311
    return str(to - from_)
312
funcs['_range'] = _range
313
314 2016 aaronmk
def _avg(items, node):
315 86 aaronmk
    count = 0
316
    sum_ = 0.
317 278 aaronmk
    for name, value in conv_items(float, items):
318 86 aaronmk
        count += 1
319
        sum_ += value
320 1472 aaronmk
    if count == 0: return None # input is empty
321
    else: return str(sum_/count)
322 995 aaronmk
funcs['_avg'] = _avg
323 86 aaronmk
324 968 aaronmk
class CvException(Exception):
325
    def __init__(self):
326
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
327
            ' allowed for ratio scale data '
328
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
329
330 2016 aaronmk
def _noCV(items, node):
331 3631 aaronmk
    items = list(conv_items(strings.ustr, items))
332 3046 aaronmk
    try: name, value = items.pop() # last entry contains value
333
    except IndexError: return None # input is empty
334 1609 aaronmk
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
335 968 aaronmk
    return value
336 995 aaronmk
funcs['_noCV'] = _noCV
337 968 aaronmk
338 1607 aaronmk
#### Angles
339
340 2016 aaronmk
def _compass(items, node):
341 1607 aaronmk
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
342
    items = dict(conv_items(strings.ustr, items))
343
    try: value = items['value']
344
    except KeyError: return None # input is empty
345
346
    if not value.isupper(): return value # pass through other coordinate formats
347
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
348
    except KeyError, e: raise FormatException(e)
349
funcs['_compass'] = _compass