Project

General

Profile

1
# XML "function" nodes that transform their contents
2

    
3
import datetime
4
import operator
5
import os
6
import re
7
import sre_constants
8
import warnings
9

    
10
import angles
11
import dates
12
import exc
13
import format
14
import lists
15
import maps
16
import scalar
17
import sql
18
import sql_io
19
import strings
20
import term
21
import units
22
import util
23
import xml_dom
24
import xpath
25

    
26
##### Exceptions
27

    
28
class SyntaxError(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
31
            cause)
32

    
33
class FormatException(exc.ExceptionWithCause):
34
    def __init__(self, cause):
35
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
36

    
37
##### Helper functions
38

    
39
def map_names(func, params):
40
    return [(func(name), value) for name, value in params]
41

    
42
def variadic_args(node):
43
    args = map_names(float, xml_dom.NodeEntryIter(node))
44
    args.sort()
45
    return [value for name, value in args]
46

    
47
def map_items(func, items):
48
    return [(name, func(value)) for name, value in items]
49

    
50
def cast(type_, val):
51
    '''Throws FormatException if can't cast'''
52
    try: return type_(val)
53
    except ValueError, e: raise FormatException(e)
54

    
55
def conv_items(type_, items):
56
    return map_items(lambda val: cast(type_, val),
57
        xml_dom.TextEntryOnlyIter(items))
58

    
59
def pop_value(items, name='value'):
60
    '''@param name Name of value param, or None to accept any name'''
61
    try: last = items.pop() # last entry contains value
62
    except IndexError: return None # input is empty and no actions
63
    if name != None and last[0] != name: return None # input is empty
64
    return last[1]
65

    
66
def merge_tagged(root):
67
    '''Merges siblings in root that are marked as mergeable.
68
    Used to recombine pieces of nodes that were split apart in the mappings.
69
    '''
70
    for name in set((c.tagName for c in xpath.get(root, '*[@merge=1]'))):
71
        xml_dom.merge_by_name(root, name)
72
    
73
    # Recurse
74
    for child in xml_dom.NodeElemIter(root): merge_tagged(child)
75

    
76
funcs = {}
77
simplifying_funcs = {}
78

    
79
##### Public functions
80

    
81
var_name_prefix = '$'
82

    
83
def is_var_name(str_): return str_.startswith(var_name_prefix)
84

    
85
def is_var(node):
86
    return xml_dom.is_text_node(node) and is_var_name(xml_dom.value(node))
87

    
88
def is_func_name(name):
89
    return name.startswith('_') and name != '_' # '_' is default root node name
90

    
91
def is_func(node): return is_func_name(node.tagName)
92

    
93
def is_xml_func_name(name): return is_func_name(name) and name in funcs
94

    
95
def is_xml_func(node): return is_xml_func_name(node.tagName)
96

    
97
def is_scalar(value):
98
    return scalar.is_scalar(value) and not (util.is_str(value)
99
        and is_var_name(value))
100

    
101
def passthru(node):
102
    '''Passes through single child node. First prunes the node.'''
103
    xml_dom.prune(node)
104
    children = list(xml_dom.NodeEntryIter(node))
105
    if len(children) == 1: xml_dom.replace(node, children[0][1])
106

    
107
def simplify(node):
108
    '''Simplifies an XML tree.
109
    * Merges nodes tagged as mergable
110
    * Runs simplifying functions
111
    '''
112
    for child in xml_dom.NodeElemIter(node): simplify(child)
113
    merge_tagged(node)
114
    
115
    name = node.tagName
116
    
117
    # Pass-through optimizations
118
    if is_func_name(name):
119
        try: func = simplifying_funcs[name]
120
        except KeyError: xml_dom.prune_empty(node)
121
        else: func(node)
122
    # Pruning optimizations
123
    else: # these should not run on functions because they would remove args
124
        xml_dom.prune_children(node)
125

    
126
def process(node, on_error=exc.reraise, is_rel_func=None, db=None):
127
    '''Evaluates the XML functions in an XML tree.
128
    @param is_rel_func None|f(str) Tests if a name is a relational function.
129
        * If != None: Non-relational functions are removed, or relational
130
          functions are treated specially, depending on the db param (below).
131
    @param db
132
        * If None: Non-relational functions other than structural functions are
133
          replaced with their last parameter (usually the value), not evaluated.
134
          This is used in column-based mode to remove XML-only functions.
135
        * If != None: Relational functions are evaluated directly. This is used
136
          in row-based mode to combine relational and XML functions.
137
    '''
138
    has_rel_funcs = is_rel_func != None
139
    assert db == None or has_rel_funcs # rel_funcs required if db set
140
    
141
    for child in xml_dom.NodeElemIter(node):
142
        process(child, on_error, is_rel_func, db)
143
    merge_tagged(node)
144
    
145
    name = node.tagName
146
    if not is_func_name(name): return node # not any kind of function
147
    
148
    row_mode = has_rel_funcs and db != None
149
    column_mode = has_rel_funcs and db == None
150
    func = funcs.get(name, None)
151
    items = list(xml_dom.NodeTextEntryIter(node))
152
    
153
    # Parse function
154
    if len(items) == 1 and items[0][0].isdigit(): # has single numeric param
155
        # pass-through optimization for aggregating functions with one arg
156
        value = items[0][1] # pass through first arg
157
    elif row_mode and (is_rel_func(name) or func == None): # row-based mode
158
        if items and reduce(operator.or_, (xml_dom.is_node(v)
159
            for n, v in items)): return # preserve complex funcs
160
        # Evaluate using DB
161
        try: value = sql_io.put(db, name, dict(items), on_error=on_error)
162
        except sql.DoesNotExistException: return # preserve unknown funcs
163
            # possibly a built-in function of db_xml.put()
164
    elif column_mode or func == None:
165
        # local XML function can't be used or does not exist
166
        if column_mode and is_rel_func(name): return # preserve relational funcs
167
        # otherwise XML-only in column mode, or DB-only in XML output mode
168
        value = pop_value(items, None) # just replace with last param
169
    else: # local XML function
170
        try: value = func(items, node)
171
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
172
            # Save in case another exception raised, overwriting sys.exc_info()
173
            exc.add_traceback(e)
174
            str_ = strings.ustr(node)
175
            exc.add_msg(e, 'function:\n'+str_)
176
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
177
                '\n'+term.emph_multiline(str_)))
178
                
179
            on_error(e)
180
            return # in case on_error() returns
181
    
182
    xml_dom.replace_with_text(node, value)
183

    
184
##### Simplifying functions
185

    
186
# Function names must start with _ to avoid collisions with real tags
187
# Functions take params (node) and have no return value
188

    
189
#### Logic
190

    
191
def _and(node):
192
    values = [v for k, v in xml_dom.NodeTextEntryIter(node)]
193
    
194
    if lists.and_(map(is_scalar, values)): # all constants
195
        xml_dom.replace_with_text(node, lists.and_(values))
196
    else: passthru(node)
197
simplifying_funcs['_and'] = _and
198

    
199
def _or(node):
200
    values = [v for k, v in xml_dom.NodeTextEntryIter(node)]
201
    
202
    if lists.and_(map(is_scalar, values)): # all constants
203
        xml_dom.replace_with_text(node, lists.or_(values))
204
    else: passthru(node)
205
simplifying_funcs['_or'] = _or
206

    
207
def _exists(node):
208
    '''Returns whether its node is non-empty'''
209
    xml_dom.replace_with_text(node, not xml_dom.is_empty(node))
210
simplifying_funcs['_exists'] = _exists
211

    
212
def _if(node):
213
    '''
214
    *Must* be run to remove conditions that functions._if() can't handle.
215
    Note: Can add `@name` attr to distinguish separate _if statements.
216
    '''
217
    params = dict(xml_dom.NodeEntryIter(node))
218
    then = params.get('then', None)
219
    cond = params.get('cond', None)
220
    else_ = params.get('else', None)
221
    
222
    if cond == None: xml_dom.replace(node, else_) # always False
223
    elif then == else_: xml_dom.replace(node, then) # always same value
224
    elif is_var(cond): pass # can't simplify variable conditions
225
    elif xml_dom.is_text_node(cond) and bool(xml_dom.value(cond)): # always True
226
        xml_dom.replace(node, then)
227
simplifying_funcs['_if'] = _if
228

    
229
def _nullIf(node):
230
    '''
231
    *Must* be run to remove conditions that functions._nullIf() can't handle.
232
    '''
233
    params = dict(xml_dom.NodeEntryIter(node))
234
    null = params.get('null', None)
235
    value = params.get('value', None)
236
    
237
    if value == None: xml_dom.prune_parent(node) # empty
238
    elif null == None: xml_dom.replace(node, value) # nothing to null out
239
simplifying_funcs['_nullIf'] = _nullIf
240

    
241
#### Comparison
242

    
243
def _eq(node):
244
    params = dict(xml_dom.NodeTextEntryIter(node))
245
    left = params.get('left', None)
246
    right = params.get('right', None)
247
    
248
    if is_scalar(left) and is_scalar(right): # constant
249
        xml_dom.replace_with_text(node, left == right)
250
    elif left == right: xml_dom.replace_with_text(node, True) # always True
251
simplifying_funcs['_eq'] = _eq
252

    
253
#### Merging
254

    
255
simplifying_funcs['_alt'] = passthru
256
simplifying_funcs['_join'] = passthru
257
simplifying_funcs['_join_words'] = passthru
258
simplifying_funcs['_merge_prefix'] = passthru
259
simplifying_funcs['_merge'] = passthru
260
simplifying_funcs['_min'] = passthru
261
simplifying_funcs['_max'] = passthru
262

    
263
def _first(node):
264
    '''Chooses the first non-empty param (sorting by numeric param name)'''
265
    xml_dom.prune_children(node)
266
    args = variadic_args(node)
267
    try: first = args[0]
268
    except IndexError: first = None
269
    xml_dom.replace(node, first)
270
simplifying_funcs['_first'] = _first
271

    
272
#### Environment access
273

    
274
def _env(node):
275
    params = dict(xml_dom.NodeTextEntryIter(node))
276
    try: name = params['name']
277
    except KeyError, e: raise SyntaxError(e)
278
    
279
    xml_dom.replace_with_text(node, os.environ[name])
280
simplifying_funcs['_env'] = _env
281

    
282
##### XML functions
283

    
284
# Function names must start with _ to avoid collisions with real tags
285
# Functions take arguments (items, node)
286

    
287
#### Transforming values
288

    
289
def _replace(items, node):
290
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
291
    value = pop_value(items)
292
    if value == None: return None # input is empty
293
    try:
294
        for repl, with_ in items:
295
            if re.match(r'^\w+$', repl):
296
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
297
            value = re.sub(repl, with_, value)
298
    except sre_constants.error, e: raise SyntaxError(e)
299
    return util.none_if(value.strip(), u'') # empty strings always mean None
300
funcs['_replace'] = _replace
301

    
302
#### Quantities
303

    
304
def _units(items, node):
305
    value = pop_value(items)
306
    if value == None: return None # input is empty
307
    
308
    quantity = units.str2quantity(value)
309
    try:
310
        for action, units_ in items:
311
            units_ = util.none_if(units_, u'')
312
            if action == 'default': units.set_default_units(quantity, units_)
313
            elif action == 'to':
314
                try: quantity = units.convert(quantity, units_)
315
                except ValueError, e: raise FormatException(e)
316
            else: raise SyntaxError(ValueError('Invalid action: '+action))
317
    except units.MissingUnitsException, e: raise FormatException(e)
318
    return units.quantity2str(quantity)
319
funcs['_units'] = _units
320

    
321
def parse_range(str_, range_sep='-'):
322
    default = (str_, None)
323
    start, sep, end = str_.partition(range_sep)
324
    if sep == '': return default # not a range
325
    if start == '' and range_sep == '-': return default # negative number
326
    return tuple(d.strip() for d in (start, end))
327

    
328
def _rangeStart(items, node):
329
    items = dict(conv_items(strings.ustr, items))
330
    try: value = items['value']
331
    except KeyError: return None # input is empty
332
    return parse_range(value)[0]
333
funcs['_rangeStart'] = _rangeStart
334

    
335
def _rangeEnd(items, node):
336
    items = dict(conv_items(strings.ustr, items))
337
    try: value = items['value']
338
    except KeyError: return None # input is empty
339
    return parse_range(value)[1]
340
funcs['_rangeEnd'] = _rangeEnd
341

    
342
def _range(items, node):
343
    items = dict(conv_items(float, items))
344
    from_ = items.get('from', None)
345
    to = items.get('to', None)
346
    if from_ == None or to == None: return None
347
    return str(to - from_)
348
funcs['_range'] = _range
349

    
350
def _avg(items, node):
351
    count = 0
352
    sum_ = 0.
353
    for name, value in conv_items(float, items):
354
        count += 1
355
        sum_ += value
356
    if count == 0: return None # input is empty
357
    else: return str(sum_/count)
358
funcs['_avg'] = _avg
359

    
360
class CvException(Exception):
361
    def __init__(self):
362
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
363
            ' allowed for ratio scale data '
364
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
365

    
366
def _noCV(items, node):
367
    items = list(conv_items(strings.ustr, items))
368
    try: name, value = items.pop() # last entry contains value
369
    except IndexError: return None # input is empty
370
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
371
    return value
372
funcs['_noCV'] = _noCV
373

    
374
#### Angles
375

    
376
def _compass(items, node):
377
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
378
    items = dict(conv_items(strings.ustr, items))
379
    try: value = items['value']
380
    except KeyError: return None # input is empty
381
    
382
    if not value.isupper(): return value # pass through other coordinate formats
383
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
384
    except KeyError, e: raise FormatException(e)
385
funcs['_compass'] = _compass
(44-44/47)