Project

General

Profile

1
# XML "function" nodes that transform their contents
2

    
3
import datetime
4
import operator
5
import os
6
import re
7
import sre_constants
8
import warnings
9

    
10
import angles
11
import dates
12
import exc
13
import format
14
import lists
15
import maps
16
import scalar
17
import sql
18
import sql_io
19
import strings
20
import term
21
import units
22
import util
23
import xml_dom
24
import xpath
25

    
26
##### Exceptions
27

    
28
class SyntaxError(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
31
            cause)
32

    
33
class FormatException(exc.ExceptionWithCause):
34
    def __init__(self, cause):
35
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
36

    
37
##### Helper functions
38

    
39
def map_names(func, params):
40
    return [(func(name), value) for name, value in params]
41

    
42
def variadic_args(node):
43
    args = map_names(float, xml_dom.NodeEntryIter(node))
44
    args.sort()
45
    return [value for name, value in args]
46

    
47
def map_items(func, items):
48
    return [(name, func(value)) for name, value in items]
49

    
50
def cast(type_, val):
51
    '''Throws FormatException if can't cast'''
52
    try: return type_(val)
53
    except ValueError, e: raise FormatException(e)
54

    
55
def conv_items(type_, items):
56
    return map_items(lambda val: cast(type_, val),
57
        xml_dom.TextEntryOnlyIter(items))
58

    
59
def pop_value(items, name='value'):
60
    '''@param name Name of value param, or None to accept any name'''
61
    try: last = items.pop() # last entry contains value
62
    except IndexError: return None # input is empty and no actions
63
    if name != None and last[0] != name: return None # input is empty
64
    return last[1]
65

    
66
def merge_tagged(root):
67
    '''Merges siblings in root that are marked as mergeable.
68
    Used to recombine pieces of nodes that were split apart in the mappings.
69
    '''
70
    for name in set((c.tagName for c in xpath.get(root, '*[@merge=1]'))):
71
        xml_dom.merge_by_name(root, name)
72
    
73
    # Recurse
74
    for child in xml_dom.NodeElemIter(root): merge_tagged(child)
75

    
76
funcs = {}
77
simplifying_funcs = {}
78

    
79
##### Public functions
80

    
81
var_name_prefix = '$'
82

    
83
def is_var_name(str_): return str_.startswith(var_name_prefix)
84

    
85
def is_var(node):
86
    return xml_dom.is_text_node(node) and is_var_name(xml_dom.value(node))
87

    
88
def is_func_name(name):
89
    return name.startswith('_') and name != '_' # '_' is default root node name
90

    
91
def is_func(node): return is_func_name(node.tagName)
92

    
93
def is_xml_func_name(name): return is_func_name(name) and name in funcs
94

    
95
def is_xml_func(node): return is_xml_func_name(node.tagName)
96

    
97
def is_scalar(value):
98
    return scalar.is_scalar(value) and not (util.is_str(value)
99
        and is_var_name(value))
100

    
101
def passthru(node):
102
    '''Passes through single child node. First prunes the node.'''
103
    xml_dom.prune(node)
104
    children = list(xml_dom.NodeEntryIter(node))
105
    if len(children) == 1: xml_dom.replace(node, children[0][1])
106

    
107
def simplify(node):
108
    '''Simplifies an XML tree.
109
    * Merges nodes tagged as mergable
110
    * Runs simplifying functions
111
    '''
112
    for child in xml_dom.NodeElemIter(node): simplify(child)
113
    merge_tagged(node)
114
    
115
    name = node.tagName
116
    
117
    # Pass-through optimizations
118
    if is_func_name(name):
119
        try: func = simplifying_funcs[name]
120
        except KeyError: xml_dom.prune_empty(node)
121
        else: func(node)
122
    # Pruning optimizations
123
    else: # these should not run on functions because they would remove args
124
        xml_dom.prune_children(node)
125

    
126
def process(node, on_error=exc.reraise, is_rel_func=None, db=None):
127
    '''Evaluates the XML functions in an XML tree.
128
    @param is_rel_func None|f(str) Tests if a name is a relational function.
129
        * If != None: Non-relational functions are removed, or relational
130
          functions are treated specially, depending on the db param (below).
131
    @param db
132
        * If None: Non-relational functions other than structural functions are
133
          replaced with their last parameter (usually the value), not evaluated.
134
          This is used in column-based mode to remove XML-only functions.
135
        * If != None: Relational functions are evaluated directly. This is used
136
          in row-based mode to combine relational and XML functions.
137
    '''
138
    has_rel_funcs = is_rel_func != None
139
    assert db == None or has_rel_funcs # rel_funcs required if db set
140
    
141
    for child in xml_dom.NodeElemIter(node):
142
        process(child, on_error, is_rel_func, db)
143
    merge_tagged(node)
144
    
145
    name = node.tagName
146
    if not is_func_name(name): return node # not any kind of function
147
    
148
    row_mode = has_rel_funcs and db != None
149
    column_mode = has_rel_funcs and db == None
150
    func = funcs.get(name, None)
151
    items = list(xml_dom.NodeTextEntryIter(node))
152
    
153
    # Parse function
154
    if len(items) == 1 and items[0][0].isdigit(): # has single numeric param
155
        # pass-through optimization for aggregating functions with one arg
156
        value = items[0][1] # pass through first arg
157
    elif row_mode and (is_rel_func(name) or func == None): # row-based mode
158
        if items and reduce(operator.or_, (xml_dom.is_node(v)
159
            for n, v in items)): return # preserve complex funcs
160
        # Evaluate using DB
161
        try: value = sql_io.put(db, name, dict(items), on_error=on_error)
162
        except sql.DoesNotExistException: return # preserve unknown funcs
163
            # possibly a built-in function of db_xml.put()
164
    elif column_mode or func == None:
165
        # local XML function can't be used or does not exist
166
        if column_mode and is_rel_func(name): return # preserve relational funcs
167
        # otherwise XML-only in column mode, or DB-only in XML output mode
168
        value = pop_value(items, None) # just replace with last param
169
    else: # local XML function
170
        try: value = func(items, node)
171
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
172
            # Save in case another exception raised, overwriting sys.exc_info()
173
            exc.add_traceback(e)
174
            str_ = strings.ustr(node)
175
            exc.add_msg(e, 'function:\n'+str_)
176
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
177
                '\n'+term.emph_multiline(str_)))
178
                
179
            on_error(e)
180
            return # in case on_error() returns
181
    
182
    xml_dom.replace_with_text(node, value)
183

    
184
##### Simplifying functions
185

    
186
# Function names must start with _ to avoid collisions with real tags
187
# Functions take params (node) and have no return value
188

    
189
#### Logic
190

    
191
def _and(node):
192
    values = [v for k, v in xml_dom.NodeTextEntryIter(node)]
193
    
194
    if lists.and_(map(is_scalar, values)): # all constants
195
        xml_dom.replace_with_text(node, lists.and_(values))
196
    else: passthru(node)
197
simplifying_funcs['_and'] = _and
198

    
199
def _or(node):
200
    values = [v for k, v in xml_dom.NodeTextEntryIter(node)]
201
    
202
    if lists.and_(map(is_scalar, values)): # all constants
203
        xml_dom.replace_with_text(node, lists.or_(values))
204
    else: passthru(node)
205
simplifying_funcs['_or'] = _or
206

    
207
def _exists(node):
208
    '''Returns whether its node is non-empty'''
209
    xml_dom.replace_with_text(node, not xml_dom.is_empty(node))
210
simplifying_funcs['_exists'] = _exists
211

    
212
def _if(node):
213
    '''
214
    *Must* be run to remove conditions that functions._if() can't handle.
215
    Note: Can add `@name` attr to distinguish separate _if statements.
216
    '''
217
    params = dict(xml_dom.NodeEntryIter(node))
218
    then = params.get('then', None)
219
    cond = params.get('cond', None)
220
    else_ = params.get('else', None)
221
    
222
    if cond == None: xml_dom.replace(node, else_) # always False
223
    elif then == else_: xml_dom.replace(node, then) # always same value
224
    elif is_var(cond): pass # can't simplify variable conditions
225
    elif xml_dom.is_text_node(cond) and bool(xml_dom.value(cond)): # always True
226
        xml_dom.replace(node, then)
227
simplifying_funcs['_if'] = _if
228

    
229
def _nullIf(node):
230
    '''
231
    *Must* be run to remove conditions that functions._nullIf() can't handle.
232
    '''
233
    params = dict(xml_dom.NodeEntryIter(node))
234
    null = params.get('null', None)
235
    value = params.get('value', None)
236
    
237
    if value == None: xml_dom.prune_parent(node) # empty
238
    elif null == None: xml_dom.replace(node, value) # nothing to null out
239
simplifying_funcs['_nullIf'] = _nullIf
240

    
241
#### Comparison
242

    
243
def _eq(node):
244
    params = dict(xml_dom.NodeTextEntryIter(node))
245
    left = params.get('left', None)
246
    right = params.get('right', None)
247
    
248
    if is_scalar(left) and is_scalar(right): # constant
249
        xml_dom.replace_with_text(node, left == right)
250
    elif left == right: xml_dom.replace_with_text(node, True) # always True
251
simplifying_funcs['_eq'] = _eq
252

    
253
#### Merging
254

    
255
simplifying_funcs['_alt'] = passthru
256
simplifying_funcs['_join'] = passthru
257
simplifying_funcs['_join_words'] = passthru
258
simplifying_funcs['_merge_prefix'] = passthru
259
simplifying_funcs['_merge'] = passthru
260
simplifying_funcs['_min'] = passthru
261
simplifying_funcs['_max'] = passthru
262
simplifying_funcs['_avg'] = passthru
263

    
264
def _first(node):
265
    '''Chooses the first non-empty param (sorting by numeric param name)'''
266
    xml_dom.prune_children(node)
267
    args = variadic_args(node)
268
    try: first = args[0]
269
    except IndexError: first = None
270
    xml_dom.replace(node, first)
271
simplifying_funcs['_first'] = _first
272

    
273
#### Environment access
274

    
275
def _env(node):
276
    params = dict(xml_dom.NodeTextEntryIter(node))
277
    try: name = params['name']
278
    except KeyError, e: raise SyntaxError(e)
279
    
280
    xml_dom.replace_with_text(node, os.environ[name])
281
simplifying_funcs['_env'] = _env
282

    
283
##### XML functions
284

    
285
# Function names must start with _ to avoid collisions with real tags
286
# Functions take arguments (items, node)
287

    
288
#### Transforming values
289

    
290
def _replace(items, node):
291
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
292
    value = pop_value(items)
293
    if value == None: return None # input is empty
294
    try:
295
        for repl, with_ in items:
296
            if re.match(r'^\w+$', repl):
297
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
298
            value = re.sub(repl, with_, value)
299
    except sre_constants.error, e: raise SyntaxError(e)
300
    return util.none_if(value.strip(), u'') # empty strings always mean None
301
funcs['_replace'] = _replace
302

    
303
#### Quantities
304

    
305
def _units(items, node):
306
    value = pop_value(items)
307
    if value == None: return None # input is empty
308
    
309
    quantity = units.str2quantity(value)
310
    try:
311
        for action, units_ in items:
312
            units_ = util.none_if(units_, u'')
313
            if action == 'default': units.set_default_units(quantity, units_)
314
            elif action == 'to':
315
                try: quantity = units.convert(quantity, units_)
316
                except ValueError, e: raise FormatException(e)
317
            else: raise SyntaxError(ValueError('Invalid action: '+action))
318
    except units.MissingUnitsException, e: raise FormatException(e)
319
    return units.quantity2str(quantity)
320
funcs['_units'] = _units
321

    
322
def _rangeStart(items, node):
323
    items = dict(conv_items(strings.ustr, items))
324
    try: value = items['value']
325
    except KeyError: return None # input is empty
326
    return units.parse_range(value)[0]
327
funcs['_rangeStart'] = _rangeStart
328

    
329
def _rangeEnd(items, node):
330
    items = dict(conv_items(strings.ustr, items))
331
    try: value = items['value']
332
    except KeyError: return None # input is empty
333
    return units.parse_range(value)[1]
334
funcs['_rangeEnd'] = _rangeEnd
335

    
336
def _range(items, node):
337
    items = dict(conv_items(float, items))
338
    from_ = items.get('from', None)
339
    to = items.get('to', None)
340
    if from_ == None or to == None: return None
341
    return str(to - from_)
342
funcs['_range'] = _range
343

    
344
def _avg(items, node):
345
    count = 0
346
    sum_ = 0.
347
    for name, value in conv_items(float, items):
348
        count += 1
349
        sum_ += value
350
    if count == 0: return None # input is empty
351
    else: return str(sum_/count)
352
funcs['_avg'] = _avg
353

    
354
class CvException(Exception):
355
    def __init__(self):
356
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
357
            ' allowed for ratio scale data '
358
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
359

    
360
def _noCV(items, node):
361
    items = list(conv_items(strings.ustr, items))
362
    try: name, value = items.pop() # last entry contains value
363
    except IndexError: return None # input is empty
364
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
365
    return value
366
funcs['_noCV'] = _noCV
367

    
368
#### Angles
369

    
370
def _compass(items, node):
371
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
372
    items = dict(conv_items(strings.ustr, items))
373
    try: value = items['value']
374
    except KeyError: return None # input is empty
375
    
376
    if not value.isupper(): return value # pass through other coordinate formats
377
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
378
    except KeyError, e: raise FormatException(e)
379
funcs['_compass'] = _compass
(44-44/47)