Project

General

Profile

1
# XML "function" nodes that transform their contents
2

    
3
import datetime
4
import operator
5
import os
6
import re
7
import sre_constants
8
import warnings
9

    
10
import angles
11
import dates
12
import exc
13
import format
14
import lists
15
import maps
16
import scalar
17
import sql
18
import sql_io
19
import strings
20
import term
21
import units
22
import util
23
import xml_dom
24
import xpath
25

    
26
##### Exceptions
27

    
28
class SyntaxError(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
31
            cause)
32

    
33
class FormatException(exc.ExceptionWithCause):
34
    def __init__(self, cause):
35
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
36

    
37
##### Helper functions
38

    
39
def map_names(func, params):
40
    return [(func(name), value) for name, value in params]
41

    
42
def variadic_args(node):
43
    args = map_names(float, xml_dom.NodeEntryIter(node))
44
    args.sort()
45
    return [value for name, value in args]
46

    
47
def map_items(func, items):
48
    return [(name, func(value)) for name, value in items]
49

    
50
def cast(type_, val):
51
    '''Throws FormatException if can't cast'''
52
    try: return type_(val)
53
    except ValueError, e: raise FormatException(e)
54

    
55
def conv_items(type_, items):
56
    return map_items(lambda val: cast(type_, val),
57
        xml_dom.TextEntryOnlyIter(items))
58

    
59
def pop_value(items, name='value'):
60
    '''@param name Name of value param, or None to accept any name'''
61
    try: last = items.pop() # last entry contains value
62
    except IndexError: return None # input is empty and no actions
63
    if name != None and last[0] != name: return None # input is empty
64
    return last[1]
65

    
66
def merge_tagged(root):
67
    '''Merges siblings in root that are marked as mergeable.
68
    Used to recombine pieces of nodes that were split apart in the mappings.
69
    '''
70
    for name in set((c.tagName for c in xpath.get(root, '*[@merge=1]'))):
71
        xml_dom.merge_by_name(root, name)
72
    
73
    # Recurse
74
    for child in xml_dom.NodeElemIter(root): merge_tagged(child)
75

    
76
funcs = {}
77
simplifying_funcs = {}
78

    
79
##### Public functions
80

    
81
var_name_prefix = '$'
82

    
83
def is_var_name(str_): return str_.startswith(var_name_prefix)
84

    
85
def is_var(node):
86
    return xml_dom.is_text_node(node) and is_var_name(xml_dom.value(node))
87

    
88
def is_func_name(name):
89
    return name.startswith('_') and name != '_' # '_' is default root node name
90

    
91
def is_func(node): return is_func_name(node.tagName)
92

    
93
def is_xml_func_name(name): return is_func_name(name) and name in funcs
94

    
95
def is_xml_func(node): return is_xml_func_name(node.tagName)
96

    
97
def is_scalar(value):
98
    return scalar.is_scalar(value) and not (util.is_str(value)
99
        and is_var_name(value))
100

    
101
def passthru(node):
102
    '''Passes through single child node. First prunes the node.'''
103
    xml_dom.prune(node)
104
    children = list(xml_dom.NodeEntryIter(node))
105
    if len(children) == 1: xml_dom.replace(node, children[0][1])
106

    
107
def simplify(node):
108
    '''Simplifies an XML tree.
109
    * Merges nodes tagged as mergable
110
    * Runs simplifying functions
111
    '''
112
    for child in xml_dom.NodeElemIter(node): simplify(child)
113
    merge_tagged(node)
114
    
115
    name = node.tagName
116
    
117
    # Pass-through optimizations
118
    if is_func_name(name):
119
        try: func = simplifying_funcs[name]
120
        except KeyError: xml_dom.prune_empty(node)
121
        else: func(node)
122
    # Pruning optimizations
123
    else: # these should not run on functions because they would remove args
124
        xml_dom.prune_children(node)
125

    
126
def process(node, on_error=exc.reraise, is_rel_func=None, db=None):
127
    '''Evaluates the XML functions in an XML tree.
128
    @param is_rel_func None|f(str) Tests if a name is a relational function.
129
        * If != None: Non-relational functions are removed, or relational
130
          functions are treated specially, depending on the db param (below).
131
    @param db
132
        * If None: Non-relational functions other than structural functions are
133
          replaced with their last parameter (usually the value), not evaluated.
134
          This is used in column-based mode to remove XML-only functions.
135
        * If != None: Relational functions are evaluated directly. This is used
136
          in row-based mode to combine relational and XML functions.
137
    '''
138
    has_rel_funcs = is_rel_func != None
139
    assert db == None or has_rel_funcs # rel_funcs required if db set
140
    
141
    for child in xml_dom.NodeElemIter(node):
142
        process(child, on_error, is_rel_func, db)
143
    merge_tagged(node)
144
    
145
    name = node.tagName
146
    if not is_func_name(name): return node # not any kind of function
147
    
148
    row_mode = has_rel_funcs and db != None
149
    column_mode = has_rel_funcs and db == None
150
    func = funcs.get(name, None)
151
    items = list(xml_dom.NodeTextEntryIter(node))
152
    
153
    # Parse function
154
    if len(items) == 1 and items[0][0].isdigit(): # has single numeric param
155
        # pass-through optimization for aggregating functions with one arg
156
        value = items[0][1] # pass through first arg
157
    elif row_mode and (is_rel_func(name) or func == None): # row-based mode
158
        if items and reduce(operator.or_, (xml_dom.is_node(v)
159
            for n, v in items)): return # preserve complex funcs
160
        # Evaluate using DB
161
        try: value = sql_io.put(db, name, dict(items), on_error=on_error)
162
        except sql.DoesNotExistException: return # preserve unknown funcs
163
            # possibly a built-in function of db_xml.put()
164
    elif column_mode or func == None:
165
        # local XML function can't be used or does not exist
166
        if column_mode and is_rel_func(name): return # preserve relational funcs
167
        # otherwise XML-only in column mode, or DB-only in XML output mode
168
        value = pop_value(items, None) # just replace with last param
169
    else: # local XML function
170
        try: value = func(items, node)
171
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
172
            # Save in case another exception raised, overwriting sys.exc_info()
173
            exc.add_traceback(e)
174
            str_ = strings.ustr(node)
175
            exc.add_msg(e, 'function:\n'+str_)
176
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
177
                '\n'+term.emph_multiline(str_)))
178
                
179
            on_error(e)
180
            return # in case on_error() returns
181
    
182
    xml_dom.replace_with_text(node, value)
183

    
184
##### Simplifying functions
185

    
186
# Function names must start with _ to avoid collisions with real tags
187
# Functions take params (node) and have no return value
188

    
189
#### Logic
190

    
191
def _and(node):
192
    values = [v for k, v in xml_dom.NodeTextEntryIter(node)]
193
    
194
    if lists.and_(map(is_scalar, values)): # all constants
195
        xml_dom.replace_with_text(node, lists.and_(values))
196
    else: passthru(node)
197
simplifying_funcs['_and'] = _and
198

    
199
def _or(node):
200
    values = [v for k, v in xml_dom.NodeTextEntryIter(node)]
201
    
202
    if lists.and_(map(is_scalar, values)): # all constants
203
        xml_dom.replace_with_text(node, lists.or_(values))
204
    else: passthru(node)
205
simplifying_funcs['_or'] = _or
206

    
207
def _exists(node):
208
    '''Returns whether its node is non-empty'''
209
    xml_dom.replace_with_text(node, not xml_dom.is_empty(node))
210
simplifying_funcs['_exists'] = _exists
211

    
212
def _if(node):
213
    '''
214
    *Must* be run to remove conditions that functions._if() can't handle.
215
    Note: Can add `@name` attr to distinguish separate _if statements.
216
    '''
217
    params = dict(xml_dom.NodeEntryIter(node))
218
    then = params.get('then', None)
219
    cond = params.get('cond', None)
220
    else_ = params.get('else', None)
221
    
222
    if cond == None: xml_dom.replace(node, else_) # always False
223
    elif then == else_: xml_dom.replace(node, then) # always same value
224
    elif is_var(cond): pass # can't simplify variable conditions
225
    elif xml_dom.is_text_node(cond) and bool(xml_dom.value(cond)): # always True
226
        xml_dom.replace(node, then)
227
simplifying_funcs['_if'] = _if
228

    
229
def _nullIf(node):
230
    '''
231
    *Must* be run to remove conditions that functions._nullIf() can't handle.
232
    '''
233
    params = dict(xml_dom.NodeEntryIter(node))
234
    null = params.get('null', None)
235
    value = params.get('value', None)
236
    
237
    if value == None: xml_dom.prune_parent(node) # empty
238
    elif null == None: xml_dom.replace(node, value) # nothing to null out
239
simplifying_funcs['_nullIf'] = _nullIf
240

    
241
#### Comparison
242

    
243
def _eq(node):
244
    params = dict(xml_dom.NodeTextEntryIter(node))
245
    left = params.get('left', None)
246
    right = params.get('right', None)
247
    
248
    if is_scalar(left) and is_scalar(right): # constant
249
        xml_dom.replace_with_text(node, left == right)
250
    elif left == right: xml_dom.replace_with_text(node, True) # always True
251
simplifying_funcs['_eq'] = _eq
252

    
253
#### Merging
254

    
255
simplifying_funcs['_alt'] = passthru
256
simplifying_funcs['_join'] = passthru
257
simplifying_funcs['_join_words'] = passthru
258
simplifying_funcs['_merge'] = passthru
259
simplifying_funcs['_min'] = passthru
260
simplifying_funcs['_max'] = passthru
261

    
262
def _first(node):
263
    '''Chooses the first non-empty param (sorting by numeric param name)'''
264
    xml_dom.prune_children(node)
265
    args = variadic_args(node)
266
    try: first = args[0]
267
    except IndexError: first = None
268
    xml_dom.replace(node, first)
269
simplifying_funcs['_first'] = _first
270

    
271
#### Environment access
272

    
273
def _env(node):
274
    params = dict(xml_dom.NodeTextEntryIter(node))
275
    try: name = params['name']
276
    except KeyError, e: raise SyntaxError(e)
277
    
278
    xml_dom.replace_with_text(node, os.environ[name])
279
simplifying_funcs['_env'] = _env
280

    
281
##### XML functions
282

    
283
# Function names must start with _ to avoid collisions with real tags
284
# Functions take arguments (items, node)
285

    
286
#### Transforming values
287

    
288
def _replace(items, node):
289
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
290
    value = pop_value(items)
291
    if value == None: return None # input is empty
292
    try:
293
        for repl, with_ in items:
294
            if re.match(r'^\w+$', repl):
295
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
296
            value = re.sub(repl, with_, value)
297
    except sre_constants.error, e: raise SyntaxError(e)
298
    return util.none_if(value.strip(), u'') # empty strings always mean None
299
funcs['_replace'] = _replace
300

    
301
#### Quantities
302

    
303
def _units(items, node):
304
    value = pop_value(items)
305
    if value == None: return None # input is empty
306
    
307
    quantity = units.str2quantity(value)
308
    try:
309
        for action, units_ in items:
310
            units_ = util.none_if(units_, u'')
311
            if action == 'default': units.set_default_units(quantity, units_)
312
            elif action == 'to':
313
                try: quantity = units.convert(quantity, units_)
314
                except ValueError, e: raise FormatException(e)
315
            else: raise SyntaxError(ValueError('Invalid action: '+action))
316
    except units.MissingUnitsException, e: raise FormatException(e)
317
    return units.quantity2str(quantity)
318
funcs['_units'] = _units
319

    
320
def parse_range(str_, range_sep='-'):
321
    default = (str_, None)
322
    start, sep, end = str_.partition(range_sep)
323
    if sep == '': return default # not a range
324
    if start == '' and range_sep == '-': return default # negative number
325
    return tuple(d.strip() for d in (start, end))
326

    
327
def _rangeStart(items, node):
328
    items = dict(conv_items(strings.ustr, items))
329
    try: value = items['value']
330
    except KeyError: return None # input is empty
331
    return parse_range(value)[0]
332
funcs['_rangeStart'] = _rangeStart
333

    
334
def _rangeEnd(items, node):
335
    items = dict(conv_items(strings.ustr, items))
336
    try: value = items['value']
337
    except KeyError: return None # input is empty
338
    return parse_range(value)[1]
339
funcs['_rangeEnd'] = _rangeEnd
340

    
341
def _range(items, node):
342
    items = dict(conv_items(float, items))
343
    from_ = items.get('from', None)
344
    to = items.get('to', None)
345
    if from_ == None or to == None: return None
346
    return str(to - from_)
347
funcs['_range'] = _range
348

    
349
def _avg(items, node):
350
    count = 0
351
    sum_ = 0.
352
    for name, value in conv_items(float, items):
353
        count += 1
354
        sum_ += value
355
    if count == 0: return None # input is empty
356
    else: return str(sum_/count)
357
funcs['_avg'] = _avg
358

    
359
class CvException(Exception):
360
    def __init__(self):
361
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
362
            ' allowed for ratio scale data '
363
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
364

    
365
def _noCV(items, node):
366
    items = list(conv_items(strings.ustr, items))
367
    try: name, value = items.pop() # last entry contains value
368
    except IndexError: return None # input is empty
369
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
370
    return value
371
funcs['_noCV'] = _noCV
372

    
373
#### Angles
374

    
375
def _compass(items, node):
376
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
377
    items = dict(conv_items(strings.ustr, items))
378
    try: value = items['value']
379
    except KeyError: return None # input is empty
380
    
381
    if not value.isupper(): return value # pass through other coordinate formats
382
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
383
    except KeyError, e: raise FormatException(e)
384
funcs['_compass'] = _compass
(44-44/47)