Project

General

Profile

1
# XML "function" nodes that transform their contents
2

    
3
import datetime
4
import operator
5
import os
6
import re
7
import sre_constants
8
import warnings
9

    
10
import angles
11
import dates
12
import exc
13
import format
14
import lists
15
import maps
16
import scalar
17
import sql
18
import sql_io
19
import strings
20
import term
21
import units
22
import util
23
import xml_dom
24
import xpath
25

    
26
##### Exceptions
27

    
28
class SyntaxError(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
31
            cause)
32

    
33
class FormatException(exc.ExceptionWithCause):
34
    def __init__(self, cause):
35
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
36

    
37
##### Helper functions
38

    
39
def map_names(func, params):
40
    return [(func(name), value) for name, value in params]
41

    
42
def variadic_args(node):
43
    args = map_names(float, xml_dom.NodeEntryIter(node))
44
    args.sort()
45
    return [value for name, value in args]
46

    
47
def map_items(func, items):
48
    return [(name, func(value)) for name, value in items]
49

    
50
def cast(type_, val):
51
    '''Throws FormatException if can't cast'''
52
    try: return type_(val)
53
    except ValueError, e: raise FormatException(e)
54

    
55
def conv_items(type_, items):
56
    return map_items(lambda val: cast(type_, val),
57
        xml_dom.TextEntryOnlyIter(items))
58

    
59
def pop_value(items, name='value'):
60
    '''@param name Name of value param, or None to accept any name'''
61
    try: last = items.pop() # last entry contains value
62
    except IndexError: return None # input is empty and no actions
63
    if name != None and last[0] != name: return None # input is empty
64
    return last[1]
65

    
66
def merge_tagged(root):
67
    '''Merges siblings in root that are marked as mergeable.
68
    Used to recombine pieces of nodes that were split apart in the mappings.
69
    '''
70
    for name in set((c.tagName for c in xpath.get(root, '*[@merge=1]'))):
71
        xml_dom.merge_by_name(root, name)
72
    
73
    # Recurse
74
    for child in xml_dom.NodeElemIter(root): merge_tagged(child)
75

    
76
funcs = {}
77
simplifying_funcs = {}
78

    
79
##### Public functions
80

    
81
var_name_prefix = '$'
82

    
83
def is_var_name(str_): return str_.startswith(var_name_prefix)
84

    
85
def is_var(node):
86
    return xml_dom.is_text_node(node) and is_var_name(xml_dom.value(node))
87

    
88
def is_func_name(name):
89
    return name.startswith('_') and name != '_' # '_' is default root node name
90

    
91
def is_func(node): return is_func_name(node.tagName)
92

    
93
def is_xml_func_name(name): return is_func_name(name) and name in funcs
94

    
95
def is_xml_func(node): return is_xml_func_name(node.tagName)
96

    
97
def is_scalar(value):
98
    return scalar.is_scalar(value) and not (util.is_str(value)
99
        and is_var_name(value))
100

    
101
def passthru(node):
102
    '''Passes through single child node. First prunes the node.'''
103
    xml_dom.prune(node)
104
    children = list(xml_dom.NodeEntryIter(node))
105
    if len(children) == 1: xml_dom.replace(node, children[0][1])
106

    
107
def simplify(node):
108
    '''Simplifies an XML tree.
109
    * Merges nodes tagged as mergable
110
    * Runs simplifying functions
111
    '''
112
    for child in xml_dom.NodeElemIter(node): simplify(child)
113
    merge_tagged(node)
114
    
115
    name = node.tagName
116
    
117
    # Pass-through optimizations
118
    if is_func_name(name):
119
        try: func = simplifying_funcs[name]
120
        except KeyError: xml_dom.prune_empty(node)
121
        else: func(node)
122
    # Pruning optimizations
123
    else: # these should not run on functions because they would remove args
124
        xml_dom.prune_children(node)
125

    
126
def process(node, on_error=exc.reraise, is_rel_func=None, db=None):
127
    '''Evaluates the XML functions in an XML tree.
128
    @param is_rel_func None|f(str) Tests if a name is a relational function.
129
        * If != None: Non-relational functions are removed, or relational
130
          functions are treated specially, depending on the db param (below).
131
    @param db
132
        * If None: Non-relational functions other than structural functions are
133
          replaced with their last parameter (usually the value), not evaluated.
134
          This is used in column-based mode to remove XML-only functions.
135
        * If != None: Relational functions are evaluated directly. This is used
136
          in row-based mode to combine relational and XML functions.
137
    '''
138
    has_rel_funcs = is_rel_func != None
139
    assert db == None or has_rel_funcs # rel_funcs required if db set
140
    
141
    for child in xml_dom.NodeElemIter(node):
142
        process(child, on_error, is_rel_func, db)
143
    merge_tagged(node)
144
    
145
    name = node.tagName
146
    if not is_func_name(name): return node # not any kind of function
147
    
148
    row_mode = has_rel_funcs and db != None
149
    column_mode = has_rel_funcs and db == None
150
    func = funcs.get(name, None)
151
    items = list(xml_dom.NodeTextEntryIter(node))
152
    
153
    # Parse function
154
    if len(items) == 1 and items[0][0].isdigit(): # has single numeric param
155
        # pass-through optimization for aggregating functions with one arg
156
        value = items[0][1] # pass through first arg
157
    elif row_mode and (is_rel_func(name) or func == None): # row-based mode
158
        if items and reduce(operator.or_, (xml_dom.is_node(v)
159
            for n, v in items)): return # preserve complex funcs
160
        # Evaluate using DB
161
        try: value = sql_io.put(db, name, dict(items), on_error=on_error)
162
        except sql.DoesNotExistException: return # preserve unknown funcs
163
            # possibly a built-in function of db_xml.put()
164
    elif column_mode or func == None:
165
        # local XML function can't be used or does not exist
166
        if column_mode and is_rel_func(name): return # preserve relational funcs
167
        # otherwise XML-only in column mode, or DB-only in XML output mode
168
        value = pop_value(items, None) # just replace with last param
169
    else: # local XML function
170
        try: value = func(items, node)
171
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
172
            # Save in case another exception raised, overwriting sys.exc_info()
173
            exc.add_traceback(e)
174
            str_ = strings.ustr(node)
175
            exc.add_msg(e, 'function:\n'+str_)
176
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
177
                '\n'+term.emph_multiline(str_)))
178
                
179
            on_error(e)
180
            return # in case on_error() returns
181
    
182
    xml_dom.replace_with_text(node, value)
183

    
184
##### Simplifying functions
185

    
186
# Function names must start with _ to avoid collisions with real tags
187
# Functions take params (node) and have no return value
188

    
189
#### Logic
190

    
191
def _and(node):
192
    values = [v for k, v in xml_dom.NodeTextEntryIter(node)]
193
    
194
    if lists.and_(map(is_scalar, values)): # all constants
195
        xml_dom.replace_with_text(node, lists.and_(values))
196
    else: passthru(node)
197
simplifying_funcs['_and'] = _and
198

    
199
def _or(node):
200
    values = [v for k, v in xml_dom.NodeTextEntryIter(node)]
201
    
202
    if lists.and_(map(is_scalar, values)): # all constants
203
        xml_dom.replace_with_text(node, lists.or_(values))
204
    else: passthru(node)
205
simplifying_funcs['_or'] = _or
206

    
207
def _exists(node):
208
    '''Returns whether its node is non-empty'''
209
    xml_dom.replace_with_text(node, not xml_dom.is_empty(node))
210
simplifying_funcs['_exists'] = _exists
211

    
212
def _if(node):
213
    '''
214
    *Must* be run to remove conditions that functions._if() can't handle.
215
    Note: Can add `@name` attr to distinguish separate _if statements.
216
    '''
217
    params = dict(xml_dom.NodeEntryIter(node))
218
    then = params.get('then', None)
219
    cond = params.get('cond', None)
220
    else_ = params.get('else', None)
221
    
222
    if cond == None: xml_dom.replace(node, else_) # always False
223
    elif then == else_: xml_dom.replace(node, then) # always same value
224
    elif is_var(cond): pass # can't simplify variable conditions
225
    elif xml_dom.is_text_node(cond) and bool(xml_dom.value(cond)): # always True
226
        xml_dom.replace(node, then)
227
simplifying_funcs['_if'] = _if
228

    
229
def _nullIf(node):
230
    '''
231
    *Must* be run to remove conditions that functions._nullIf() can't handle.
232
    '''
233
    params = dict(xml_dom.NodeEntryIter(node))
234
    null = params.get('null', None)
235
    value = params.get('value', None)
236
    
237
    if value == None: xml_dom.prune_parent(node) # empty
238
    elif null == None: xml_dom.replace(node, value) # nothing to null out
239
simplifying_funcs['_nullIf'] = _nullIf
240

    
241
#### Comparison
242

    
243
def _eq(node):
244
    params = dict(xml_dom.NodeTextEntryIter(node))
245
    left = params.get('left', None)
246
    right = params.get('right', None)
247
    
248
    if is_scalar(left) and is_scalar(right): # constant
249
        xml_dom.replace_with_text(node, left == right)
250
    elif left == right: xml_dom.replace_with_text(node, True) # always True
251
simplifying_funcs['_eq'] = _eq
252

    
253
#### Merging
254

    
255
simplifying_funcs['_alt'] = passthru
256
simplifying_funcs['_join'] = passthru
257
simplifying_funcs['_join_words'] = passthru
258
simplifying_funcs['_merge_prefix'] = passthru
259
simplifying_funcs['_merge'] = passthru
260
simplifying_funcs['_min'] = passthru
261
simplifying_funcs['_max'] = passthru
262
simplifying_funcs['_avg'] = passthru
263

    
264
def _first(node):
265
    '''Chooses the first non-empty param (sorting by numeric param name)'''
266
    xml_dom.prune_children(node)
267
    args = variadic_args(node)
268
    try: first = args[0]
269
    except IndexError: first = None
270
    xml_dom.replace(node, first)
271
simplifying_funcs['_first'] = _first
272

    
273
#### Environment access
274

    
275
def _env(node):
276
    params = dict(xml_dom.NodeTextEntryIter(node))
277
    try: name = params['name']
278
    except KeyError, e: raise SyntaxError(e)
279
    
280
    xml_dom.replace_with_text(node, os.environ[name])
281
simplifying_funcs['_env'] = _env
282

    
283
##### XML functions
284

    
285
# Function names must start with _ to avoid collisions with real tags
286
# Functions take arguments (items, node)
287

    
288
#### Transforming values
289

    
290
def _replace(items, node):
291
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
292
    value = pop_value(items)
293
    if value == None: return None # input is empty
294
    try:
295
        for repl, with_ in items:
296
            if re.match(r'^\w+$', repl):
297
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
298
            value = re.sub(repl, with_, value)
299
    except sre_constants.error, e: raise SyntaxError(e)
300
    return util.none_if(value.strip(), u'') # empty strings always mean None
301
funcs['_replace'] = _replace
302

    
303
#### Quantities
304

    
305
def _units(items, node):
306
    value = pop_value(items)
307
    if value == None: return None # input is empty
308
    
309
    quantity = units.str2quantity(value)
310
    try:
311
        for action, units_ in items:
312
            units_ = util.none_if(units_, u'')
313
            if action == 'default': units.set_default_units(quantity, units_)
314
            elif action == 'to':
315
                try: quantity = units.convert(quantity, units_)
316
                except ValueError, e: raise FormatException(e)
317
            else: raise SyntaxError(ValueError('Invalid action: '+action))
318
    except units.MissingUnitsException, e: raise FormatException(e)
319
    return units.quantity2str(quantity)
320
funcs['_units'] = _units
321

    
322
def _rangeStart(items, node):
323
    items = dict(conv_items(strings.ustr, items))
324
    try: value = items['value']
325
    except KeyError: return None # input is empty
326
    return units.parse_range(value)[0]
327
funcs['_rangeStart'] = _rangeStart
328

    
329
def _rangeEnd(items, node):
330
    items = dict(conv_items(strings.ustr, items))
331
    try: value = items['value']
332
    except KeyError: return None # input is empty
333
    return units.parse_range(value)[1]
334
funcs['_rangeEnd'] = _rangeEnd
335

    
336
class CvException(Exception):
337
    def __init__(self):
338
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
339
            ' allowed for ratio scale data '
340
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
341

    
342
def _noCV(items, node):
343
    items = list(conv_items(strings.ustr, items))
344
    try: name, value = items.pop() # last entry contains value
345
    except IndexError: return None # input is empty
346
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
347
    return value
348
funcs['_noCV'] = _noCV
349

    
350
#### Angles
351

    
352
def _compass(items, node):
353
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
354
    items = dict(conv_items(strings.ustr, items))
355
    try: value = items['value']
356
    except KeyError: return None # input is empty
357
    
358
    if not value.isupper(): return value # pass through other coordinate formats
359
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
360
    except KeyError, e: raise FormatException(e)
361
funcs['_compass'] = _compass
(46-46/49)