Project

General

Profile

1
# XML "function" nodes that transform their contents
2

    
3
import datetime
4
import re
5
import sre_constants
6
import warnings
7

    
8
import angles
9
import dates
10
import exc
11
import format
12
import maps
13
import sql_io
14
import strings
15
import term
16
import units
17
import util
18
import xml_dom
19
import xpath
20

    
21
##### Exceptions
22

    
23
class SyntaxError(exc.ExceptionWithCause):
24
    def __init__(self, cause):
25
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
26
            cause)
27

    
28
class FormatException(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
31

    
32
##### Helper functions
33

    
34
def map_items(func, items):
35
    return [(name, func(value)) for name, value in items]
36

    
37
def cast(type_, val):
38
    '''Throws FormatException if can't cast'''
39
    try: return type_(val)
40
    except ValueError, e: raise FormatException(e)
41

    
42
def conv_items(type_, items):
43
    return map_items(lambda val: cast(type_, val),
44
        xml_dom.TextEntryOnlyIter(items))
45

    
46
def pop_value(items, name='value'):
47
    '''@param name Name of value param, or None to accept any name'''
48
    try: last = items.pop() # last entry contains value
49
    except IndexError: return None # input is empty and no actions
50
    if name != None and last[0] != name: return None # input is empty
51
    return last[1]
52

    
53
def merge_tagged(root):
54
    '''Merges siblings in root that are marked as mergeable.
55
    Used to recombine pieces of nodes that were split apart in the mappings.
56
    '''
57
    for name in set((c.tagName for c in xpath.get(root, '*[@merge=1]'))):
58
        xml_dom.merge_by_name(root, name)
59
    
60
    # Recurse
61
    for child in xml_dom.NodeElemIter(root): merge_tagged(child)
62

    
63
funcs = {}
64

    
65
structural_funcs = set()
66

    
67
##### Public functions
68

    
69
def is_func_name(name):
70
    return name.startswith('_') and name != '_' # '_' is default root node name
71

    
72
def is_func(node): return is_func_name(node.tagName)
73

    
74
def is_xml_func_name(name): return is_func_name(name) and name in funcs
75

    
76
def is_xml_func(node): return is_xml_func_name(node.tagName)
77

    
78
def process(node, on_error=exc.reraise, is_rel_func=None, db=None):
79
    '''Evaluates the XML functions in an XML tree.
80
    @param is_rel_func None|f(str) Tests if a name is a relational function.
81
        * If != None: Non-relational functions are removed, or relational
82
          functions are treated specially, depending on the db param (below).
83
    @param db
84
        * If None: Non-relational functions other than structural functions are
85
          replaced with their last parameter (usually the value), not evaluated.
86
          This is used in column-based mode to remove XML-only functions.
87
        * If != None: Relational functions are evaluated directly. This is used
88
          in row-based mode to combine relational and XML functions.
89
    '''
90
    has_rel_funcs = is_rel_func != None
91
    assert db == None or has_rel_funcs # rel_funcs required if db set
92
    
93
    for child in xml_dom.NodeElemIter(node):
94
        process(child, on_error, is_rel_func, db)
95
    merge_tagged(node)
96
    
97
    name = node.tagName
98
    if not is_func_name(name): return node # not any kind of function
99
    
100
    row_mode = has_rel_funcs and db != None
101
    column_mode = has_rel_funcs and db == None
102
    func = funcs.get(name, None)
103
    items = list(xml_dom.NodeTextEntryIter(node))
104
    
105
    # Parse function
106
    if len(items) == 1 and items[0][0].isdigit(): # has single numeric param
107
        # pass-through optimization for aggregating functions with one arg
108
        value = items[0][1] # pass through first arg
109
    elif row_mode and (is_rel_func(name) or func == None): # row-based mode
110
        value = sql_io.put(db, name, dict(items)) # evaluate using DB
111
    elif (column_mode and not name in structural_funcs) or func == None:
112
        # local XML function can't be used or does not exist
113
        if column_mode and is_rel_func(name): return # preserve relational funcs
114
        # otherwise XML-only in column mode, or DB-only in XML output mode
115
        value = pop_value(items, None) # just replace with last param
116
    else: # local XML function
117
        try: value = func(items, node)
118
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
119
            # Save in case another exception raised, overwriting sys.exc_info()
120
            exc.add_traceback(e)
121
            str_ = strings.ustr(node)
122
            exc.add_msg(e, 'function:\n'+str_)
123
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
124
                '\n'+term.emph_multiline(str_)))
125
                
126
            on_error(e)
127
            return # in case on_error() returns
128
    
129
    xml_dom.replace_with_text(node, value)
130

    
131
##### XML functions
132

    
133
# Function names must start with _ to avoid collisions with real tags
134
# Functions take arguments (items)
135

    
136
#### Structural
137

    
138
def _ignore(items, node):
139
    '''Used to "comment out" an XML subtree'''
140
    return None
141
funcs['_ignore'] = _ignore
142
structural_funcs.add('_ignore')
143

    
144
def _ref(items, node):
145
    '''Used to retrieve a value from another XML node
146
    @param items
147
        addr=<path> XPath to value, relative to the XML func's parent node
148
    '''
149
    items = dict(items)
150
    try: addr = items['addr']
151
    except KeyError, e: raise SyntaxError(e)
152
    
153
    value = xpath.get_value(node.parentNode, addr)
154
    if value == None:
155
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
156
            +str(addr)))
157
    return value
158
funcs['_ref'] = _ref
159
structural_funcs.add('_ref')
160

    
161
#### Conditionals
162

    
163
def _eq(items, node):
164
    items = dict(items)
165
    try:
166
        left = items['left']
167
        right = items['right']
168
    except KeyError: return '' # a value was None
169
    return util.bool2str(left == right)
170
funcs['_eq'] = _eq
171

    
172
def _if(items, node):
173
    items = dict(items)
174
    try:
175
        cond = items['cond']
176
        then = items['then']
177
    except KeyError, e: raise SyntaxError(e)
178
    else_ = items.get('else', None)
179
    cond = bool(cast(strings.ustr, cond))
180
    if cond: return then
181
    else: return else_
182
funcs['_if'] = _if
183

    
184
#### Transforming values
185

    
186
def _collapse(items, node):
187
    '''Collapses a subtree if the "value" element in it is NULL'''
188
    items = dict(items)
189
    try: require = cast(strings.ustr, items['require'])
190
    except KeyError, e: raise SyntaxError(e)
191
    value = items.get('value', None)
192
    
193
    if xpath.get_value(value, require, allow_rooted=False) == None: return None
194
    else: return value
195
funcs['_collapse'] = _collapse
196

    
197
def repl(repls, value):
198
    '''Raises error if value not in map and no special '*' entry
199
    @param repls dict repl:with
200
        repl "*" means all other input values
201
        with "*" means keep input value the same
202
        with "" means ignore input value
203
    '''
204
    try: new_value = repls[value]
205
    except KeyError, e:
206
        # Save traceback right away in case another exception raised
207
        fe = FormatException(e)
208
        try: new_value = repls['*']
209
        except KeyError: raise fe
210
    if new_value == '*': new_value = value # '*' means keep input value the same
211
    return new_value
212

    
213
def _map(items, node):
214
    '''See repl()
215
    @param items
216
        <last_entry> Value
217
        <other_entries> name=value Mappings. Special values: See repl() repls.
218
    '''
219
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
220
    value = pop_value(items)
221
    if value == None: return None # input is empty
222
    return util.none_if(repl(dict(items), value), u'') # empty value means None
223
funcs['_map'] = _map
224

    
225
def _replace(items, node):
226
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
227
    value = pop_value(items)
228
    if value == None: return None # input is empty
229
    try:
230
        for repl, with_ in items:
231
            if re.match(r'^\w+$', repl):
232
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
233
            value = re.sub(repl, with_, value)
234
    except sre_constants.error, e: raise SyntaxError(e)
235
    return util.none_if(value.strip(), u'') # empty strings always mean None
236
funcs['_replace'] = _replace
237

    
238
#### Quantities
239

    
240
def _units(items, node):
241
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
242
    value = pop_value(items)
243
    if value == None: return None # input is empty
244
    
245
    quantity = units.str2quantity(value)
246
    try:
247
        for action, units_ in items:
248
            units_ = util.none_if(units_, u'')
249
            if action == 'default': units.set_default_units(quantity, units_)
250
            elif action == 'to':
251
                try: quantity = units.convert(quantity, units_)
252
                except ValueError, e: raise FormatException(e)
253
            else: raise SyntaxError(ValueError('Invalid action: '+action))
254
    except units.MissingUnitsException, e: raise FormatException(e)
255
    return units.quantity2str(quantity)
256
funcs['_units'] = _units
257

    
258
def parse_range(str_, range_sep='-'):
259
    default = (str_, None)
260
    start, sep, end = str_.partition(range_sep)
261
    if sep == '': return default # not a range
262
    if start == '' and range_sep == '-': return default # negative number
263
    return tuple(d.strip() for d in (start, end))
264

    
265
def _rangeStart(items, node):
266
    items = dict(conv_items(strings.ustr, items))
267
    try: value = items['value']
268
    except KeyError: return None # input is empty
269
    return parse_range(value)[0]
270
funcs['_rangeStart'] = _rangeStart
271

    
272
def _rangeEnd(items, node):
273
    items = dict(conv_items(strings.ustr, items))
274
    try: value = items['value']
275
    except KeyError: return None # input is empty
276
    return parse_range(value)[1]
277
funcs['_rangeEnd'] = _rangeEnd
278

    
279
def _range(items, node):
280
    items = dict(conv_items(float, items))
281
    from_ = items.get('from', None)
282
    to = items.get('to', None)
283
    if from_ == None or to == None: return None
284
    return str(to - from_)
285
funcs['_range'] = _range
286

    
287
def _avg(items, node):
288
    count = 0
289
    sum_ = 0.
290
    for name, value in conv_items(float, items):
291
        count += 1
292
        sum_ += value
293
    if count == 0: return None # input is empty
294
    else: return str(sum_/count)
295
funcs['_avg'] = _avg
296

    
297
class CvException(Exception):
298
    def __init__(self):
299
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
300
            ' allowed for ratio scale data '
301
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
302

    
303
def _noCV(items, node):
304
    items = list(conv_items(strings.ustr, items))
305
    try: name, value = items.pop() # last entry contains value
306
    except IndexError: return None # input is empty
307
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
308
    return value
309
funcs['_noCV'] = _noCV
310

    
311
#### Names
312

    
313
_name_parts_slices_items = [
314
    ('first', slice(None, 1)),
315
    ('middle', slice(1, -1)),
316
    ('last', slice(-1, None)),
317
]
318
name_parts_slices = dict(_name_parts_slices_items)
319
name_parts = [name for name, slice_ in _name_parts_slices_items]
320

    
321
def _name(items, node):
322
    items = dict(list(conv_items(strings.ustr, items)))
323
    parts = []
324
    for part in name_parts:
325
        if part in items: parts.append(items[part])
326
    return ' '.join(parts)
327
funcs['_name'] = _name
328

    
329
#### Angles
330

    
331
def _compass(items, node):
332
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
333
    items = dict(conv_items(strings.ustr, items))
334
    try: value = items['value']
335
    except KeyError: return None # input is empty
336
    
337
    if not value.isupper(): return value # pass through other coordinate formats
338
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
339
    except KeyError, e: raise FormatException(e)
340
funcs['_compass'] = _compass
341

    
342
#### Paths
343

    
344
def _simplifyPath(items, node):
345
    items = dict(items)
346
    try:
347
        next = cast(strings.ustr, items['next'])
348
        require = cast(strings.ustr, items['require'])
349
        root = items['path']
350
    except KeyError, e: raise SyntaxError(e)
351
    
352
    node = root
353
    while node != None:
354
        new_node = xpath.get_1(node, next, allow_rooted=False)
355
        if xpath.get_value(node, require, allow_rooted=False) == None: # empty
356
            xml_dom.replace(node, new_node) # remove current elem
357
            if node is root: root = new_node # also update root
358
        node = new_node
359
    return root
360
funcs['_simplifyPath'] = _simplifyPath
(34-34/37)