Project

General

Profile

1
# XML "function" nodes that transform their contents
2

    
3
import datetime
4
import re
5
import sre_constants
6
import warnings
7

    
8
import angles
9
import dates
10
import exc
11
import format
12
import maps
13
import sql_io
14
import strings
15
import term
16
import units
17
import util
18
import xml_dom
19
import xpath
20

    
21
##### Exceptions
22

    
23
class SyntaxError(exc.ExceptionWithCause):
24
    def __init__(self, cause):
25
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
26
            cause)
27

    
28
class FormatException(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
31

    
32
##### Helper functions
33

    
34
def map_items(func, items):
35
    return [(name, func(value)) for name, value in items]
36

    
37
def cast(type_, val):
38
    '''Throws FormatException if can't cast'''
39
    try: return type_(val)
40
    except ValueError, e: raise FormatException(e)
41

    
42
def conv_items(type_, items):
43
    return map_items(lambda val: cast(type_, val),
44
        xml_dom.TextEntryOnlyIter(items))
45

    
46
def pop_value(items, name='value'):
47
    '''@param name Name of value param, or None to accept any name'''
48
    try: last = items.pop() # last entry contains value
49
    except IndexError: return None # input is empty and no actions
50
    if name != None and last[0] != name: return None # input is empty
51
    return last[1]
52

    
53
def merge_tagged(root):
54
    '''Merges siblings in root that are marked as mergeable.
55
    Used to recombine pieces of nodes that were split apart in the mappings.
56
    '''
57
    for name in set((c.tagName for c in xpath.get(root, '*[@merge=1]'))):
58
        xml_dom.merge_by_name(root, name)
59
    
60
    # Recurse
61
    for child in xml_dom.NodeElemIter(root): merge_tagged(child)
62

    
63
funcs = {}
64

    
65
structural_funcs = set()
66

    
67
##### Public functions
68

    
69
def is_func_name(name):
70
    return name.startswith('_') and name != '_' # '_' is default root node name
71

    
72
def is_func(node): return is_func_name(node.tagName)
73

    
74
def is_xml_func_name(name): return is_func_name(name) and name in funcs
75

    
76
def is_xml_func(node): return is_xml_func_name(node.tagName)
77

    
78
def process(node, on_error=exc.raise_, is_rel_func=None, db=None):
79
    '''Evaluates the XML functions in an XML tree.
80
    @param is_rel_func None|f(str) Tests if a name is a relational function.
81
        * If != None: Non-relational functions are removed, or relational
82
          functions are treated specially, depending on the db param (below).
83
    @param db
84
        * If None: Non-relational functions other than structural functions are
85
          replaced with their last parameter (usually the value), not evaluated.
86
          This is used in column-based mode to remove XML-only functions.
87
        * If != None: Relational functions are evaluated directly. This is used
88
          in row-based mode to combine relational and XML functions.
89
    '''
90
    has_rel_funcs = is_rel_func != None
91
    assert db == None or has_rel_funcs # rel_funcs required if db set
92
    
93
    for child in xml_dom.NodeElemIter(node):
94
        process(child, on_error, is_rel_func, db)
95
    merge_tagged(node)
96
    
97
    name = node.tagName
98
    if not is_func_name(name): return node # not any kind of function
99
    
100
    row_mode = has_rel_funcs and db != None
101
    column_mode = has_rel_funcs and db == None
102
    func = funcs.get(name, None)
103
    items = list(xml_dom.NodeTextEntryIter(node))
104
    
105
    # Parse function
106
    if len(items) == 1 and items[0][0].isdigit(): # has single numeric param
107
        # pass-through optimization for aggregating functions with one arg
108
        value = items[0][1] # pass through first arg
109
    elif row_mode and (is_rel_func(name) or func == None): # row-based mode
110
        value = sql_io.put(db, name, dict(items)) # evaluate using DB
111
    elif column_mode and not name in structural_funcs: # column-based mode
112
        if is_rel_func(name): return # preserve relational functions
113
        # otherwise XML-only, so just replace with last param
114
        value = pop_value(items, None)
115
    else: # local XML function
116
        try: value = func(items, node)
117
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
118
            # Save in case another exception raised, overwriting sys.exc_info()
119
            exc.add_traceback(e)
120
            str_ = strings.ustr(node)
121
            exc.add_msg(e, 'function:\n'+str_)
122
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
123
                '\n'+term.emph_multiline(str_)))
124
                
125
            on_error(e)
126
            return # in case on_error() returns
127
    
128
    xml_dom.replace_with_text(node, value)
129

    
130
##### XML functions
131

    
132
# Function names must start with _ to avoid collisions with real tags
133
# Functions take arguments (items)
134

    
135
#### Structural
136

    
137
def _ignore(items, node):
138
    '''Used to "comment out" an XML subtree'''
139
    return None
140
funcs['_ignore'] = _ignore
141
structural_funcs.add('_ignore')
142

    
143
def _ref(items, node):
144
    '''Used to retrieve a value from another XML node
145
    @param items
146
        addr=<path> XPath to value, relative to the XML func's parent node
147
    '''
148
    items = dict(items)
149
    try: addr = items['addr']
150
    except KeyError, e: raise SyntaxError(e)
151
    
152
    value = xpath.get_value(node.parentNode, addr)
153
    if value == None:
154
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
155
            +str(addr)))
156
    return value
157
funcs['_ref'] = _ref
158
structural_funcs.add('_ref')
159

    
160
#### Conditionals
161

    
162
def _eq(items, node):
163
    items = dict(items)
164
    try:
165
        left = items['left']
166
        right = items['right']
167
    except KeyError: return '' # a value was None
168
    return util.bool2str(left == right)
169
funcs['_eq'] = _eq
170

    
171
def _if(items, node):
172
    items = dict(items)
173
    try:
174
        cond = items['cond']
175
        then = items['then']
176
    except KeyError, e: raise SyntaxError(e)
177
    else_ = items.get('else', None)
178
    cond = bool(cast(strings.ustr, cond))
179
    if cond: return then
180
    else: return else_
181
funcs['_if'] = _if
182

    
183
#### Combining values
184

    
185
def _alt(items, node):
186
    items = list(items)
187
    items.sort()
188
    try: return items[0][1] # value of lowest-numbered item
189
    except IndexError: return None # input got removed by e.g. FormatException
190
funcs['_alt'] = _alt
191

    
192
def _merge(items, node):
193
    items = list(conv_items(strings.ustr, items))
194
        # get *once* from iter, check types
195
    items.sort()
196
    return maps.merge_values(*[v for k, v in items])
197
funcs['_merge'] = _merge
198

    
199
def _label(items, node):
200
    items = dict(conv_items(strings.ustr, items))
201
        # get *once* from iter, check types
202
    value = items.get('value', None)
203
    if value == None: return None # input is empty
204
    try: label = items['label']
205
    except KeyError, e: raise SyntaxError(e)
206
    return label+': '+value
207
funcs['_label'] = _label
208

    
209
#### Transforming values
210

    
211
def _collapse(items, node):
212
    '''Collapses a subtree if the "value" element in it is NULL'''
213
    items = dict(items)
214
    try: require = cast(strings.ustr, items['require'])
215
    except KeyError, e: raise SyntaxError(e)
216
    value = items.get('value', None)
217
    
218
    if xpath.get_value(value, require, allow_rooted=False) == None: return None
219
    else: return value
220
funcs['_collapse'] = _collapse
221

    
222
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
223

    
224
def _nullIf(items, node):
225
    items = dict(conv_items(strings.ustr, items))
226
    try: null = items['null']
227
    except KeyError, e: raise SyntaxError(e)
228
    value = items.get('value', None)
229
    type_str = items.get('type', None)
230
    
231
    try: type_ = types_by_name[type_str]
232
    except KeyError, e: raise SyntaxError(e)
233
    null = type_(null)
234
    
235
    try: return util.none_if(value, null)
236
    except ValueError: return value # value not convertible, so can't equal null
237
funcs['_nullIf'] = _nullIf
238

    
239
def repl(repls, value):
240
    '''Raises error if value not in map and no special '*' entry
241
    @param repls dict repl:with
242
        repl "*" means all other input values
243
        with "*" means keep input value the same
244
        with "" means ignore input value
245
    '''
246
    try: new_value = repls[value]
247
    except KeyError, e:
248
        # Save traceback right away in case another exception raised
249
        fe = FormatException(e)
250
        try: new_value = repls['*']
251
        except KeyError: raise fe
252
    if new_value == '*': new_value = value # '*' means keep input value the same
253
    return new_value
254

    
255
def _map(items, node):
256
    '''See repl()
257
    @param items
258
        <last_entry> Value
259
        <other_entries> name=value Mappings. Special values: See repl() repls.
260
    '''
261
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
262
    value = pop_value(items)
263
    if value == None: return None # input is empty
264
    return util.none_if(repl(dict(items), value), u'') # empty value means None
265
funcs['_map'] = _map
266

    
267
def _replace(items, node):
268
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
269
    value = pop_value(items)
270
    if value == None: return None # input is empty
271
    try:
272
        for repl, with_ in items:
273
            if re.match(r'^\w+$', repl):
274
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
275
            value = re.sub(repl, with_, value)
276
    except sre_constants.error, e: raise SyntaxError(e)
277
    return util.none_if(value.strip(), u'') # empty strings always mean None
278
funcs['_replace'] = _replace
279

    
280
#### Quantities
281

    
282
def _units(items, node):
283
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
284
    value = pop_value(items)
285
    if value == None: return None # input is empty
286
    
287
    quantity = units.str2quantity(value)
288
    try:
289
        for action, units_ in items:
290
            units_ = util.none_if(units_, u'')
291
            if action == 'default': units.set_default_units(quantity, units_)
292
            elif action == 'to':
293
                try: quantity = units.convert(quantity, units_)
294
                except ValueError, e: raise FormatException(e)
295
            else: raise SyntaxError(ValueError('Invalid action: '+action))
296
    except units.MissingUnitsException, e: raise FormatException(e)
297
    return units.quantity2str(quantity)
298
funcs['_units'] = _units
299

    
300
def parse_range(str_, range_sep='-'):
301
    default = (str_, None)
302
    start, sep, end = str_.partition(range_sep)
303
    if sep == '': return default # not a range
304
    if start == '' and range_sep == '-': return default # negative number
305
    return tuple(d.strip() for d in (start, end))
306

    
307
def _rangeStart(items, node):
308
    items = dict(conv_items(strings.ustr, items))
309
    try: value = items['value']
310
    except KeyError: return None # input is empty
311
    return parse_range(value)[0]
312
funcs['_rangeStart'] = _rangeStart
313

    
314
def _rangeEnd(items, node):
315
    items = dict(conv_items(strings.ustr, items))
316
    try: value = items['value']
317
    except KeyError: return None # input is empty
318
    return parse_range(value)[1]
319
funcs['_rangeEnd'] = _rangeEnd
320

    
321
def _range(items, node):
322
    items = dict(conv_items(float, items))
323
    from_ = items.get('from', None)
324
    to = items.get('to', None)
325
    if from_ == None or to == None: return None
326
    return str(to - from_)
327
funcs['_range'] = _range
328

    
329
def _avg(items, node):
330
    count = 0
331
    sum_ = 0.
332
    for name, value in conv_items(float, items):
333
        count += 1
334
        sum_ += value
335
    if count == 0: return None # input is empty
336
    else: return str(sum_/count)
337
funcs['_avg'] = _avg
338

    
339
class CvException(Exception):
340
    def __init__(self):
341
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
342
            ' allowed for ratio scale data '
343
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
344

    
345
def _noCV(items, node):
346
    items = list(conv_items(strings.ustr, items))
347
    try: name, value = items.pop() # last entry contains value
348
    except IndexError: return None # input is empty
349
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
350
    return value
351
funcs['_noCV'] = _noCV
352

    
353
#### Dates
354

    
355
def _date(items, node):
356
    items = dict(conv_items(strings.ustr, items))
357
        # get *once* from iter, check types
358
    try: str_ = items['date']
359
    except KeyError:
360
        # Year is required
361
        try: items['year']
362
        except KeyError, e:
363
            if items == {}: return None # entire date is empty
364
            else: raise FormatException(e)
365
        
366
        # Convert month name to number
367
        try: month = items['month']
368
        except KeyError: pass
369
        else:
370
            if not month.isdigit(): # month is name
371
                try: items['month'] = str(dates.strtotime(month).month)
372
                except ValueError, e: raise FormatException(e)
373
        
374
        items = dict(conv_items(format.str2int, items.iteritems()))
375
        items.setdefault('month', 1)
376
        items.setdefault('day', 1)
377
        
378
        for try_num in xrange(2):
379
            try:
380
                date = datetime.date(**items)
381
                break
382
            except ValueError, e:
383
                if try_num > 0: raise FormatException(e)
384
                    # exception still raised after retry
385
                msg = strings.ustr(e)
386
                if msg == 'month must be in 1..12': # try swapping month and day
387
                    items['month'], items['day'] = items['day'], items['month']
388
                else: raise FormatException(e)
389
    else:
390
        try: year = float(str_)
391
        except ValueError:
392
            try: date = dates.strtotime(str_)
393
            except ImportError: return str_
394
            except ValueError, e: raise FormatException(e)
395
        else: date = (datetime.date(int(year), 1, 1) +
396
            datetime.timedelta(round((year % 1.)*365)))
397
    try: return dates.strftime('%Y-%m-%d', date)
398
    except ValueError, e: raise FormatException(e)
399
funcs['_date'] = _date
400

    
401
def _dateRangeStart(items, node):
402
    items = dict(conv_items(strings.ustr, items))
403
    try: value = items['value']
404
    except KeyError: return None # input is empty
405
    return dates.parse_date_range(value)[0]
406
funcs['_dateRangeStart'] = _dateRangeStart
407

    
408
def _dateRangeEnd(items, node):
409
    items = dict(conv_items(strings.ustr, items))
410
    try: value = items['value']
411
    except KeyError: return None # input is empty
412
    return dates.parse_date_range(value)[1]
413
funcs['_dateRangeEnd'] = _dateRangeEnd
414

    
415
#### Names
416

    
417
_name_parts_slices_items = [
418
    ('first', slice(None, 1)),
419
    ('middle', slice(1, -1)),
420
    ('last', slice(-1, None)),
421
]
422
name_parts_slices = dict(_name_parts_slices_items)
423
name_parts = [name for name, slice_ in _name_parts_slices_items]
424

    
425
def _name(items, node):
426
    items = dict(list(conv_items(strings.ustr, items)))
427
    parts = []
428
    for part in name_parts:
429
        if part in items: parts.append(items[part])
430
    return ' '.join(parts)
431
funcs['_name'] = _name
432

    
433
def _namePart(items, node):
434
    items = list(conv_items(strings.ustr, items))
435
    out_items = []
436
    for part, value in items:
437
        try: slice_ = name_parts_slices[part]
438
        except KeyError, e: raise SyntaxError(e)
439
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
440
    return _name(out_items, node)
441
funcs['_namePart'] = _namePart
442

    
443
#### Angles
444

    
445
def _compass(items, node):
446
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
447
    items = dict(conv_items(strings.ustr, items))
448
    try: value = items['value']
449
    except KeyError: return None # input is empty
450
    
451
    if not value.isupper(): return value # pass through other coordinate formats
452
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
453
    except KeyError, e: raise FormatException(e)
454
funcs['_compass'] = _compass
455

    
456
#### Paths
457

    
458
def _simplifyPath(items, node):
459
    items = dict(items)
460
    try:
461
        next = cast(strings.ustr, items['next'])
462
        require = cast(strings.ustr, items['require'])
463
        root = items['path']
464
    except KeyError, e: raise SyntaxError(e)
465
    
466
    node = root
467
    while node != None:
468
        new_node = xpath.get_1(node, next, allow_rooted=False)
469
        if xpath.get_value(node, require, allow_rooted=False) == None: # empty
470
            xml_dom.replace(node, new_node) # remove current elem
471
            if node is root: root = new_node # also update root
472
        node = new_node
473
    return root
474
funcs['_simplifyPath'] = _simplifyPath
(34-34/37)