Project

General

Profile

1
# XML "function" nodes that transform their contents
2

    
3
import datetime
4
import re
5
import sre_constants
6
import warnings
7

    
8
import angles
9
import dates
10
import exc
11
import format
12
import maps
13
import sql_io
14
import strings
15
import term
16
import units
17
import util
18
import xml_dom
19
import xpath
20

    
21
##### Exceptions
22

    
23
class SyntaxError(exc.ExceptionWithCause):
24
    def __init__(self, cause):
25
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
26
            cause)
27

    
28
class FormatException(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
31

    
32
##### Helper functions
33

    
34
def map_items(func, items):
35
    return [(name, func(value)) for name, value in items]
36

    
37
def cast(type_, val):
38
    '''Throws FormatException if can't cast'''
39
    try: return type_(val)
40
    except ValueError, e: raise FormatException(e)
41

    
42
def conv_items(type_, items):
43
    return map_items(lambda val: cast(type_, val),
44
        xml_dom.TextEntryOnlyIter(items))
45

    
46
def pop_value(items, name='value'):
47
    '''@param name Name of value param, or None to accept any name'''
48
    try: last = items.pop() # last entry contains value
49
    except IndexError: return None # input is empty and no actions
50
    if name != None and last[0] != name: return None # input is empty
51
    return last[1]
52

    
53
def merge_tagged(root):
54
    '''Merges siblings in root that are marked as mergeable.
55
    Used to recombine pieces of nodes that were split apart in the mappings.
56
    '''
57
    for name in set((c.tagName for c in xpath.get(root, '*[@merge=1]'))):
58
        xml_dom.merge_by_name(root, name)
59
    
60
    # Recurse
61
    for child in xml_dom.NodeElemIter(root): merge_tagged(child)
62

    
63
funcs = {}
64

    
65
structural_funcs = set()
66

    
67
##### Public functions
68

    
69
def is_func_name(name):
70
    return name.startswith('_') and name != '_' # '_' is default root node name
71

    
72
def is_func(node): return is_func_name(node.tagName)
73

    
74
def is_xml_func_name(name): return is_func_name(name) and name in funcs
75

    
76
def is_xml_func(node): return is_xml_func_name(node.tagName)
77

    
78
def process(node, on_error=exc.raise_, rel_funcs=None, db=None):
79
    '''Evaluates the XML functions in an XML tree.
80
    @param rel_funcs None|set(str...) Relational functions
81
        * container can be any iterable type
82
        * If != None: Non-relational functions are removed, or relational
83
          functions are treated specially, depending on the db param (below).
84
    @param db
85
        * If None: Non-relational functions other than structural functions are
86
          replaced with their last parameter (usually the value), not evaluated.
87
          This is used in column-based mode to remove XML-only functions.
88
        * If != None: Relational functions are evaluated directly. This is used
89
          in row-based mode to combine relational and XML functions.
90
    '''
91
    has_rel_funcs = rel_funcs != None
92
    assert db == None or has_rel_funcs # rel_funcs required if db set
93
    
94
    for child in xml_dom.NodeElemIter(node):
95
        process(child, on_error, rel_funcs, db)
96
    merge_tagged(node)
97
    
98
    name = node.tagName
99
    if not is_func_name(name): return node # not any kind of function
100
    
101
    # Change rel_funcs *after* processing child nodes, which needs orig value
102
    if not has_rel_funcs: rel_funcs = set()
103
    rel_funcs = set(rel_funcs)
104
    
105
    row_mode = has_rel_funcs and db != None
106
    column_mode = has_rel_funcs and db == None
107
    items = list(xml_dom.NodeTextEntryIter(node))
108
    
109
    # Parse function
110
    if len(items) == 1 and items[0][0].isdigit(): # has single numeric param
111
        # pass-through optimization for aggregating functions with one arg
112
        value = items[0][1] # pass through first arg
113
    elif row_mode and name in rel_funcs: # row-based mode: evaluate using DB
114
        value = sql_io.put(db, name, dict(items))
115
    elif column_mode and not name in structural_funcs: # column-based mode
116
        if name in rel_funcs: return # preserve relational functions
117
        # otherwise XML-only, so just replace with last param
118
        value = pop_value(items, None)
119
    else: # local XML function
120
        try: value = funcs[name](items, node)
121
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
122
            # Save in case another exception raised, overwriting sys.exc_info()
123
            exc.add_traceback(e)
124
            str_ = strings.ustr(node)
125
            exc.add_msg(e, 'function:\n'+str_)
126
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
127
                '\n'+term.emph_multiline(str_)))
128
                
129
            on_error(e)
130
            return # in case on_error() returns
131
    
132
    xml_dom.replace_with_text(node, value)
133

    
134
##### XML functions
135

    
136
# Function names must start with _ to avoid collisions with real tags
137
# Functions take arguments (items)
138

    
139
#### Structural
140

    
141
def _ignore(items, node):
142
    '''Used to "comment out" an XML subtree'''
143
    return None
144
funcs['_ignore'] = _ignore
145
structural_funcs.add('_ignore')
146

    
147
def _ref(items, node):
148
    '''Used to retrieve a value from another XML node
149
    @param items
150
        addr=<path> XPath to value, relative to the XML func's parent node
151
    '''
152
    items = dict(items)
153
    try: addr = items['addr']
154
    except KeyError, e: raise SyntaxError(e)
155
    
156
    value = xpath.get_value(node.parentNode, addr)
157
    if value == None:
158
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
159
            +str(addr)))
160
    return value
161
funcs['_ref'] = _ref
162
structural_funcs.add('_ref')
163

    
164
#### Conditionals
165

    
166
def _eq(items, node):
167
    items = dict(items)
168
    try:
169
        left = items['left']
170
        right = items['right']
171
    except KeyError: return '' # a value was None
172
    return util.bool2str(left == right)
173
funcs['_eq'] = _eq
174

    
175
def _if(items, node):
176
    items = dict(items)
177
    try:
178
        cond = items['cond']
179
        then = items['then']
180
    except KeyError, e: raise SyntaxError(e)
181
    else_ = items.get('else', None)
182
    cond = bool(cast(strings.ustr, cond))
183
    if cond: return then
184
    else: return else_
185
funcs['_if'] = _if
186

    
187
#### Combining values
188

    
189
def _alt(items, node):
190
    items = list(items)
191
    items.sort()
192
    try: return items[0][1] # value of lowest-numbered item
193
    except IndexError: return None # input got removed by e.g. FormatException
194
funcs['_alt'] = _alt
195

    
196
def _merge(items, node):
197
    items = list(conv_items(strings.ustr, items))
198
        # get *once* from iter, check types
199
    items.sort()
200
    return maps.merge_values(*[v for k, v in items])
201
funcs['_merge'] = _merge
202

    
203
def _label(items, node):
204
    items = dict(conv_items(strings.ustr, items))
205
        # get *once* from iter, check types
206
    value = items.get('value', None)
207
    if value == None: return None # input is empty
208
    try: label = items['label']
209
    except KeyError, e: raise SyntaxError(e)
210
    return label+': '+value
211
funcs['_label'] = _label
212

    
213
#### Transforming values
214

    
215
def _collapse(items, node):
216
    '''Collapses a subtree if the "value" element in it is NULL'''
217
    items = dict(items)
218
    try: require = cast(strings.ustr, items['require'])
219
    except KeyError, e: raise SyntaxError(e)
220
    value = items.get('value', None)
221
    
222
    if xpath.get_value(value, require, allow_rooted=False) == None: return None
223
    else: return value
224
funcs['_collapse'] = _collapse
225

    
226
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
227

    
228
def _nullIf(items, node):
229
    items = dict(conv_items(strings.ustr, items))
230
    try: null = items['null']
231
    except KeyError, e: raise SyntaxError(e)
232
    value = items.get('value', None)
233
    type_str = items.get('type', None)
234
    
235
    try: type_ = types_by_name[type_str]
236
    except KeyError, e: raise SyntaxError(e)
237
    null = type_(null)
238
    
239
    try: return util.none_if(value, null)
240
    except ValueError: return value # value not convertible, so can't equal null
241
funcs['_nullIf'] = _nullIf
242

    
243
def repl(repls, value):
244
    '''Raises error if value not in map and no special '*' entry
245
    @param repls dict repl:with
246
        repl "*" means all other input values
247
        with "*" means keep input value the same
248
        with "" means ignore input value
249
    '''
250
    try: new_value = repls[value]
251
    except KeyError, e:
252
        # Save traceback right away in case another exception raised
253
        fe = FormatException(e)
254
        try: new_value = repls['*']
255
        except KeyError: raise fe
256
    if new_value == '*': new_value = value # '*' means keep input value the same
257
    return new_value
258

    
259
def _map(items, node):
260
    '''See repl()
261
    @param items
262
        <last_entry> Value
263
        <other_entries> name=value Mappings. Special values: See repl() repls.
264
    '''
265
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
266
    value = pop_value(items)
267
    if value == None: return None # input is empty
268
    return util.none_if(repl(dict(items), value), u'') # empty value means None
269
funcs['_map'] = _map
270

    
271
def _replace(items, node):
272
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
273
    value = pop_value(items)
274
    if value == None: return None # input is empty
275
    try:
276
        for repl, with_ in items:
277
            if re.match(r'^\w+$', repl):
278
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
279
            value = re.sub(repl, with_, value)
280
    except sre_constants.error, e: raise SyntaxError(e)
281
    return util.none_if(value.strip(), u'') # empty strings always mean None
282
funcs['_replace'] = _replace
283

    
284
#### Quantities
285

    
286
def _units(items, node):
287
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
288
    value = pop_value(items)
289
    if value == None: return None # input is empty
290
    
291
    quantity = units.str2quantity(value)
292
    try:
293
        for action, units_ in items:
294
            units_ = util.none_if(units_, u'')
295
            if action == 'default': units.set_default_units(quantity, units_)
296
            elif action == 'to':
297
                try: quantity = units.convert(quantity, units_)
298
                except ValueError, e: raise FormatException(e)
299
            else: raise SyntaxError(ValueError('Invalid action: '+action))
300
    except units.MissingUnitsException, e: raise FormatException(e)
301
    return units.quantity2str(quantity)
302
funcs['_units'] = _units
303

    
304
def parse_range(str_, range_sep='-'):
305
    default = (str_, None)
306
    start, sep, end = str_.partition(range_sep)
307
    if sep == '': return default # not a range
308
    if start == '' and range_sep == '-': return default # negative number
309
    return tuple(d.strip() for d in (start, end))
310

    
311
def _rangeStart(items, node):
312
    items = dict(conv_items(strings.ustr, items))
313
    try: value = items['value']
314
    except KeyError: return None # input is empty
315
    return parse_range(value)[0]
316
funcs['_rangeStart'] = _rangeStart
317

    
318
def _rangeEnd(items, node):
319
    items = dict(conv_items(strings.ustr, items))
320
    try: value = items['value']
321
    except KeyError: return None # input is empty
322
    return parse_range(value)[1]
323
funcs['_rangeEnd'] = _rangeEnd
324

    
325
def _range(items, node):
326
    items = dict(conv_items(float, items))
327
    from_ = items.get('from', None)
328
    to = items.get('to', None)
329
    if from_ == None or to == None: return None
330
    return str(to - from_)
331
funcs['_range'] = _range
332

    
333
def _avg(items, node):
334
    count = 0
335
    sum_ = 0.
336
    for name, value in conv_items(float, items):
337
        count += 1
338
        sum_ += value
339
    if count == 0: return None # input is empty
340
    else: return str(sum_/count)
341
funcs['_avg'] = _avg
342

    
343
class CvException(Exception):
344
    def __init__(self):
345
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
346
            ' allowed for ratio scale data '
347
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
348

    
349
def _noCV(items, node):
350
    try: name, value = items.pop() # last entry contains value
351
    except IndexError: return None # input is empty
352
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
353
    return value
354
funcs['_noCV'] = _noCV
355

    
356
#### Dates
357

    
358
def _date(items, node):
359
    items = dict(conv_items(strings.ustr, items))
360
        # get *once* from iter, check types
361
    try: str_ = items['date']
362
    except KeyError:
363
        # Year is required
364
        try: items['year']
365
        except KeyError, e:
366
            if items == {}: return None # entire date is empty
367
            else: raise FormatException(e)
368
        
369
        # Convert month name to number
370
        try: month = items['month']
371
        except KeyError: pass
372
        else:
373
            if not month.isdigit(): # month is name
374
                try: items['month'] = str(dates.strtotime(month).month)
375
                except ValueError, e: raise FormatException(e)
376
        
377
        items = dict(conv_items(format.str2int, items.iteritems()))
378
        items.setdefault('month', 1)
379
        items.setdefault('day', 1)
380
        
381
        for try_num in xrange(2):
382
            try:
383
                date = datetime.date(**items)
384
                break
385
            except ValueError, e:
386
                if try_num > 0: raise FormatException(e)
387
                    # exception still raised after retry
388
                msg = strings.ustr(e)
389
                if msg == 'month must be in 1..12': # try swapping month and day
390
                    items['month'], items['day'] = items['day'], items['month']
391
                else: raise FormatException(e)
392
    else:
393
        try: year = float(str_)
394
        except ValueError:
395
            try: date = dates.strtotime(str_)
396
            except ImportError: return str_
397
            except ValueError, e: raise FormatException(e)
398
        else: date = (datetime.date(int(year), 1, 1) +
399
            datetime.timedelta(round((year % 1.)*365)))
400
    try: return dates.strftime('%Y-%m-%d', date)
401
    except ValueError, e: raise FormatException(e)
402
funcs['_date'] = _date
403

    
404
def _dateRangeStart(items, node):
405
    items = dict(conv_items(strings.ustr, items))
406
    try: value = items['value']
407
    except KeyError: return None # input is empty
408
    return dates.parse_date_range(value)[0]
409
funcs['_dateRangeStart'] = _dateRangeStart
410

    
411
def _dateRangeEnd(items, node):
412
    items = dict(conv_items(strings.ustr, items))
413
    try: value = items['value']
414
    except KeyError: return None # input is empty
415
    return dates.parse_date_range(value)[1]
416
funcs['_dateRangeEnd'] = _dateRangeEnd
417

    
418
#### Names
419

    
420
_name_parts_slices_items = [
421
    ('first', slice(None, 1)),
422
    ('middle', slice(1, -1)),
423
    ('last', slice(-1, None)),
424
]
425
name_parts_slices = dict(_name_parts_slices_items)
426
name_parts = [name for name, slice_ in _name_parts_slices_items]
427

    
428
def _name(items, node):
429
    items = dict(items)
430
    parts = []
431
    for part in name_parts:
432
        if part in items: parts.append(items[part])
433
    return ' '.join(parts)
434
funcs['_name'] = _name
435

    
436
def _namePart(items, node):
437
    out_items = []
438
    for part, value in items:
439
        try: slice_ = name_parts_slices[part]
440
        except KeyError, e: raise SyntaxError(e)
441
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
442
    return _name(out_items, node)
443
funcs['_namePart'] = _namePart
444

    
445
#### Angles
446

    
447
def _compass(items, node):
448
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
449
    items = dict(conv_items(strings.ustr, items))
450
    try: value = items['value']
451
    except KeyError: return None # input is empty
452
    
453
    if not value.isupper(): return value # pass through other coordinate formats
454
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
455
    except KeyError, e: raise FormatException(e)
456
funcs['_compass'] = _compass
457

    
458
#### Paths
459

    
460
def _simplifyPath(items, node):
461
    items = dict(items)
462
    try:
463
        next = cast(strings.ustr, items['next'])
464
        require = cast(strings.ustr, items['require'])
465
        root = items['path']
466
    except KeyError, e: raise SyntaxError(e)
467
    
468
    node = root
469
    while node != None:
470
        new_node = xpath.get_1(node, next, allow_rooted=False)
471
        if xpath.get_value(node, require, allow_rooted=False) == None: # empty
472
            xml_dom.replace(node, new_node) # remove current elem
473
            if node is root: root = new_node # also update root
474
        node = new_node
475
    return root
476
funcs['_simplifyPath'] = _simplifyPath
(34-34/37)