Project

General

Profile

1
# XML "function" nodes that transform their contents
2

    
3
import datetime
4
import re
5
import sre_constants
6
import warnings
7

    
8
import angles
9
import dates
10
import exc
11
import format
12
import maps
13
import sql
14
import strings
15
import term
16
import units
17
import util
18
import xml_dom
19
import xpath
20

    
21
##### Exceptions
22

    
23
class SyntaxError(exc.ExceptionWithCause):
24
    def __init__(self, cause):
25
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
26
            cause)
27

    
28
class FormatException(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
31

    
32
##### Helper functions
33

    
34
def map_items(func, items):
35
    return [(name, func(value)) for name, value in items]
36

    
37
def cast(type_, val):
38
    '''Throws FormatException if can't cast'''
39
    try: return type_(val)
40
    except ValueError, e: raise FormatException(e)
41

    
42
def conv_items(type_, items):
43
    return map_items(lambda val: cast(type_, val),
44
        xml_dom.TextEntryOnlyIter(items))
45

    
46
def pop_value(items, name='value'):
47
    '''@param name Name of value param, or None to accept any name'''
48
    try: last = items.pop() # last entry contains value
49
    except IndexError: return None # input is empty and no actions
50
    if name != None and last[0] != name: return None # input is empty
51
    return last[1]
52

    
53
funcs = {}
54

    
55
structural_funcs = set()
56

    
57
##### Public functions
58

    
59
def is_func_name(name):
60
    return name.startswith('_') and name != '_' # '_' is default root node name
61

    
62
def is_func(node): return is_func_name(node.tagName)
63

    
64
def is_xml_func_name(name): return is_func_name(name) and name in funcs
65

    
66
def is_xml_func(node): return is_xml_func_name(node.tagName)
67

    
68
def process(node, on_error=exc.raise_, rel_funcs=None, db=None):
69
    '''Evaluates the XML functions in an XML tree.
70
    @param rel_funcs None|set(str...) Relational functions
71
        * container can be any iterable type
72
        * If != None: Non-relational functions are removed, or relational
73
          functions are treated specially, depending on the db param (below).
74
    @param db
75
        * If None: Non-relational functions other than structural functions are
76
          replaced with their last parameter (usually the value), not evaluated.
77
          This is used in column-based mode to remove XML-only functions.
78
        * If != None: Relational functions are evaluated directly. This is used
79
          in row-based mode to combine relational and XML functions.
80
    '''
81
    has_rel_funcs = rel_funcs != None
82
    assert db == None or has_rel_funcs # rel_funcs required if db set
83
    
84
    for child in xml_dom.NodeElemIter(node):
85
        process(child, on_error, rel_funcs, db)
86
    
87
    name = node.tagName
88
    if not is_func_name(name): return # not any kind of function
89
    
90
    # Change rel_funcs *after* processing child nodes, which needs orig value
91
    if not has_rel_funcs: rel_funcs = set()
92
    rel_funcs = set(rel_funcs)
93
    
94
    row_mode = has_rel_funcs and db != None
95
    column_mode = has_rel_funcs and db == None
96
    items = xml_dom.NodeTextEntryIter(node)
97
    
98
    if row_mode and name in rel_funcs: # row-based mode: evaluate using DB
99
        value = sql.put(db, name, dict(items))
100
    elif column_mode and not name in structural_funcs: # column-based mode
101
        if name in rel_funcs: return # preserve relational functions
102
        # otherwise XML-only, so just replace with last param
103
        value = pop_value(list(items), None)
104
    else: # local XML function
105
        try: value = funcs[name](items, node)
106
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
107
            # Save in case another exception raised, overwriting sys.exc_info()
108
            exc.add_traceback(e)
109
            str_ = strings.ustr(node)
110
            exc.add_msg(e, 'function:\n'+str_)
111
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
112
                '\n'+term.emph_multiline(str_)))
113
                
114
            on_error(e)
115
            return # in case on_error() returns
116
    xml_dom.replace_with_text(node, value)
117

    
118
##### XML functions
119

    
120
# Function names must start with _ to avoid collisions with real tags
121
# Functions take arguments (items)
122

    
123
#### Structural
124

    
125
def _ignore(items, node):
126
    '''Used to "comment out" an XML subtree'''
127
    return None
128
funcs['_ignore'] = _ignore
129
structural_funcs.add('_ignore')
130

    
131
def _ref(items, node):
132
    '''Used to retrieve a value from another XML node
133
    @param items
134
        addr=<path> XPath to value, relative to the XML func's parent node
135
    '''
136
    items = dict(items)
137
    try: addr = items['addr']
138
    except KeyError, e: raise SyntaxError(e)
139
    
140
    value = xpath.get_value(node.parentNode, addr)
141
    if value == None:
142
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
143
            +str(addr)))
144
    return value
145
funcs['_ref'] = _ref
146
structural_funcs.add('_ref')
147

    
148
#### Conditionals
149

    
150
def _eq(items, node):
151
    items = dict(items)
152
    try:
153
        left = items['left']
154
        right = items['right']
155
    except KeyError: return '' # a value was None
156
    return util.bool2str(left == right)
157
funcs['_eq'] = _eq
158

    
159
def _if(items, node):
160
    items = dict(items)
161
    try:
162
        cond = items['cond']
163
        then = items['then']
164
    except KeyError, e: raise SyntaxError(e)
165
    else_ = items.get('else', None)
166
    cond = bool(cast(strings.ustr, cond))
167
    if cond: return then
168
    else: return else_
169
funcs['_if'] = _if
170

    
171
#### Combining values
172

    
173
def _alt(items, node):
174
    items = list(items)
175
    items.sort()
176
    try: return items[0][1] # value of lowest-numbered item
177
    except IndexError: return None # input got removed by e.g. FormatException
178
funcs['_alt'] = _alt
179

    
180
def _merge(items, node):
181
    items = list(conv_items(strings.ustr, items))
182
        # get *once* from iter, check types
183
    items.sort()
184
    return maps.merge_values(*[v for k, v in items])
185
funcs['_merge'] = _merge
186

    
187
def _label(items, node):
188
    items = dict(conv_items(strings.ustr, items))
189
        # get *once* from iter, check types
190
    value = items.get('value', None)
191
    if value == None: return None # input is empty
192
    try: label = items['label']
193
    except KeyError, e: raise SyntaxError(e)
194
    return label+': '+value
195
funcs['_label'] = _label
196

    
197
#### Transforming values
198

    
199
def _collapse(items, node):
200
    '''Collapses a subtree if the "value" element in it is NULL'''
201
    items = dict(items)
202
    try: require = cast(strings.ustr, items['require'])
203
    except KeyError, e: raise SyntaxError(e)
204
    value = items.get('value', None)
205
    
206
    if xpath.get_value(value, require, allow_rooted=False) == None: return None
207
    else: return value
208
funcs['_collapse'] = _collapse
209

    
210
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
211

    
212
def _nullIf(items, node):
213
    items = dict(conv_items(strings.ustr, items))
214
    try: null = items['null']
215
    except KeyError, e: raise SyntaxError(e)
216
    value = items.get('value', None)
217
    type_str = items.get('type', None)
218
    
219
    try: type_ = types_by_name[type_str]
220
    except KeyError, e: raise SyntaxError(e)
221
    null = type_(null)
222
    
223
    try: return util.none_if(value, null)
224
    except ValueError: return value # value not convertible, so can't equal null
225
funcs['_nullIf'] = _nullIf
226

    
227
def repl(repls, value):
228
    '''Raises error if value not in map and no special '*' entry
229
    @param repls dict repl:with
230
        repl "*" means all other input values
231
        with "*" means keep input value the same
232
        with "" means ignore input value
233
    '''
234
    try: new_value = repls[value]
235
    except KeyError, e:
236
        # Save traceback right away in case another exception raised
237
        fe = FormatException(e)
238
        try: new_value = repls['*']
239
        except KeyError: raise fe
240
    if new_value == '*': new_value = value # '*' means keep input value the same
241
    return new_value
242

    
243
def _map(items, node):
244
    '''See repl()
245
    @param items
246
        <last_entry> Value
247
        <other_entries> name=value Mappings. Special values: See repl() repls.
248
    '''
249
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
250
    value = pop_value(items)
251
    if value == None: return None # input is empty
252
    return util.none_if(repl(dict(items), value), u'') # empty value means None
253
funcs['_map'] = _map
254

    
255
def _replace(items, node):
256
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
257
    value = pop_value(items)
258
    if value == None: return None # input is empty
259
    try:
260
        for repl, with_ in items:
261
            if re.match(r'^\w+$', repl):
262
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
263
            value = re.sub(repl, with_, value)
264
    except sre_constants.error, e: raise SyntaxError(e)
265
    return util.none_if(value.strip(), u'') # empty strings always mean None
266
funcs['_replace'] = _replace
267

    
268
#### Quantities
269

    
270
def _units(items, node):
271
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
272
    value = pop_value(items)
273
    if value == None: return None # input is empty
274
    
275
    quantity = units.str2quantity(value)
276
    try:
277
        for action, units_ in items:
278
            units_ = util.none_if(units_, u'')
279
            if action == 'default': units.set_default_units(quantity, units_)
280
            elif action == 'to':
281
                try: quantity = units.convert(quantity, units_)
282
                except ValueError, e: raise FormatException(e)
283
            else: raise SyntaxError(ValueError('Invalid action: '+action))
284
    except units.MissingUnitsException, e: raise FormatException(e)
285
    return units.quantity2str(quantity)
286
funcs['_units'] = _units
287

    
288
def parse_range(str_, range_sep='-'):
289
    default = (str_, None)
290
    start, sep, end = str_.partition(range_sep)
291
    if sep == '': return default # not a range
292
    if start == '' and range_sep == '-': return default # negative number
293
    return tuple(d.strip() for d in (start, end))
294

    
295
def _rangeStart(items, node):
296
    items = dict(conv_items(strings.ustr, items))
297
    try: value = items['value']
298
    except KeyError: return None # input is empty
299
    return parse_range(value)[0]
300
funcs['_rangeStart'] = _rangeStart
301

    
302
def _rangeEnd(items, node):
303
    items = dict(conv_items(strings.ustr, items))
304
    try: value = items['value']
305
    except KeyError: return None # input is empty
306
    return parse_range(value)[1]
307
funcs['_rangeEnd'] = _rangeEnd
308

    
309
def _range(items, node):
310
    items = dict(conv_items(float, items))
311
    from_ = items.get('from', None)
312
    to = items.get('to', None)
313
    if from_ == None or to == None: return None
314
    return str(to - from_)
315
funcs['_range'] = _range
316

    
317
def _avg(items, node):
318
    count = 0
319
    sum_ = 0.
320
    for name, value in conv_items(float, items):
321
        count += 1
322
        sum_ += value
323
    if count == 0: return None # input is empty
324
    else: return str(sum_/count)
325
funcs['_avg'] = _avg
326

    
327
class CvException(Exception):
328
    def __init__(self):
329
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
330
            ' allowed for ratio scale data '
331
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
332

    
333
def _noCV(items, node):
334
    try: name, value = items.next()
335
    except StopIteration: return None
336
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
337
    return value
338
funcs['_noCV'] = _noCV
339

    
340
#### Dates
341

    
342
def _date(items, node):
343
    items = dict(conv_items(strings.ustr, items))
344
        # get *once* from iter, check types
345
    try: str_ = items['date']
346
    except KeyError:
347
        # Year is required
348
        try: items['year']
349
        except KeyError, e:
350
            if items == {}: return None # entire date is empty
351
            else: raise FormatException(e)
352
        
353
        # Convert month name to number
354
        try: month = items['month']
355
        except KeyError: pass
356
        else:
357
            if not month.isdigit(): # month is name
358
                try: items['month'] = str(dates.strtotime(month).month)
359
                except ValueError, e: raise FormatException(e)
360
        
361
        items = dict(conv_items(format.str2int, items.iteritems()))
362
        items.setdefault('month', 1)
363
        items.setdefault('day', 1)
364
        
365
        for try_num in xrange(2):
366
            try:
367
                date = datetime.date(**items)
368
                break
369
            except ValueError, e:
370
                if try_num > 0: raise FormatException(e)
371
                    # exception still raised after retry
372
                msg = strings.ustr(e)
373
                if msg == 'month must be in 1..12': # try swapping month and day
374
                    items['month'], items['day'] = items['day'], items['month']
375
                else: raise FormatException(e)
376
    else:
377
        try: year = float(str_)
378
        except ValueError:
379
            try: date = dates.strtotime(str_)
380
            except ImportError: return str_
381
            except ValueError, e: raise FormatException(e)
382
        else: date = (datetime.date(int(year), 1, 1) +
383
            datetime.timedelta(round((year % 1.)*365)))
384
    try: return dates.strftime('%Y-%m-%d', date)
385
    except ValueError, e: raise FormatException(e)
386
funcs['_date'] = _date
387

    
388
def _dateRangeStart(items, node):
389
    items = dict(conv_items(strings.ustr, items))
390
    try: value = items['value']
391
    except KeyError: return None # input is empty
392
    return dates.parse_date_range(value)[0]
393
funcs['_dateRangeStart'] = _dateRangeStart
394

    
395
def _dateRangeEnd(items, node):
396
    items = dict(conv_items(strings.ustr, items))
397
    try: value = items['value']
398
    except KeyError: return None # input is empty
399
    return dates.parse_date_range(value)[1]
400
funcs['_dateRangeEnd'] = _dateRangeEnd
401

    
402
#### Names
403

    
404
_name_parts_slices_items = [
405
    ('first', slice(None, 1)),
406
    ('middle', slice(1, -1)),
407
    ('last', slice(-1, None)),
408
]
409
name_parts_slices = dict(_name_parts_slices_items)
410
name_parts = [name for name, slice_ in _name_parts_slices_items]
411

    
412
def _name(items, node):
413
    items = dict(items)
414
    parts = []
415
    for part in name_parts:
416
        if part in items: parts.append(items[part])
417
    return ' '.join(parts)
418
funcs['_name'] = _name
419

    
420
def _namePart(items, node):
421
    out_items = []
422
    for part, value in items:
423
        try: slice_ = name_parts_slices[part]
424
        except KeyError, e: raise SyntaxError(e)
425
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
426
    return _name(out_items, node)
427
funcs['_namePart'] = _namePart
428

    
429
#### Angles
430

    
431
def _compass(items, node):
432
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
433
    items = dict(conv_items(strings.ustr, items))
434
    try: value = items['value']
435
    except KeyError: return None # input is empty
436
    
437
    if not value.isupper(): return value # pass through other coordinate formats
438
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
439
    except KeyError, e: raise FormatException(e)
440
funcs['_compass'] = _compass
441

    
442
#### Paths
443

    
444
def _simplifyPath(items, node):
445
    items = dict(items)
446
    try:
447
        next = cast(strings.ustr, items['next'])
448
        require = cast(strings.ustr, items['require'])
449
        root = items['path']
450
    except KeyError, e: raise SyntaxError(e)
451
    
452
    node = root
453
    while node != None:
454
        new_node = xpath.get_1(node, next, allow_rooted=False)
455
        if xpath.get_value(node, require, allow_rooted=False) == None: # empty
456
            xml_dom.replace(node, new_node) # remove current elem
457
            if node is root: root = new_node # also update root
458
        node = new_node
459
    return root
460
funcs['_simplifyPath'] = _simplifyPath
(33-33/36)