Project

General

Profile

1
# XML "function" nodes that transform their contents
2

    
3
import datetime
4
import re
5
import sre_constants
6
import warnings
7

    
8
import angles
9
import dates
10
import exc
11
import format
12
import maps
13
import sql
14
import strings
15
import term
16
import units
17
import util
18
import xml_dom
19
import xpath
20

    
21
##### Exceptions
22

    
23
class SyntaxError(exc.ExceptionWithCause):
24
    def __init__(self, cause):
25
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
26
            cause)
27

    
28
class FormatException(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
31

    
32
##### Helper functions
33

    
34
def map_items(func, items):
35
    return [(name, func(value)) for name, value in items]
36

    
37
def cast(type_, val):
38
    '''Throws FormatException if can't cast'''
39
    try: return type_(val)
40
    except ValueError, e: raise FormatException(e)
41

    
42
def conv_items(type_, items):
43
    return map_items(lambda val: cast(type_, val),
44
        xml_dom.TextEntryOnlyIter(items))
45

    
46
def pop_value(items, name='value'):
47
    '''@param name Name of value param, or None to accept any name'''
48
    try: last = items.pop() # last entry contains value
49
    except IndexError: return None # input is empty and no actions
50
    if name != None and last[0] != name: return None # input is empty
51
    return last[1]
52

    
53
funcs = {}
54

    
55
structural_funcs = set()
56

    
57
##### Public functions
58

    
59
def is_func_name(name):
60
    return name.startswith('_') and name != '_' # '_' is default root node name
61

    
62
def is_func(node): return is_func_name(node.tagName)
63

    
64
def is_xml_func_name(name): return is_func_name(name) and name in funcs
65

    
66
def is_xml_func(node): return is_xml_func_name(node.tagName)
67

    
68
def process(node, on_error=exc.raise_, db=None, preserve=set(), strip=False):
69
    '''Evaluates the XML functions in an XML tree.
70
    @param preserve set(str...) XML functions not to remove.
71
        * container can be any iterable type
72
    @param strip Whether to instead replace most XML functions with their last
73
        parameter (usually the value) and evaluate only structural functions
74
    '''
75
    preserve = set(preserve)
76
    
77
    for child in xml_dom.NodeElemIter(node):
78
        process(child, on_error, db, preserve, strip)
79
    name = node.tagName
80
    if not is_xml_func_name(name) or name in preserve: pass
81
    elif strip and name not in structural_funcs: # just replace with last param
82
        value = pop_value(list(xml_dom.NodeTextEntryIter(node)), None)
83
        xml_dom.replace_with_text(node, value)
84
    else:
85
        try:
86
            items = xml_dom.NodeTextEntryIter(node)
87
            try: func = funcs[name]
88
            except KeyError:
89
                if db != None: # DB with relational functions available
90
                    value = sql.put(db, name, dict(items))
91
                else: value = pop_value(list(items)) # pass value through
92
            else: value = func(items, node) # local XML function
93
            
94
            xml_dom.replace_with_text(node, value)
95
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
96
            # Save in case another exception raised, overwriting sys.exc_info()
97
            exc.add_traceback(e)
98
            str_ = strings.ustr(node)
99
            exc.add_msg(e, 'function:\n'+str_)
100
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
101
                '\n'+term.emph_multiline(str_)))
102
                
103
            on_error(e)
104

    
105
##### XML functions
106

    
107
# Function names must start with _ to avoid collisions with real tags
108
# Functions take arguments (items)
109

    
110
#### Structural
111

    
112
def _ignore(items, node):
113
    '''Used to "comment out" an XML subtree'''
114
    return None
115
funcs['_ignore'] = _ignore
116
structural_funcs.add('_ignore')
117

    
118
def _ref(items, node):
119
    '''Used to retrieve a value from another XML node
120
    @param items
121
        addr=<path> XPath to value, relative to the XML func's parent node
122
    '''
123
    items = dict(items)
124
    try: addr = items['addr']
125
    except KeyError, e: raise SyntaxError(e)
126
    
127
    value = xpath.get_value(node.parentNode, addr)
128
    if value == None:
129
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
130
            +str(addr)))
131
    return value
132
funcs['_ref'] = _ref
133
structural_funcs.add('_ref')
134

    
135
#### Conditionals
136

    
137
def _eq(items, node):
138
    items = dict(items)
139
    try:
140
        left = items['left']
141
        right = items['right']
142
    except KeyError: return '' # a value was None
143
    return util.bool2str(left == right)
144
funcs['_eq'] = _eq
145

    
146
def _if(items, node):
147
    items = dict(items)
148
    try:
149
        cond = items['cond']
150
        then = items['then']
151
    except KeyError, e: raise SyntaxError(e)
152
    else_ = items.get('else', None)
153
    cond = bool(cast(strings.ustr, cond))
154
    if cond: return then
155
    else: return else_
156
funcs['_if'] = _if
157

    
158
#### Combining values
159

    
160
def _alt(items, node):
161
    items = list(items)
162
    items.sort()
163
    try: return items[0][1] # value of lowest-numbered item
164
    except IndexError: return None # input got removed by e.g. FormatException
165
funcs['_alt'] = _alt
166

    
167
def _merge(items, node):
168
    items = list(conv_items(strings.ustr, items))
169
        # get *once* from iter, check types
170
    items.sort()
171
    return maps.merge_values(*[v for k, v in items])
172
funcs['_merge'] = _merge
173

    
174
def _label(items, node):
175
    items = dict(conv_items(strings.ustr, items))
176
        # get *once* from iter, check types
177
    value = items.get('value', None)
178
    if value == None: return None # input is empty
179
    try: label = items['label']
180
    except KeyError, e: raise SyntaxError(e)
181
    return label+': '+value
182
funcs['_label'] = _label
183

    
184
#### Transforming values
185

    
186
def _collapse(items, node):
187
    '''Collapses a subtree if the "value" element in it is NULL'''
188
    items = dict(items)
189
    try: require = cast(strings.ustr, items['require'])
190
    except KeyError, e: raise SyntaxError(e)
191
    value = items.get('value', None)
192
    
193
    if xpath.get_value(value, require, allow_rooted=False) == None: return None
194
    else: return value
195
funcs['_collapse'] = _collapse
196

    
197
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
198

    
199
def _nullIf(items, node):
200
    items = dict(conv_items(strings.ustr, items))
201
    try: null = items['null']
202
    except KeyError, e: raise SyntaxError(e)
203
    value = items.get('value', None)
204
    type_str = items.get('type', None)
205
    
206
    try: type_ = types_by_name[type_str]
207
    except KeyError, e: raise SyntaxError(e)
208
    null = type_(null)
209
    
210
    try: return util.none_if(value, null)
211
    except ValueError: return value # value not convertible, so can't equal null
212
funcs['_nullIf'] = _nullIf
213

    
214
def repl(repls, value):
215
    '''Raises error if value not in map and no special '*' entry
216
    @param repls dict repl:with
217
        repl "*" means all other input values
218
        with "*" means keep input value the same
219
        with "" means ignore input value
220
    '''
221
    try: new_value = repls[value]
222
    except KeyError, e:
223
        # Save traceback right away in case another exception raised
224
        fe = FormatException(e) 
225
        try: new_value = repls['*']
226
        except KeyError: raise fe
227
    if new_value == '*': new_value = value # '*' means keep input value the same
228
    return new_value
229

    
230
def _map(items, node):
231
    '''See repl()
232
    @param items
233
        <last_entry> Value
234
        <other_entries> name=value Mappings. Special values: See repl() repls.
235
    '''
236
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
237
    value = pop_value(items)
238
    if value == None: return None # input is empty
239
    return util.none_if(repl(dict(items), value), u'') # empty value means None
240
funcs['_map'] = _map
241

    
242
def _replace(items, node):
243
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
244
    value = pop_value(items)
245
    if value == None: return None # input is empty
246
    try:
247
        for repl, with_ in items:
248
            if re.match(r'^\w+$', repl):
249
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
250
            value = re.sub(repl, with_, value)
251
    except sre_constants.error, e: raise SyntaxError(e)
252
    return util.none_if(value.strip(), u'') # empty strings always mean None
253
funcs['_replace'] = _replace
254

    
255
#### Quantities
256

    
257
def _units(items, node):
258
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
259
    value = pop_value(items)
260
    if value == None: return None # input is empty
261
    
262
    quantity = units.str2quantity(value)
263
    try:
264
        for action, units_ in items:
265
            units_ = util.none_if(units_, u'')
266
            if action == 'default': units.set_default_units(quantity, units_)
267
            elif action == 'to':
268
                try: quantity = units.convert(quantity, units_)
269
                except ValueError, e: raise FormatException(e)
270
            else: raise SyntaxError(ValueError('Invalid action: '+action))
271
    except units.MissingUnitsException, e: raise FormatException(e)
272
    return units.quantity2str(quantity)
273
funcs['_units'] = _units
274

    
275
def parse_range(str_, range_sep='-'):
276
    default = (str_, None)
277
    start, sep, end = str_.partition(range_sep)
278
    if sep == '': return default # not a range
279
    if start == '' and range_sep == '-': return default # negative number
280
    return tuple(d.strip() for d in (start, end))
281

    
282
def _rangeStart(items, node):
283
    items = dict(conv_items(strings.ustr, items))
284
    try: value = items['value']
285
    except KeyError: return None # input is empty
286
    return parse_range(value)[0]
287
funcs['_rangeStart'] = _rangeStart
288

    
289
def _rangeEnd(items, node):
290
    items = dict(conv_items(strings.ustr, items))
291
    try: value = items['value']
292
    except KeyError: return None # input is empty
293
    return parse_range(value)[1]
294
funcs['_rangeEnd'] = _rangeEnd
295

    
296
def _range(items, node):
297
    items = dict(conv_items(float, items))
298
    from_ = items.get('from', None)
299
    to = items.get('to', None)
300
    if from_ == None or to == None: return None
301
    return str(to - from_)
302
funcs['_range'] = _range
303

    
304
def _avg(items, node):
305
    count = 0
306
    sum_ = 0.
307
    for name, value in conv_items(float, items):
308
        count += 1
309
        sum_ += value
310
    if count == 0: return None # input is empty
311
    else: return str(sum_/count)
312
funcs['_avg'] = _avg
313

    
314
class CvException(Exception):
315
    def __init__(self):
316
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
317
            ' allowed for ratio scale data '
318
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
319

    
320
def _noCV(items, node):
321
    try: name, value = items.next()
322
    except StopIteration: return None
323
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
324
    return value
325
funcs['_noCV'] = _noCV
326

    
327
#### Dates
328

    
329
def _date(items, node):
330
    items = dict(conv_items(strings.ustr, items))
331
        # get *once* from iter, check types
332
    try: str_ = items['date']
333
    except KeyError:
334
        # Year is required
335
        try: items['year']
336
        except KeyError, e:
337
            if items == {}: return None # entire date is empty
338
            else: raise FormatException(e)
339
        
340
        # Convert month name to number
341
        try: month = items['month']
342
        except KeyError: pass
343
        else:
344
            if not month.isdigit(): # month is name
345
                try: items['month'] = str(dates.strtotime(month).month)
346
                except ValueError, e: raise FormatException(e)
347
        
348
        items = dict(conv_items(format.str2int, items.iteritems()))
349
        items.setdefault('month', 1)
350
        items.setdefault('day', 1)
351
        
352
        for try_num in xrange(2):
353
            try:
354
                date = datetime.date(**items)
355
                break
356
            except ValueError, e:
357
                if try_num > 0: raise FormatException(e)
358
                    # exception still raised after retry
359
                msg = strings.ustr(e)
360
                if msg == 'month must be in 1..12': # try swapping month and day
361
                    items['month'], items['day'] = items['day'], items['month']
362
                else: raise FormatException(e)
363
    else:
364
        try: year = float(str_)
365
        except ValueError:
366
            try: date = dates.strtotime(str_)
367
            except ImportError: return str_
368
            except ValueError, e: raise FormatException(e)
369
        else: date = (datetime.date(int(year), 1, 1) +
370
            datetime.timedelta(round((year % 1.)*365)))
371
    try: return dates.strftime('%Y-%m-%d', date)
372
    except ValueError, e: raise FormatException(e)
373
funcs['_date'] = _date
374

    
375
def _dateRangeStart(items, node):
376
    items = dict(conv_items(strings.ustr, items))
377
    try: value = items['value']
378
    except KeyError: return None # input is empty
379
    return dates.parse_date_range(value)[0]
380
funcs['_dateRangeStart'] = _dateRangeStart
381

    
382
def _dateRangeEnd(items, node):
383
    items = dict(conv_items(strings.ustr, items))
384
    try: value = items['value']
385
    except KeyError: return None # input is empty
386
    return dates.parse_date_range(value)[1]
387
funcs['_dateRangeEnd'] = _dateRangeEnd
388

    
389
#### Names
390

    
391
_name_parts_slices_items = [
392
    ('first', slice(None, 1)),
393
    ('middle', slice(1, -1)),
394
    ('last', slice(-1, None)),
395
]
396
name_parts_slices = dict(_name_parts_slices_items)
397
name_parts = [name for name, slice_ in _name_parts_slices_items]
398

    
399
def _name(items, node):
400
    items = dict(items)
401
    parts = []
402
    for part in name_parts:
403
        if part in items: parts.append(items[part])
404
    return ' '.join(parts)
405
funcs['_name'] = _name
406

    
407
def _namePart(items, node):
408
    out_items = []
409
    for part, value in items:
410
        try: slice_ = name_parts_slices[part]
411
        except KeyError, e: raise SyntaxError(e)
412
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
413
    return _name(out_items, node)
414
funcs['_namePart'] = _namePart
415

    
416
#### Angles
417

    
418
def _compass(items, node):
419
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
420
    items = dict(conv_items(strings.ustr, items))
421
    try: value = items['value']
422
    except KeyError: return None # input is empty
423
    
424
    if not value.isupper(): return value # pass through other coordinate formats
425
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
426
    except KeyError, e: raise FormatException(e)
427
funcs['_compass'] = _compass
428

    
429
#### Paths
430

    
431
def _simplifyPath(items, node):
432
    items = dict(items)
433
    try:
434
        next = cast(strings.ustr, items['next'])
435
        require = cast(strings.ustr, items['require'])
436
        root = items['path']
437
    except KeyError, e: raise SyntaxError(e)
438
    
439
    node = root
440
    while node != None:
441
        new_node = xpath.get_1(node, next, allow_rooted=False)
442
        if xpath.get_value(node, require, allow_rooted=False) == None: # empty
443
            xml_dom.replace(node, new_node) # remove current elem
444
            if node is root: root = new_node # also update root
445
        node = new_node
446
    return root
447
funcs['_simplifyPath'] = _simplifyPath
(33-33/36)