Project

General

Profile

1
# XML "function" nodes that transform their contents
2

    
3
import datetime
4
import re
5
import sre_constants
6
import warnings
7

    
8
import angles
9
import dates
10
import exc
11
import format
12
import maps
13
import sql
14
import strings
15
import term
16
import units
17
import util
18
import xml_dom
19
import xpath
20

    
21
##### Exceptions
22

    
23
class SyntaxError(exc.ExceptionWithCause):
24
    def __init__(self, cause):
25
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
26
            cause)
27

    
28
class FormatException(exc.ExceptionWithCause):
29
    def __init__(self, cause):
30
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
31

    
32
##### Helper functions
33

    
34
def map_items(func, items):
35
    return [(name, func(value)) for name, value in items]
36

    
37
def cast(type_, val):
38
    '''Throws FormatException if can't cast'''
39
    try: return type_(val)
40
    except ValueError, e: raise FormatException(e)
41

    
42
def conv_items(type_, items):
43
    return map_items(lambda val: cast(type_, val),
44
        xml_dom.TextEntryOnlyIter(items))
45

    
46
def pop_value(items, name='value'):
47
    '''@param name Name of value param, or None to accept any name'''
48
    try: last = items.pop() # last entry contains value
49
    except IndexError: return None # input is empty and no actions
50
    if name != None and last[0] != name: return None # input is empty
51
    return last[1]
52

    
53
funcs = {}
54

    
55
##### Public functions
56

    
57
def is_func_name(name):
58
    return name.startswith('_') and name != '_' # '_' is default root node name
59

    
60
def is_func(node): return is_func_name(node.tagName)
61

    
62
def is_xml_func_name(name): return is_func_name(name) and name in funcs
63

    
64
def is_xml_func(node): return is_xml_func_name(node.tagName)
65

    
66
def process(node, on_error=exc.raise_, db=None):
67
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
68
    name = node.tagName
69
    if is_func_name(name):
70
        try:
71
            items = xml_dom.NodeTextEntryIter(node)
72
            try: func = funcs[name]
73
            except KeyError:
74
                if db != None: # DB with relational functions available
75
                    value = sql.put(db, name, dict(items))
76
                else: value = pop_value(list(items)) # pass value through
77
            else: value = func(items, node) # local XML function
78
            
79
            xml_dom.replace_with_text(node, value)
80
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
81
            # Save in case another exception raised, overwriting sys.exc_info()
82
            exc.add_traceback(e)
83
            str_ = strings.ustr(node)
84
            exc.add_msg(e, 'function:\n'+str_)
85
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
86
                '\n'+term.emph_multiline(str_)))
87
                
88
            on_error(e)
89

    
90
def strip(node, preserve=set()):
91
    '''Replaces every XML function with its last parameter (which is usually its
92
    value), except for _ignore, which is removed completely
93
    @param preserve set(str...) XML functions not to remove.
94
        * container can be any iterable type
95
    '''
96
    preserve = set(preserve)
97
    
98
    for child in xml_dom.NodeElemIter(node): strip(child, preserve)
99
    name = node.tagName
100
    if is_xml_func_name(name) and name not in preserve:
101
        if name == '_ignore': value = None
102
        else: value = pop_value(list(xml_dom.NodeTextEntryIter(node)), None)
103
        xml_dom.replace_with_text(node, value)
104

    
105
##### XML functions
106

    
107
# Function names must start with _ to avoid collisions with real tags
108
# Functions take arguments (items)
109

    
110
#### General
111

    
112
def _ignore(items, node):
113
    '''Used to "comment out" an XML subtree'''
114
    return None
115
funcs['_ignore'] = _ignore
116

    
117
def _ref(items, node):
118
    '''Used to retrieve a value from another XML node
119
    @param items
120
        addr=<path> XPath to value, relative to the XML func's parent node
121
    '''
122
    items = dict(items)
123
    try: addr = items['addr']
124
    except KeyError, e: raise SyntaxError(e)
125
    
126
    value = xpath.get_value(node.parentNode, addr)
127
    if value == None:
128
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
129
            +str(addr)))
130
    return value
131
funcs['_ref'] = _ref
132

    
133
#### Conditionals
134

    
135
def _eq(items, node):
136
    items = dict(items)
137
    try:
138
        left = items['left']
139
        right = items['right']
140
    except KeyError: return '' # a value was None
141
    return util.bool2str(left == right)
142
funcs['_eq'] = _eq
143

    
144
def _if(items, node):
145
    items = dict(items)
146
    try:
147
        cond = items['cond']
148
        then = items['then']
149
    except KeyError, e: raise SyntaxError(e)
150
    else_ = items.get('else', None)
151
    cond = bool(cast(strings.ustr, cond))
152
    if cond: return then
153
    else: return else_
154
funcs['_if'] = _if
155

    
156
#### Combining values
157

    
158
def _alt(items, node):
159
    items = list(items)
160
    items.sort()
161
    try: return items[0][1] # value of lowest-numbered item
162
    except IndexError: return None # input got removed by e.g. FormatException
163
funcs['_alt'] = _alt
164

    
165
def _merge(items, node):
166
    items = list(conv_items(strings.ustr, items))
167
        # get *once* from iter, check types
168
    items.sort()
169
    return maps.merge_values(*[v for k, v in items])
170
funcs['_merge'] = _merge
171

    
172
def _label(items, node):
173
    items = dict(conv_items(strings.ustr, items))
174
        # get *once* from iter, check types
175
    value = items.get('value', None)
176
    if value == None: return None # input is empty
177
    try: label = items['label']
178
    except KeyError, e: raise SyntaxError(e)
179
    return label+': '+value
180
funcs['_label'] = _label
181

    
182
#### Transforming values
183

    
184
def _collapse(items, node):
185
    '''Collapses a subtree if the "value" element in it is NULL'''
186
    items = dict(items)
187
    try: require = cast(strings.ustr, items['require'])
188
    except KeyError, e: raise SyntaxError(e)
189
    value = items.get('value', None)
190
    
191
    required_node = xpath.get_1(value, require, allow_rooted=False)
192
    if required_node == None or xml_dom.is_empty(required_node): return None
193
    else: return value
194
funcs['_collapse'] = _collapse
195

    
196
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
197

    
198
def _nullIf(items, node):
199
    items = dict(conv_items(strings.ustr, items))
200
    try: null = items['null']
201
    except KeyError, e: raise SyntaxError(e)
202
    value = items.get('value', None)
203
    type_str = items.get('type', None)
204
    
205
    try: type_ = types_by_name[type_str]
206
    except KeyError, e: raise SyntaxError(e)
207
    null = type_(null)
208
    
209
    try: return util.none_if(value, null)
210
    except ValueError: return value # value not convertible, so can't equal null
211
funcs['_nullIf'] = _nullIf
212

    
213
def repl(repls, value):
214
    '''Raises error if value not in map and no special '*' entry
215
    @param repls dict repl:with
216
        repl "*" means all other input values
217
        with "*" means keep input value the same
218
        with "" means ignore input value
219
    '''
220
    try: new_value = repls[value]
221
    except KeyError, e:
222
        # Save traceback right away in case another exception raised
223
        fe = FormatException(e) 
224
        try: new_value = repls['*']
225
        except KeyError: raise fe
226
    if new_value == '*': new_value = value # '*' means keep input value the same
227
    return new_value
228

    
229
def _map(items, node):
230
    '''See repl()
231
    @param items
232
        <last_entry> Value
233
        <other_entries> name=value Mappings. Special values: See repl() repls.
234
    '''
235
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
236
    value = pop_value(items)
237
    if value == None: return None # input is empty
238
    return util.none_if(repl(dict(items), value), u'') # empty value means None
239
funcs['_map'] = _map
240

    
241
def _replace(items, node):
242
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
243
    value = pop_value(items)
244
    if value == None: return None # input is empty
245
    try:
246
        for repl, with_ in items:
247
            if re.match(r'^\w+$', repl):
248
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
249
            value = re.sub(repl, with_, value)
250
    except sre_constants.error, e: raise SyntaxError(e)
251
    return util.none_if(value.strip(), u'') # empty strings always mean None
252
funcs['_replace'] = _replace
253

    
254
#### Quantities
255

    
256
def _units(items, node):
257
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
258
    value = pop_value(items)
259
    if value == None: return None # input is empty
260
    
261
    quantity = units.str2quantity(value)
262
    try:
263
        for action, units_ in items:
264
            units_ = util.none_if(units_, u'')
265
            if action == 'default': units.set_default_units(quantity, units_)
266
            elif action == 'to':
267
                try: quantity = units.convert(quantity, units_)
268
                except ValueError, e: raise FormatException(e)
269
            else: raise SyntaxError(ValueError('Invalid action: '+action))
270
    except units.MissingUnitsException, e: raise FormatException(e)
271
    return units.quantity2str(quantity)
272
funcs['_units'] = _units
273

    
274
def parse_range(str_, range_sep='-'):
275
    default = (str_, None)
276
    start, sep, end = str_.partition(range_sep)
277
    if sep == '': return default # not a range
278
    if start == '' and range_sep == '-': return default # negative number
279
    return tuple(d.strip() for d in (start, end))
280

    
281
def _rangeStart(items, node):
282
    items = dict(conv_items(strings.ustr, items))
283
    try: value = items['value']
284
    except KeyError: return None # input is empty
285
    return parse_range(value)[0]
286
funcs['_rangeStart'] = _rangeStart
287

    
288
def _rangeEnd(items, node):
289
    items = dict(conv_items(strings.ustr, items))
290
    try: value = items['value']
291
    except KeyError: return None # input is empty
292
    return parse_range(value)[1]
293
funcs['_rangeEnd'] = _rangeEnd
294

    
295
def _range(items, node):
296
    items = dict(conv_items(float, items))
297
    from_ = items.get('from', None)
298
    to = items.get('to', None)
299
    if from_ == None or to == None: return None
300
    return str(to - from_)
301
funcs['_range'] = _range
302

    
303
def _avg(items, node):
304
    count = 0
305
    sum_ = 0.
306
    for name, value in conv_items(float, items):
307
        count += 1
308
        sum_ += value
309
    if count == 0: return None # input is empty
310
    else: return str(sum_/count)
311
funcs['_avg'] = _avg
312

    
313
class CvException(Exception):
314
    def __init__(self):
315
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
316
            ' allowed for ratio scale data '
317
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
318

    
319
def _noCV(items, node):
320
    try: name, value = items.next()
321
    except StopIteration: return None
322
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
323
    return value
324
funcs['_noCV'] = _noCV
325

    
326
#### Dates
327

    
328
def _date(items, node):
329
    items = dict(conv_items(strings.ustr, items))
330
        # get *once* from iter, check types
331
    try: str_ = items['date']
332
    except KeyError:
333
        # Year is required
334
        try: items['year']
335
        except KeyError, e:
336
            if items == {}: return None # entire date is empty
337
            else: raise FormatException(e)
338
        
339
        # Convert month name to number
340
        try: month = items['month']
341
        except KeyError: pass
342
        else:
343
            if not month.isdigit(): # month is name
344
                try: items['month'] = str(dates.strtotime(month).month)
345
                except ValueError, e: raise FormatException(e)
346
        
347
        items = dict(conv_items(format.str2int, items.iteritems()))
348
        items.setdefault('month', 1)
349
        items.setdefault('day', 1)
350
        
351
        for try_num in xrange(2):
352
            try:
353
                date = datetime.date(**items)
354
                break
355
            except ValueError, e:
356
                if try_num > 0: raise FormatException(e)
357
                    # exception still raised after retry
358
                msg = strings.ustr(e)
359
                if msg == 'month must be in 1..12': # try swapping month and day
360
                    items['month'], items['day'] = items['day'], items['month']
361
                else: raise FormatException(e)
362
    else:
363
        try: year = float(str_)
364
        except ValueError:
365
            try: date = dates.strtotime(str_)
366
            except ImportError: return str_
367
            except ValueError, e: raise FormatException(e)
368
        else: date = (datetime.date(int(year), 1, 1) +
369
            datetime.timedelta(round((year % 1.)*365)))
370
    try: return dates.strftime('%Y-%m-%d', date)
371
    except ValueError, e: raise FormatException(e)
372
funcs['_date'] = _date
373

    
374
def _dateRangeStart(items, node):
375
    items = dict(conv_items(strings.ustr, items))
376
    try: value = items['value']
377
    except KeyError: return None # input is empty
378
    return dates.parse_date_range(value)[0]
379
funcs['_dateRangeStart'] = _dateRangeStart
380

    
381
def _dateRangeEnd(items, node):
382
    items = dict(conv_items(strings.ustr, items))
383
    try: value = items['value']
384
    except KeyError: return None # input is empty
385
    return dates.parse_date_range(value)[1]
386
funcs['_dateRangeEnd'] = _dateRangeEnd
387

    
388
#### Names
389

    
390
_name_parts_slices_items = [
391
    ('first', slice(None, 1)),
392
    ('middle', slice(1, -1)),
393
    ('last', slice(-1, None)),
394
]
395
name_parts_slices = dict(_name_parts_slices_items)
396
name_parts = [name for name, slice_ in _name_parts_slices_items]
397

    
398
def _name(items, node):
399
    items = dict(items)
400
    parts = []
401
    for part in name_parts:
402
        if part in items: parts.append(items[part])
403
    return ' '.join(parts)
404
funcs['_name'] = _name
405

    
406
def _namePart(items, node):
407
    out_items = []
408
    for part, value in items:
409
        try: slice_ = name_parts_slices[part]
410
        except KeyError, e: raise SyntaxError(e)
411
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
412
    return _name(out_items, node)
413
funcs['_namePart'] = _namePart
414

    
415
#### Angles
416

    
417
def _compass(items, node):
418
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
419
    items = dict(conv_items(strings.ustr, items))
420
    try: value = items['value']
421
    except KeyError: return None # input is empty
422
    
423
    if not value.isupper(): return value # pass through other coordinate formats
424
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
425
    except KeyError, e: raise FormatException(e)
426
funcs['_compass'] = _compass
427

    
428
#### Paths
429

    
430
def _simplifyPath(items, node):
431
    items = dict(items)
432
    try:
433
        next = cast(strings.ustr, items['next'])
434
        require = cast(strings.ustr, items['require'])
435
        root = items['path']
436
    except KeyError, e: raise SyntaxError(e)
437
    
438
    node = root
439
    while node != None:
440
        new_node = xpath.get_1(node, next, allow_rooted=False)
441
        required_node = xpath.get_1(node, require, allow_rooted=False)
442
        if required_node == None or xml_dom.is_empty(required_node):# empty elem
443
            xml_dom.replace(node, new_node) # remove current elem
444
            if node is root: root = new_node # also update root
445
        node = new_node
446
    return root
447
funcs['_simplifyPath'] = _simplifyPath
(32-32/35)