Project

General

Profile

1
# XML "function" nodes that transform their contents
2

    
3
import datetime
4
import re
5
import sre_constants
6
import warnings
7

    
8
import angles
9
import dates
10
import exc
11
import format
12
import maps
13
import strings
14
import term
15
import units
16
import util
17
import xml_dom
18
import xpath
19

    
20
##### Exceptions
21

    
22
class SyntaxError(exc.ExceptionWithCause):
23
    def __init__(self, cause):
24
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
25
            cause)
26

    
27
class FormatException(exc.ExceptionWithCause):
28
    def __init__(self, cause):
29
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
30

    
31
##### Helper functions
32

    
33
def map_items(func, items):
34
    return [(name, func(value)) for name, value in items]
35

    
36
def cast(type_, val):
37
    '''Throws FormatException if can't cast'''
38
    try: return type_(val)
39
    except ValueError, e: raise FormatException(e)
40

    
41
def conv_items(type_, items):
42
    return map_items(lambda val: cast(type_, val),
43
        xml_dom.TextEntryOnlyIter(items))
44

    
45
def pop_value(items, name='value'):
46
    '''@param name Name of value param, or None to accept any name'''
47
    try: last = items.pop() # last entry contains value
48
    except IndexError: return None # input is empty and no actions
49
    if name != None and last[0] != name: return None # input is empty
50
    return last[1]
51

    
52
funcs = {}
53

    
54
##### Public functions
55

    
56
def process(node, on_error=exc.raise_):
57
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
58
    name = node.tagName
59
    if name.startswith('_') and name in funcs:
60
        try:
61
            value = funcs[name](xml_dom.NodeTextEntryIter(node), node)
62
            xml_dom.replace_with_text(node, value)
63
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
64
            # Save in case another exception raised, overwriting sys.exc_info()
65
            exc.add_traceback(e)
66
            str_ = strings.ustr(node)
67
            exc.add_msg(e, 'function:\n'+str_)
68
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
69
                '\n'+term.emph_multiline(str_)))
70
                
71
            on_error(e)
72

    
73
def strip(node):
74
    '''Replaces every XML function with its last parameter (which is usually its
75
    value), except for _ignore, which is removed completely'''
76
    for child in xml_dom.NodeElemIter(node): strip(child)
77
    name = node.tagName
78
    if name.startswith('_') and name in funcs:
79
        if name == '_ignore': value = None
80
        else: value = pop_value(list(xml_dom.NodeTextEntryIter(node)), None)
81
        xml_dom.replace_with_text(node, value)
82

    
83
##### XML functions
84

    
85
# Function names must start with _ to avoid collisions with real tags
86
# Functions take arguments (items)
87

    
88
#### General
89

    
90
def _ignore(items, node):
91
    '''Used to "comment out" an XML subtree'''
92
    return None
93
funcs['_ignore'] = _ignore
94

    
95
def _ref(items, node):
96
    '''Used to retrieve a value from another XML node
97
    @param items
98
        addr=<path> XPath to value, relative to the XML func's parent node
99
    '''
100
    items = dict(items)
101
    try: addr = items['addr']
102
    except KeyError, e: raise SyntaxError(e)
103
    
104
    value = xpath.get_value(node.parentNode, addr)
105
    if value == None:
106
        warnings.warn(UserWarning('_ref: XPath reference target missing: '
107
            +str(addr)))
108
    return value
109
funcs['_ref'] = _ref
110

    
111
#### Conditionals
112

    
113
def _eq(items, node):
114
    items = dict(items)
115
    try:
116
        left = items['left']
117
        right = items['right']
118
    except KeyError: return '' # a value was None
119
    return util.bool2str(left == right)
120
funcs['_eq'] = _eq
121

    
122
def _if(items, node):
123
    items = dict(items)
124
    try:
125
        cond = items['cond']
126
        then = items['then']
127
    except KeyError, e: raise SyntaxError(e)
128
    else_ = items.get('else', None)
129
    cond = bool(cast(strings.ustr, cond))
130
    if cond: return then
131
    else: return else_
132
funcs['_if'] = _if
133

    
134
#### Combining values
135

    
136
def _alt(items, node):
137
    items = list(items)
138
    items.sort()
139
    try: return items[0][1] # value of lowest-numbered item
140
    except IndexError: return None # input got removed by e.g. FormatException
141
funcs['_alt'] = _alt
142

    
143
def _merge(items, node):
144
    items = list(conv_items(strings.ustr, items))
145
        # get *once* from iter, check types
146
    items.sort()
147
    return maps.merge_values(*[v for k, v in items])
148
funcs['_merge'] = _merge
149

    
150
def _label(items, node):
151
    items = dict(conv_items(strings.ustr, items))
152
        # get *once* from iter, check types
153
    value = items.get('value', None)
154
    if value == None: return None # input is empty
155
    try: label = items['label']
156
    except KeyError, e: raise SyntaxError(e)
157
    return label+': '+value
158
funcs['_label'] = _label
159

    
160
#### Transforming values
161

    
162
def _collapse(items, node):
163
    '''Collapses a subtree if the "value" element in it is NULL'''
164
    items = dict(items)
165
    try: require = cast(strings.ustr, items['require'])
166
    except KeyError, e: raise SyntaxError(e)
167
    value = items.get('value', None)
168
    
169
    required_node = xpath.get_1(value, require, allow_rooted=False)
170
    if required_node == None or xml_dom.is_empty(required_node): return None
171
    else: return value
172
funcs['_collapse'] = _collapse
173

    
174
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
175

    
176
def _nullIf(items, node):
177
    items = dict(conv_items(strings.ustr, items))
178
    try: null = items['null']
179
    except KeyError, e: raise SyntaxError(e)
180
    value = items.get('value', None)
181
    type_str = items.get('type', None)
182
    
183
    try: type_ = types_by_name[type_str]
184
    except KeyError, e: raise SyntaxError(e)
185
    null = type_(null)
186
    
187
    try: return util.none_if(value, null)
188
    except ValueError: return value # value not convertible, so can't equal null
189
funcs['_nullIf'] = _nullIf
190

    
191
def repl(repls, value):
192
    '''Raises error if value not in map and no special '*' entry
193
    @param repls dict repl:with
194
        repl "*" means all other input values
195
        with "*" means keep input value the same
196
        with "" means ignore input value
197
    '''
198
    try: new_value = repls[value]
199
    except KeyError, e:
200
        # Save traceback right away in case another exception raised
201
        fe = FormatException(e) 
202
        try: new_value = repls['*']
203
        except KeyError: raise fe
204
    if new_value == '*': new_value = value # '*' means keep input value the same
205
    return new_value
206

    
207
def _map(items, node):
208
    '''See repl()
209
    @param items
210
        <last_entry> Value
211
        <other_entries> name=value Mappings. Special values: See repl() repls.
212
    '''
213
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
214
    value = pop_value(items)
215
    if value == None: return None # input is empty
216
    return util.none_if(repl(dict(items), value), u'') # empty value means None
217
funcs['_map'] = _map
218

    
219
def _replace(items, node):
220
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
221
    value = pop_value(items)
222
    if value == None: return None # input is empty
223
    try:
224
        for repl, with_ in items:
225
            if re.match(r'^\w+$', repl):
226
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
227
            value = re.sub(repl, with_, value)
228
    except sre_constants.error, e: raise SyntaxError(e)
229
    return util.none_if(value.strip(), u'') # empty strings always mean None
230
funcs['_replace'] = _replace
231

    
232
#### Quantities
233

    
234
def _units(items, node):
235
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
236
    value = pop_value(items)
237
    if value == None: return None # input is empty
238
    
239
    quantity = units.str2quantity(value)
240
    try:
241
        for action, units_ in items:
242
            units_ = util.none_if(units_, u'')
243
            if action == 'default': units.set_default_units(quantity, units_)
244
            elif action == 'to':
245
                try: quantity = units.convert(quantity, units_)
246
                except ValueError, e: raise FormatException(e)
247
            else: raise SyntaxError(ValueError('Invalid action: '+action))
248
    except units.MissingUnitsException, e: raise FormatException(e)
249
    return units.quantity2str(quantity)
250
funcs['_units'] = _units
251

    
252
def parse_range(str_, range_sep='-'):
253
    default = (str_, None)
254
    start, sep, end = str_.partition(range_sep)
255
    if sep == '': return default # not a range
256
    if start == '' and range_sep == '-': return default # negative number
257
    return tuple(d.strip() for d in (start, end))
258

    
259
def _rangeStart(items, node):
260
    items = dict(conv_items(strings.ustr, items))
261
    try: value = items['value']
262
    except KeyError: return None # input is empty
263
    return parse_range(value)[0]
264
funcs['_rangeStart'] = _rangeStart
265

    
266
def _rangeEnd(items, node):
267
    items = dict(conv_items(strings.ustr, items))
268
    try: value = items['value']
269
    except KeyError: return None # input is empty
270
    return parse_range(value)[1]
271
funcs['_rangeEnd'] = _rangeEnd
272

    
273
def _range(items, node):
274
    items = dict(conv_items(float, items))
275
    from_ = items.get('from', None)
276
    to = items.get('to', None)
277
    if from_ == None or to == None: return None
278
    return str(to - from_)
279
funcs['_range'] = _range
280

    
281
def _avg(items, node):
282
    count = 0
283
    sum_ = 0.
284
    for name, value in conv_items(float, items):
285
        count += 1
286
        sum_ += value
287
    if count == 0: return None # input is empty
288
    else: return str(sum_/count)
289
funcs['_avg'] = _avg
290

    
291
class CvException(Exception):
292
    def __init__(self):
293
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
294
            ' allowed for ratio scale data '
295
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
296

    
297
def _noCV(items, node):
298
    try: name, value = items.next()
299
    except StopIteration: return None
300
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
301
    return value
302
funcs['_noCV'] = _noCV
303

    
304
#### Dates
305

    
306
def _date(items, node):
307
    items = dict(conv_items(strings.ustr, items))
308
        # get *once* from iter, check types
309
    try: str_ = items['date']
310
    except KeyError:
311
        # Year is required
312
        try: items['year']
313
        except KeyError, e:
314
            if items == {}: return None # entire date is empty
315
            else: raise FormatException(e)
316
        
317
        # Convert month name to number
318
        try: month = items['month']
319
        except KeyError: pass
320
        else:
321
            if not month.isdigit(): # month is name
322
                try: items['month'] = str(dates.strtotime(month).month)
323
                except ValueError, e: raise FormatException(e)
324
        
325
        items = dict(conv_items(format.str2int, items.iteritems()))
326
        items.setdefault('month', 1)
327
        items.setdefault('day', 1)
328
        
329
        for try_num in xrange(2):
330
            try:
331
                date = datetime.date(**items)
332
                break
333
            except ValueError, e:
334
                if try_num > 0: raise FormatException(e)
335
                    # exception still raised after retry
336
                msg = strings.ustr(e)
337
                if msg == 'month must be in 1..12': # try swapping month and day
338
                    items['month'], items['day'] = items['day'], items['month']
339
                else: raise FormatException(e)
340
    else:
341
        try: year = float(str_)
342
        except ValueError:
343
            try: date = dates.strtotime(str_)
344
            except ImportError: return str_
345
            except ValueError, e: raise FormatException(e)
346
        else: date = (datetime.date(int(year), 1, 1) +
347
            datetime.timedelta(round((year % 1.)*365)))
348
    try: return dates.strftime('%Y-%m-%d', date)
349
    except ValueError, e: raise FormatException(e)
350
funcs['_date'] = _date
351

    
352
def _dateRangeStart(items, node):
353
    items = dict(conv_items(strings.ustr, items))
354
    try: value = items['value']
355
    except KeyError: return None # input is empty
356
    return dates.parse_date_range(value)[0]
357
funcs['_dateRangeStart'] = _dateRangeStart
358

    
359
def _dateRangeEnd(items, node):
360
    items = dict(conv_items(strings.ustr, items))
361
    try: value = items['value']
362
    except KeyError: return None # input is empty
363
    return dates.parse_date_range(value)[1]
364
funcs['_dateRangeEnd'] = _dateRangeEnd
365

    
366
#### Names
367

    
368
_name_parts_slices_items = [
369
    ('first', slice(None, 1)),
370
    ('middle', slice(1, -1)),
371
    ('last', slice(-1, None)),
372
]
373
name_parts_slices = dict(_name_parts_slices_items)
374
name_parts = [name for name, slice_ in _name_parts_slices_items]
375

    
376
def _name(items, node):
377
    items = dict(items)
378
    parts = []
379
    for part in name_parts:
380
        if part in items: parts.append(items[part])
381
    return ' '.join(parts)
382
funcs['_name'] = _name
383

    
384
def _namePart(items, node):
385
    out_items = []
386
    for part, value in items:
387
        try: slice_ = name_parts_slices[part]
388
        except KeyError, e: raise SyntaxError(e)
389
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
390
    return _name(out_items, node)
391
funcs['_namePart'] = _namePart
392

    
393
#### Angles
394

    
395
def _compass(items, node):
396
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
397
    items = dict(conv_items(strings.ustr, items))
398
    try: value = items['value']
399
    except KeyError: return None # input is empty
400
    
401
    if not value.isupper(): return value # pass through other coordinate formats
402
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
403
    except KeyError, e: raise FormatException(e)
404
funcs['_compass'] = _compass
405

    
406
#### Paths
407

    
408
def _simplifyPath(items, node):
409
    items = dict(items)
410
    try:
411
        next = cast(strings.ustr, items['next'])
412
        require = cast(strings.ustr, items['require'])
413
        root = items['path']
414
    except KeyError, e: raise SyntaxError(e)
415
    
416
    node = root
417
    while node != None:
418
        new_node = xpath.get_1(node, next, allow_rooted=False)
419
        required_node = xpath.get_1(node, require, allow_rooted=False)
420
        if required_node == None or xml_dom.is_empty(required_node):# empty elem
421
            xml_dom.replace(node, new_node) # remove current elem
422
            if node is root: root = new_node # also update root
423
        node = new_node
424
    return root
425
funcs['_simplifyPath'] = _simplifyPath
(30-30/33)