Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 111 aaronmk
7 1607 aaronmk
import angles
8 818 aaronmk
import dates
9 300 aaronmk
import exc
10 1580 aaronmk
import format
11 917 aaronmk
import maps
12 1234 aaronmk
import strings
13 827 aaronmk
import term
14 1468 aaronmk
import units
15 1047 aaronmk
import util
16 86 aaronmk
import xml_dom
17 1321 aaronmk
import xpath
18 86 aaronmk
19 995 aaronmk
##### Exceptions
20
21 1612 aaronmk
class SyntaxError(exc.ExceptionWithCause):
22 797 aaronmk
    def __init__(self, cause):
23 1611 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
24
            cause)
25 278 aaronmk
26 1613 aaronmk
class FormatException(exc.ExceptionWithCause):
27
    def __init__(self, cause):
28
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
29 843 aaronmk
30 1992 aaronmk
##### Helper functions
31 995 aaronmk
32 1992 aaronmk
def map_items(func, items):
33
    return [(name, func(value)) for name, value in items]
34
35
def cast(type_, val):
36
    '''Throws FormatException if can't cast'''
37
    try: return type_(val)
38
    except ValueError, e: raise FormatException(e)
39
40
def conv_items(type_, items):
41
    return map_items(lambda val: cast(type_, val),
42
        xml_dom.TextEntryOnlyIter(items))
43
44
def pop_value(items, name='value'):
45
    '''@param name Name of value param, or None to accept any name'''
46
    try: last = items.pop() # last entry contains value
47
    except IndexError: return None # input is empty and no actions
48
    if name != None and last[0] != name: return None # input is empty
49
    return last[1]
50
51 995 aaronmk
funcs = {}
52
53 1992 aaronmk
##### Public functions
54
55 995 aaronmk
def process(node, on_error=exc.raise_):
56
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
57
    name = node.tagName
58
    if name.startswith('_') and name in funcs:
59 1369 aaronmk
        try:
60
            value = funcs[name](xml_dom.NodeTextEntryIter(node))
61
            xml_dom.replace_with_text(node, value)
62 1613 aaronmk
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
63 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
64
            exc.add_traceback(e)
65 1562 aaronmk
            str_ = strings.ustr(node)
66 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
67 1810 aaronmk
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
68
                '\n'+term.emph_multiline(str_)))
69
70 995 aaronmk
            on_error(e)
71
72 1992 aaronmk
def strip(node):
73
    '''Replaces every XML function with its last parameter (which is usually its
74 1995 aaronmk
    value), except for _ignore, which is removed completely'''
75 1992 aaronmk
    for child in xml_dom.NodeElemIter(node): strip(child)
76
    name = node.tagName
77
    if name.startswith('_') and name in funcs:
78 1995 aaronmk
        if name == '_ignore': value = None
79
        else: value = pop_value(list(xml_dom.NodeTextEntryIter(node)), None)
80 1992 aaronmk
        xml_dom.replace_with_text(node, value)
81 86 aaronmk
82 1469 aaronmk
##### XML functions
83 995 aaronmk
84
# Function names must start with _ to avoid collisions with real tags
85
# Functions take arguments (items)
86
87 1469 aaronmk
#### General
88
89 995 aaronmk
def _ignore(items):
90 994 aaronmk
    '''Used to "comment out" an XML subtree'''
91
    return None
92 995 aaronmk
funcs['_ignore'] = _ignore
93 994 aaronmk
94 1469 aaronmk
#### Conditionals
95
96 1234 aaronmk
def _eq(items):
97
    items = dict(items)
98
    try:
99
        left = items['left']
100
        right = items['right']
101
    except KeyError: return '' # a value was None
102
    return util.bool2str(left == right)
103
funcs['_eq'] = _eq
104
105
def _if(items):
106
    items = dict(items)
107
    try:
108
        cond = items['cond']
109
        then = items['then']
110 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
111 1234 aaronmk
    else_ = items.get('else', None)
112 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
113 1234 aaronmk
    if cond: return then
114
    else: return else_
115
funcs['_if'] = _if
116
117 1469 aaronmk
#### Combining values
118
119 995 aaronmk
def _alt(items):
120 113 aaronmk
    items = list(items)
121
    items.sort()
122 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
123 1609 aaronmk
    except IndexError: return None # input got removed by e.g. FormatException
124 995 aaronmk
funcs['_alt'] = _alt
125 113 aaronmk
126 995 aaronmk
def _merge(items):
127 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
128 1562 aaronmk
        # get *once* from iter, check types
129 917 aaronmk
    items.sort()
130
    return maps.merge_values(*[v for k, v in items])
131 995 aaronmk
funcs['_merge'] = _merge
132 917 aaronmk
133 995 aaronmk
def _label(items):
134 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
135 1562 aaronmk
        # get *once* from iter, check types
136 917 aaronmk
    try:
137
        label = items['label']
138
        value = items['value']
139 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
140 917 aaronmk
    return label+': '+value
141 995 aaronmk
funcs['_label'] = _label
142 917 aaronmk
143 1469 aaronmk
#### Transforming values
144
145 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
146 1477 aaronmk
147 1047 aaronmk
def _nullIf(items):
148 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
149 1477 aaronmk
    try: null = items['null']
150 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
151 1477 aaronmk
    value = items.get('value', None)
152 1219 aaronmk
    type_str = items.get('type', None)
153 1477 aaronmk
154
    try: type_ = types_by_name[type_str]
155 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
156 1477 aaronmk
    null = type_(null)
157
158
    try: return util.none_if(value, null)
159
    except ValueError: return value # value not convertible, so can't equal null
160 1047 aaronmk
funcs['_nullIf'] = _nullIf
161
162 1602 aaronmk
def repl(repls, value):
163 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
164 1602 aaronmk
    @param repls dict repl:with
165
        repl "*" means all other input values
166
        with "*" means keep input value the same
167
        with "" means ignore input value
168 1537 aaronmk
    '''
169 1602 aaronmk
    try: new_value = repls[value]
170 1304 aaronmk
    except KeyError, e:
171 1537 aaronmk
        # Save traceback right away in case another exception raised
172 1609 aaronmk
        fe = FormatException(e)
173 1602 aaronmk
        try: new_value = repls['*']
174 1609 aaronmk
        except KeyError: raise fe
175 1537 aaronmk
    if new_value == '*': new_value = value # '*' means keep input value the same
176 1607 aaronmk
    return new_value
177 1602 aaronmk
178
def _map(items):
179
    '''See repl()
180
    @param items
181
        <last_entry> Value
182
        <other_entries> name=value Mappings. Special values: See repl() repls.
183
    '''
184
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
185
    value = pop_value(items)
186
    if value == None: return None # input is empty
187 1607 aaronmk
    return util.none_if(repl(dict(items), value), u'') # empty value means None
188 1219 aaronmk
funcs['_map'] = _map
189
190
def _replace(items):
191 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
192 1581 aaronmk
    value = pop_value(items)
193
    if value == None: return None # input is empty
194 1219 aaronmk
    try:
195
        for repl, with_ in items:
196
            if re.match(r'^\w+$', repl):
197
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
198
            value = re.sub(repl, with_, value)
199 1612 aaronmk
    except sre_constants.error, e: raise SyntaxError(e)
200 1624 aaronmk
    return util.none_if(value.strip(), u'') # empty strings always mean None
201 1219 aaronmk
funcs['_replace'] = _replace
202
203 1469 aaronmk
#### Quantities
204
205 1225 aaronmk
def _units(items):
206 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
207 1581 aaronmk
    value = pop_value(items)
208
    if value == None: return None # input is empty
209 1471 aaronmk
210 1581 aaronmk
    quantity = units.str2quantity(value)
211 1471 aaronmk
    try:
212
        for action, units_ in items:
213
            units_ = util.none_if(units_, u'')
214
            if action == 'default': units.set_default_units(quantity, units_)
215 1567 aaronmk
            elif action == 'to':
216
                try: quantity = units.convert(quantity, units_)
217 1609 aaronmk
                except ValueError, e: raise FormatException(e)
218 1612 aaronmk
            else: raise SyntaxError(ValueError('Invalid action: '+action))
219 1609 aaronmk
    except units.MissingUnitsException, e: raise FormatException(e)
220 1471 aaronmk
    return units.quantity2str(quantity)
221 1225 aaronmk
funcs['_units'] = _units
222
223 1399 aaronmk
def parse_range(str_, range_sep='-'):
224
    default = (str_, None)
225
    start, sep, end = str_.partition(range_sep)
226
    if sep == '': return default # not a range
227 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
228 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
229
230
def _rangeStart(items):
231 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
232 1399 aaronmk
    try: value = items['value']
233 1406 aaronmk
    except KeyError: return None # input is empty
234 1399 aaronmk
    return parse_range(value)[0]
235
funcs['_rangeStart'] = _rangeStart
236
237
def _rangeEnd(items):
238 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
239 1399 aaronmk
    try: value = items['value']
240 1406 aaronmk
    except KeyError: return None # input is empty
241 1399 aaronmk
    return parse_range(value)[1]
242
funcs['_rangeEnd'] = _rangeEnd
243
244 1472 aaronmk
def _range(items):
245
    items = dict(conv_items(float, items))
246
    from_ = items.get('from', None)
247
    to = items.get('to', None)
248
    if from_ == None or to == None: return None
249
    return str(to - from_)
250
funcs['_range'] = _range
251
252 995 aaronmk
def _avg(items):
253 86 aaronmk
    count = 0
254
    sum_ = 0.
255 278 aaronmk
    for name, value in conv_items(float, items):
256 86 aaronmk
        count += 1
257
        sum_ += value
258 1472 aaronmk
    if count == 0: return None # input is empty
259
    else: return str(sum_/count)
260 995 aaronmk
funcs['_avg'] = _avg
261 86 aaronmk
262 968 aaronmk
class CvException(Exception):
263
    def __init__(self):
264
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
265
            ' allowed for ratio scale data '
266
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
267
268 995 aaronmk
def _noCV(items):
269 968 aaronmk
    try: name, value = items.next()
270
    except StopIteration: return None
271 1609 aaronmk
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
272 968 aaronmk
    return value
273 995 aaronmk
funcs['_noCV'] = _noCV
274 968 aaronmk
275 1469 aaronmk
#### Dates
276
277 995 aaronmk
def _date(items):
278 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
279
        # get *once* from iter, check types
280 1514 aaronmk
    try: str_ = items['date']
281 786 aaronmk
    except KeyError:
282 1515 aaronmk
        # Year is required
283
        try: items['year']
284 1309 aaronmk
        except KeyError, e:
285
            if items == {}: return None # entire date is empty
286 1609 aaronmk
            else: raise FormatException(e)
287 1515 aaronmk
288
        # Convert month name to number
289
        try: month = items['month']
290
        except KeyError: pass
291
        else:
292
            if not month.isdigit(): # month is name
293 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
294 1609 aaronmk
                except ValueError, e: raise FormatException(e)
295 1515 aaronmk
296 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
297 786 aaronmk
        items.setdefault('month', 1)
298
        items.setdefault('day', 1)
299 1535 aaronmk
300
        for try_num in xrange(2):
301
            try:
302
                date = datetime.date(**items)
303
                break
304
            except ValueError, e:
305 1609 aaronmk
                if try_num > 0: raise FormatException(e)
306 1536 aaronmk
                    # exception still raised after retry
307 1562 aaronmk
                msg = strings.ustr(e)
308 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
309
                    items['month'], items['day'] = items['day'], items['month']
310 1609 aaronmk
                else: raise FormatException(e)
311 786 aaronmk
    else:
312 324 aaronmk
        try: year = float(str_)
313
        except ValueError:
314 1264 aaronmk
            try: date = dates.strtotime(str_)
315 324 aaronmk
            except ImportError: return str_
316 1609 aaronmk
            except ValueError, e: raise FormatException(e)
317 324 aaronmk
        else: date = (datetime.date(int(year), 1, 1) +
318
            datetime.timedelta(round((year % 1.)*365)))
319 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
320 843 aaronmk
    except ValueError, e: raise FormatException(e)
321 995 aaronmk
funcs['_date'] = _date
322 86 aaronmk
323 1366 aaronmk
def _dateRangeStart(items):
324 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
325 1366 aaronmk
    try: value = items['value']
326 1406 aaronmk
    except KeyError: return None # input is empty
327 1366 aaronmk
    return dates.parse_date_range(value)[0]
328
funcs['_dateRangeStart'] = _dateRangeStart
329 1311 aaronmk
330 1366 aaronmk
def _dateRangeEnd(items):
331 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
332 1366 aaronmk
    try: value = items['value']
333 1406 aaronmk
    except KeyError: return None # input is empty
334 1366 aaronmk
    return dates.parse_date_range(value)[1]
335
funcs['_dateRangeEnd'] = _dateRangeEnd
336 1311 aaronmk
337 1469 aaronmk
#### Names
338
339 328 aaronmk
_name_parts_slices_items = [
340
    ('first', slice(None, 1)),
341
    ('middle', slice(1, -1)),
342
    ('last', slice(-1, None)),
343
]
344
name_parts_slices = dict(_name_parts_slices_items)
345
name_parts = [name for name, slice_ in _name_parts_slices_items]
346
347 995 aaronmk
def _name(items):
348 89 aaronmk
    items = dict(items)
349 102 aaronmk
    parts = []
350 328 aaronmk
    for part in name_parts:
351
        if part in items: parts.append(items[part])
352 102 aaronmk
    return ' '.join(parts)
353 995 aaronmk
funcs['_name'] = _name
354 102 aaronmk
355 995 aaronmk
def _namePart(items):
356 328 aaronmk
    out_items = []
357
    for part, value in items:
358
        try: slice_ = name_parts_slices[part]
359 1612 aaronmk
        except KeyError, e: raise SyntaxError(e)
360 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
361 995 aaronmk
    return _name(out_items)
362
funcs['_namePart'] = _namePart
363 1321 aaronmk
364 1607 aaronmk
#### Angles
365
366
def _compass(items):
367
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
368
    items = dict(conv_items(strings.ustr, items))
369
    try: value = items['value']
370
    except KeyError: return None # input is empty
371
372
    if not value.isupper(): return value # pass through other coordinate formats
373
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
374
    except KeyError, e: raise FormatException(e)
375
funcs['_compass'] = _compass
376
377 1469 aaronmk
#### Paths
378
379 1321 aaronmk
def _simplifyPath(items):
380
    items = dict(items)
381
    try:
382 1562 aaronmk
        next = cast(strings.ustr, items['next'])
383
        require = cast(strings.ustr, items['require'])
384 1321 aaronmk
        root = items['path']
385 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
386 1321 aaronmk
387
    node = root
388
    while node != None:
389
        new_node = xpath.get_1(node, next, allow_rooted=False)
390 2006 aaronmk
        required_node = xpath.get_1(node, require, allow_rooted=False)
391
        if required_node == None or xml_dom.is_empty(required_node):# empty elem
392 1321 aaronmk
            xml_dom.replace(node, new_node) # remove current elem
393
            if node is root: root = new_node # also update root
394
        node = new_node
395
    return root
396
funcs['_simplifyPath'] = _simplifyPath