Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 111 aaronmk
7 1607 aaronmk
import angles
8 818 aaronmk
import dates
9 300 aaronmk
import exc
10 1580 aaronmk
import format
11 917 aaronmk
import maps
12 1234 aaronmk
import strings
13 827 aaronmk
import term
14 1468 aaronmk
import units
15 1047 aaronmk
import util
16 86 aaronmk
import xml_dom
17 1321 aaronmk
import xpath
18 86 aaronmk
19 995 aaronmk
##### Exceptions
20
21 1612 aaronmk
class SyntaxError(exc.ExceptionWithCause):
22 797 aaronmk
    def __init__(self, cause):
23 1611 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
24
            cause)
25 278 aaronmk
26 1613 aaronmk
class FormatException(exc.ExceptionWithCause):
27
    def __init__(self, cause):
28
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
29 843 aaronmk
30 995 aaronmk
##### Functions
31
32
funcs = {}
33
34
def process(node, on_error=exc.raise_):
35
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
36
    name = node.tagName
37
    if name.startswith('_') and name in funcs:
38 1369 aaronmk
        try:
39
            value = funcs[name](xml_dom.NodeTextEntryIter(node))
40
            xml_dom.replace_with_text(node, value)
41 1613 aaronmk
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
42 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
43
            exc.add_traceback(e)
44 1562 aaronmk
            str_ = strings.ustr(node)
45 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
46 1810 aaronmk
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
47
                '\n'+term.emph_multiline(str_)))
48
49 995 aaronmk
            on_error(e)
50
51 86 aaronmk
def map_items(func, items):
52
    return [(name, func(value)) for name, value in items]
53
54 1234 aaronmk
def cast(type_, val):
55 1609 aaronmk
    '''Throws FormatException if can't cast'''
56 1234 aaronmk
    try: return type_(val)
57 1609 aaronmk
    except ValueError, e: raise FormatException(e)
58 1234 aaronmk
59 278 aaronmk
def conv_items(type_, items):
60 1234 aaronmk
    return map_items(lambda val: cast(type_, val),
61
        xml_dom.TextEntryOnlyIter(items))
62 278 aaronmk
63 1581 aaronmk
def pop_value(items):
64
    try: last = items.pop() # last entry contains value
65
    except IndexError: return None # input is empty and no actions
66
    if last[0] != 'value': return None # input is empty
67
    return last[1]
68
69 1469 aaronmk
##### XML functions
70 995 aaronmk
71
# Function names must start with _ to avoid collisions with real tags
72
# Functions take arguments (items)
73
74 1469 aaronmk
#### General
75
76 995 aaronmk
def _ignore(items):
77 994 aaronmk
    '''Used to "comment out" an XML subtree'''
78
    return None
79 995 aaronmk
funcs['_ignore'] = _ignore
80 994 aaronmk
81 1469 aaronmk
#### Conditionals
82
83 1234 aaronmk
def _eq(items):
84
    items = dict(items)
85
    try:
86
        left = items['left']
87
        right = items['right']
88
    except KeyError: return '' # a value was None
89
    return util.bool2str(left == right)
90
funcs['_eq'] = _eq
91
92
def _if(items):
93
    items = dict(items)
94
    try:
95
        cond = items['cond']
96
        then = items['then']
97 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
98 1234 aaronmk
    else_ = items.get('else', None)
99 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
100 1234 aaronmk
    if cond: return then
101
    else: return else_
102
funcs['_if'] = _if
103
104 1469 aaronmk
#### Combining values
105
106 995 aaronmk
def _alt(items):
107 113 aaronmk
    items = list(items)
108
    items.sort()
109 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
110 1609 aaronmk
    except IndexError: return None # input got removed by e.g. FormatException
111 995 aaronmk
funcs['_alt'] = _alt
112 113 aaronmk
113 995 aaronmk
def _merge(items):
114 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
115 1562 aaronmk
        # get *once* from iter, check types
116 917 aaronmk
    items.sort()
117
    return maps.merge_values(*[v for k, v in items])
118 995 aaronmk
funcs['_merge'] = _merge
119 917 aaronmk
120 995 aaronmk
def _label(items):
121 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
122 1562 aaronmk
        # get *once* from iter, check types
123 917 aaronmk
    try:
124
        label = items['label']
125
        value = items['value']
126 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
127 917 aaronmk
    return label+': '+value
128 995 aaronmk
funcs['_label'] = _label
129 917 aaronmk
130 1469 aaronmk
#### Transforming values
131
132 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
133 1477 aaronmk
134 1047 aaronmk
def _nullIf(items):
135 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
136 1477 aaronmk
    try: null = items['null']
137 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
138 1477 aaronmk
    value = items.get('value', None)
139 1219 aaronmk
    type_str = items.get('type', None)
140 1477 aaronmk
141
    try: type_ = types_by_name[type_str]
142 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
143 1477 aaronmk
    null = type_(null)
144
145
    try: return util.none_if(value, null)
146
    except ValueError: return value # value not convertible, so can't equal null
147 1047 aaronmk
funcs['_nullIf'] = _nullIf
148
149 1602 aaronmk
def repl(repls, value):
150 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
151 1602 aaronmk
    @param repls dict repl:with
152
        repl "*" means all other input values
153
        with "*" means keep input value the same
154
        with "" means ignore input value
155 1537 aaronmk
    '''
156 1602 aaronmk
    try: new_value = repls[value]
157 1304 aaronmk
    except KeyError, e:
158 1537 aaronmk
        # Save traceback right away in case another exception raised
159 1609 aaronmk
        fe = FormatException(e)
160 1602 aaronmk
        try: new_value = repls['*']
161 1609 aaronmk
        except KeyError: raise fe
162 1537 aaronmk
    if new_value == '*': new_value = value # '*' means keep input value the same
163 1607 aaronmk
    return new_value
164 1602 aaronmk
165
def _map(items):
166
    '''See repl()
167
    @param items
168
        <last_entry> Value
169
        <other_entries> name=value Mappings. Special values: See repl() repls.
170
    '''
171
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
172
    value = pop_value(items)
173
    if value == None: return None # input is empty
174 1607 aaronmk
    return util.none_if(repl(dict(items), value), u'') # empty value means None
175 1219 aaronmk
funcs['_map'] = _map
176
177
def _replace(items):
178 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
179 1581 aaronmk
    value = pop_value(items)
180
    if value == None: return None # input is empty
181 1219 aaronmk
    try:
182
        for repl, with_ in items:
183
            if re.match(r'^\w+$', repl):
184
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
185
            value = re.sub(repl, with_, value)
186 1612 aaronmk
    except sre_constants.error, e: raise SyntaxError(e)
187 1624 aaronmk
    return util.none_if(value.strip(), u'') # empty strings always mean None
188 1219 aaronmk
funcs['_replace'] = _replace
189
190 1469 aaronmk
#### Quantities
191
192 1225 aaronmk
def _units(items):
193 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
194 1581 aaronmk
    value = pop_value(items)
195
    if value == None: return None # input is empty
196 1471 aaronmk
197 1581 aaronmk
    quantity = units.str2quantity(value)
198 1471 aaronmk
    try:
199
        for action, units_ in items:
200
            units_ = util.none_if(units_, u'')
201
            if action == 'default': units.set_default_units(quantity, units_)
202 1567 aaronmk
            elif action == 'to':
203
                try: quantity = units.convert(quantity, units_)
204 1609 aaronmk
                except ValueError, e: raise FormatException(e)
205 1612 aaronmk
            else: raise SyntaxError(ValueError('Invalid action: '+action))
206 1609 aaronmk
    except units.MissingUnitsException, e: raise FormatException(e)
207 1471 aaronmk
    return units.quantity2str(quantity)
208 1225 aaronmk
funcs['_units'] = _units
209
210 1399 aaronmk
def parse_range(str_, range_sep='-'):
211
    default = (str_, None)
212
    start, sep, end = str_.partition(range_sep)
213
    if sep == '': return default # not a range
214 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
215 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
216
217
def _rangeStart(items):
218 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
219 1399 aaronmk
    try: value = items['value']
220 1406 aaronmk
    except KeyError: return None # input is empty
221 1399 aaronmk
    return parse_range(value)[0]
222
funcs['_rangeStart'] = _rangeStart
223
224
def _rangeEnd(items):
225 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
226 1399 aaronmk
    try: value = items['value']
227 1406 aaronmk
    except KeyError: return None # input is empty
228 1399 aaronmk
    return parse_range(value)[1]
229
funcs['_rangeEnd'] = _rangeEnd
230
231 1472 aaronmk
def _range(items):
232
    items = dict(conv_items(float, items))
233
    from_ = items.get('from', None)
234
    to = items.get('to', None)
235
    if from_ == None or to == None: return None
236
    return str(to - from_)
237
funcs['_range'] = _range
238
239 995 aaronmk
def _avg(items):
240 86 aaronmk
    count = 0
241
    sum_ = 0.
242 278 aaronmk
    for name, value in conv_items(float, items):
243 86 aaronmk
        count += 1
244
        sum_ += value
245 1472 aaronmk
    if count == 0: return None # input is empty
246
    else: return str(sum_/count)
247 995 aaronmk
funcs['_avg'] = _avg
248 86 aaronmk
249 968 aaronmk
class CvException(Exception):
250
    def __init__(self):
251
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
252
            ' allowed for ratio scale data '
253
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
254
255 995 aaronmk
def _noCV(items):
256 968 aaronmk
    try: name, value = items.next()
257
    except StopIteration: return None
258 1609 aaronmk
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
259 968 aaronmk
    return value
260 995 aaronmk
funcs['_noCV'] = _noCV
261 968 aaronmk
262 1469 aaronmk
#### Dates
263
264 995 aaronmk
def _date(items):
265 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
266
        # get *once* from iter, check types
267 1514 aaronmk
    try: str_ = items['date']
268 786 aaronmk
    except KeyError:
269 1515 aaronmk
        # Year is required
270
        try: items['year']
271 1309 aaronmk
        except KeyError, e:
272
            if items == {}: return None # entire date is empty
273 1609 aaronmk
            else: raise FormatException(e)
274 1515 aaronmk
275
        # Convert month name to number
276
        try: month = items['month']
277
        except KeyError: pass
278
        else:
279
            if not month.isdigit(): # month is name
280 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
281 1609 aaronmk
                except ValueError, e: raise FormatException(e)
282 1515 aaronmk
283 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
284 786 aaronmk
        items.setdefault('month', 1)
285
        items.setdefault('day', 1)
286 1535 aaronmk
287
        for try_num in xrange(2):
288
            try:
289
                date = datetime.date(**items)
290
                break
291
            except ValueError, e:
292 1609 aaronmk
                if try_num > 0: raise FormatException(e)
293 1536 aaronmk
                    # exception still raised after retry
294 1562 aaronmk
                msg = strings.ustr(e)
295 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
296
                    items['month'], items['day'] = items['day'], items['month']
297 1609 aaronmk
                else: raise FormatException(e)
298 786 aaronmk
    else:
299 324 aaronmk
        try: year = float(str_)
300
        except ValueError:
301 1264 aaronmk
            try: date = dates.strtotime(str_)
302 324 aaronmk
            except ImportError: return str_
303 1609 aaronmk
            except ValueError, e: raise FormatException(e)
304 324 aaronmk
        else: date = (datetime.date(int(year), 1, 1) +
305
            datetime.timedelta(round((year % 1.)*365)))
306 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
307 843 aaronmk
    except ValueError, e: raise FormatException(e)
308 995 aaronmk
funcs['_date'] = _date
309 86 aaronmk
310 1366 aaronmk
def _dateRangeStart(items):
311 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
312 1366 aaronmk
    try: value = items['value']
313 1406 aaronmk
    except KeyError: return None # input is empty
314 1366 aaronmk
    return dates.parse_date_range(value)[0]
315
funcs['_dateRangeStart'] = _dateRangeStart
316 1311 aaronmk
317 1366 aaronmk
def _dateRangeEnd(items):
318 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
319 1366 aaronmk
    try: value = items['value']
320 1406 aaronmk
    except KeyError: return None # input is empty
321 1366 aaronmk
    return dates.parse_date_range(value)[1]
322
funcs['_dateRangeEnd'] = _dateRangeEnd
323 1311 aaronmk
324 1469 aaronmk
#### Names
325
326 328 aaronmk
_name_parts_slices_items = [
327
    ('first', slice(None, 1)),
328
    ('middle', slice(1, -1)),
329
    ('last', slice(-1, None)),
330
]
331
name_parts_slices = dict(_name_parts_slices_items)
332
name_parts = [name for name, slice_ in _name_parts_slices_items]
333
334 995 aaronmk
def _name(items):
335 89 aaronmk
    items = dict(items)
336 102 aaronmk
    parts = []
337 328 aaronmk
    for part in name_parts:
338
        if part in items: parts.append(items[part])
339 102 aaronmk
    return ' '.join(parts)
340 995 aaronmk
funcs['_name'] = _name
341 102 aaronmk
342 995 aaronmk
def _namePart(items):
343 328 aaronmk
    out_items = []
344
    for part, value in items:
345
        try: slice_ = name_parts_slices[part]
346 1612 aaronmk
        except KeyError, e: raise SyntaxError(e)
347 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
348 995 aaronmk
    return _name(out_items)
349
funcs['_namePart'] = _namePart
350 1321 aaronmk
351 1607 aaronmk
#### Angles
352
353
def _compass(items):
354
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
355
    items = dict(conv_items(strings.ustr, items))
356
    try: value = items['value']
357
    except KeyError: return None # input is empty
358
359
    if not value.isupper(): return value # pass through other coordinate formats
360
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
361
    except KeyError, e: raise FormatException(e)
362
funcs['_compass'] = _compass
363
364 1469 aaronmk
#### Paths
365
366 1321 aaronmk
def _simplifyPath(items):
367
    items = dict(items)
368
    try:
369 1562 aaronmk
        next = cast(strings.ustr, items['next'])
370
        require = cast(strings.ustr, items['require'])
371 1321 aaronmk
        root = items['path']
372 1612 aaronmk
    except KeyError, e: raise SyntaxError(e)
373 1321 aaronmk
374
    node = root
375
    while node != None:
376
        new_node = xpath.get_1(node, next, allow_rooted=False)
377
        if xpath.get_1(node, require, allow_rooted=False) == None: # empty elem
378
            xml_dom.replace(node, new_node) # remove current elem
379
            if node is root: root = new_node # also update root
380
        node = new_node
381
    return root
382
funcs['_simplifyPath'] = _simplifyPath