Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 111 aaronmk
7 1607 aaronmk
import angles
8 818 aaronmk
import dates
9 300 aaronmk
import exc
10 1580 aaronmk
import format
11 917 aaronmk
import maps
12 1234 aaronmk
import strings
13 827 aaronmk
import term
14 1468 aaronmk
import units
15 1047 aaronmk
import util
16 86 aaronmk
import xml_dom
17 1321 aaronmk
import xpath
18 86 aaronmk
19 995 aaronmk
##### Exceptions
20
21 1518 aaronmk
class SyntaxException(exc.ExceptionWithCause):
22 797 aaronmk
    def __init__(self, cause):
23 1518 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax: '
24 962 aaronmk
            +exc.str_(cause))
25 278 aaronmk
26 843 aaronmk
class FormatException(SyntaxException): pass
27
28 995 aaronmk
##### Functions
29
30
funcs = {}
31
32
def process(node, on_error=exc.raise_):
33
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
34
    name = node.tagName
35
    if name.startswith('_') and name in funcs:
36 1369 aaronmk
        try:
37
            value = funcs[name](xml_dom.NodeTextEntryIter(node))
38
            xml_dom.replace_with_text(node, value)
39
        except Exception, e: # also catch XML func internal errors
40 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
41
            exc.add_traceback(e)
42 1562 aaronmk
            str_ = strings.ustr(node)
43 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
44
            xml_dom.replace(node, node.ownerDocument.createComment(
45 1234 aaronmk
                '\n'+term.emph_multiline(str_).replace('--','-')))
46
                # comments can't contain '--'
47 995 aaronmk
            on_error(e)
48
49 86 aaronmk
def map_items(func, items):
50
    return [(name, func(value)) for name, value in items]
51
52 1234 aaronmk
def cast(type_, val):
53
    '''Throws SyntaxException if can't cast'''
54
    try: return type_(val)
55
    except ValueError, e: raise SyntaxException(e)
56
57 278 aaronmk
def conv_items(type_, items):
58 1234 aaronmk
    return map_items(lambda val: cast(type_, val),
59
        xml_dom.TextEntryOnlyIter(items))
60 278 aaronmk
61 1581 aaronmk
def pop_value(items):
62
    try: last = items.pop() # last entry contains value
63
    except IndexError: return None # input is empty and no actions
64
    if last[0] != 'value': return None # input is empty
65
    return last[1]
66
67 1469 aaronmk
##### XML functions
68 995 aaronmk
69
# Function names must start with _ to avoid collisions with real tags
70
# Functions take arguments (items)
71
72 1469 aaronmk
#### General
73
74 995 aaronmk
def _ignore(items):
75 994 aaronmk
    '''Used to "comment out" an XML subtree'''
76
    return None
77 995 aaronmk
funcs['_ignore'] = _ignore
78 994 aaronmk
79 1469 aaronmk
#### Conditionals
80
81 1234 aaronmk
def _eq(items):
82
    items = dict(items)
83
    try:
84
        left = items['left']
85
        right = items['right']
86
    except KeyError: return '' # a value was None
87
    return util.bool2str(left == right)
88
funcs['_eq'] = _eq
89
90
def _if(items):
91
    items = dict(items)
92
    try:
93
        cond = items['cond']
94
        then = items['then']
95
    except KeyError, e: raise SyntaxException(e)
96
    else_ = items.get('else', None)
97 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
98 1234 aaronmk
    if cond: return then
99
    else: return else_
100
funcs['_if'] = _if
101
102 1469 aaronmk
#### Combining values
103
104 995 aaronmk
def _alt(items):
105 113 aaronmk
    items = list(items)
106
    items.sort()
107 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
108 1187 aaronmk
    except IndexError: return None # input got removed by e.g. SyntaxException
109 995 aaronmk
funcs['_alt'] = _alt
110 113 aaronmk
111 995 aaronmk
def _merge(items):
112 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
113 1562 aaronmk
        # get *once* from iter, check types
114 917 aaronmk
    items.sort()
115
    return maps.merge_values(*[v for k, v in items])
116 995 aaronmk
funcs['_merge'] = _merge
117 917 aaronmk
118 995 aaronmk
def _label(items):
119 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
120 1562 aaronmk
        # get *once* from iter, check types
121 917 aaronmk
    try:
122
        label = items['label']
123
        value = items['value']
124
    except KeyError, e: raise SyntaxException(e)
125
    return label+': '+value
126 995 aaronmk
funcs['_label'] = _label
127 917 aaronmk
128 1469 aaronmk
#### Transforming values
129
130 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
131 1477 aaronmk
132 1047 aaronmk
def _nullIf(items):
133 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
134 1477 aaronmk
    try: null = items['null']
135 1047 aaronmk
    except KeyError, e: raise SyntaxException(e)
136 1477 aaronmk
    value = items.get('value', None)
137 1219 aaronmk
    type_str = items.get('type', None)
138 1477 aaronmk
139
    try: type_ = types_by_name[type_str]
140
    except KeyError, e: raise SyntaxException(e)
141
    null = type_(null)
142
143
    try: return util.none_if(value, null)
144
    except ValueError: return value # value not convertible, so can't equal null
145 1047 aaronmk
funcs['_nullIf'] = _nullIf
146
147 1602 aaronmk
def repl(repls, value):
148 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
149 1602 aaronmk
    @param repls dict repl:with
150
        repl "*" means all other input values
151
        with "*" means keep input value the same
152
        with "" means ignore input value
153 1537 aaronmk
    '''
154 1602 aaronmk
    try: new_value = repls[value]
155 1304 aaronmk
    except KeyError, e:
156 1537 aaronmk
        # Save traceback right away in case another exception raised
157
        se = SyntaxException(e)
158 1602 aaronmk
        try: new_value = repls['*']
159 1537 aaronmk
        except KeyError: raise se
160
    if new_value == '*': new_value = value # '*' means keep input value the same
161 1607 aaronmk
    return new_value
162 1602 aaronmk
163
def _map(items):
164
    '''See repl()
165
    @param items
166
        <last_entry> Value
167
        <other_entries> name=value Mappings. Special values: See repl() repls.
168
    '''
169
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
170
    value = pop_value(items)
171
    if value == None: return None # input is empty
172 1607 aaronmk
    return util.none_if(repl(dict(items), value), u'') # empty value means None
173 1219 aaronmk
funcs['_map'] = _map
174
175
def _replace(items):
176 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
177 1581 aaronmk
    value = pop_value(items)
178
    if value == None: return None # input is empty
179 1219 aaronmk
    try:
180
        for repl, with_ in items:
181
            if re.match(r'^\w+$', repl):
182
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
183
            value = re.sub(repl, with_, value)
184
    except sre_constants.error, e: raise SyntaxException(e)
185 1427 aaronmk
    return util.none_if(value, u'') # empty strings always mean None
186 1219 aaronmk
funcs['_replace'] = _replace
187
188 1469 aaronmk
#### Quantities
189
190 1225 aaronmk
def _units(items):
191 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
192 1581 aaronmk
    value = pop_value(items)
193
    if value == None: return None # input is empty
194 1471 aaronmk
195 1581 aaronmk
    quantity = units.str2quantity(value)
196 1471 aaronmk
    try:
197
        for action, units_ in items:
198
            units_ = util.none_if(units_, u'')
199
            if action == 'default': units.set_default_units(quantity, units_)
200 1567 aaronmk
            elif action == 'to':
201
                try: quantity = units.convert(quantity, units_)
202
                except ValueError, e: raise SyntaxException(e)
203 1471 aaronmk
            else: raise SyntaxException(ValueError('Invalid action: '+action))
204 1468 aaronmk
    except units.MissingUnitsException, e: raise SyntaxException(e)
205 1471 aaronmk
    return units.quantity2str(quantity)
206 1225 aaronmk
funcs['_units'] = _units
207
208 1399 aaronmk
def parse_range(str_, range_sep='-'):
209
    default = (str_, None)
210
    start, sep, end = str_.partition(range_sep)
211
    if sep == '': return default # not a range
212 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
213 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
214
215
def _rangeStart(items):
216 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
217 1399 aaronmk
    try: value = items['value']
218 1406 aaronmk
    except KeyError: return None # input is empty
219 1399 aaronmk
    return parse_range(value)[0]
220
funcs['_rangeStart'] = _rangeStart
221
222
def _rangeEnd(items):
223 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
224 1399 aaronmk
    try: value = items['value']
225 1406 aaronmk
    except KeyError: return None # input is empty
226 1399 aaronmk
    return parse_range(value)[1]
227
funcs['_rangeEnd'] = _rangeEnd
228
229 1472 aaronmk
def _range(items):
230
    items = dict(conv_items(float, items))
231
    from_ = items.get('from', None)
232
    to = items.get('to', None)
233
    if from_ == None or to == None: return None
234
    return str(to - from_)
235
funcs['_range'] = _range
236
237 995 aaronmk
def _avg(items):
238 86 aaronmk
    count = 0
239
    sum_ = 0.
240 278 aaronmk
    for name, value in conv_items(float, items):
241 86 aaronmk
        count += 1
242
        sum_ += value
243 1472 aaronmk
    if count == 0: return None # input is empty
244
    else: return str(sum_/count)
245 995 aaronmk
funcs['_avg'] = _avg
246 86 aaronmk
247 968 aaronmk
class CvException(Exception):
248
    def __init__(self):
249
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
250
            ' allowed for ratio scale data '
251
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
252
253 995 aaronmk
def _noCV(items):
254 968 aaronmk
    try: name, value = items.next()
255
    except StopIteration: return None
256
    if re.match('^(?i)CV *\d+$', value): raise SyntaxException(CvException())
257
    return value
258 995 aaronmk
funcs['_noCV'] = _noCV
259 968 aaronmk
260 1469 aaronmk
#### Dates
261
262 995 aaronmk
def _date(items):
263 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
264
        # get *once* from iter, check types
265 1514 aaronmk
    try: str_ = items['date']
266 786 aaronmk
    except KeyError:
267 1515 aaronmk
        # Year is required
268
        try: items['year']
269 1309 aaronmk
        except KeyError, e:
270
            if items == {}: return None # entire date is empty
271
            else: raise SyntaxException(e)
272 1515 aaronmk
273
        # Convert month name to number
274
        try: month = items['month']
275
        except KeyError: pass
276
        else:
277
            if not month.isdigit(): # month is name
278 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
279
                except ValueError, e: raise SyntaxException(e)
280 1515 aaronmk
281 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
282 786 aaronmk
        items.setdefault('month', 1)
283
        items.setdefault('day', 1)
284 1535 aaronmk
285
        for try_num in xrange(2):
286
            try:
287
                date = datetime.date(**items)
288
                break
289
            except ValueError, e:
290 1536 aaronmk
                if try_num > 0: raise SyntaxException(e)
291
                    # exception still raised after retry
292 1562 aaronmk
                msg = strings.ustr(e)
293 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
294
                    items['month'], items['day'] = items['day'], items['month']
295
                else: raise SyntaxException(e)
296 786 aaronmk
    else:
297 324 aaronmk
        try: year = float(str_)
298
        except ValueError:
299 1264 aaronmk
            try: date = dates.strtotime(str_)
300 324 aaronmk
            except ImportError: return str_
301
            except ValueError, e: raise SyntaxException(e)
302
        else: date = (datetime.date(int(year), 1, 1) +
303
            datetime.timedelta(round((year % 1.)*365)))
304 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
305 843 aaronmk
    except ValueError, e: raise FormatException(e)
306 995 aaronmk
funcs['_date'] = _date
307 86 aaronmk
308 1366 aaronmk
def _dateRangeStart(items):
309 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
310 1366 aaronmk
    try: value = items['value']
311 1406 aaronmk
    except KeyError: return None # input is empty
312 1366 aaronmk
    return dates.parse_date_range(value)[0]
313
funcs['_dateRangeStart'] = _dateRangeStart
314 1311 aaronmk
315 1366 aaronmk
def _dateRangeEnd(items):
316 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
317 1366 aaronmk
    try: value = items['value']
318 1406 aaronmk
    except KeyError: return None # input is empty
319 1366 aaronmk
    return dates.parse_date_range(value)[1]
320
funcs['_dateRangeEnd'] = _dateRangeEnd
321 1311 aaronmk
322 1469 aaronmk
#### Names
323
324 328 aaronmk
_name_parts_slices_items = [
325
    ('first', slice(None, 1)),
326
    ('middle', slice(1, -1)),
327
    ('last', slice(-1, None)),
328
]
329
name_parts_slices = dict(_name_parts_slices_items)
330
name_parts = [name for name, slice_ in _name_parts_slices_items]
331
332 995 aaronmk
def _name(items):
333 89 aaronmk
    items = dict(items)
334 102 aaronmk
    parts = []
335 328 aaronmk
    for part in name_parts:
336
        if part in items: parts.append(items[part])
337 102 aaronmk
    return ' '.join(parts)
338 995 aaronmk
funcs['_name'] = _name
339 102 aaronmk
340 995 aaronmk
def _namePart(items):
341 328 aaronmk
    out_items = []
342
    for part, value in items:
343
        try: slice_ = name_parts_slices[part]
344
        except KeyError, e: raise SyntaxException(e)
345 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
346 995 aaronmk
    return _name(out_items)
347
funcs['_namePart'] = _namePart
348 1321 aaronmk
349 1607 aaronmk
#### Angles
350
351
def _compass(items):
352
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
353
    items = dict(conv_items(strings.ustr, items))
354
    try: value = items['value']
355
    except KeyError: return None # input is empty
356
357
    if not value.isupper(): return value # pass through other coordinate formats
358
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
359
    except KeyError, e: raise FormatException(e)
360
funcs['_compass'] = _compass
361
362 1469 aaronmk
#### Paths
363
364 1321 aaronmk
def _simplifyPath(items):
365
    items = dict(items)
366
    try:
367 1562 aaronmk
        next = cast(strings.ustr, items['next'])
368
        require = cast(strings.ustr, items['require'])
369 1321 aaronmk
        root = items['path']
370
    except KeyError, e: raise SyntaxException(e)
371
372
    node = root
373
    while node != None:
374
        new_node = xpath.get_1(node, next, allow_rooted=False)
375
        if xpath.get_1(node, require, allow_rooted=False) == None: # empty elem
376
            xml_dom.replace(node, new_node) # remove current elem
377
            if node is root: root = new_node # also update root
378
        node = new_node
379
    return root
380
funcs['_simplifyPath'] = _simplifyPath