Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 111 aaronmk
7 818 aaronmk
import dates
8 300 aaronmk
import exc
9 1580 aaronmk
import format
10 917 aaronmk
import maps
11 1234 aaronmk
import strings
12 827 aaronmk
import term
13 1468 aaronmk
import units
14 1047 aaronmk
import util
15 86 aaronmk
import xml_dom
16 1321 aaronmk
import xpath
17 86 aaronmk
18 995 aaronmk
##### Exceptions
19
20 1518 aaronmk
class SyntaxException(exc.ExceptionWithCause):
21 797 aaronmk
    def __init__(self, cause):
22 1518 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax: '
23 962 aaronmk
            +exc.str_(cause))
24 278 aaronmk
25 843 aaronmk
class FormatException(SyntaxException): pass
26
27 995 aaronmk
##### Functions
28
29
funcs = {}
30
31
def process(node, on_error=exc.raise_):
32
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
33
    name = node.tagName
34
    if name.startswith('_') and name in funcs:
35 1369 aaronmk
        try:
36
            value = funcs[name](xml_dom.NodeTextEntryIter(node))
37
            xml_dom.replace_with_text(node, value)
38
        except Exception, e: # also catch XML func internal errors
39 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
40
            exc.add_traceback(e)
41 1562 aaronmk
            str_ = strings.ustr(node)
42 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
43
            xml_dom.replace(node, node.ownerDocument.createComment(
44 1234 aaronmk
                '\n'+term.emph_multiline(str_).replace('--','-')))
45
                # comments can't contain '--'
46 995 aaronmk
            on_error(e)
47
48 86 aaronmk
def map_items(func, items):
49
    return [(name, func(value)) for name, value in items]
50
51 1234 aaronmk
def cast(type_, val):
52
    '''Throws SyntaxException if can't cast'''
53
    try: return type_(val)
54
    except ValueError, e: raise SyntaxException(e)
55
56 278 aaronmk
def conv_items(type_, items):
57 1234 aaronmk
    return map_items(lambda val: cast(type_, val),
58
        xml_dom.TextEntryOnlyIter(items))
59 278 aaronmk
60 1581 aaronmk
def pop_value(items):
61
    try: last = items.pop() # last entry contains value
62
    except IndexError: return None # input is empty and no actions
63
    if last[0] != 'value': return None # input is empty
64
    return last[1]
65
66 1469 aaronmk
##### XML functions
67 995 aaronmk
68
# Function names must start with _ to avoid collisions with real tags
69
# Functions take arguments (items)
70
71 1469 aaronmk
#### General
72
73 995 aaronmk
def _ignore(items):
74 994 aaronmk
    '''Used to "comment out" an XML subtree'''
75
    return None
76 995 aaronmk
funcs['_ignore'] = _ignore
77 994 aaronmk
78 1469 aaronmk
#### Conditionals
79
80 1234 aaronmk
def _eq(items):
81
    items = dict(items)
82
    try:
83
        left = items['left']
84
        right = items['right']
85
    except KeyError: return '' # a value was None
86
    return util.bool2str(left == right)
87
funcs['_eq'] = _eq
88
89
def _if(items):
90
    items = dict(items)
91
    try:
92
        cond = items['cond']
93
        then = items['then']
94
    except KeyError, e: raise SyntaxException(e)
95
    else_ = items.get('else', None)
96 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
97 1234 aaronmk
    if cond: return then
98
    else: return else_
99
funcs['_if'] = _if
100
101 1469 aaronmk
#### Combining values
102
103 995 aaronmk
def _alt(items):
104 113 aaronmk
    items = list(items)
105
    items.sort()
106 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
107 1187 aaronmk
    except IndexError: return None # input got removed by e.g. SyntaxException
108 995 aaronmk
funcs['_alt'] = _alt
109 113 aaronmk
110 995 aaronmk
def _merge(items):
111 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
112 1562 aaronmk
        # get *once* from iter, check types
113 917 aaronmk
    items.sort()
114
    return maps.merge_values(*[v for k, v in items])
115 995 aaronmk
funcs['_merge'] = _merge
116 917 aaronmk
117 995 aaronmk
def _label(items):
118 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
119 1562 aaronmk
        # get *once* from iter, check types
120 917 aaronmk
    try:
121
        label = items['label']
122
        value = items['value']
123
    except KeyError, e: raise SyntaxException(e)
124
    return label+': '+value
125 995 aaronmk
funcs['_label'] = _label
126 917 aaronmk
127 1469 aaronmk
#### Transforming values
128
129 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
130 1477 aaronmk
131 1047 aaronmk
def _nullIf(items):
132 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
133 1477 aaronmk
    try: null = items['null']
134 1047 aaronmk
    except KeyError, e: raise SyntaxException(e)
135 1477 aaronmk
    value = items.get('value', None)
136 1219 aaronmk
    type_str = items.get('type', None)
137 1477 aaronmk
138
    try: type_ = types_by_name[type_str]
139
    except KeyError, e: raise SyntaxException(e)
140
    null = type_(null)
141
142
    try: return util.none_if(value, null)
143
    except ValueError: return value # value not convertible, so can't equal null
144 1047 aaronmk
funcs['_nullIf'] = _nullIf
145
146 1602 aaronmk
def repl(repls, value):
147 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
148 1602 aaronmk
    @param repls dict repl:with
149
        repl "*" means all other input values
150
        with "*" means keep input value the same
151
        with "" means ignore input value
152 1537 aaronmk
    '''
153 1602 aaronmk
    try: new_value = repls[value]
154 1304 aaronmk
    except KeyError, e:
155 1537 aaronmk
        # Save traceback right away in case another exception raised
156
        se = SyntaxException(e)
157 1602 aaronmk
        try: new_value = repls['*']
158 1537 aaronmk
        except KeyError: raise se
159
    if new_value == '*': new_value = value # '*' means keep input value the same
160
    return util.none_if(new_value, u'') # empty map entry means None
161 1602 aaronmk
162
def _map(items):
163
    '''See repl()
164
    @param items
165
        <last_entry> Value
166
        <other_entries> name=value Mappings. Special values: See repl() repls.
167
    '''
168
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
169
    value = pop_value(items)
170
    if value == None: return None # input is empty
171
    return repl(dict(items), value)
172 1219 aaronmk
funcs['_map'] = _map
173
174
def _replace(items):
175 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
176 1581 aaronmk
    value = pop_value(items)
177
    if value == None: return None # input is empty
178 1219 aaronmk
    try:
179
        for repl, with_ in items:
180
            if re.match(r'^\w+$', repl):
181
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
182
            value = re.sub(repl, with_, value)
183
    except sre_constants.error, e: raise SyntaxException(e)
184 1427 aaronmk
    return util.none_if(value, u'') # empty strings always mean None
185 1219 aaronmk
funcs['_replace'] = _replace
186
187 1469 aaronmk
#### Quantities
188
189 1225 aaronmk
def _units(items):
190 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
191 1581 aaronmk
    value = pop_value(items)
192
    if value == None: return None # input is empty
193 1471 aaronmk
194 1581 aaronmk
    quantity = units.str2quantity(value)
195 1471 aaronmk
    try:
196
        for action, units_ in items:
197
            units_ = util.none_if(units_, u'')
198
            if action == 'default': units.set_default_units(quantity, units_)
199 1567 aaronmk
            elif action == 'to':
200
                try: quantity = units.convert(quantity, units_)
201
                except ValueError, e: raise SyntaxException(e)
202 1471 aaronmk
            else: raise SyntaxException(ValueError('Invalid action: '+action))
203 1468 aaronmk
    except units.MissingUnitsException, e: raise SyntaxException(e)
204 1471 aaronmk
    return units.quantity2str(quantity)
205 1225 aaronmk
funcs['_units'] = _units
206
207 1399 aaronmk
def parse_range(str_, range_sep='-'):
208
    default = (str_, None)
209
    start, sep, end = str_.partition(range_sep)
210
    if sep == '': return default # not a range
211 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
212 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
213
214
def _rangeStart(items):
215 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
216 1399 aaronmk
    try: value = items['value']
217 1406 aaronmk
    except KeyError: return None # input is empty
218 1399 aaronmk
    return parse_range(value)[0]
219
funcs['_rangeStart'] = _rangeStart
220
221
def _rangeEnd(items):
222 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
223 1399 aaronmk
    try: value = items['value']
224 1406 aaronmk
    except KeyError: return None # input is empty
225 1399 aaronmk
    return parse_range(value)[1]
226
funcs['_rangeEnd'] = _rangeEnd
227
228 1472 aaronmk
def _range(items):
229
    items = dict(conv_items(float, items))
230
    from_ = items.get('from', None)
231
    to = items.get('to', None)
232
    if from_ == None or to == None: return None
233
    return str(to - from_)
234
funcs['_range'] = _range
235
236 995 aaronmk
def _avg(items):
237 86 aaronmk
    count = 0
238
    sum_ = 0.
239 278 aaronmk
    for name, value in conv_items(float, items):
240 86 aaronmk
        count += 1
241
        sum_ += value
242 1472 aaronmk
    if count == 0: return None # input is empty
243
    else: return str(sum_/count)
244 995 aaronmk
funcs['_avg'] = _avg
245 86 aaronmk
246 968 aaronmk
class CvException(Exception):
247
    def __init__(self):
248
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
249
            ' allowed for ratio scale data '
250
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
251
252 995 aaronmk
def _noCV(items):
253 968 aaronmk
    try: name, value = items.next()
254
    except StopIteration: return None
255
    if re.match('^(?i)CV *\d+$', value): raise SyntaxException(CvException())
256
    return value
257 995 aaronmk
funcs['_noCV'] = _noCV
258 968 aaronmk
259 1469 aaronmk
#### Dates
260
261 995 aaronmk
def _date(items):
262 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
263
        # get *once* from iter, check types
264 1514 aaronmk
    try: str_ = items['date']
265 786 aaronmk
    except KeyError:
266 1515 aaronmk
        # Year is required
267
        try: items['year']
268 1309 aaronmk
        except KeyError, e:
269
            if items == {}: return None # entire date is empty
270
            else: raise SyntaxException(e)
271 1515 aaronmk
272
        # Convert month name to number
273
        try: month = items['month']
274
        except KeyError: pass
275
        else:
276
            if not month.isdigit(): # month is name
277 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
278
                except ValueError, e: raise SyntaxException(e)
279 1515 aaronmk
280 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
281 786 aaronmk
        items.setdefault('month', 1)
282
        items.setdefault('day', 1)
283 1535 aaronmk
284
        for try_num in xrange(2):
285
            try:
286
                date = datetime.date(**items)
287
                break
288
            except ValueError, e:
289 1536 aaronmk
                if try_num > 0: raise SyntaxException(e)
290
                    # exception still raised after retry
291 1562 aaronmk
                msg = strings.ustr(e)
292 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
293
                    items['month'], items['day'] = items['day'], items['month']
294
                else: raise SyntaxException(e)
295 786 aaronmk
    else:
296 324 aaronmk
        try: year = float(str_)
297
        except ValueError:
298 1264 aaronmk
            try: date = dates.strtotime(str_)
299 324 aaronmk
            except ImportError: return str_
300
            except ValueError, e: raise SyntaxException(e)
301
        else: date = (datetime.date(int(year), 1, 1) +
302
            datetime.timedelta(round((year % 1.)*365)))
303 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
304 843 aaronmk
    except ValueError, e: raise FormatException(e)
305 995 aaronmk
funcs['_date'] = _date
306 86 aaronmk
307 1366 aaronmk
def _dateRangeStart(items):
308 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
309 1366 aaronmk
    try: value = items['value']
310 1406 aaronmk
    except KeyError: return None # input is empty
311 1366 aaronmk
    return dates.parse_date_range(value)[0]
312
funcs['_dateRangeStart'] = _dateRangeStart
313 1311 aaronmk
314 1366 aaronmk
def _dateRangeEnd(items):
315 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
316 1366 aaronmk
    try: value = items['value']
317 1406 aaronmk
    except KeyError: return None # input is empty
318 1366 aaronmk
    return dates.parse_date_range(value)[1]
319
funcs['_dateRangeEnd'] = _dateRangeEnd
320 1311 aaronmk
321 1469 aaronmk
#### Names
322
323 328 aaronmk
_name_parts_slices_items = [
324
    ('first', slice(None, 1)),
325
    ('middle', slice(1, -1)),
326
    ('last', slice(-1, None)),
327
]
328
name_parts_slices = dict(_name_parts_slices_items)
329
name_parts = [name for name, slice_ in _name_parts_slices_items]
330
331 995 aaronmk
def _name(items):
332 89 aaronmk
    items = dict(items)
333 102 aaronmk
    parts = []
334 328 aaronmk
    for part in name_parts:
335
        if part in items: parts.append(items[part])
336 102 aaronmk
    return ' '.join(parts)
337 995 aaronmk
funcs['_name'] = _name
338 102 aaronmk
339 995 aaronmk
def _namePart(items):
340 328 aaronmk
    out_items = []
341
    for part, value in items:
342
        try: slice_ = name_parts_slices[part]
343
        except KeyError, e: raise SyntaxException(e)
344 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
345 995 aaronmk
    return _name(out_items)
346
funcs['_namePart'] = _namePart
347 1321 aaronmk
348 1469 aaronmk
#### Paths
349
350 1321 aaronmk
def _simplifyPath(items):
351
    items = dict(items)
352
    try:
353 1562 aaronmk
        next = cast(strings.ustr, items['next'])
354
        require = cast(strings.ustr, items['require'])
355 1321 aaronmk
        root = items['path']
356
    except KeyError, e: raise SyntaxException(e)
357
358
    node = root
359
    while node != None:
360
        new_node = xpath.get_1(node, next, allow_rooted=False)
361
        if xpath.get_1(node, require, allow_rooted=False) == None: # empty elem
362
            xml_dom.replace(node, new_node) # remove current elem
363
            if node is root: root = new_node # also update root
364
        node = new_node
365
    return root
366
funcs['_simplifyPath'] = _simplifyPath