Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 111 aaronmk
7 818 aaronmk
import dates
8 300 aaronmk
import exc
9 1580 aaronmk
import format
10 917 aaronmk
import maps
11 1234 aaronmk
import strings
12 827 aaronmk
import term
13 1468 aaronmk
import units
14 1047 aaronmk
import util
15 86 aaronmk
import xml_dom
16 1321 aaronmk
import xpath
17 86 aaronmk
18 995 aaronmk
##### Exceptions
19
20 1518 aaronmk
class SyntaxException(exc.ExceptionWithCause):
21 797 aaronmk
    def __init__(self, cause):
22 1518 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax: '
23 962 aaronmk
            +exc.str_(cause))
24 278 aaronmk
25 843 aaronmk
class FormatException(SyntaxException): pass
26
27 995 aaronmk
##### Functions
28
29
funcs = {}
30
31
def process(node, on_error=exc.raise_):
32
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
33
    name = node.tagName
34
    if name.startswith('_') and name in funcs:
35 1369 aaronmk
        try:
36
            value = funcs[name](xml_dom.NodeTextEntryIter(node))
37
            xml_dom.replace_with_text(node, value)
38
        except Exception, e: # also catch XML func internal errors
39 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
40
            exc.add_traceback(e)
41 1562 aaronmk
            str_ = strings.ustr(node)
42 995 aaronmk
            exc.add_msg(e, 'function:\n'+str_)
43
            xml_dom.replace(node, node.ownerDocument.createComment(
44 1234 aaronmk
                '\n'+term.emph_multiline(str_).replace('--','-')))
45
                # comments can't contain '--'
46 995 aaronmk
            on_error(e)
47
48 86 aaronmk
def map_items(func, items):
49
    return [(name, func(value)) for name, value in items]
50
51 1234 aaronmk
def cast(type_, val):
52
    '''Throws SyntaxException if can't cast'''
53
    try: return type_(val)
54
    except ValueError, e: raise SyntaxException(e)
55
56 278 aaronmk
def conv_items(type_, items):
57 1234 aaronmk
    return map_items(lambda val: cast(type_, val),
58
        xml_dom.TextEntryOnlyIter(items))
59 278 aaronmk
60 1581 aaronmk
def pop_value(items):
61
    try: last = items.pop() # last entry contains value
62
    except IndexError: return None # input is empty and no actions
63
    if last[0] != 'value': return None # input is empty
64
    return last[1]
65
66 1469 aaronmk
##### XML functions
67 995 aaronmk
68
# Function names must start with _ to avoid collisions with real tags
69
# Functions take arguments (items)
70
71 1469 aaronmk
#### General
72
73 995 aaronmk
def _ignore(items):
74 994 aaronmk
    '''Used to "comment out" an XML subtree'''
75
    return None
76 995 aaronmk
funcs['_ignore'] = _ignore
77 994 aaronmk
78 1469 aaronmk
#### Conditionals
79
80 1234 aaronmk
def _eq(items):
81
    items = dict(items)
82
    try:
83
        left = items['left']
84
        right = items['right']
85
    except KeyError: return '' # a value was None
86
    return util.bool2str(left == right)
87
funcs['_eq'] = _eq
88
89
def _if(items):
90
    items = dict(items)
91
    try:
92
        cond = items['cond']
93
        then = items['then']
94
    except KeyError, e: raise SyntaxException(e)
95
    else_ = items.get('else', None)
96 1562 aaronmk
    cond = bool(cast(strings.ustr, cond))
97 1234 aaronmk
    if cond: return then
98
    else: return else_
99
funcs['_if'] = _if
100
101 1469 aaronmk
#### Combining values
102
103 995 aaronmk
def _alt(items):
104 113 aaronmk
    items = list(items)
105
    items.sort()
106 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
107 1187 aaronmk
    except IndexError: return None # input got removed by e.g. SyntaxException
108 995 aaronmk
funcs['_alt'] = _alt
109 113 aaronmk
110 995 aaronmk
def _merge(items):
111 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
112 1562 aaronmk
        # get *once* from iter, check types
113 917 aaronmk
    items.sort()
114
    return maps.merge_values(*[v for k, v in items])
115 995 aaronmk
funcs['_merge'] = _merge
116 917 aaronmk
117 995 aaronmk
def _label(items):
118 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
119 1562 aaronmk
        # get *once* from iter, check types
120 917 aaronmk
    try:
121
        label = items['label']
122
        value = items['value']
123
    except KeyError, e: raise SyntaxException(e)
124
    return label+': '+value
125 995 aaronmk
funcs['_label'] = _label
126 917 aaronmk
127 1469 aaronmk
#### Transforming values
128
129 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
130 1477 aaronmk
131 1047 aaronmk
def _nullIf(items):
132 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
133 1477 aaronmk
    try: null = items['null']
134 1047 aaronmk
    except KeyError, e: raise SyntaxException(e)
135 1477 aaronmk
    value = items.get('value', None)
136 1219 aaronmk
    type_str = items.get('type', None)
137 1477 aaronmk
138
    try: type_ = types_by_name[type_str]
139
    except KeyError, e: raise SyntaxException(e)
140
    null = type_(null)
141
142
    try: return util.none_if(value, null)
143
    except ValueError: return value # value not convertible, so can't equal null
144 1047 aaronmk
funcs['_nullIf'] = _nullIf
145
146 1219 aaronmk
def _map(items):
147 1537 aaronmk
    '''Raises error if value not in map and no special '*' entry
148
    @param items
149
        <last_entry> Value
150
        <other_entries> name=value Mappings
151
            name "*" means all other input values
152
            value "*" means keep input value the same
153
            value "" means ignore input value
154
    '''
155 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
156 1581 aaronmk
    value = pop_value(items)
157
    if value == None: return None # input is empty
158 1219 aaronmk
    map_ = dict(items)
159 1537 aaronmk
160
    try: new_value = map_[value]
161 1304 aaronmk
    except KeyError, e:
162 1537 aaronmk
        # Save traceback right away in case another exception raised
163
        se = SyntaxException(e)
164
        try: new_value = map_['*']
165
        except KeyError: raise se
166
    if new_value == '*': new_value = value # '*' means keep input value the same
167
    return util.none_if(new_value, u'') # empty map entry means None
168 1219 aaronmk
funcs['_map'] = _map
169
170
def _replace(items):
171 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
172 1581 aaronmk
    value = pop_value(items)
173
    if value == None: return None # input is empty
174 1219 aaronmk
    try:
175
        for repl, with_ in items:
176
            if re.match(r'^\w+$', repl):
177
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
178
            value = re.sub(repl, with_, value)
179
    except sre_constants.error, e: raise SyntaxException(e)
180 1427 aaronmk
    return util.none_if(value, u'') # empty strings always mean None
181 1219 aaronmk
funcs['_replace'] = _replace
182
183 1469 aaronmk
#### Quantities
184
185 1225 aaronmk
def _units(items):
186 1562 aaronmk
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
187 1581 aaronmk
    value = pop_value(items)
188
    if value == None: return None # input is empty
189 1471 aaronmk
190 1581 aaronmk
    quantity = units.str2quantity(value)
191 1471 aaronmk
    try:
192
        for action, units_ in items:
193
            units_ = util.none_if(units_, u'')
194
            if action == 'default': units.set_default_units(quantity, units_)
195 1567 aaronmk
            elif action == 'to':
196
                try: quantity = units.convert(quantity, units_)
197
                except ValueError, e: raise SyntaxException(e)
198 1471 aaronmk
            else: raise SyntaxException(ValueError('Invalid action: '+action))
199 1468 aaronmk
    except units.MissingUnitsException, e: raise SyntaxException(e)
200 1471 aaronmk
    return units.quantity2str(quantity)
201 1225 aaronmk
funcs['_units'] = _units
202
203 1399 aaronmk
def parse_range(str_, range_sep='-'):
204
    default = (str_, None)
205
    start, sep, end = str_.partition(range_sep)
206
    if sep == '': return default # not a range
207 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
208 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
209
210
def _rangeStart(items):
211 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
212 1399 aaronmk
    try: value = items['value']
213 1406 aaronmk
    except KeyError: return None # input is empty
214 1399 aaronmk
    return parse_range(value)[0]
215
funcs['_rangeStart'] = _rangeStart
216
217
def _rangeEnd(items):
218 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
219 1399 aaronmk
    try: value = items['value']
220 1406 aaronmk
    except KeyError: return None # input is empty
221 1399 aaronmk
    return parse_range(value)[1]
222
funcs['_rangeEnd'] = _rangeEnd
223
224 1472 aaronmk
def _range(items):
225
    items = dict(conv_items(float, items))
226
    from_ = items.get('from', None)
227
    to = items.get('to', None)
228
    if from_ == None or to == None: return None
229
    return str(to - from_)
230
funcs['_range'] = _range
231
232 995 aaronmk
def _avg(items):
233 86 aaronmk
    count = 0
234
    sum_ = 0.
235 278 aaronmk
    for name, value in conv_items(float, items):
236 86 aaronmk
        count += 1
237
        sum_ += value
238 1472 aaronmk
    if count == 0: return None # input is empty
239
    else: return str(sum_/count)
240 995 aaronmk
funcs['_avg'] = _avg
241 86 aaronmk
242 968 aaronmk
class CvException(Exception):
243
    def __init__(self):
244
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
245
            ' allowed for ratio scale data '
246
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
247
248 995 aaronmk
def _noCV(items):
249 968 aaronmk
    try: name, value = items.next()
250
    except StopIteration: return None
251
    if re.match('^(?i)CV *\d+$', value): raise SyntaxException(CvException())
252
    return value
253 995 aaronmk
funcs['_noCV'] = _noCV
254 968 aaronmk
255 1469 aaronmk
#### Dates
256
257 995 aaronmk
def _date(items):
258 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
259
        # get *once* from iter, check types
260 1514 aaronmk
    try: str_ = items['date']
261 786 aaronmk
    except KeyError:
262 1515 aaronmk
        # Year is required
263
        try: items['year']
264 1309 aaronmk
        except KeyError, e:
265
            if items == {}: return None # entire date is empty
266
            else: raise SyntaxException(e)
267 1515 aaronmk
268
        # Convert month name to number
269
        try: month = items['month']
270
        except KeyError: pass
271
        else:
272
            if not month.isdigit(): # month is name
273 1582 aaronmk
                try: items['month'] = str(dates.strtotime(month).month)
274
                except ValueError, e: raise SyntaxException(e)
275 1515 aaronmk
276 1580 aaronmk
        items = dict(conv_items(format.str2int, items.iteritems()))
277 786 aaronmk
        items.setdefault('month', 1)
278
        items.setdefault('day', 1)
279 1535 aaronmk
280
        for try_num in xrange(2):
281
            try:
282
                date = datetime.date(**items)
283
                break
284
            except ValueError, e:
285 1536 aaronmk
                if try_num > 0: raise SyntaxException(e)
286
                    # exception still raised after retry
287 1562 aaronmk
                msg = strings.ustr(e)
288 1535 aaronmk
                if msg == 'month must be in 1..12': # try swapping month and day
289
                    items['month'], items['day'] = items['day'], items['month']
290
                else: raise SyntaxException(e)
291 786 aaronmk
    else:
292 324 aaronmk
        try: year = float(str_)
293
        except ValueError:
294 1264 aaronmk
            try: date = dates.strtotime(str_)
295 324 aaronmk
            except ImportError: return str_
296
            except ValueError, e: raise SyntaxException(e)
297
        else: date = (datetime.date(int(year), 1, 1) +
298
            datetime.timedelta(round((year % 1.)*365)))
299 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
300 843 aaronmk
    except ValueError, e: raise FormatException(e)
301 995 aaronmk
funcs['_date'] = _date
302 86 aaronmk
303 1366 aaronmk
def _dateRangeStart(items):
304 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
305 1366 aaronmk
    try: value = items['value']
306 1406 aaronmk
    except KeyError: return None # input is empty
307 1366 aaronmk
    return dates.parse_date_range(value)[0]
308
funcs['_dateRangeStart'] = _dateRangeStart
309 1311 aaronmk
310 1366 aaronmk
def _dateRangeEnd(items):
311 1562 aaronmk
    items = dict(conv_items(strings.ustr, items))
312 1366 aaronmk
    try: value = items['value']
313 1406 aaronmk
    except KeyError: return None # input is empty
314 1366 aaronmk
    return dates.parse_date_range(value)[1]
315
funcs['_dateRangeEnd'] = _dateRangeEnd
316 1311 aaronmk
317 1469 aaronmk
#### Names
318
319 328 aaronmk
_name_parts_slices_items = [
320
    ('first', slice(None, 1)),
321
    ('middle', slice(1, -1)),
322
    ('last', slice(-1, None)),
323
]
324
name_parts_slices = dict(_name_parts_slices_items)
325
name_parts = [name for name, slice_ in _name_parts_slices_items]
326
327 995 aaronmk
def _name(items):
328 89 aaronmk
    items = dict(items)
329 102 aaronmk
    parts = []
330 328 aaronmk
    for part in name_parts:
331
        if part in items: parts.append(items[part])
332 102 aaronmk
    return ' '.join(parts)
333 995 aaronmk
funcs['_name'] = _name
334 102 aaronmk
335 995 aaronmk
def _namePart(items):
336 328 aaronmk
    out_items = []
337
    for part, value in items:
338
        try: slice_ = name_parts_slices[part]
339
        except KeyError, e: raise SyntaxException(e)
340 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
341 995 aaronmk
    return _name(out_items)
342
funcs['_namePart'] = _namePart
343 1321 aaronmk
344 1469 aaronmk
#### Paths
345
346 1321 aaronmk
def _simplifyPath(items):
347
    items = dict(items)
348
    try:
349 1562 aaronmk
        next = cast(strings.ustr, items['next'])
350
        require = cast(strings.ustr, items['require'])
351 1321 aaronmk
        root = items['path']
352
    except KeyError, e: raise SyntaxException(e)
353
354
    node = root
355
    while node != None:
356
        new_node = xpath.get_1(node, next, allow_rooted=False)
357
        if xpath.get_1(node, require, allow_rooted=False) == None: # empty elem
358
            xml_dom.replace(node, new_node) # remove current elem
359
            if node is root: root = new_node # also update root
360
        node = new_node
361
    return root
362
funcs['_simplifyPath'] = _simplifyPath