Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 111 aaronmk
7 818 aaronmk
import dates
8 300 aaronmk
import exc
9 917 aaronmk
import maps
10 1234 aaronmk
import strings
11 827 aaronmk
import term
12 1468 aaronmk
import units
13 1047 aaronmk
import util
14 86 aaronmk
import xml_dom
15 1321 aaronmk
import xpath
16 86 aaronmk
17 995 aaronmk
##### Exceptions
18
19 1518 aaronmk
class SyntaxException(exc.ExceptionWithCause):
20 797 aaronmk
    def __init__(self, cause):
21 1518 aaronmk
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax: '
22 962 aaronmk
            +exc.str_(cause))
23 278 aaronmk
24 843 aaronmk
class FormatException(SyntaxException): pass
25
26 995 aaronmk
##### Functions
27
28
funcs = {}
29
30
def process(node, on_error=exc.raise_):
31
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
32
    name = node.tagName
33
    if name.startswith('_') and name in funcs:
34 1369 aaronmk
        try:
35
            value = funcs[name](xml_dom.NodeTextEntryIter(node))
36
            xml_dom.replace_with_text(node, value)
37
        except Exception, e: # also catch XML func internal errors
38 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
39
            exc.add_traceback(e)
40 995 aaronmk
            str_ = str(node)
41
            exc.add_msg(e, 'function:\n'+str_)
42
            xml_dom.replace(node, node.ownerDocument.createComment(
43 1234 aaronmk
                '\n'+term.emph_multiline(str_).replace('--','-')))
44
                # comments can't contain '--'
45 995 aaronmk
            on_error(e)
46
47 86 aaronmk
def map_items(func, items):
48
    return [(name, func(value)) for name, value in items]
49
50 1234 aaronmk
def cast(type_, val):
51
    '''Throws SyntaxException if can't cast'''
52
    try: return type_(val)
53
    except ValueError, e: raise SyntaxException(e)
54
55 278 aaronmk
def conv_items(type_, items):
56 1234 aaronmk
    return map_items(lambda val: cast(type_, val),
57
        xml_dom.TextEntryOnlyIter(items))
58 278 aaronmk
59 1469 aaronmk
##### XML functions
60 995 aaronmk
61
# Function names must start with _ to avoid collisions with real tags
62
# Functions take arguments (items)
63
64 1469 aaronmk
#### General
65
66 995 aaronmk
def _ignore(items):
67 994 aaronmk
    '''Used to "comment out" an XML subtree'''
68
    return None
69 995 aaronmk
funcs['_ignore'] = _ignore
70 994 aaronmk
71 1469 aaronmk
#### Conditionals
72
73 1234 aaronmk
def _eq(items):
74
    items = dict(items)
75
    try:
76
        left = items['left']
77
        right = items['right']
78
    except KeyError: return '' # a value was None
79
    return util.bool2str(left == right)
80
funcs['_eq'] = _eq
81
82
def _if(items):
83
    items = dict(items)
84
    try:
85
        cond = items['cond']
86
        then = items['then']
87
    except KeyError, e: raise SyntaxException(e)
88
    else_ = items.get('else', None)
89
    cond = bool(cast(str, cond))
90
    if cond: return then
91
    else: return else_
92
funcs['_if'] = _if
93
94 1469 aaronmk
#### Combining values
95
96 995 aaronmk
def _alt(items):
97 113 aaronmk
    items = list(items)
98
    items.sort()
99 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
100 1187 aaronmk
    except IndexError: return None # input got removed by e.g. SyntaxException
101 995 aaronmk
funcs['_alt'] = _alt
102 113 aaronmk
103 995 aaronmk
def _merge(items):
104 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
105
        # get *once* from iter and check types
106 917 aaronmk
    items.sort()
107
    return maps.merge_values(*[v for k, v in items])
108 995 aaronmk
funcs['_merge'] = _merge
109 917 aaronmk
110 995 aaronmk
def _label(items):
111 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
112
        # get *once* from iter and check types
113 917 aaronmk
    try:
114
        label = items['label']
115
        value = items['value']
116
    except KeyError, e: raise SyntaxException(e)
117
    return label+': '+value
118 995 aaronmk
funcs['_label'] = _label
119 917 aaronmk
120 1469 aaronmk
#### Transforming values
121
122 1478 aaronmk
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
123 1477 aaronmk
124 1047 aaronmk
def _nullIf(items):
125
    items = dict(conv_items(str, items))
126 1477 aaronmk
    try: null = items['null']
127 1047 aaronmk
    except KeyError, e: raise SyntaxException(e)
128 1477 aaronmk
    value = items.get('value', None)
129 1219 aaronmk
    type_str = items.get('type', None)
130 1477 aaronmk
131
    try: type_ = types_by_name[type_str]
132
    except KeyError, e: raise SyntaxException(e)
133
    null = type_(null)
134
135
    try: return util.none_if(value, null)
136
    except ValueError: return value # value not convertible, so can't equal null
137 1047 aaronmk
funcs['_nullIf'] = _nullIf
138
139 1219 aaronmk
def _map(items):
140
    items = conv_items(str, items) # get *once* from iter and check types
141 1471 aaronmk
    try: value = items.pop()[1] # last entry contains value
142 1219 aaronmk
    except IndexError, e: raise SyntaxException(e)
143
    map_ = dict(items)
144 1304 aaronmk
    closed = bool(map_.pop('_closed', False))
145 1473 aaronmk
    try: value = map_[value]
146 1304 aaronmk
    except KeyError, e:
147
        if closed: raise SyntaxException(e)
148
        else: return value
149 1473 aaronmk
    return util.none_if(value, u'') # empty map entry means None
150 1219 aaronmk
funcs['_map'] = _map
151
152
def _replace(items):
153
    items = conv_items(str, items) # get *once* from iter and check types
154 1424 aaronmk
    try: value = items.pop()[1] # last entry contains value
155 1219 aaronmk
    except IndexError, e: raise SyntaxException(e)
156
    try:
157
        for repl, with_ in items:
158
            if re.match(r'^\w+$', repl):
159
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
160
            value = re.sub(repl, with_, value)
161
    except sre_constants.error, e: raise SyntaxException(e)
162 1427 aaronmk
    return util.none_if(value, u'') # empty strings always mean None
163 1219 aaronmk
funcs['_replace'] = _replace
164
165 1469 aaronmk
#### Quantities
166
167 1225 aaronmk
def _units(items):
168 1471 aaronmk
    items = conv_items(str, items) # get *once* from iter and check types
169
    try: last = items.pop() # last entry contains value
170
    except IndexError: return None # input is empty and no actions
171
    if last[0] != 'value': return None # input is empty
172
    str_ = last[1]
173
174
    quantity = units.str2quantity(str_)
175
    try:
176
        for action, units_ in items:
177
            units_ = util.none_if(units_, u'')
178
            if action == 'default': units.set_default_units(quantity, units_)
179
            elif action == 'to': quantity = units.convert(quantity, units_)
180
            else: raise SyntaxException(ValueError('Invalid action: '+action))
181 1468 aaronmk
    except units.MissingUnitsException, e: raise SyntaxException(e)
182 1471 aaronmk
    return units.quantity2str(quantity)
183 1225 aaronmk
funcs['_units'] = _units
184
185 1399 aaronmk
def parse_range(str_, range_sep='-'):
186
    default = (str_, None)
187
    start, sep, end = str_.partition(range_sep)
188
    if sep == '': return default # not a range
189 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
190 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
191
192
def _rangeStart(items):
193
    items = dict(conv_items(str, items))
194
    try: value = items['value']
195 1406 aaronmk
    except KeyError: return None # input is empty
196 1399 aaronmk
    return parse_range(value)[0]
197
funcs['_rangeStart'] = _rangeStart
198
199
def _rangeEnd(items):
200
    items = dict(conv_items(str, items))
201
    try: value = items['value']
202 1406 aaronmk
    except KeyError: return None # input is empty
203 1399 aaronmk
    return parse_range(value)[1]
204
funcs['_rangeEnd'] = _rangeEnd
205
206 1472 aaronmk
def _range(items):
207
    items = dict(conv_items(float, items))
208
    from_ = items.get('from', None)
209
    to = items.get('to', None)
210
    if from_ == None or to == None: return None
211
    return str(to - from_)
212
funcs['_range'] = _range
213
214 995 aaronmk
def _avg(items):
215 86 aaronmk
    count = 0
216
    sum_ = 0.
217 278 aaronmk
    for name, value in conv_items(float, items):
218 86 aaronmk
        count += 1
219
        sum_ += value
220 1472 aaronmk
    if count == 0: return None # input is empty
221
    else: return str(sum_/count)
222 995 aaronmk
funcs['_avg'] = _avg
223 86 aaronmk
224 968 aaronmk
class CvException(Exception):
225
    def __init__(self):
226
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
227
            ' allowed for ratio scale data '
228
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
229
230 995 aaronmk
def _noCV(items):
231 968 aaronmk
    try: name, value = items.next()
232
    except StopIteration: return None
233
    if re.match('^(?i)CV *\d+$', value): raise SyntaxException(CvException())
234
    return value
235 995 aaronmk
funcs['_noCV'] = _noCV
236 968 aaronmk
237 1469 aaronmk
#### Dates
238
239 995 aaronmk
def _date(items):
240 1514 aaronmk
    items = dict(conv_items(str, items)) # get *once* from iter and check types
241
    try: str_ = items['date']
242 786 aaronmk
    except KeyError:
243 1515 aaronmk
        # Year is required
244
        try: items['year']
245 1309 aaronmk
        except KeyError, e:
246
            if items == {}: return None # entire date is empty
247
            else: raise SyntaxException(e)
248 1515 aaronmk
249
        # Convert month name to number
250
        try: month = items['month']
251
        except KeyError: pass
252
        else:
253
            if not month.isdigit(): # month is name
254
                items['month'] = str(dates.strtotime(month).month)
255
256
        items = dict(conv_items(int, items.iteritems()))
257 786 aaronmk
        items.setdefault('month', 1)
258
        items.setdefault('day', 1)
259
        try: date = datetime.date(**items)
260
        except ValueError, e: raise SyntaxException(e)
261
    else:
262 324 aaronmk
        try: year = float(str_)
263
        except ValueError:
264 1264 aaronmk
            try: date = dates.strtotime(str_)
265 324 aaronmk
            except ImportError: return str_
266
            except ValueError, e: raise SyntaxException(e)
267
        else: date = (datetime.date(int(year), 1, 1) +
268
            datetime.timedelta(round((year % 1.)*365)))
269 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
270 843 aaronmk
    except ValueError, e: raise FormatException(e)
271 995 aaronmk
funcs['_date'] = _date
272 86 aaronmk
273 1366 aaronmk
def _dateRangeStart(items):
274
    items = dict(conv_items(str, items))
275
    try: value = items['value']
276 1406 aaronmk
    except KeyError: return None # input is empty
277 1366 aaronmk
    return dates.parse_date_range(value)[0]
278
funcs['_dateRangeStart'] = _dateRangeStart
279 1311 aaronmk
280 1366 aaronmk
def _dateRangeEnd(items):
281 1311 aaronmk
    items = dict(conv_items(str, items))
282 1366 aaronmk
    try: value = items['value']
283 1406 aaronmk
    except KeyError: return None # input is empty
284 1366 aaronmk
    return dates.parse_date_range(value)[1]
285
funcs['_dateRangeEnd'] = _dateRangeEnd
286 1311 aaronmk
287 1469 aaronmk
#### Names
288
289 328 aaronmk
_name_parts_slices_items = [
290
    ('first', slice(None, 1)),
291
    ('middle', slice(1, -1)),
292
    ('last', slice(-1, None)),
293
]
294
name_parts_slices = dict(_name_parts_slices_items)
295
name_parts = [name for name, slice_ in _name_parts_slices_items]
296
297 995 aaronmk
def _name(items):
298 89 aaronmk
    items = dict(items)
299 102 aaronmk
    parts = []
300 328 aaronmk
    for part in name_parts:
301
        if part in items: parts.append(items[part])
302 102 aaronmk
    return ' '.join(parts)
303 995 aaronmk
funcs['_name'] = _name
304 102 aaronmk
305 995 aaronmk
def _namePart(items):
306 328 aaronmk
    out_items = []
307
    for part, value in items:
308
        try: slice_ = name_parts_slices[part]
309
        except KeyError, e: raise SyntaxException(e)
310 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
311 995 aaronmk
    return _name(out_items)
312
funcs['_namePart'] = _namePart
313 1321 aaronmk
314 1469 aaronmk
#### Paths
315
316 1321 aaronmk
def _simplifyPath(items):
317
    items = dict(items)
318
    try:
319
        next = cast(str, items['next'])
320
        require = cast(str, items['require'])
321
        root = items['path']
322
    except KeyError, e: raise SyntaxException(e)
323
324
    node = root
325
    while node != None:
326
        new_node = xpath.get_1(node, next, allow_rooted=False)
327
        if xpath.get_1(node, require, allow_rooted=False) == None: # empty elem
328
            xml_dom.replace(node, new_node) # remove current elem
329
            if node is root: root = new_node # also update root
330
        node = new_node
331
    return root
332
funcs['_simplifyPath'] = _simplifyPath