Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 111 aaronmk
7 818 aaronmk
import dates
8 300 aaronmk
import exc
9 1463 aaronmk
import format
10 917 aaronmk
import maps
11 1234 aaronmk
import strings
12 827 aaronmk
import term
13 1047 aaronmk
import util
14 86 aaronmk
import xml_dom
15 1321 aaronmk
import xpath
16 86 aaronmk
17 995 aaronmk
##### Exceptions
18
19 962 aaronmk
class SyntaxException(Exception):
20 797 aaronmk
    def __init__(self, cause):
21 962 aaronmk
        Exception.__init__(self, 'Invalid XML function syntax: '
22
            +exc.str_(cause))
23 278 aaronmk
24 843 aaronmk
class FormatException(SyntaxException): pass
25
26 995 aaronmk
##### Functions
27
28
funcs = {}
29
30
def process(node, on_error=exc.raise_):
31
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
32
    name = node.tagName
33
    if name.startswith('_') and name in funcs:
34 1369 aaronmk
        try:
35
            value = funcs[name](xml_dom.NodeTextEntryIter(node))
36
            xml_dom.replace_with_text(node, value)
37
        except Exception, e: # also catch XML func internal errors
38 1371 aaronmk
            # Save in case another exception raised, overwriting sys.exc_info()
39
            exc.add_traceback(e)
40 995 aaronmk
            str_ = str(node)
41
            exc.add_msg(e, 'function:\n'+str_)
42
            xml_dom.replace(node, node.ownerDocument.createComment(
43 1234 aaronmk
                '\n'+term.emph_multiline(str_).replace('--','-')))
44
                # comments can't contain '--'
45 995 aaronmk
            on_error(e)
46
47 86 aaronmk
def map_items(func, items):
48
    return [(name, func(value)) for name, value in items]
49
50 1234 aaronmk
def cast(type_, val):
51
    '''Throws SyntaxException if can't cast'''
52
    try: return type_(val)
53
    except ValueError, e: raise SyntaxException(e)
54
55 278 aaronmk
def conv_items(type_, items):
56 1234 aaronmk
    return map_items(lambda val: cast(type_, val),
57
        xml_dom.TextEntryOnlyIter(items))
58 278 aaronmk
59 995 aaronmk
#### XML functions
60
61
# Function names must start with _ to avoid collisions with real tags
62
# Functions take arguments (items)
63
64
def _ignore(items):
65 994 aaronmk
    '''Used to "comment out" an XML subtree'''
66
    return None
67 995 aaronmk
funcs['_ignore'] = _ignore
68 994 aaronmk
69 1234 aaronmk
def _eq(items):
70
    items = dict(items)
71
    try:
72
        left = items['left']
73
        right = items['right']
74
    except KeyError: return '' # a value was None
75
    return util.bool2str(left == right)
76
funcs['_eq'] = _eq
77
78
def _if(items):
79
    items = dict(items)
80
    try:
81
        cond = items['cond']
82
        then = items['then']
83
    except KeyError, e: raise SyntaxException(e)
84
    else_ = items.get('else', None)
85
    cond = bool(cast(str, cond))
86
    if cond: return then
87
    else: return else_
88
funcs['_if'] = _if
89
90 995 aaronmk
def _alt(items):
91 113 aaronmk
    items = list(items)
92
    items.sort()
93 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
94 1187 aaronmk
    except IndexError: return None # input got removed by e.g. SyntaxException
95 995 aaronmk
funcs['_alt'] = _alt
96 113 aaronmk
97 995 aaronmk
def _merge(items):
98 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
99
        # get *once* from iter and check types
100 917 aaronmk
    items.sort()
101
    return maps.merge_values(*[v for k, v in items])
102 995 aaronmk
funcs['_merge'] = _merge
103 917 aaronmk
104 995 aaronmk
def _label(items):
105 1412 aaronmk
    items = dict(conv_items(strings.ustr, items))
106
        # get *once* from iter and check types
107 917 aaronmk
    try:
108
        label = items['label']
109
        value = items['value']
110
    except KeyError, e: raise SyntaxException(e)
111
    return label+': '+value
112 995 aaronmk
funcs['_label'] = _label
113 917 aaronmk
114 1047 aaronmk
def _nullIf(items):
115
    items = dict(conv_items(str, items))
116
    try:
117
        null = items['null']
118
        value = items['value']
119
    except KeyError, e: raise SyntaxException(e)
120 1219 aaronmk
    type_str = items.get('type', None)
121
    type_ = str
122
    if type_str == 'float': type_ = float
123
    return util.none_if(value, type_(null))
124 1047 aaronmk
funcs['_nullIf'] = _nullIf
125
126 1219 aaronmk
def _map(items):
127
    items = conv_items(str, items) # get *once* from iter and check types
128
    try: value = items.pop()[1] # value is last entry's value
129
    except IndexError, e: raise SyntaxException(e)
130
    map_ = dict(items)
131 1304 aaronmk
    closed = bool(map_.pop('_closed', False))
132 1219 aaronmk
    try: return map_[value]
133 1304 aaronmk
    except KeyError, e:
134
        if closed: raise SyntaxException(e)
135
        else: return value
136 1219 aaronmk
funcs['_map'] = _map
137
138
def _replace(items):
139
    items = conv_items(str, items) # get *once* from iter and check types
140 1424 aaronmk
    try: value = items.pop()[1] # last entry contains value
141 1219 aaronmk
    except IndexError, e: raise SyntaxException(e)
142
    try:
143
        for repl, with_ in items:
144
            if re.match(r'^\w+$', repl):
145
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
146
            value = re.sub(repl, with_, value)
147
    except sre_constants.error, e: raise SyntaxException(e)
148 1427 aaronmk
    return util.none_if(value, u'') # empty strings always mean None
149 1219 aaronmk
funcs['_replace'] = _replace
150
151 1225 aaronmk
def _units(items):
152
    items = dict(conv_items(str, items))
153 1463 aaronmk
    try: value = items['value']
154 1464 aaronmk
    except KeyError: return None # input is empty
155 1463 aaronmk
    default_units = items.get('units', None)
156
    # DB unit conversion isn't ready yet, so just return number
157
    try: return format.cleanup_units(value, default_units).split(' ')[0]
158
    except format.MissingUnitsException, e: raise SyntaxException(e)
159 1225 aaronmk
funcs['_units'] = _units
160
161 995 aaronmk
def _range(items):
162 278 aaronmk
    items = dict(conv_items(float, items))
163 965 aaronmk
    from_ = items.get('from', None)
164
    to = items.get('to', None)
165
    if from_ == None or to == None: return None
166 326 aaronmk
    return str(to - from_)
167 995 aaronmk
funcs['_range'] = _range
168 86 aaronmk
169 1399 aaronmk
def parse_range(str_, range_sep='-'):
170
    default = (str_, None)
171
    start, sep, end = str_.partition(range_sep)
172
    if sep == '': return default # not a range
173 1427 aaronmk
    if start == '' and range_sep == '-': return default # negative number
174 1399 aaronmk
    return tuple(d.strip() for d in (start, end))
175
176
def _rangeStart(items):
177
    items = dict(conv_items(str, items))
178
    try: value = items['value']
179 1406 aaronmk
    except KeyError: return None # input is empty
180 1399 aaronmk
    return parse_range(value)[0]
181
funcs['_rangeStart'] = _rangeStart
182
183
def _rangeEnd(items):
184
    items = dict(conv_items(str, items))
185
    try: value = items['value']
186 1406 aaronmk
    except KeyError: return None # input is empty
187 1399 aaronmk
    return parse_range(value)[1]
188
funcs['_rangeEnd'] = _rangeEnd
189
190 995 aaronmk
def _avg(items):
191 86 aaronmk
    count = 0
192
    sum_ = 0.
193 278 aaronmk
    for name, value in conv_items(float, items):
194 86 aaronmk
        count += 1
195
        sum_ += value
196
    return str(sum_/count)
197 995 aaronmk
funcs['_avg'] = _avg
198 86 aaronmk
199 968 aaronmk
class CvException(Exception):
200
    def __init__(self):
201
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
202
            ' allowed for ratio scale data '
203
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
204
205 995 aaronmk
def _noCV(items):
206 968 aaronmk
    try: name, value = items.next()
207
    except StopIteration: return None
208
    if re.match('^(?i)CV *\d+$', value): raise SyntaxException(CvException())
209
    return value
210 995 aaronmk
funcs['_noCV'] = _noCV
211 968 aaronmk
212 995 aaronmk
def _date(items):
213 917 aaronmk
    items = conv_items(str, items) # get *once* from iter and check types
214 786 aaronmk
    try: str_ = dict(items)['date']
215
    except KeyError:
216 1308 aaronmk
        items = dict(conv_items(int, items))
217 1292 aaronmk
        try: items['year'] # year is required
218 1309 aaronmk
        except KeyError, e:
219
            if items == {}: return None # entire date is empty
220
            else: raise SyntaxException(e)
221 786 aaronmk
        items.setdefault('month', 1)
222
        items.setdefault('day', 1)
223
        try: date = datetime.date(**items)
224
        except ValueError, e: raise SyntaxException(e)
225
    else:
226 324 aaronmk
        try: year = float(str_)
227
        except ValueError:
228 1264 aaronmk
            try: date = dates.strtotime(str_)
229 324 aaronmk
            except ImportError: return str_
230
            except ValueError, e: raise SyntaxException(e)
231
        else: date = (datetime.date(int(year), 1, 1) +
232
            datetime.timedelta(round((year % 1.)*365)))
233 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
234 843 aaronmk
    except ValueError, e: raise FormatException(e)
235 995 aaronmk
funcs['_date'] = _date
236 86 aaronmk
237 1366 aaronmk
def _dateRangeStart(items):
238
    items = dict(conv_items(str, items))
239
    try: value = items['value']
240 1406 aaronmk
    except KeyError: return None # input is empty
241 1366 aaronmk
    return dates.parse_date_range(value)[0]
242
funcs['_dateRangeStart'] = _dateRangeStart
243 1311 aaronmk
244 1366 aaronmk
def _dateRangeEnd(items):
245 1311 aaronmk
    items = dict(conv_items(str, items))
246 1366 aaronmk
    try: value = items['value']
247 1406 aaronmk
    except KeyError: return None # input is empty
248 1366 aaronmk
    return dates.parse_date_range(value)[1]
249
funcs['_dateRangeEnd'] = _dateRangeEnd
250 1311 aaronmk
251 328 aaronmk
_name_parts_slices_items = [
252
    ('first', slice(None, 1)),
253
    ('middle', slice(1, -1)),
254
    ('last', slice(-1, None)),
255
]
256
name_parts_slices = dict(_name_parts_slices_items)
257
name_parts = [name for name, slice_ in _name_parts_slices_items]
258
259 995 aaronmk
def _name(items):
260 89 aaronmk
    items = dict(items)
261 102 aaronmk
    parts = []
262 328 aaronmk
    for part in name_parts:
263
        if part in items: parts.append(items[part])
264 102 aaronmk
    return ' '.join(parts)
265 995 aaronmk
funcs['_name'] = _name
266 102 aaronmk
267 995 aaronmk
def _namePart(items):
268 328 aaronmk
    out_items = []
269
    for part, value in items:
270
        try: slice_ = name_parts_slices[part]
271
        except KeyError, e: raise SyntaxException(e)
272 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
273 995 aaronmk
    return _name(out_items)
274
funcs['_namePart'] = _namePart
275 1321 aaronmk
276
def _simplifyPath(items):
277
    items = dict(items)
278
    try:
279
        next = cast(str, items['next'])
280
        require = cast(str, items['require'])
281
        root = items['path']
282
    except KeyError, e: raise SyntaxException(e)
283
284
    node = root
285
    while node != None:
286
        new_node = xpath.get_1(node, next, allow_rooted=False)
287
        if xpath.get_1(node, require, allow_rooted=False) == None: # empty elem
288
            xml_dom.replace(node, new_node) # remove current elem
289
            if node is root: root = new_node # also update root
290
        node = new_node
291
    return root
292
funcs['_simplifyPath'] = _simplifyPath