Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 111 aaronmk
7 818 aaronmk
import dates
8 300 aaronmk
import exc
9 917 aaronmk
import maps
10 1234 aaronmk
import strings
11 827 aaronmk
import term
12 1047 aaronmk
import util
13 86 aaronmk
import xml_dom
14 1321 aaronmk
import xpath
15 86 aaronmk
16 995 aaronmk
##### Exceptions
17
18 962 aaronmk
class SyntaxException(Exception):
19 797 aaronmk
    def __init__(self, cause):
20 962 aaronmk
        Exception.__init__(self, 'Invalid XML function syntax: '
21
            +exc.str_(cause))
22 278 aaronmk
23 843 aaronmk
class FormatException(SyntaxException): pass
24
25 995 aaronmk
##### Functions
26
27
funcs = {}
28
29
def process(node, on_error=exc.raise_):
30
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
31
    name = node.tagName
32
    if name.startswith('_') and name in funcs:
33
        try: value = funcs[name](xml_dom.NodeTextEntryIter(node))
34
        except SyntaxException, e:
35
            str_ = str(node)
36
            exc.add_msg(e, 'function:\n'+str_)
37
            xml_dom.replace(node, node.ownerDocument.createComment(
38 1234 aaronmk
                '\n'+term.emph_multiline(str_).replace('--','-')))
39
                # comments can't contain '--'
40 995 aaronmk
            on_error(e)
41
        else: xml_dom.replace_with_text(node, value)
42
43 86 aaronmk
def map_items(func, items):
44
    return [(name, func(value)) for name, value in items]
45
46 1234 aaronmk
def cast(type_, val):
47
    '''Throws SyntaxException if can't cast'''
48
    try: return type_(val)
49
    except ValueError, e: raise SyntaxException(e)
50
51 278 aaronmk
def conv_items(type_, items):
52 1234 aaronmk
    return map_items(lambda val: cast(type_, val),
53
        xml_dom.TextEntryOnlyIter(items))
54 278 aaronmk
55 995 aaronmk
#### XML functions
56
57
# Function names must start with _ to avoid collisions with real tags
58
# Functions take arguments (items)
59
60
def _ignore(items):
61 994 aaronmk
    '''Used to "comment out" an XML subtree'''
62
    return None
63 995 aaronmk
funcs['_ignore'] = _ignore
64 994 aaronmk
65 1234 aaronmk
def _eq(items):
66
    items = dict(items)
67
    try:
68
        left = items['left']
69
        right = items['right']
70
    except KeyError: return '' # a value was None
71
    return util.bool2str(left == right)
72
funcs['_eq'] = _eq
73
74
def _if(items):
75
    items = dict(items)
76
    try:
77
        cond = items['cond']
78
        then = items['then']
79
    except KeyError, e: raise SyntaxException(e)
80
    else_ = items.get('else', None)
81
    cond = bool(cast(str, cond))
82
    if cond: return then
83
    else: return else_
84
funcs['_if'] = _if
85
86 995 aaronmk
def _alt(items):
87 113 aaronmk
    items = list(items)
88
    items.sort()
89 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
90 1187 aaronmk
    except IndexError: return None # input got removed by e.g. SyntaxException
91 995 aaronmk
funcs['_alt'] = _alt
92 113 aaronmk
93 995 aaronmk
def _merge(items):
94 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
95
        # get *once* from iter and check types
96 917 aaronmk
    items.sort()
97
    return maps.merge_values(*[v for k, v in items])
98 995 aaronmk
funcs['_merge'] = _merge
99 917 aaronmk
100 995 aaronmk
def _label(items):
101 917 aaronmk
    items = dict(conv_items(str, items)) # get *once* from iter and check types
102
    try:
103
        label = items['label']
104
        value = items['value']
105
    except KeyError, e: raise SyntaxException(e)
106
    return label+': '+value
107 995 aaronmk
funcs['_label'] = _label
108 917 aaronmk
109 1047 aaronmk
def _nullIf(items):
110
    items = dict(conv_items(str, items))
111
    try:
112
        null = items['null']
113
        value = items['value']
114
    except KeyError, e: raise SyntaxException(e)
115 1219 aaronmk
    type_str = items.get('type', None)
116
    type_ = str
117
    if type_str == 'float': type_ = float
118
    return util.none_if(value, type_(null))
119 1047 aaronmk
funcs['_nullIf'] = _nullIf
120
121 1219 aaronmk
def _map(items):
122
    items = conv_items(str, items) # get *once* from iter and check types
123
    try: value = items.pop()[1] # value is last entry's value
124
    except IndexError, e: raise SyntaxException(e)
125
    map_ = dict(items)
126 1304 aaronmk
    closed = bool(map_.pop('_closed', False))
127 1219 aaronmk
    try: return map_[value]
128 1304 aaronmk
    except KeyError, e:
129
        if closed: raise SyntaxException(e)
130
        else: return value
131 1219 aaronmk
funcs['_map'] = _map
132
133
def _replace(items):
134
    items = conv_items(str, items) # get *once* from iter and check types
135
    try: value = items.pop() # value is last entry
136
    except IndexError, e: raise SyntaxException(e)
137
    try:
138
        for repl, with_ in items:
139
            if re.match(r'^\w+$', repl):
140
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
141
            value = re.sub(repl, with_, value)
142
    except sre_constants.error, e: raise SyntaxException(e)
143
    return value
144
funcs['_replace'] = _replace
145
146 1225 aaronmk
def _units(items):
147
    items = dict(conv_items(str, items))
148
    try:
149
        units = items['units']
150
        value = items['value']
151
    except KeyError, e: raise SyntaxException(e)
152
    return value#+' '+units # don't add yet because unit conversion isn't ready
153
funcs['_units'] = _units
154
155 995 aaronmk
def _range(items):
156 278 aaronmk
    items = dict(conv_items(float, items))
157 965 aaronmk
    from_ = items.get('from', None)
158
    to = items.get('to', None)
159
    if from_ == None or to == None: return None
160 326 aaronmk
    return str(to - from_)
161 995 aaronmk
funcs['_range'] = _range
162 86 aaronmk
163 995 aaronmk
def _avg(items):
164 86 aaronmk
    count = 0
165
    sum_ = 0.
166 278 aaronmk
    for name, value in conv_items(float, items):
167 86 aaronmk
        count += 1
168
        sum_ += value
169
    return str(sum_/count)
170 995 aaronmk
funcs['_avg'] = _avg
171 86 aaronmk
172 968 aaronmk
class CvException(Exception):
173
    def __init__(self):
174
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
175
            ' allowed for ratio scale data '
176
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
177
178 995 aaronmk
def _noCV(items):
179 968 aaronmk
    try: name, value = items.next()
180
    except StopIteration: return None
181
    if re.match('^(?i)CV *\d+$', value): raise SyntaxException(CvException())
182
    return value
183 995 aaronmk
funcs['_noCV'] = _noCV
184 968 aaronmk
185 995 aaronmk
def _date(items):
186 917 aaronmk
    items = conv_items(str, items) # get *once* from iter and check types
187 786 aaronmk
    try: str_ = dict(items)['date']
188
    except KeyError:
189 1308 aaronmk
        items = dict(conv_items(int, items))
190 1292 aaronmk
        try: items['year'] # year is required
191 1309 aaronmk
        except KeyError, e:
192
            if items == {}: return None # entire date is empty
193
            else: raise SyntaxException(e)
194 786 aaronmk
        items.setdefault('month', 1)
195
        items.setdefault('day', 1)
196
        try: date = datetime.date(**items)
197
        except ValueError, e: raise SyntaxException(e)
198
    else:
199 324 aaronmk
        try: year = float(str_)
200
        except ValueError:
201 1264 aaronmk
            try: date = dates.strtotime(str_)
202 324 aaronmk
            except ImportError: return str_
203
            except ValueError, e: raise SyntaxException(e)
204
        else: date = (datetime.date(int(year), 1, 1) +
205
            datetime.timedelta(round((year % 1.)*365)))
206 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
207 843 aaronmk
    except ValueError, e: raise FormatException(e)
208 995 aaronmk
funcs['_date'] = _date
209 86 aaronmk
210 1311 aaronmk
date_range_sep = '-'
211
date_range_date_part_sep = ' '
212
213
def _dateRangePart(items):
214
    items = dict(conv_items(str, items))
215
    try:
216
        part = items['part']
217
        value = items['value']
218
    except KeyError, e: raise SyntaxException(e)
219
    value = value.strip()
220
    if value.find(date_range_date_part_sep) < 0: return value
221
        # date_range_sep might be used as date part separator instead
222
223
    start, sep, end = value.partition(date_range_sep)
224
    if sep == '': return value # not a range
225
    start = start.split(date_range_date_part_sep)
226
    end = end.split(date_range_date_part_sep)
227
    #if len(end) < 3
228
    #if len(start) < len(end): # range
229
funcs['_dateRangePart'] = _dateRangePart
230
231 328 aaronmk
_name_parts_slices_items = [
232
    ('first', slice(None, 1)),
233
    ('middle', slice(1, -1)),
234
    ('last', slice(-1, None)),
235
]
236
name_parts_slices = dict(_name_parts_slices_items)
237
name_parts = [name for name, slice_ in _name_parts_slices_items]
238
239 995 aaronmk
def _name(items):
240 89 aaronmk
    items = dict(items)
241 102 aaronmk
    parts = []
242 328 aaronmk
    for part in name_parts:
243
        if part in items: parts.append(items[part])
244 102 aaronmk
    return ' '.join(parts)
245 995 aaronmk
funcs['_name'] = _name
246 102 aaronmk
247 995 aaronmk
def _namePart(items):
248 328 aaronmk
    out_items = []
249
    for part, value in items:
250
        try: slice_ = name_parts_slices[part]
251
        except KeyError, e: raise SyntaxException(e)
252 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
253 995 aaronmk
    return _name(out_items)
254
funcs['_namePart'] = _namePart
255 1321 aaronmk
256
def _simplifyPath(items):
257
    items = dict(items)
258
    try:
259
        next = cast(str, items['next'])
260
        require = cast(str, items['require'])
261
        root = items['path']
262
    except KeyError, e: raise SyntaxException(e)
263
264
    node = root
265
    while node != None:
266
        new_node = xpath.get_1(node, next, allow_rooted=False)
267
        if xpath.get_1(node, require, allow_rooted=False) == None: # empty elem
268
            xml_dom.replace(node, new_node) # remove current elem
269
            if node is root: root = new_node # also update root
270
        node = new_node
271
    return root
272
funcs['_simplifyPath'] = _simplifyPath