Project

General

Profile

1
# XML "function" nodes that transform their contents
2

    
3
import datetime
4
import re
5
import sre_constants
6

    
7
import dates
8
import exc
9
import maps
10
import strings
11
import term
12
import units
13
import util
14
import xml_dom
15
import xpath
16

    
17
##### Exceptions
18

    
19
class SyntaxException(exc.ExceptionWithCause):
20
    def __init__(self, cause):
21
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax: '
22
            +exc.str_(cause))
23

    
24
class FormatException(SyntaxException): pass
25

    
26
##### Functions
27

    
28
funcs = {}
29

    
30
def process(node, on_error=exc.raise_):
31
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
32
    name = node.tagName
33
    if name.startswith('_') and name in funcs:
34
        try:
35
            value = funcs[name](xml_dom.NodeTextEntryIter(node))
36
            xml_dom.replace_with_text(node, value)
37
        except Exception, e: # also catch XML func internal errors
38
            # Save in case another exception raised, overwriting sys.exc_info()
39
            exc.add_traceback(e)
40
            str_ = strings.ustr(node)
41
            exc.add_msg(e, 'function:\n'+str_)
42
            xml_dom.replace(node, node.ownerDocument.createComment(
43
                '\n'+term.emph_multiline(str_).replace('--','-')))
44
                # comments can't contain '--'
45
            on_error(e)
46

    
47
def map_items(func, items):
48
    return [(name, func(value)) for name, value in items]
49

    
50
def cast(type_, val):
51
    '''Throws SyntaxException if can't cast'''
52
    try: return type_(val)
53
    except ValueError, e: raise SyntaxException(e)
54

    
55
def conv_items(type_, items):
56
    return map_items(lambda val: cast(type_, val),
57
        xml_dom.TextEntryOnlyIter(items))
58

    
59
##### XML functions
60

    
61
# Function names must start with _ to avoid collisions with real tags
62
# Functions take arguments (items)
63

    
64
#### General
65

    
66
def _ignore(items):
67
    '''Used to "comment out" an XML subtree'''
68
    return None
69
funcs['_ignore'] = _ignore
70

    
71
#### Conditionals
72

    
73
def _eq(items):
74
    items = dict(items)
75
    try:
76
        left = items['left']
77
        right = items['right']
78
    except KeyError: return '' # a value was None
79
    return util.bool2str(left == right)
80
funcs['_eq'] = _eq
81

    
82
def _if(items):
83
    items = dict(items)
84
    try:
85
        cond = items['cond']
86
        then = items['then']
87
    except KeyError, e: raise SyntaxException(e)
88
    else_ = items.get('else', None)
89
    cond = bool(cast(strings.ustr, cond))
90
    if cond: return then
91
    else: return else_
92
funcs['_if'] = _if
93

    
94
#### Combining values
95

    
96
def _alt(items):
97
    items = list(items)
98
    items.sort()
99
    try: return items[0][1] # value of lowest-numbered item
100
    except IndexError: return None # input got removed by e.g. SyntaxException
101
funcs['_alt'] = _alt
102

    
103
def _merge(items):
104
    items = list(conv_items(strings.ustr, items))
105
        # get *once* from iter, check types
106
    items.sort()
107
    return maps.merge_values(*[v for k, v in items])
108
funcs['_merge'] = _merge
109

    
110
def _label(items):
111
    items = dict(conv_items(strings.ustr, items))
112
        # get *once* from iter, check types
113
    try:
114
        label = items['label']
115
        value = items['value']
116
    except KeyError, e: raise SyntaxException(e)
117
    return label+': '+value
118
funcs['_label'] = _label
119

    
120
#### Transforming values
121

    
122
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
123

    
124
def _nullIf(items):
125
    items = dict(conv_items(strings.ustr, items))
126
    try: null = items['null']
127
    except KeyError, e: raise SyntaxException(e)
128
    value = items.get('value', None)
129
    type_str = items.get('type', None)
130
    
131
    try: type_ = types_by_name[type_str]
132
    except KeyError, e: raise SyntaxException(e)
133
    null = type_(null)
134
    
135
    try: return util.none_if(value, null)
136
    except ValueError: return value # value not convertible, so can't equal null
137
funcs['_nullIf'] = _nullIf
138

    
139
def _map(items):
140
    '''Raises error if value not in map and no special '*' entry
141
    @param items
142
        <last_entry> Value
143
        <other_entries> name=value Mappings
144
            name "*" means all other input values
145
            value "*" means keep input value the same
146
            value "" means ignore input value
147
    '''
148
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
149
    try: value = items.pop()[1] # last entry contains value
150
    except IndexError, e: raise SyntaxException(e)
151
    map_ = dict(items)
152
    
153
    try: new_value = map_[value]
154
    except KeyError, e:
155
        # Save traceback right away in case another exception raised
156
        se = SyntaxException(e) 
157
        try: new_value = map_['*']
158
        except KeyError: raise se
159
    if new_value == '*': new_value = value # '*' means keep input value the same
160
    return util.none_if(new_value, u'') # empty map entry means None
161
funcs['_map'] = _map
162

    
163
def _replace(items):
164
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
165
    try: value = items.pop()[1] # last entry contains value
166
    except IndexError, e: raise SyntaxException(e)
167
    try:
168
        for repl, with_ in items:
169
            if re.match(r'^\w+$', repl):
170
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
171
            value = re.sub(repl, with_, value)
172
    except sre_constants.error, e: raise SyntaxException(e)
173
    return util.none_if(value, u'') # empty strings always mean None
174
funcs['_replace'] = _replace
175

    
176
#### Quantities
177

    
178
def _units(items):
179
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
180
    try: last = items.pop() # last entry contains value
181
    except IndexError: return None # input is empty and no actions
182
    if last[0] != 'value': return None # input is empty
183
    str_ = last[1]
184
    
185
    quantity = units.str2quantity(str_)
186
    try:
187
        for action, units_ in items:
188
            units_ = util.none_if(units_, u'')
189
            if action == 'default': units.set_default_units(quantity, units_)
190
            elif action == 'to':
191
                try: quantity = units.convert(quantity, units_)
192
                except ValueError, e: raise SyntaxException(e)
193
            else: raise SyntaxException(ValueError('Invalid action: '+action))
194
    except units.MissingUnitsException, e: raise SyntaxException(e)
195
    return units.quantity2str(quantity)
196
funcs['_units'] = _units
197

    
198
def parse_range(str_, range_sep='-'):
199
    default = (str_, None)
200
    start, sep, end = str_.partition(range_sep)
201
    if sep == '': return default # not a range
202
    if start == '' and range_sep == '-': return default # negative number
203
    return tuple(d.strip() for d in (start, end))
204

    
205
def _rangeStart(items):
206
    items = dict(conv_items(strings.ustr, items))
207
    try: value = items['value']
208
    except KeyError: return None # input is empty
209
    return parse_range(value)[0]
210
funcs['_rangeStart'] = _rangeStart
211

    
212
def _rangeEnd(items):
213
    items = dict(conv_items(strings.ustr, items))
214
    try: value = items['value']
215
    except KeyError: return None # input is empty
216
    return parse_range(value)[1]
217
funcs['_rangeEnd'] = _rangeEnd
218

    
219
def _range(items):
220
    items = dict(conv_items(float, items))
221
    from_ = items.get('from', None)
222
    to = items.get('to', None)
223
    if from_ == None or to == None: return None
224
    return str(to - from_)
225
funcs['_range'] = _range
226

    
227
def _avg(items):
228
    count = 0
229
    sum_ = 0.
230
    for name, value in conv_items(float, items):
231
        count += 1
232
        sum_ += value
233
    if count == 0: return None # input is empty
234
    else: return str(sum_/count)
235
funcs['_avg'] = _avg
236

    
237
class CvException(Exception):
238
    def __init__(self):
239
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
240
            ' allowed for ratio scale data '
241
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
242

    
243
def _noCV(items):
244
    try: name, value = items.next()
245
    except StopIteration: return None
246
    if re.match('^(?i)CV *\d+$', value): raise SyntaxException(CvException())
247
    return value
248
funcs['_noCV'] = _noCV
249

    
250
#### Dates
251

    
252
def _date(items):
253
    items = dict(conv_items(strings.ustr, items))
254
        # get *once* from iter, check types
255
    try: str_ = items['date']
256
    except KeyError:
257
        # Year is required
258
        try: items['year']
259
        except KeyError, e:
260
            if items == {}: return None # entire date is empty
261
            else: raise SyntaxException(e)
262
        
263
        # Convert month name to number
264
        try: month = items['month']
265
        except KeyError: pass
266
        else:
267
            if not month.isdigit(): # month is name
268
                items['month'] = str(dates.strtotime(month).month)
269
        
270
        items = dict(conv_items(int, items.iteritems()))
271
        items.setdefault('month', 1)
272
        items.setdefault('day', 1)
273
        
274
        for try_num in xrange(2):
275
            try:
276
                date = datetime.date(**items)
277
                break
278
            except ValueError, e:
279
                if try_num > 0: raise SyntaxException(e)
280
                    # exception still raised after retry
281
                msg = strings.ustr(e)
282
                if msg == 'month must be in 1..12': # try swapping month and day
283
                    items['month'], items['day'] = items['day'], items['month']
284
                else: raise SyntaxException(e)
285
    else:
286
        try: year = float(str_)
287
        except ValueError:
288
            try: date = dates.strtotime(str_)
289
            except ImportError: return str_
290
            except ValueError, e: raise SyntaxException(e)
291
        else: date = (datetime.date(int(year), 1, 1) +
292
            datetime.timedelta(round((year % 1.)*365)))
293
    try: return dates.strftime('%Y-%m-%d', date)
294
    except ValueError, e: raise FormatException(e)
295
funcs['_date'] = _date
296

    
297
def _dateRangeStart(items):
298
    items = dict(conv_items(strings.ustr, items))
299
    try: value = items['value']
300
    except KeyError: return None # input is empty
301
    return dates.parse_date_range(value)[0]
302
funcs['_dateRangeStart'] = _dateRangeStart
303

    
304
def _dateRangeEnd(items):
305
    items = dict(conv_items(strings.ustr, items))
306
    try: value = items['value']
307
    except KeyError: return None # input is empty
308
    return dates.parse_date_range(value)[1]
309
funcs['_dateRangeEnd'] = _dateRangeEnd
310

    
311
#### Names
312

    
313
_name_parts_slices_items = [
314
    ('first', slice(None, 1)),
315
    ('middle', slice(1, -1)),
316
    ('last', slice(-1, None)),
317
]
318
name_parts_slices = dict(_name_parts_slices_items)
319
name_parts = [name for name, slice_ in _name_parts_slices_items]
320

    
321
def _name(items):
322
    items = dict(items)
323
    parts = []
324
    for part in name_parts:
325
        if part in items: parts.append(items[part])
326
    return ' '.join(parts)
327
funcs['_name'] = _name
328

    
329
def _namePart(items):
330
    out_items = []
331
    for part, value in items:
332
        try: slice_ = name_parts_slices[part]
333
        except KeyError, e: raise SyntaxException(e)
334
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
335
    return _name(out_items)
336
funcs['_namePart'] = _namePart
337

    
338
#### Paths
339

    
340
def _simplifyPath(items):
341
    items = dict(items)
342
    try:
343
        next = cast(strings.ustr, items['next'])
344
        require = cast(strings.ustr, items['require'])
345
        root = items['path']
346
    except KeyError, e: raise SyntaxException(e)
347
    
348
    node = root
349
    while node != None:
350
        new_node = xpath.get_1(node, next, allow_rooted=False)
351
        if xpath.get_1(node, require, allow_rooted=False) == None: # empty elem
352
            xml_dom.replace(node, new_node) # remove current elem
353
            if node is root: root = new_node # also update root
354
        node = new_node
355
    return root
356
funcs['_simplifyPath'] = _simplifyPath
(17-17/19)