Project

General

Profile

1
# XML "function" nodes that transform their contents
2

    
3
import datetime
4
import re
5
import sre_constants
6

    
7
import dates
8
import exc
9
import format
10
import maps
11
import strings
12
import term
13
import units
14
import util
15
import xml_dom
16
import xpath
17

    
18
##### Exceptions
19

    
20
class SyntaxException(exc.ExceptionWithCause):
21
    def __init__(self, cause):
22
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax: '
23
            +exc.str_(cause))
24

    
25
class FormatException(SyntaxException): pass
26

    
27
##### Functions
28

    
29
funcs = {}
30

    
31
def process(node, on_error=exc.raise_):
32
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
33
    name = node.tagName
34
    if name.startswith('_') and name in funcs:
35
        try:
36
            value = funcs[name](xml_dom.NodeTextEntryIter(node))
37
            xml_dom.replace_with_text(node, value)
38
        except Exception, e: # also catch XML func internal errors
39
            # Save in case another exception raised, overwriting sys.exc_info()
40
            exc.add_traceback(e)
41
            str_ = strings.ustr(node)
42
            exc.add_msg(e, 'function:\n'+str_)
43
            xml_dom.replace(node, node.ownerDocument.createComment(
44
                '\n'+term.emph_multiline(str_).replace('--','-')))
45
                # comments can't contain '--'
46
            on_error(e)
47

    
48
def map_items(func, items):
49
    return [(name, func(value)) for name, value in items]
50

    
51
def cast(type_, val):
52
    '''Throws SyntaxException if can't cast'''
53
    try: return type_(val)
54
    except ValueError, e: raise SyntaxException(e)
55

    
56
def conv_items(type_, items):
57
    return map_items(lambda val: cast(type_, val),
58
        xml_dom.TextEntryOnlyIter(items))
59

    
60
def pop_value(items):
61
    try: last = items.pop() # last entry contains value
62
    except IndexError: return None # input is empty and no actions
63
    if last[0] != 'value': return None # input is empty
64
    return last[1]
65

    
66
##### XML functions
67

    
68
# Function names must start with _ to avoid collisions with real tags
69
# Functions take arguments (items)
70

    
71
#### General
72

    
73
def _ignore(items):
74
    '''Used to "comment out" an XML subtree'''
75
    return None
76
funcs['_ignore'] = _ignore
77

    
78
#### Conditionals
79

    
80
def _eq(items):
81
    items = dict(items)
82
    try:
83
        left = items['left']
84
        right = items['right']
85
    except KeyError: return '' # a value was None
86
    return util.bool2str(left == right)
87
funcs['_eq'] = _eq
88

    
89
def _if(items):
90
    items = dict(items)
91
    try:
92
        cond = items['cond']
93
        then = items['then']
94
    except KeyError, e: raise SyntaxException(e)
95
    else_ = items.get('else', None)
96
    cond = bool(cast(strings.ustr, cond))
97
    if cond: return then
98
    else: return else_
99
funcs['_if'] = _if
100

    
101
#### Combining values
102

    
103
def _alt(items):
104
    items = list(items)
105
    items.sort()
106
    try: return items[0][1] # value of lowest-numbered item
107
    except IndexError: return None # input got removed by e.g. SyntaxException
108
funcs['_alt'] = _alt
109

    
110
def _merge(items):
111
    items = list(conv_items(strings.ustr, items))
112
        # get *once* from iter, check types
113
    items.sort()
114
    return maps.merge_values(*[v for k, v in items])
115
funcs['_merge'] = _merge
116

    
117
def _label(items):
118
    items = dict(conv_items(strings.ustr, items))
119
        # get *once* from iter, check types
120
    try:
121
        label = items['label']
122
        value = items['value']
123
    except KeyError, e: raise SyntaxException(e)
124
    return label+': '+value
125
funcs['_label'] = _label
126

    
127
#### Transforming values
128

    
129
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
130

    
131
def _nullIf(items):
132
    items = dict(conv_items(strings.ustr, items))
133
    try: null = items['null']
134
    except KeyError, e: raise SyntaxException(e)
135
    value = items.get('value', None)
136
    type_str = items.get('type', None)
137
    
138
    try: type_ = types_by_name[type_str]
139
    except KeyError, e: raise SyntaxException(e)
140
    null = type_(null)
141
    
142
    try: return util.none_if(value, null)
143
    except ValueError: return value # value not convertible, so can't equal null
144
funcs['_nullIf'] = _nullIf
145

    
146
def repl(repls, value):
147
    '''Raises error if value not in map and no special '*' entry
148
    @param repls dict repl:with
149
        repl "*" means all other input values
150
        with "*" means keep input value the same
151
        with "" means ignore input value
152
    '''
153
    try: new_value = repls[value]
154
    except KeyError, e:
155
        # Save traceback right away in case another exception raised
156
        se = SyntaxException(e) 
157
        try: new_value = repls['*']
158
        except KeyError: raise se
159
    if new_value == '*': new_value = value # '*' means keep input value the same
160
    return util.none_if(new_value, u'') # empty map entry means None
161

    
162
def _map(items):
163
    '''See repl()
164
    @param items
165
        <last_entry> Value
166
        <other_entries> name=value Mappings. Special values: See repl() repls.
167
    '''
168
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
169
    value = pop_value(items)
170
    if value == None: return None # input is empty
171
    return repl(dict(items), value)
172
funcs['_map'] = _map
173

    
174
def _replace(items):
175
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
176
    value = pop_value(items)
177
    if value == None: return None # input is empty
178
    try:
179
        for repl, with_ in items:
180
            if re.match(r'^\w+$', repl):
181
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
182
            value = re.sub(repl, with_, value)
183
    except sre_constants.error, e: raise SyntaxException(e)
184
    return util.none_if(value, u'') # empty strings always mean None
185
funcs['_replace'] = _replace
186

    
187
#### Quantities
188

    
189
def _units(items):
190
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
191
    value = pop_value(items)
192
    if value == None: return None # input is empty
193
    
194
    quantity = units.str2quantity(value)
195
    try:
196
        for action, units_ in items:
197
            units_ = util.none_if(units_, u'')
198
            if action == 'default': units.set_default_units(quantity, units_)
199
            elif action == 'to':
200
                try: quantity = units.convert(quantity, units_)
201
                except ValueError, e: raise SyntaxException(e)
202
            else: raise SyntaxException(ValueError('Invalid action: '+action))
203
    except units.MissingUnitsException, e: raise SyntaxException(e)
204
    return units.quantity2str(quantity)
205
funcs['_units'] = _units
206

    
207
def parse_range(str_, range_sep='-'):
208
    default = (str_, None)
209
    start, sep, end = str_.partition(range_sep)
210
    if sep == '': return default # not a range
211
    if start == '' and range_sep == '-': return default # negative number
212
    return tuple(d.strip() for d in (start, end))
213

    
214
def _rangeStart(items):
215
    items = dict(conv_items(strings.ustr, items))
216
    try: value = items['value']
217
    except KeyError: return None # input is empty
218
    return parse_range(value)[0]
219
funcs['_rangeStart'] = _rangeStart
220

    
221
def _rangeEnd(items):
222
    items = dict(conv_items(strings.ustr, items))
223
    try: value = items['value']
224
    except KeyError: return None # input is empty
225
    return parse_range(value)[1]
226
funcs['_rangeEnd'] = _rangeEnd
227

    
228
def _range(items):
229
    items = dict(conv_items(float, items))
230
    from_ = items.get('from', None)
231
    to = items.get('to', None)
232
    if from_ == None or to == None: return None
233
    return str(to - from_)
234
funcs['_range'] = _range
235

    
236
def _avg(items):
237
    count = 0
238
    sum_ = 0.
239
    for name, value in conv_items(float, items):
240
        count += 1
241
        sum_ += value
242
    if count == 0: return None # input is empty
243
    else: return str(sum_/count)
244
funcs['_avg'] = _avg
245

    
246
class CvException(Exception):
247
    def __init__(self):
248
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
249
            ' allowed for ratio scale data '
250
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
251

    
252
def _noCV(items):
253
    try: name, value = items.next()
254
    except StopIteration: return None
255
    if re.match('^(?i)CV *\d+$', value): raise SyntaxException(CvException())
256
    return value
257
funcs['_noCV'] = _noCV
258

    
259
#### Dates
260

    
261
def _date(items):
262
    items = dict(conv_items(strings.ustr, items))
263
        # get *once* from iter, check types
264
    try: str_ = items['date']
265
    except KeyError:
266
        # Year is required
267
        try: items['year']
268
        except KeyError, e:
269
            if items == {}: return None # entire date is empty
270
            else: raise SyntaxException(e)
271
        
272
        # Convert month name to number
273
        try: month = items['month']
274
        except KeyError: pass
275
        else:
276
            if not month.isdigit(): # month is name
277
                try: items['month'] = str(dates.strtotime(month).month)
278
                except ValueError, e: raise SyntaxException(e)
279
        
280
        items = dict(conv_items(format.str2int, items.iteritems()))
281
        items.setdefault('month', 1)
282
        items.setdefault('day', 1)
283
        
284
        for try_num in xrange(2):
285
            try:
286
                date = datetime.date(**items)
287
                break
288
            except ValueError, e:
289
                if try_num > 0: raise SyntaxException(e)
290
                    # exception still raised after retry
291
                msg = strings.ustr(e)
292
                if msg == 'month must be in 1..12': # try swapping month and day
293
                    items['month'], items['day'] = items['day'], items['month']
294
                else: raise SyntaxException(e)
295
    else:
296
        try: year = float(str_)
297
        except ValueError:
298
            try: date = dates.strtotime(str_)
299
            except ImportError: return str_
300
            except ValueError, e: raise SyntaxException(e)
301
        else: date = (datetime.date(int(year), 1, 1) +
302
            datetime.timedelta(round((year % 1.)*365)))
303
    try: return dates.strftime('%Y-%m-%d', date)
304
    except ValueError, e: raise FormatException(e)
305
funcs['_date'] = _date
306

    
307
def _dateRangeStart(items):
308
    items = dict(conv_items(strings.ustr, items))
309
    try: value = items['value']
310
    except KeyError: return None # input is empty
311
    return dates.parse_date_range(value)[0]
312
funcs['_dateRangeStart'] = _dateRangeStart
313

    
314
def _dateRangeEnd(items):
315
    items = dict(conv_items(strings.ustr, items))
316
    try: value = items['value']
317
    except KeyError: return None # input is empty
318
    return dates.parse_date_range(value)[1]
319
funcs['_dateRangeEnd'] = _dateRangeEnd
320

    
321
#### Names
322

    
323
_name_parts_slices_items = [
324
    ('first', slice(None, 1)),
325
    ('middle', slice(1, -1)),
326
    ('last', slice(-1, None)),
327
]
328
name_parts_slices = dict(_name_parts_slices_items)
329
name_parts = [name for name, slice_ in _name_parts_slices_items]
330

    
331
def _name(items):
332
    items = dict(items)
333
    parts = []
334
    for part in name_parts:
335
        if part in items: parts.append(items[part])
336
    return ' '.join(parts)
337
funcs['_name'] = _name
338

    
339
def _namePart(items):
340
    out_items = []
341
    for part, value in items:
342
        try: slice_ = name_parts_slices[part]
343
        except KeyError, e: raise SyntaxException(e)
344
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
345
    return _name(out_items)
346
funcs['_namePart'] = _namePart
347

    
348
#### Paths
349

    
350
def _simplifyPath(items):
351
    items = dict(items)
352
    try:
353
        next = cast(strings.ustr, items['next'])
354
        require = cast(strings.ustr, items['require'])
355
        root = items['path']
356
    except KeyError, e: raise SyntaxException(e)
357
    
358
    node = root
359
    while node != None:
360
        new_node = xpath.get_1(node, next, allow_rooted=False)
361
        if xpath.get_1(node, require, allow_rooted=False) == None: # empty elem
362
            xml_dom.replace(node, new_node) # remove current elem
363
            if node is root: root = new_node # also update root
364
        node = new_node
365
    return root
366
funcs['_simplifyPath'] = _simplifyPath
(17-17/19)