Project

General

Profile

1
# XML "function" nodes that transform their contents
2

    
3
import datetime
4
import re
5
import sre_constants
6

    
7
import angles
8
import dates
9
import exc
10
import format
11
import maps
12
import strings
13
import term
14
import units
15
import util
16
import xml_dom
17
import xpath
18

    
19
##### Exceptions
20

    
21
class SyntaxError(exc.ExceptionWithCause):
22
    def __init__(self, cause):
23
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
24
            cause)
25

    
26
class FormatException(exc.ExceptionWithCause):
27
    def __init__(self, cause):
28
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
29

    
30
##### Functions
31

    
32
funcs = {}
33

    
34
def process(node, on_error=exc.raise_):
35
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
36
    name = node.tagName
37
    if name.startswith('_') and name in funcs:
38
        try:
39
            value = funcs[name](xml_dom.NodeTextEntryIter(node))
40
            xml_dom.replace_with_text(node, value)
41
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
42
            # Save in case another exception raised, overwriting sys.exc_info()
43
            exc.add_traceback(e)
44
            str_ = strings.ustr(node)
45
            exc.add_msg(e, 'function:\n'+str_)
46
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
47
                '\n'+term.emph_multiline(str_)))
48
                
49
            on_error(e)
50

    
51
def map_items(func, items):
52
    return [(name, func(value)) for name, value in items]
53

    
54
def cast(type_, val):
55
    '''Throws FormatException if can't cast'''
56
    try: return type_(val)
57
    except ValueError, e: raise FormatException(e)
58

    
59
def conv_items(type_, items):
60
    return map_items(lambda val: cast(type_, val),
61
        xml_dom.TextEntryOnlyIter(items))
62

    
63
def pop_value(items):
64
    try: last = items.pop() # last entry contains value
65
    except IndexError: return None # input is empty and no actions
66
    if last[0] != 'value': return None # input is empty
67
    return last[1]
68

    
69
##### XML functions
70

    
71
# Function names must start with _ to avoid collisions with real tags
72
# Functions take arguments (items)
73

    
74
#### General
75

    
76
def _ignore(items):
77
    '''Used to "comment out" an XML subtree'''
78
    return None
79
funcs['_ignore'] = _ignore
80

    
81
#### Conditionals
82

    
83
def _eq(items):
84
    items = dict(items)
85
    try:
86
        left = items['left']
87
        right = items['right']
88
    except KeyError: return '' # a value was None
89
    return util.bool2str(left == right)
90
funcs['_eq'] = _eq
91

    
92
def _if(items):
93
    items = dict(items)
94
    try:
95
        cond = items['cond']
96
        then = items['then']
97
    except KeyError, e: raise SyntaxError(e)
98
    else_ = items.get('else', None)
99
    cond = bool(cast(strings.ustr, cond))
100
    if cond: return then
101
    else: return else_
102
funcs['_if'] = _if
103

    
104
#### Combining values
105

    
106
def _alt(items):
107
    items = list(items)
108
    items.sort()
109
    try: return items[0][1] # value of lowest-numbered item
110
    except IndexError: return None # input got removed by e.g. FormatException
111
funcs['_alt'] = _alt
112

    
113
def _merge(items):
114
    items = list(conv_items(strings.ustr, items))
115
        # get *once* from iter, check types
116
    items.sort()
117
    return maps.merge_values(*[v for k, v in items])
118
funcs['_merge'] = _merge
119

    
120
def _label(items):
121
    items = dict(conv_items(strings.ustr, items))
122
        # get *once* from iter, check types
123
    try:
124
        label = items['label']
125
        value = items['value']
126
    except KeyError, e: raise SyntaxError(e)
127
    return label+': '+value
128
funcs['_label'] = _label
129

    
130
#### Transforming values
131

    
132
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
133

    
134
def _nullIf(items):
135
    items = dict(conv_items(strings.ustr, items))
136
    try: null = items['null']
137
    except KeyError, e: raise SyntaxError(e)
138
    value = items.get('value', None)
139
    type_str = items.get('type', None)
140
    
141
    try: type_ = types_by_name[type_str]
142
    except KeyError, e: raise SyntaxError(e)
143
    null = type_(null)
144
    
145
    try: return util.none_if(value, null)
146
    except ValueError: return value # value not convertible, so can't equal null
147
funcs['_nullIf'] = _nullIf
148

    
149
def repl(repls, value):
150
    '''Raises error if value not in map and no special '*' entry
151
    @param repls dict repl:with
152
        repl "*" means all other input values
153
        with "*" means keep input value the same
154
        with "" means ignore input value
155
    '''
156
    try: new_value = repls[value]
157
    except KeyError, e:
158
        # Save traceback right away in case another exception raised
159
        fe = FormatException(e) 
160
        try: new_value = repls['*']
161
        except KeyError: raise fe
162
    if new_value == '*': new_value = value # '*' means keep input value the same
163
    return new_value
164

    
165
def _map(items):
166
    '''See repl()
167
    @param items
168
        <last_entry> Value
169
        <other_entries> name=value Mappings. Special values: See repl() repls.
170
    '''
171
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
172
    value = pop_value(items)
173
    if value == None: return None # input is empty
174
    return util.none_if(repl(dict(items), value), u'') # empty value means None
175
funcs['_map'] = _map
176

    
177
def _replace(items):
178
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
179
    value = pop_value(items)
180
    if value == None: return None # input is empty
181
    try:
182
        for repl, with_ in items:
183
            if re.match(r'^\w+$', repl):
184
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
185
            value = re.sub(repl, with_, value)
186
    except sre_constants.error, e: raise SyntaxError(e)
187
    return util.none_if(value.strip(), u'') # empty strings always mean None
188
funcs['_replace'] = _replace
189

    
190
#### Quantities
191

    
192
def _units(items):
193
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
194
    value = pop_value(items)
195
    if value == None: return None # input is empty
196
    
197
    quantity = units.str2quantity(value)
198
    try:
199
        for action, units_ in items:
200
            units_ = util.none_if(units_, u'')
201
            if action == 'default': units.set_default_units(quantity, units_)
202
            elif action == 'to':
203
                try: quantity = units.convert(quantity, units_)
204
                except ValueError, e: raise FormatException(e)
205
            else: raise SyntaxError(ValueError('Invalid action: '+action))
206
    except units.MissingUnitsException, e: raise FormatException(e)
207
    return units.quantity2str(quantity)
208
funcs['_units'] = _units
209

    
210
def parse_range(str_, range_sep='-'):
211
    default = (str_, None)
212
    start, sep, end = str_.partition(range_sep)
213
    if sep == '': return default # not a range
214
    if start == '' and range_sep == '-': return default # negative number
215
    return tuple(d.strip() for d in (start, end))
216

    
217
def _rangeStart(items):
218
    items = dict(conv_items(strings.ustr, items))
219
    try: value = items['value']
220
    except KeyError: return None # input is empty
221
    return parse_range(value)[0]
222
funcs['_rangeStart'] = _rangeStart
223

    
224
def _rangeEnd(items):
225
    items = dict(conv_items(strings.ustr, items))
226
    try: value = items['value']
227
    except KeyError: return None # input is empty
228
    return parse_range(value)[1]
229
funcs['_rangeEnd'] = _rangeEnd
230

    
231
def _range(items):
232
    items = dict(conv_items(float, items))
233
    from_ = items.get('from', None)
234
    to = items.get('to', None)
235
    if from_ == None or to == None: return None
236
    return str(to - from_)
237
funcs['_range'] = _range
238

    
239
def _avg(items):
240
    count = 0
241
    sum_ = 0.
242
    for name, value in conv_items(float, items):
243
        count += 1
244
        sum_ += value
245
    if count == 0: return None # input is empty
246
    else: return str(sum_/count)
247
funcs['_avg'] = _avg
248

    
249
class CvException(Exception):
250
    def __init__(self):
251
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
252
            ' allowed for ratio scale data '
253
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
254

    
255
def _noCV(items):
256
    try: name, value = items.next()
257
    except StopIteration: return None
258
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
259
    return value
260
funcs['_noCV'] = _noCV
261

    
262
#### Dates
263

    
264
def _date(items):
265
    items = dict(conv_items(strings.ustr, items))
266
        # get *once* from iter, check types
267
    try: str_ = items['date']
268
    except KeyError:
269
        # Year is required
270
        try: items['year']
271
        except KeyError, e:
272
            if items == {}: return None # entire date is empty
273
            else: raise FormatException(e)
274
        
275
        # Convert month name to number
276
        try: month = items['month']
277
        except KeyError: pass
278
        else:
279
            if not month.isdigit(): # month is name
280
                try: items['month'] = str(dates.strtotime(month).month)
281
                except ValueError, e: raise FormatException(e)
282
        
283
        items = dict(conv_items(format.str2int, items.iteritems()))
284
        items.setdefault('month', 1)
285
        items.setdefault('day', 1)
286
        
287
        for try_num in xrange(2):
288
            try:
289
                date = datetime.date(**items)
290
                break
291
            except ValueError, e:
292
                if try_num > 0: raise FormatException(e)
293
                    # exception still raised after retry
294
                msg = strings.ustr(e)
295
                if msg == 'month must be in 1..12': # try swapping month and day
296
                    items['month'], items['day'] = items['day'], items['month']
297
                else: raise FormatException(e)
298
    else:
299
        try: year = float(str_)
300
        except ValueError:
301
            try: date = dates.strtotime(str_)
302
            except ImportError: return str_
303
            except ValueError, e: raise FormatException(e)
304
        else: date = (datetime.date(int(year), 1, 1) +
305
            datetime.timedelta(round((year % 1.)*365)))
306
    try: return dates.strftime('%Y-%m-%d', date)
307
    except ValueError, e: raise FormatException(e)
308
funcs['_date'] = _date
309

    
310
def _dateRangeStart(items):
311
    items = dict(conv_items(strings.ustr, items))
312
    try: value = items['value']
313
    except KeyError: return None # input is empty
314
    return dates.parse_date_range(value)[0]
315
funcs['_dateRangeStart'] = _dateRangeStart
316

    
317
def _dateRangeEnd(items):
318
    items = dict(conv_items(strings.ustr, items))
319
    try: value = items['value']
320
    except KeyError: return None # input is empty
321
    return dates.parse_date_range(value)[1]
322
funcs['_dateRangeEnd'] = _dateRangeEnd
323

    
324
#### Names
325

    
326
_name_parts_slices_items = [
327
    ('first', slice(None, 1)),
328
    ('middle', slice(1, -1)),
329
    ('last', slice(-1, None)),
330
]
331
name_parts_slices = dict(_name_parts_slices_items)
332
name_parts = [name for name, slice_ in _name_parts_slices_items]
333

    
334
def _name(items):
335
    items = dict(items)
336
    parts = []
337
    for part in name_parts:
338
        if part in items: parts.append(items[part])
339
    return ' '.join(parts)
340
funcs['_name'] = _name
341

    
342
def _namePart(items):
343
    out_items = []
344
    for part, value in items:
345
        try: slice_ = name_parts_slices[part]
346
        except KeyError, e: raise SyntaxError(e)
347
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
348
    return _name(out_items)
349
funcs['_namePart'] = _namePart
350

    
351
#### Angles
352

    
353
def _compass(items):
354
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
355
    items = dict(conv_items(strings.ustr, items))
356
    try: value = items['value']
357
    except KeyError: return None # input is empty
358
    
359
    if not value.isupper(): return value # pass through other coordinate formats
360
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
361
    except KeyError, e: raise FormatException(e)
362
funcs['_compass'] = _compass
363

    
364
#### Paths
365

    
366
def _simplifyPath(items):
367
    items = dict(items)
368
    try:
369
        next = cast(strings.ustr, items['next'])
370
        require = cast(strings.ustr, items['require'])
371
        root = items['path']
372
    except KeyError, e: raise SyntaxError(e)
373
    
374
    node = root
375
    while node != None:
376
        new_node = xpath.get_1(node, next, allow_rooted=False)
377
        if xpath.get_1(node, require, allow_rooted=False) == None: # empty elem
378
            xml_dom.replace(node, new_node) # remove current elem
379
            if node is root: root = new_node # also update root
380
        node = new_node
381
    return root
382
funcs['_simplifyPath'] = _simplifyPath
(30-30/33)