Project

General

Profile

1
# XML "function" nodes that transform their contents
2

    
3
import datetime
4
import re
5
import sre_constants
6

    
7
import angles
8
import dates
9
import exc
10
import format
11
import maps
12
import strings
13
import term
14
import units
15
import util
16
import xml_dom
17
import xpath
18

    
19
##### Exceptions
20

    
21
class SyntaxError(exc.ExceptionWithCause):
22
    def __init__(self, cause):
23
        exc.ExceptionWithCause.__init__(self, 'Invalid XML function syntax',
24
            cause)
25

    
26
class FormatException(exc.ExceptionWithCause):
27
    def __init__(self, cause):
28
        exc.ExceptionWithCause.__init__(self, 'Invalid input value', cause)
29

    
30
##### Helper functions
31

    
32
def map_items(func, items):
33
    return [(name, func(value)) for name, value in items]
34

    
35
def cast(type_, val):
36
    '''Throws FormatException if can't cast'''
37
    try: return type_(val)
38
    except ValueError, e: raise FormatException(e)
39

    
40
def conv_items(type_, items):
41
    return map_items(lambda val: cast(type_, val),
42
        xml_dom.TextEntryOnlyIter(items))
43

    
44
def pop_value(items, name='value'):
45
    '''@param name Name of value param, or None to accept any name'''
46
    try: last = items.pop() # last entry contains value
47
    except IndexError: return None # input is empty and no actions
48
    if name != None and last[0] != name: return None # input is empty
49
    return last[1]
50

    
51
funcs = {}
52

    
53
##### Public functions
54

    
55
def process(node, on_error=exc.raise_):
56
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
57
    name = node.tagName
58
    if name.startswith('_') and name in funcs:
59
        try:
60
            value = funcs[name](xml_dom.NodeTextEntryIter(node))
61
            xml_dom.replace_with_text(node, value)
62
        except Exception, e: # also catch non-wrapped exceptions (XML func bugs)
63
            # Save in case another exception raised, overwriting sys.exc_info()
64
            exc.add_traceback(e)
65
            str_ = strings.ustr(node)
66
            exc.add_msg(e, 'function:\n'+str_)
67
            xml_dom.replace(node, xml_dom.mk_comment(node.ownerDocument,
68
                '\n'+term.emph_multiline(str_)))
69
                
70
            on_error(e)
71

    
72
def strip(node):
73
    '''Replaces every XML function with its last parameter (which is usually its
74
    value), except for _ignore, which is removed completely'''
75
    for child in xml_dom.NodeElemIter(node): strip(child)
76
    name = node.tagName
77
    if name.startswith('_') and name in funcs:
78
        if name == '_ignore': value = None
79
        else: value = pop_value(list(xml_dom.NodeTextEntryIter(node)), None)
80
        xml_dom.replace_with_text(node, value)
81

    
82
##### XML functions
83

    
84
# Function names must start with _ to avoid collisions with real tags
85
# Functions take arguments (items)
86

    
87
#### General
88

    
89
def _ignore(items):
90
    '''Used to "comment out" an XML subtree'''
91
    return None
92
funcs['_ignore'] = _ignore
93

    
94
#### Conditionals
95

    
96
def _eq(items):
97
    items = dict(items)
98
    try:
99
        left = items['left']
100
        right = items['right']
101
    except KeyError: return '' # a value was None
102
    return util.bool2str(left == right)
103
funcs['_eq'] = _eq
104

    
105
def _if(items):
106
    items = dict(items)
107
    try:
108
        cond = items['cond']
109
        then = items['then']
110
    except KeyError, e: raise SyntaxError(e)
111
    else_ = items.get('else', None)
112
    cond = bool(cast(strings.ustr, cond))
113
    if cond: return then
114
    else: return else_
115
funcs['_if'] = _if
116

    
117
#### Combining values
118

    
119
def _alt(items):
120
    items = list(items)
121
    items.sort()
122
    try: return items[0][1] # value of lowest-numbered item
123
    except IndexError: return None # input got removed by e.g. FormatException
124
funcs['_alt'] = _alt
125

    
126
def _merge(items):
127
    items = list(conv_items(strings.ustr, items))
128
        # get *once* from iter, check types
129
    items.sort()
130
    return maps.merge_values(*[v for k, v in items])
131
funcs['_merge'] = _merge
132

    
133
def _label(items):
134
    items = dict(conv_items(strings.ustr, items))
135
        # get *once* from iter, check types
136
    value = items.get('value', None)
137
    if value == None: return None # input is empty
138
    try: label = items['label']
139
    except KeyError, e: raise SyntaxError(e)
140
    return label+': '+value
141
funcs['_label'] = _label
142

    
143
#### Transforming values
144

    
145
def _collapse(items):
146
    '''Collapses a subtree if the "value" element in it is NULL'''
147
    items = dict(items)
148
    try: require = cast(strings.ustr, items['require'])
149
    except KeyError, e: raise SyntaxError(e)
150
    value = items.get('value', None)
151
    
152
    required_node = xpath.get_1(value, require, allow_rooted=False)
153
    if required_node == None or xml_dom.is_empty(required_node): return None
154
    else: return value
155
funcs['_collapse'] = _collapse
156

    
157
types_by_name = {None: strings.ustr, 'str': strings.ustr, 'float': float}
158

    
159
def _nullIf(items):
160
    items = dict(conv_items(strings.ustr, items))
161
    try: null = items['null']
162
    except KeyError, e: raise SyntaxError(e)
163
    value = items.get('value', None)
164
    type_str = items.get('type', None)
165
    
166
    try: type_ = types_by_name[type_str]
167
    except KeyError, e: raise SyntaxError(e)
168
    null = type_(null)
169
    
170
    try: return util.none_if(value, null)
171
    except ValueError: return value # value not convertible, so can't equal null
172
funcs['_nullIf'] = _nullIf
173

    
174
def repl(repls, value):
175
    '''Raises error if value not in map and no special '*' entry
176
    @param repls dict repl:with
177
        repl "*" means all other input values
178
        with "*" means keep input value the same
179
        with "" means ignore input value
180
    '''
181
    try: new_value = repls[value]
182
    except KeyError, e:
183
        # Save traceback right away in case another exception raised
184
        fe = FormatException(e) 
185
        try: new_value = repls['*']
186
        except KeyError: raise fe
187
    if new_value == '*': new_value = value # '*' means keep input value the same
188
    return new_value
189

    
190
def _map(items):
191
    '''See repl()
192
    @param items
193
        <last_entry> Value
194
        <other_entries> name=value Mappings. Special values: See repl() repls.
195
    '''
196
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
197
    value = pop_value(items)
198
    if value == None: return None # input is empty
199
    return util.none_if(repl(dict(items), value), u'') # empty value means None
200
funcs['_map'] = _map
201

    
202
def _replace(items):
203
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
204
    value = pop_value(items)
205
    if value == None: return None # input is empty
206
    try:
207
        for repl, with_ in items:
208
            if re.match(r'^\w+$', repl):
209
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
210
            value = re.sub(repl, with_, value)
211
    except sre_constants.error, e: raise SyntaxError(e)
212
    return util.none_if(value.strip(), u'') # empty strings always mean None
213
funcs['_replace'] = _replace
214

    
215
#### Quantities
216

    
217
def _units(items):
218
    items = conv_items(strings.ustr, items) # get *once* from iter, check types
219
    value = pop_value(items)
220
    if value == None: return None # input is empty
221
    
222
    quantity = units.str2quantity(value)
223
    try:
224
        for action, units_ in items:
225
            units_ = util.none_if(units_, u'')
226
            if action == 'default': units.set_default_units(quantity, units_)
227
            elif action == 'to':
228
                try: quantity = units.convert(quantity, units_)
229
                except ValueError, e: raise FormatException(e)
230
            else: raise SyntaxError(ValueError('Invalid action: '+action))
231
    except units.MissingUnitsException, e: raise FormatException(e)
232
    return units.quantity2str(quantity)
233
funcs['_units'] = _units
234

    
235
def parse_range(str_, range_sep='-'):
236
    default = (str_, None)
237
    start, sep, end = str_.partition(range_sep)
238
    if sep == '': return default # not a range
239
    if start == '' and range_sep == '-': return default # negative number
240
    return tuple(d.strip() for d in (start, end))
241

    
242
def _rangeStart(items):
243
    items = dict(conv_items(strings.ustr, items))
244
    try: value = items['value']
245
    except KeyError: return None # input is empty
246
    return parse_range(value)[0]
247
funcs['_rangeStart'] = _rangeStart
248

    
249
def _rangeEnd(items):
250
    items = dict(conv_items(strings.ustr, items))
251
    try: value = items['value']
252
    except KeyError: return None # input is empty
253
    return parse_range(value)[1]
254
funcs['_rangeEnd'] = _rangeEnd
255

    
256
def _range(items):
257
    items = dict(conv_items(float, items))
258
    from_ = items.get('from', None)
259
    to = items.get('to', None)
260
    if from_ == None or to == None: return None
261
    return str(to - from_)
262
funcs['_range'] = _range
263

    
264
def _avg(items):
265
    count = 0
266
    sum_ = 0.
267
    for name, value in conv_items(float, items):
268
        count += 1
269
        sum_ += value
270
    if count == 0: return None # input is empty
271
    else: return str(sum_/count)
272
funcs['_avg'] = _avg
273

    
274
class CvException(Exception):
275
    def __init__(self):
276
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
277
            ' allowed for ratio scale data '
278
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
279

    
280
def _noCV(items):
281
    try: name, value = items.next()
282
    except StopIteration: return None
283
    if re.match('^(?i)CV *\d+$', value): raise FormatException(CvException())
284
    return value
285
funcs['_noCV'] = _noCV
286

    
287
#### Dates
288

    
289
def _date(items):
290
    items = dict(conv_items(strings.ustr, items))
291
        # get *once* from iter, check types
292
    try: str_ = items['date']
293
    except KeyError:
294
        # Year is required
295
        try: items['year']
296
        except KeyError, e:
297
            if items == {}: return None # entire date is empty
298
            else: raise FormatException(e)
299
        
300
        # Convert month name to number
301
        try: month = items['month']
302
        except KeyError: pass
303
        else:
304
            if not month.isdigit(): # month is name
305
                try: items['month'] = str(dates.strtotime(month).month)
306
                except ValueError, e: raise FormatException(e)
307
        
308
        items = dict(conv_items(format.str2int, items.iteritems()))
309
        items.setdefault('month', 1)
310
        items.setdefault('day', 1)
311
        
312
        for try_num in xrange(2):
313
            try:
314
                date = datetime.date(**items)
315
                break
316
            except ValueError, e:
317
                if try_num > 0: raise FormatException(e)
318
                    # exception still raised after retry
319
                msg = strings.ustr(e)
320
                if msg == 'month must be in 1..12': # try swapping month and day
321
                    items['month'], items['day'] = items['day'], items['month']
322
                else: raise FormatException(e)
323
    else:
324
        try: year = float(str_)
325
        except ValueError:
326
            try: date = dates.strtotime(str_)
327
            except ImportError: return str_
328
            except ValueError, e: raise FormatException(e)
329
        else: date = (datetime.date(int(year), 1, 1) +
330
            datetime.timedelta(round((year % 1.)*365)))
331
    try: return dates.strftime('%Y-%m-%d', date)
332
    except ValueError, e: raise FormatException(e)
333
funcs['_date'] = _date
334

    
335
def _dateRangeStart(items):
336
    items = dict(conv_items(strings.ustr, items))
337
    try: value = items['value']
338
    except KeyError: return None # input is empty
339
    return dates.parse_date_range(value)[0]
340
funcs['_dateRangeStart'] = _dateRangeStart
341

    
342
def _dateRangeEnd(items):
343
    items = dict(conv_items(strings.ustr, items))
344
    try: value = items['value']
345
    except KeyError: return None # input is empty
346
    return dates.parse_date_range(value)[1]
347
funcs['_dateRangeEnd'] = _dateRangeEnd
348

    
349
#### Names
350

    
351
_name_parts_slices_items = [
352
    ('first', slice(None, 1)),
353
    ('middle', slice(1, -1)),
354
    ('last', slice(-1, None)),
355
]
356
name_parts_slices = dict(_name_parts_slices_items)
357
name_parts = [name for name, slice_ in _name_parts_slices_items]
358

    
359
def _name(items):
360
    items = dict(items)
361
    parts = []
362
    for part in name_parts:
363
        if part in items: parts.append(items[part])
364
    return ' '.join(parts)
365
funcs['_name'] = _name
366

    
367
def _namePart(items):
368
    out_items = []
369
    for part, value in items:
370
        try: slice_ = name_parts_slices[part]
371
        except KeyError, e: raise SyntaxError(e)
372
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
373
    return _name(out_items)
374
funcs['_namePart'] = _namePart
375

    
376
#### Angles
377

    
378
def _compass(items):
379
    '''Converts a compass direction (N, NE, NNE, etc.) into a degree heading'''
380
    items = dict(conv_items(strings.ustr, items))
381
    try: value = items['value']
382
    except KeyError: return None # input is empty
383
    
384
    if not value.isupper(): return value # pass through other coordinate formats
385
    try: return util.cast(str, angles.compass2heading(value)) # ignore None
386
    except KeyError, e: raise FormatException(e)
387
funcs['_compass'] = _compass
388

    
389
#### Paths
390

    
391
def _simplifyPath(items):
392
    items = dict(items)
393
    try:
394
        next = cast(strings.ustr, items['next'])
395
        require = cast(strings.ustr, items['require'])
396
        root = items['path']
397
    except KeyError, e: raise SyntaxError(e)
398
    
399
    node = root
400
    while node != None:
401
        new_node = xpath.get_1(node, next, allow_rooted=False)
402
        required_node = xpath.get_1(node, require, allow_rooted=False)
403
        if required_node == None or xml_dom.is_empty(required_node):# empty elem
404
            xml_dom.replace(node, new_node) # remove current elem
405
            if node is root: root = new_node # also update root
406
        node = new_node
407
    return root
408
funcs['_simplifyPath'] = _simplifyPath
(30-30/33)