Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 111 aaronmk
6 818 aaronmk
import dates
7 300 aaronmk
import exc
8 917 aaronmk
import maps
9 827 aaronmk
import term
10 1047 aaronmk
import util
11 86 aaronmk
import xml_dom
12
13 995 aaronmk
##### Exceptions
14
15 962 aaronmk
class SyntaxException(Exception):
16 797 aaronmk
    def __init__(self, cause):
17 962 aaronmk
        Exception.__init__(self, 'Invalid XML function syntax: '
18
            +exc.str_(cause))
19 278 aaronmk
20 843 aaronmk
class FormatException(SyntaxException): pass
21
22 995 aaronmk
##### Functions
23
24
funcs = {}
25
26
def process(node, on_error=exc.raise_):
27
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
28
    name = node.tagName
29
    if name.startswith('_') and name in funcs:
30
        try: value = funcs[name](xml_dom.NodeTextEntryIter(node))
31
        except SyntaxException, e:
32
            str_ = str(node)
33
            exc.add_msg(e, 'function:\n'+str_)
34
            xml_dom.replace(node, node.ownerDocument.createComment(
35
                '\n'+term.emph_multiline(str_)))
36
            on_error(e)
37
        else: xml_dom.replace_with_text(node, value)
38
39 86 aaronmk
def map_items(func, items):
40
    return [(name, func(value)) for name, value in items]
41
42 278 aaronmk
def conv_items(type_, items):
43 787 aaronmk
    def conv(val):
44
        try: return type_(val)
45
        except ValueError, e: raise SyntaxException(e)
46 793 aaronmk
    return map_items(conv, xml_dom.TextEntryOnlyIter(items))
47 278 aaronmk
48 995 aaronmk
#### XML functions
49
50
# Function names must start with _ to avoid collisions with real tags
51
# Functions take arguments (items)
52
53
def _ignore(items):
54 994 aaronmk
    '''Used to "comment out" an XML subtree'''
55
    return None
56 995 aaronmk
funcs['_ignore'] = _ignore
57 994 aaronmk
58 995 aaronmk
def _alt(items):
59 113 aaronmk
    items = list(items)
60
    items.sort()
61 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
62 1187 aaronmk
    except IndexError: return None # input got removed by e.g. SyntaxException
63 995 aaronmk
funcs['_alt'] = _alt
64 113 aaronmk
65 995 aaronmk
def _merge(items):
66 917 aaronmk
    items = list(items)
67
    items.sort()
68
    return maps.merge_values(*[v for k, v in items])
69 995 aaronmk
funcs['_merge'] = _merge
70 917 aaronmk
71 995 aaronmk
def _label(items):
72 917 aaronmk
    items = dict(conv_items(str, items)) # get *once* from iter and check types
73
    try:
74
        label = items['label']
75
        value = items['value']
76
    except KeyError, e: raise SyntaxException(e)
77
    return label+': '+value
78 995 aaronmk
funcs['_label'] = _label
79 917 aaronmk
80 1047 aaronmk
def _nullIf(items):
81
    items = dict(conv_items(str, items))
82
    try:
83
        null = items['null']
84
        value = items['value']
85
    except KeyError, e: raise SyntaxException(e)
86
    else:
87
        type_str = items.get('type', None)
88
        type_ = str
89
        if type_str == 'float': type_ = float
90
        return util.none_if(value, type_(null))
91
funcs['_nullIf'] = _nullIf
92
93 995 aaronmk
def _range(items):
94 278 aaronmk
    items = dict(conv_items(float, items))
95 965 aaronmk
    from_ = items.get('from', None)
96
    to = items.get('to', None)
97
    if from_ == None or to == None: return None
98 326 aaronmk
    return str(to - from_)
99 995 aaronmk
funcs['_range'] = _range
100 86 aaronmk
101 995 aaronmk
def _avg(items):
102 86 aaronmk
    count = 0
103
    sum_ = 0.
104 278 aaronmk
    for name, value in conv_items(float, items):
105 86 aaronmk
        count += 1
106
        sum_ += value
107
    return str(sum_/count)
108 995 aaronmk
funcs['_avg'] = _avg
109 86 aaronmk
110 968 aaronmk
class CvException(Exception):
111
    def __init__(self):
112
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
113
            ' allowed for ratio scale data '
114
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
115
116 995 aaronmk
def _noCV(items):
117 968 aaronmk
    try: name, value = items.next()
118
    except StopIteration: return None
119
    if re.match('^(?i)CV *\d+$', value): raise SyntaxException(CvException())
120
    return value
121 995 aaronmk
funcs['_noCV'] = _noCV
122 968 aaronmk
123 995 aaronmk
def _date(items):
124 917 aaronmk
    items = conv_items(str, items) # get *once* from iter and check types
125 786 aaronmk
    try: str_ = dict(items)['date']
126
    except KeyError:
127
        items = dict(filter(lambda (k, v): v != 0, conv_items(int, items)))
128
        items.setdefault('year', 1900)
129
        items.setdefault('month', 1)
130
        items.setdefault('day', 1)
131
        try: date = datetime.date(**items)
132
        except ValueError, e: raise SyntaxException(e)
133
    else:
134 324 aaronmk
        try: year = float(str_)
135
        except ValueError:
136
            try: import dateutil.parser
137
            except ImportError: return str_
138
            try: date = dateutil.parser.parse(str_)
139
            except ValueError, e: raise SyntaxException(e)
140
        else: date = (datetime.date(int(year), 1, 1) +
141
            datetime.timedelta(round((year % 1.)*365)))
142 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
143 843 aaronmk
    except ValueError, e: raise FormatException(e)
144 995 aaronmk
funcs['_date'] = _date
145 86 aaronmk
146 328 aaronmk
_name_parts_slices_items = [
147
    ('first', slice(None, 1)),
148
    ('middle', slice(1, -1)),
149
    ('last', slice(-1, None)),
150
]
151
name_parts_slices = dict(_name_parts_slices_items)
152
name_parts = [name for name, slice_ in _name_parts_slices_items]
153
154 995 aaronmk
def _name(items):
155 89 aaronmk
    items = dict(items)
156 102 aaronmk
    parts = []
157 328 aaronmk
    for part in name_parts:
158
        if part in items: parts.append(items[part])
159 102 aaronmk
    return ' '.join(parts)
160 995 aaronmk
funcs['_name'] = _name
161 102 aaronmk
162 995 aaronmk
def _namePart(items):
163 328 aaronmk
    out_items = []
164
    for part, value in items:
165
        try: slice_ = name_parts_slices[part]
166
        except KeyError, e: raise SyntaxException(e)
167
        else: out_items.append((part, ' '.join(value.split(' ')[slice_])))
168 995 aaronmk
    return _name(out_items)
169
funcs['_namePart'] = _namePart