Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 111 aaronmk
7 818 aaronmk
import dates
8 300 aaronmk
import exc
9 917 aaronmk
import maps
10 1234 aaronmk
import strings
11 827 aaronmk
import term
12 1047 aaronmk
import util
13 86 aaronmk
import xml_dom
14
15 995 aaronmk
##### Exceptions
16
17 962 aaronmk
class SyntaxException(Exception):
18 797 aaronmk
    def __init__(self, cause):
19 962 aaronmk
        Exception.__init__(self, 'Invalid XML function syntax: '
20
            +exc.str_(cause))
21 278 aaronmk
22 843 aaronmk
class FormatException(SyntaxException): pass
23
24 995 aaronmk
##### Functions
25
26
funcs = {}
27
28
def process(node, on_error=exc.raise_):
29
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
30
    name = node.tagName
31
    if name.startswith('_') and name in funcs:
32
        try: value = funcs[name](xml_dom.NodeTextEntryIter(node))
33
        except SyntaxException, e:
34
            str_ = str(node)
35
            exc.add_msg(e, 'function:\n'+str_)
36
            xml_dom.replace(node, node.ownerDocument.createComment(
37 1234 aaronmk
                '\n'+term.emph_multiline(str_).replace('--','-')))
38
                # comments can't contain '--'
39 995 aaronmk
            on_error(e)
40
        else: xml_dom.replace_with_text(node, value)
41
42 86 aaronmk
def map_items(func, items):
43
    return [(name, func(value)) for name, value in items]
44
45 1234 aaronmk
def cast(type_, val):
46
    '''Throws SyntaxException if can't cast'''
47
    try: return type_(val)
48
    except ValueError, e: raise SyntaxException(e)
49
50 278 aaronmk
def conv_items(type_, items):
51 1234 aaronmk
    return map_items(lambda val: cast(type_, val),
52
        xml_dom.TextEntryOnlyIter(items))
53 278 aaronmk
54 995 aaronmk
#### XML functions
55
56
# Function names must start with _ to avoid collisions with real tags
57
# Functions take arguments (items)
58
59
def _ignore(items):
60 994 aaronmk
    '''Used to "comment out" an XML subtree'''
61
    return None
62 995 aaronmk
funcs['_ignore'] = _ignore
63 994 aaronmk
64 1234 aaronmk
def _eq(items):
65
    items = dict(items)
66
    try:
67
        left = items['left']
68
        right = items['right']
69
    except KeyError: return '' # a value was None
70
    return util.bool2str(left == right)
71
funcs['_eq'] = _eq
72
73
def _if(items):
74
    items = dict(items)
75
    try:
76
        cond = items['cond']
77
        then = items['then']
78
    except KeyError, e: raise SyntaxException(e)
79
    else_ = items.get('else', None)
80
    cond = bool(cast(str, cond))
81
    if cond: return then
82
    else: return else_
83
funcs['_if'] = _if
84
85 995 aaronmk
def _alt(items):
86 113 aaronmk
    items = list(items)
87
    items.sort()
88 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
89 1187 aaronmk
    except IndexError: return None # input got removed by e.g. SyntaxException
90 995 aaronmk
funcs['_alt'] = _alt
91 113 aaronmk
92 995 aaronmk
def _merge(items):
93 1234 aaronmk
    items = list(conv_items(strings.ustr, items))
94
        # get *once* from iter and check types
95 917 aaronmk
    items.sort()
96
    return maps.merge_values(*[v for k, v in items])
97 995 aaronmk
funcs['_merge'] = _merge
98 917 aaronmk
99 995 aaronmk
def _label(items):
100 917 aaronmk
    items = dict(conv_items(str, items)) # get *once* from iter and check types
101
    try:
102
        label = items['label']
103
        value = items['value']
104
    except KeyError, e: raise SyntaxException(e)
105
    return label+': '+value
106 995 aaronmk
funcs['_label'] = _label
107 917 aaronmk
108 1047 aaronmk
def _nullIf(items):
109
    items = dict(conv_items(str, items))
110
    try:
111
        null = items['null']
112
        value = items['value']
113
    except KeyError, e: raise SyntaxException(e)
114 1219 aaronmk
    type_str = items.get('type', None)
115
    type_ = str
116
    if type_str == 'float': type_ = float
117
    return util.none_if(value, type_(null))
118 1047 aaronmk
funcs['_nullIf'] = _nullIf
119
120 1219 aaronmk
def _map(items):
121
    items = conv_items(str, items) # get *once* from iter and check types
122
    try: value = items.pop()[1] # value is last entry's value
123
    except IndexError, e: raise SyntaxException(e)
124
    map_ = dict(items)
125
    try: return map_[value]
126
    except KeyError: return value
127
funcs['_map'] = _map
128
129
def _replace(items):
130
    items = conv_items(str, items) # get *once* from iter and check types
131
    try: value = items.pop() # value is last entry
132
    except IndexError, e: raise SyntaxException(e)
133
    try:
134
        for repl, with_ in items:
135
            if re.match(r'^\w+$', repl):
136
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
137
            value = re.sub(repl, with_, value)
138
    except sre_constants.error, e: raise SyntaxException(e)
139
    return value
140
funcs['_replace'] = _replace
141
142 1225 aaronmk
def _units(items):
143
    items = dict(conv_items(str, items))
144
    try:
145
        units = items['units']
146
        value = items['value']
147
    except KeyError, e: raise SyntaxException(e)
148
    return value#+' '+units # don't add yet because unit conversion isn't ready
149
funcs['_units'] = _units
150
151 995 aaronmk
def _range(items):
152 278 aaronmk
    items = dict(conv_items(float, items))
153 965 aaronmk
    from_ = items.get('from', None)
154
    to = items.get('to', None)
155
    if from_ == None or to == None: return None
156 326 aaronmk
    return str(to - from_)
157 995 aaronmk
funcs['_range'] = _range
158 86 aaronmk
159 995 aaronmk
def _avg(items):
160 86 aaronmk
    count = 0
161
    sum_ = 0.
162 278 aaronmk
    for name, value in conv_items(float, items):
163 86 aaronmk
        count += 1
164
        sum_ += value
165
    return str(sum_/count)
166 995 aaronmk
funcs['_avg'] = _avg
167 86 aaronmk
168 968 aaronmk
class CvException(Exception):
169
    def __init__(self):
170
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
171
            ' allowed for ratio scale data '
172
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
173
174 995 aaronmk
def _noCV(items):
175 968 aaronmk
    try: name, value = items.next()
176
    except StopIteration: return None
177
    if re.match('^(?i)CV *\d+$', value): raise SyntaxException(CvException())
178
    return value
179 995 aaronmk
funcs['_noCV'] = _noCV
180 968 aaronmk
181 995 aaronmk
def _date(items):
182 917 aaronmk
    items = conv_items(str, items) # get *once* from iter and check types
183 786 aaronmk
    try: str_ = dict(items)['date']
184
    except KeyError:
185
        items = dict(filter(lambda (k, v): v != 0, conv_items(int, items)))
186
        items.setdefault('year', 1900)
187
        items.setdefault('month', 1)
188
        items.setdefault('day', 1)
189
        try: date = datetime.date(**items)
190
        except ValueError, e: raise SyntaxException(e)
191
    else:
192 324 aaronmk
        try: year = float(str_)
193
        except ValueError:
194
            try: import dateutil.parser
195
            except ImportError: return str_
196
            try: date = dateutil.parser.parse(str_)
197
            except ValueError, e: raise SyntaxException(e)
198
        else: date = (datetime.date(int(year), 1, 1) +
199
            datetime.timedelta(round((year % 1.)*365)))
200 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
201 843 aaronmk
    except ValueError, e: raise FormatException(e)
202 995 aaronmk
funcs['_date'] = _date
203 86 aaronmk
204 328 aaronmk
_name_parts_slices_items = [
205
    ('first', slice(None, 1)),
206
    ('middle', slice(1, -1)),
207
    ('last', slice(-1, None)),
208
]
209
name_parts_slices = dict(_name_parts_slices_items)
210
name_parts = [name for name, slice_ in _name_parts_slices_items]
211
212 995 aaronmk
def _name(items):
213 89 aaronmk
    items = dict(items)
214 102 aaronmk
    parts = []
215 328 aaronmk
    for part in name_parts:
216
        if part in items: parts.append(items[part])
217 102 aaronmk
    return ' '.join(parts)
218 995 aaronmk
funcs['_name'] = _name
219 102 aaronmk
220 995 aaronmk
def _namePart(items):
221 328 aaronmk
    out_items = []
222
    for part, value in items:
223
        try: slice_ = name_parts_slices[part]
224
        except KeyError, e: raise SyntaxException(e)
225 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
226 995 aaronmk
    return _name(out_items)
227
funcs['_namePart'] = _namePart