Project

General

Profile

1 996 aaronmk
# XML "function" nodes that transform their contents
2 86 aaronmk
3 111 aaronmk
import datetime
4 968 aaronmk
import re
5 1219 aaronmk
import sre_constants
6 111 aaronmk
7 818 aaronmk
import dates
8 300 aaronmk
import exc
9 917 aaronmk
import maps
10 827 aaronmk
import term
11 1047 aaronmk
import util
12 86 aaronmk
import xml_dom
13
14 995 aaronmk
##### Exceptions
15
16 962 aaronmk
class SyntaxException(Exception):
17 797 aaronmk
    def __init__(self, cause):
18 962 aaronmk
        Exception.__init__(self, 'Invalid XML function syntax: '
19
            +exc.str_(cause))
20 278 aaronmk
21 843 aaronmk
class FormatException(SyntaxException): pass
22
23 995 aaronmk
##### Functions
24
25
funcs = {}
26
27
def process(node, on_error=exc.raise_):
28
    for child in xml_dom.NodeElemIter(node): process(child, on_error)
29
    name = node.tagName
30
    if name.startswith('_') and name in funcs:
31
        try: value = funcs[name](xml_dom.NodeTextEntryIter(node))
32
        except SyntaxException, e:
33
            str_ = str(node)
34
            exc.add_msg(e, 'function:\n'+str_)
35
            xml_dom.replace(node, node.ownerDocument.createComment(
36
                '\n'+term.emph_multiline(str_)))
37
            on_error(e)
38
        else: xml_dom.replace_with_text(node, value)
39
40 86 aaronmk
def map_items(func, items):
41
    return [(name, func(value)) for name, value in items]
42
43 278 aaronmk
def conv_items(type_, items):
44 787 aaronmk
    def conv(val):
45
        try: return type_(val)
46
        except ValueError, e: raise SyntaxException(e)
47 793 aaronmk
    return map_items(conv, xml_dom.TextEntryOnlyIter(items))
48 278 aaronmk
49 995 aaronmk
#### XML functions
50
51
# Function names must start with _ to avoid collisions with real tags
52
# Functions take arguments (items)
53
54
def _ignore(items):
55 994 aaronmk
    '''Used to "comment out" an XML subtree'''
56
    return None
57 995 aaronmk
funcs['_ignore'] = _ignore
58 994 aaronmk
59 995 aaronmk
def _alt(items):
60 113 aaronmk
    items = list(items)
61
    items.sort()
62 1186 aaronmk
    try: return items[0][1] # value of lowest-numbered item
63 1187 aaronmk
    except IndexError: return None # input got removed by e.g. SyntaxException
64 995 aaronmk
funcs['_alt'] = _alt
65 113 aaronmk
66 995 aaronmk
def _merge(items):
67 917 aaronmk
    items = list(items)
68
    items.sort()
69
    return maps.merge_values(*[v for k, v in items])
70 995 aaronmk
funcs['_merge'] = _merge
71 917 aaronmk
72 995 aaronmk
def _label(items):
73 917 aaronmk
    items = dict(conv_items(str, items)) # get *once* from iter and check types
74
    try:
75
        label = items['label']
76
        value = items['value']
77
    except KeyError, e: raise SyntaxException(e)
78
    return label+': '+value
79 995 aaronmk
funcs['_label'] = _label
80 917 aaronmk
81 1047 aaronmk
def _nullIf(items):
82
    items = dict(conv_items(str, items))
83
    try:
84
        null = items['null']
85
        value = items['value']
86
    except KeyError, e: raise SyntaxException(e)
87 1219 aaronmk
    type_str = items.get('type', None)
88
    type_ = str
89
    if type_str == 'float': type_ = float
90
    return util.none_if(value, type_(null))
91 1047 aaronmk
funcs['_nullIf'] = _nullIf
92
93 1219 aaronmk
def _map(items):
94
    items = conv_items(str, items) # get *once* from iter and check types
95
    try: value = items.pop()[1] # value is last entry's value
96
    except IndexError, e: raise SyntaxException(e)
97
    map_ = dict(items)
98
    try: return map_[value]
99
    except KeyError: return value
100
funcs['_map'] = _map
101
102
def _replace(items):
103
    items = conv_items(str, items) # get *once* from iter and check types
104
    try: value = items.pop() # value is last entry
105
    except IndexError, e: raise SyntaxException(e)
106
    try:
107
        for repl, with_ in items:
108
            if re.match(r'^\w+$', repl):
109
                repl = r'(?<![^\W_])'+repl+r'(?![^\W_])' # match whole word
110
            value = re.sub(repl, with_, value)
111
    except sre_constants.error, e: raise SyntaxException(e)
112
    return value
113
funcs['_replace'] = _replace
114
115 1225 aaronmk
def _units(items):
116
    items = dict(conv_items(str, items))
117
    try:
118
        units = items['units']
119
        value = items['value']
120
    except KeyError, e: raise SyntaxException(e)
121
    return value#+' '+units # don't add yet because unit conversion isn't ready
122
funcs['_units'] = _units
123
124 995 aaronmk
def _range(items):
125 278 aaronmk
    items = dict(conv_items(float, items))
126 965 aaronmk
    from_ = items.get('from', None)
127
    to = items.get('to', None)
128
    if from_ == None or to == None: return None
129 326 aaronmk
    return str(to - from_)
130 995 aaronmk
funcs['_range'] = _range
131 86 aaronmk
132 995 aaronmk
def _avg(items):
133 86 aaronmk
    count = 0
134
    sum_ = 0.
135 278 aaronmk
    for name, value in conv_items(float, items):
136 86 aaronmk
        count += 1
137
        sum_ += value
138
    return str(sum_/count)
139 995 aaronmk
funcs['_avg'] = _avg
140 86 aaronmk
141 968 aaronmk
class CvException(Exception):
142
    def __init__(self):
143
        Exception.__init__(self, 'CV (coefficient of variation) values are only'
144
            ' allowed for ratio scale data '
145
            '(see <http://en.wikipedia.org/wiki/Coefficient_of_variation>)')
146
147 995 aaronmk
def _noCV(items):
148 968 aaronmk
    try: name, value = items.next()
149
    except StopIteration: return None
150
    if re.match('^(?i)CV *\d+$', value): raise SyntaxException(CvException())
151
    return value
152 995 aaronmk
funcs['_noCV'] = _noCV
153 968 aaronmk
154 995 aaronmk
def _date(items):
155 917 aaronmk
    items = conv_items(str, items) # get *once* from iter and check types
156 786 aaronmk
    try: str_ = dict(items)['date']
157
    except KeyError:
158
        items = dict(filter(lambda (k, v): v != 0, conv_items(int, items)))
159
        items.setdefault('year', 1900)
160
        items.setdefault('month', 1)
161
        items.setdefault('day', 1)
162
        try: date = datetime.date(**items)
163
        except ValueError, e: raise SyntaxException(e)
164
    else:
165 324 aaronmk
        try: year = float(str_)
166
        except ValueError:
167
            try: import dateutil.parser
168
            except ImportError: return str_
169
            try: date = dateutil.parser.parse(str_)
170
            except ValueError, e: raise SyntaxException(e)
171
        else: date = (datetime.date(int(year), 1, 1) +
172
            datetime.timedelta(round((year % 1.)*365)))
173 818 aaronmk
    try: return dates.strftime('%Y-%m-%d', date)
174 843 aaronmk
    except ValueError, e: raise FormatException(e)
175 995 aaronmk
funcs['_date'] = _date
176 86 aaronmk
177 328 aaronmk
_name_parts_slices_items = [
178
    ('first', slice(None, 1)),
179
    ('middle', slice(1, -1)),
180
    ('last', slice(-1, None)),
181
]
182
name_parts_slices = dict(_name_parts_slices_items)
183
name_parts = [name for name, slice_ in _name_parts_slices_items]
184
185 995 aaronmk
def _name(items):
186 89 aaronmk
    items = dict(items)
187 102 aaronmk
    parts = []
188 328 aaronmk
    for part in name_parts:
189
        if part in items: parts.append(items[part])
190 102 aaronmk
    return ' '.join(parts)
191 995 aaronmk
funcs['_name'] = _name
192 102 aaronmk
193 995 aaronmk
def _namePart(items):
194 328 aaronmk
    out_items = []
195
    for part, value in items:
196
        try: slice_ = name_parts_slices[part]
197
        except KeyError, e: raise SyntaxException(e)
198 1219 aaronmk
        out_items.append((part, ' '.join(value.split(' ')[slice_])))
199 995 aaronmk
    return _name(out_items)
200
funcs['_namePart'] = _namePart