Revision 86
Added by Aaron Marcuse-Kubitza about 13 years ago
scripts/lib/xml_func.py | ||
---|---|---|
1 |
# XML "function" nodes that evaluate their contents to text |
|
2 |
|
|
3 |
import xml_dom |
|
4 |
|
|
5 |
def map_items(func, items): |
|
6 |
return [(name, func(value)) for name, value in items] |
|
7 |
|
|
8 |
def range_(items): |
|
9 |
items = dict(map_items(float, items)) |
|
10 |
return str(items['to'] - items['from']) |
|
11 |
|
|
12 |
def avg(items): |
|
13 |
count = 0 |
|
14 |
sum_ = 0. |
|
15 |
for name, value in map_items(float, items): |
|
16 |
count += 1 |
|
17 |
sum_ += value |
|
18 |
return str(sum_/count) |
|
19 |
|
|
20 |
def date(items): |
|
21 |
items = dict(items) |
|
22 |
return '-'.join([items['year'], items.get('month', '1'), |
|
23 |
items.get('day', '1')]) |
|
24 |
|
|
25 |
# Function names must start with _ to avoid collisions with real tags |
|
26 |
# Functions have take arguments (doc, node) |
|
27 |
funcs = {'_range': range_, '_avg': avg, '_date': date} |
|
28 |
|
|
29 |
def process(doc, node=None): |
|
30 |
if node == None: node = doc.documentElement |
|
31 |
name = xml_dom.name_of(node) |
|
32 |
if name in funcs: xml_dom.replace_with_text(doc, node, |
|
33 |
funcs[name](xml_dom.NodeTextEntryIter(node))) |
|
34 |
else: |
|
35 |
for child in xml_dom.NodeElemIter(node): process(doc, child) |
scripts/lib/xml_dom.py | ||
---|---|---|
83 | 83 |
node.appendChild(doc.createTextNode(value)) |
84 | 84 |
else: node.nodeValue = value |
85 | 85 |
|
86 |
class NodeTextEntryIter: |
|
87 |
def __init__(self, node): self.iter_ = NodeElemIter(node) |
|
88 |
|
|
89 |
def __iter__(self): return self |
|
90 |
|
|
91 |
def curr(self): |
|
92 |
while True: |
|
93 |
child = self.iter_.curr() |
|
94 |
if is_text(child): return (name_of(child), value(child)) |
|
95 |
self.iter_.next() |
|
96 |
|
|
97 |
def next(self): |
|
98 |
entry = self.curr() |
|
99 |
self.iter_.next() |
|
100 |
return entry |
|
101 |
|
|
102 |
def replace(old_node, new_node): |
|
103 |
old_node.parentNode.replaceChild(new_node, old_node) # note order reversed |
|
104 |
|
|
105 |
def replace_with_text(doc, node, str_): replace(node, doc.createTextNode(str_)) |
|
106 |
|
|
86 | 107 |
def by_tag_name(node, name, last_only=False): |
87 | 108 |
# last_only optimization returns last (most recently inserted) matching node |
88 | 109 |
children = [] |
scripts/map | ||
---|---|---|
11 | 11 |
sys.path.append(os.path.dirname(__file__)+"/lib") |
12 | 12 |
|
13 | 13 |
import opts |
14 |
import xml_func |
|
14 | 15 |
|
15 | 16 |
def metadata_value(name): |
16 | 17 |
if name.startswith(':'): return name[1:] |
... | ... | |
87 | 88 |
for value, out in metadata: put_col(out, value) |
88 | 89 |
for i, col in enumerate(cols): |
89 | 90 |
if row[i] != '' and col in map_: put_col(map_[col], row[i]) |
91 |
xml_func.process(out_doc) |
|
90 | 92 |
doc = out_doc |
91 | 93 |
|
92 | 94 |
# Output XML tree |
scripts/util/NYSpecimenDataAmericas.xml | ||
---|---|---|
21 | 21 |
<plotObservations> |
22 | 22 |
<plotObservation id="0"> |
23 | 23 |
<plotUniqueIdentifierID>0</plotUniqueIdentifierID> |
24 |
<obsStartDate> |
|
25 |
<_date> |
|
26 |
<year>1984</year> |
|
27 |
<month>8</month> |
|
28 |
<day>20</day> |
|
29 |
</_date> |
|
30 |
</obsStartDate> |
|
24 |
<obsStartDate>1984-8-20</obsStartDate> |
|
31 | 25 |
</plotObservation> |
32 | 26 |
<plotObservation id="1"> |
33 | 27 |
<plotUniqueIdentifierID>1</plotUniqueIdentifierID> |
34 |
<obsStartDate> |
|
35 |
<_date> |
|
36 |
<year>1994</year> |
|
37 |
<month>1</month> |
|
38 |
<day>17</day> |
|
39 |
</_date> |
|
40 |
</obsStartDate> |
|
28 |
<obsStartDate>1994-1-17</obsStartDate> |
|
41 | 29 |
</plotObservation> |
42 | 30 |
</plotObservations> |
43 | 31 |
<plots> |
Also available in: Unified diff
Added xml_func.py to process mappings whose output needs postprocessing