Revision 164
Added by Aaron Marcuse-Kubitza about 13 years ago
scripts/test/accepted_output/NYBG.organisms.VegBank.xml | ||
---|---|---|
1 | 1 |
<?xml version="1.0" ?> |
2 | 2 |
<VegBank> |
3 | 3 |
<taxonObservation id="0"> |
4 |
<taxonImportance> |
|
5 |
<stemCount> |
|
6 |
<stemCount>1</stemCount> |
|
7 |
<stemLocation> |
|
8 |
<definedValue fkey="tableRecord_ID"> |
|
9 |
<userDefined_ID> |
|
10 |
<userDefined> |
|
11 |
<tableName>stemLocation</tableName> |
|
12 |
<userDefinedName>habitat</userDefinedName> |
|
13 |
<userDefinedType>varchar</userDefinedType> |
|
14 |
</userDefined> |
|
15 |
</userDefined_ID> |
|
16 |
<definedValue>Floodplain forest</definedValue> |
|
17 |
</definedValue> |
|
18 |
<definedValue fkey="tableRecord_ID"> |
|
19 |
<userDefined_ID> |
|
20 |
<userDefined> |
|
21 |
<tableName>stemLocation</tableName> |
|
22 |
<userDefinedName>plantFungusDescription</userDefinedName> |
|
23 |
<userDefinedType>varchar</userDefinedType> |
|
24 |
</userDefined> |
|
25 |
</userDefined_ID> |
|
26 |
<definedValue>with rough brown bark, almost no buttressing, 1 m diam; fallen fruit and leaves</definedValue> |
|
27 |
</definedValue> |
|
28 |
</stemLocation> |
|
29 |
</stemCount> |
|
30 |
</taxonImportance> |
|
31 | 4 |
<observation_ID> |
32 | 5 |
<observation> |
33 | 6 |
<plot_ID> |
... | ... | |
68 | 41 |
<obsStartDate>1984-08-20</obsStartDate> |
69 | 42 |
</observation> |
70 | 43 |
</observation_ID> |
44 |
<taxonImportance> |
|
45 |
<stemCount> |
|
46 |
<stemCount>1</stemCount> |
|
47 |
<stemLocation> |
|
48 |
<definedValue fkey="tableRecord_ID"> |
|
49 |
<userDefined_ID> |
|
50 |
<userDefined> |
|
51 |
<tableName>stemLocation</tableName> |
|
52 |
<userDefinedName>habitat</userDefinedName> |
|
53 |
<userDefinedType>varchar</userDefinedType> |
|
54 |
</userDefined> |
|
55 |
</userDefined_ID> |
|
56 |
<definedValue>Floodplain forest</definedValue> |
|
57 |
</definedValue> |
|
58 |
<definedValue fkey="tableRecord_ID"> |
|
59 |
<userDefined_ID> |
|
60 |
<userDefined> |
|
61 |
<tableName>stemLocation</tableName> |
|
62 |
<userDefinedName>plantFungusDescription</userDefinedName> |
|
63 |
<userDefinedType>varchar</userDefinedType> |
|
64 |
</userDefined> |
|
65 |
</userDefined_ID> |
|
66 |
<definedValue>with rough brown bark, almost no buttressing, 1 m diam; fallen fruit and leaves</definedValue> |
|
67 |
</definedValue> |
|
68 |
</stemLocation> |
|
69 |
</stemCount> |
|
70 |
</taxonImportance> |
|
71 | 71 |
<taxonInterpretation> |
72 | 72 |
<ROLE_ID> |
73 | 73 |
<aux_Role> |
... | ... | |
315 | 315 |
</taxonInterpretation> |
316 | 316 |
</taxonObservation> |
317 | 317 |
<taxonObservation id="1"> |
318 |
<taxonImportance> |
|
319 |
<stemCount> |
|
320 |
<stemCount>1</stemCount> |
|
321 |
</stemCount> |
|
322 |
</taxonImportance> |
|
323 | 318 |
<observation_ID> |
324 | 319 |
<observation> |
325 | 320 |
<plot_ID> |
... | ... | |
352 | 347 |
<obsStartDate>1994-01-17</obsStartDate> |
353 | 348 |
</observation> |
354 | 349 |
</observation_ID> |
350 |
<taxonImportance> |
|
351 |
<stemCount> |
|
352 |
<stemCount>1</stemCount> |
|
353 |
</stemCount> |
|
354 |
</taxonImportance> |
|
355 | 355 |
<taxonInterpretation> |
356 | 356 |
<ROLE_ID> |
357 | 357 |
<aux_Role> |
scripts/map | ||
---|---|---|
16 | 16 |
import xml_func |
17 | 17 |
|
18 | 18 |
def metadata_value(name): |
19 |
if name.startswith(':'): return name[1:] |
|
19 |
if type(name) == str and name.startswith(':'): return name[1:]
|
|
20 | 20 |
else: return None |
21 | 21 |
|
22 | 22 |
def main(): |
... | ... | |
66 | 66 |
for row in reader: |
67 | 67 |
in_, out = row[:2] |
68 | 68 |
if out != '': |
69 |
value = metadata_value(in_) |
|
70 |
if out_is_xpaths: out = xpath.parse(out_root+out) |
|
71 |
if value != None: metadata.append((value, out)) |
|
72 |
else: mappings.append((in_, out)) |
|
69 |
if out_is_xpaths: out = out_root+out |
|
70 |
mappings.append((in_, out)) |
|
73 | 71 |
stream.close() |
74 | 72 |
in_is_xml = in_is_xpaths and not in_is_db |
75 | 73 |
|
... | ... | |
85 | 83 |
import db_xml |
86 | 84 |
|
87 | 85 |
in_root_xml = xpath.path2xml(in_root) |
88 |
mappings = [(xpath.path2xml(in_root+in_), out) for in_, out in mappings] |
|
86 |
for i, mapping in enumerate(mappings): |
|
87 |
in_, out = mapping |
|
88 |
if metadata_value(in_) == None: |
|
89 |
mappings[i] = (xpath.path2xml(in_root+in_), out) |
|
89 | 90 |
|
90 | 91 |
in_db = sql.connect(in_db_config) |
91 | 92 |
in_pkeys = {} |
... | ... | |
93 | 94 |
limit)): |
94 | 95 |
row_id, = row |
95 | 96 |
row_id = str(row_id) |
96 |
|
|
97 |
def put_col(path, value): |
|
98 |
xpath.put_obj(root, path, row_id, has_types, value) |
|
99 |
for value, out in metadata: put_col(out, value) |
|
100 | 97 |
for in_, out in mappings: |
101 |
in_ = in_.cloneNode(True) # don't modify orig value! |
|
102 |
xml_dom.set_id(xpath.get(in_, in_root), row_id) |
|
103 |
value = sql.value_or_none(db_xml.get(in_db, in_, in_pkeys)) |
|
104 |
if value != None: put_col(out, str(value)) |
|
98 |
value = metadata_value(in_) |
|
99 |
if value == None: |
|
100 |
in_ = in_.cloneNode(True) # don't modify orig value! |
|
101 |
xml_dom.set_id(xpath.get(in_, in_root), row_id) |
|
102 |
value = sql.value_or_none(db_xml.get(in_db, in_, |
|
103 |
in_pkeys)) |
|
104 |
if value != None: |
|
105 |
xpath.put_obj(root, out, row_id, has_types, str(value)) |
|
105 | 106 |
in_db.close() |
106 | 107 |
elif in_is_xml: |
107 | 108 |
row = xpath.get(doc0.documentElement, in_root) |
108 | 109 |
for row_idx, row in enumerate(xml_dom.NodeElemIter(row.parentNode)): |
109 | 110 |
if not (limit == None or row_idx < limit): break |
110 | 111 |
row_id = str(row_idx) |
111 |
|
|
112 |
def put_col(path, value): |
|
113 |
xpath.put_obj(root, path, row_id, has_types, value) |
|
114 |
for value, out in metadata: put_col(out, value) |
|
115 | 112 |
for in_, out in mappings: |
116 |
node = xpath.get(row, in_) |
|
117 |
if node != None: put_col(out, xml_dom.value(node)) |
|
113 |
value = metadata_value(in_) |
|
114 |
if value == None: |
|
115 |
node = xpath.get(row, in_) |
|
116 |
if node != None: value = xml_dom.value(node) |
|
117 |
if value != None: |
|
118 |
xpath.put_obj(root, out, row_id, has_types, value) |
|
118 | 119 |
else: # input is CSV |
119 | 120 |
map_ = dict(mappings) |
120 | 121 |
reader = csv.reader(sys.stdin) |
121 | 122 |
cols = reader.next() |
122 | 123 |
col_idxs = dict([(value, idx) for idx, value in enumerate(cols)]) |
123 |
mappings = [(col_idxs.get(in_, None), out) for in_, out in mappings] |
|
124 |
mappings = filter(lambda item: item[0] != None, mappings) |
|
124 |
for i, mapping in enumerate(mappings): |
|
125 |
in_, out = mapping |
|
126 |
if metadata_value(in_) == None: |
|
127 |
try: mappings[i] = (col_idxs[in_], out) |
|
128 |
except KeyError: pass |
|
125 | 129 |
|
126 | 130 |
for row_idx, row in enumerate(reader): |
127 | 131 |
if not (limit == None or row_idx < limit): break |
128 | 132 |
row_id = str(row_idx) |
129 |
|
|
130 |
def put_col(path, value): |
|
131 |
xpath.put_obj(root, path, row_id, has_types, value) |
|
132 |
for value, out in metadata: put_col(out, value) |
|
133 | 133 |
for in_, out in mappings: |
134 |
if row[in_] != '': put_col(out, row[in_]) |
|
134 |
value = metadata_value(in_) |
|
135 |
if value == None: |
|
136 |
value = row[in_] |
|
137 |
if value == '': value = None |
|
138 |
if value != None: |
|
139 |
xpath.put_obj(root, out, row_id, has_types, value) |
|
135 | 140 |
xml_func.process(root) |
136 | 141 |
else: doc1 = doc0 |
137 | 142 |
|
Also available in: Unified diff
map: Handle metadata in order with regular mappings