Revision 297
Added by Aaron Marcuse-Kubitza about 13 years ago
bin/map | ||
---|---|---|
78 | 78 |
'''Maps datasource to XML tree''' |
79 | 79 |
doc1 = xml_dom.create_doc(out_label) |
80 | 80 |
root = doc1.documentElement |
81 |
|
|
82 |
def process(get_value, rows): |
|
83 |
'''Processes input values |
|
84 |
@param get_value f(in_, row):str |
|
85 |
''' |
|
86 |
for i, row in enumerate(rows): |
|
87 |
if not (limit == None or i < limit): break |
|
88 |
row_id = str(i) |
|
89 |
for in_, out in mappings: |
|
90 |
value = metadata_value(in_) |
|
91 |
if value == None: value = get_value(in_, row) |
|
92 |
if value != None: |
|
93 |
try: xpath.put_obj(root, out, row_id, has_types, value) |
|
94 |
except Exception: traceback.print_exc() |
|
95 |
|
|
81 | 96 |
if in_is_db: |
82 | 97 |
assert in_is_xpaths |
83 | 98 |
|
... | ... | |
91 | 106 |
|
92 | 107 |
in_db = sql.connect(in_db_config) |
93 | 108 |
in_pkeys = {} |
94 |
for row_idx, row in enumerate(sql.rows(db_xml.get(in_db, |
|
95 |
in_root_xml, in_pkeys, limit))): |
|
96 |
row_id = str(row_idx) |
|
109 |
def get_value(in_, row): |
|
97 | 110 |
pkey, = row |
98 |
for in_, out in mappings: |
|
99 |
value = metadata_value(in_) |
|
100 |
if value == None: |
|
101 |
in_ = in_.cloneNode(True) # don't modify orig value! |
|
102 |
xml_dom.set_id(xpath.get(in_, in_root), pkey) |
|
103 |
value = sql.value_or_none(db_xml.get(in_db, in_, |
|
104 |
in_pkeys)) |
|
105 |
if value != None: |
|
106 |
try: xpath.put_obj(root, out, row_id, has_types, |
|
107 |
str(value)) |
|
108 |
except Exception: traceback.print_exc() |
|
111 |
in_ = in_.cloneNode(True) # don't modify orig value! |
|
112 |
xml_dom.set_id(xpath.get(in_, in_root), pkey) |
|
113 |
value = sql.value_or_none(db_xml.get(in_db, in_, in_pkeys)) |
|
114 |
if value != None: return str(value) |
|
115 |
else: return None |
|
116 |
process(get_value, sql.rows(db_xml.get(in_db, in_root_xml, in_pkeys, |
|
117 |
limit))) |
|
109 | 118 |
in_db.close() |
110 | 119 |
elif in_is_xml: |
111 |
row = xpath.get(doc0.documentElement, in_root) |
|
112 |
for row_idx, row in enumerate(xml_dom.NodeElemIter(row.parentNode)): |
|
113 |
if not (limit == None or row_idx < limit): break |
|
114 |
row_id = str(row_idx) |
|
115 |
for in_, out in mappings: |
|
116 |
value = metadata_value(in_) |
|
117 |
if value == None: |
|
118 |
node = xpath.get(row, in_) |
|
119 |
if node != None: value = xml_dom.value(node) |
|
120 |
if value != None: |
|
121 |
try: xpath.put_obj(root, out, row_id, has_types, value) |
|
122 |
except Exception: traceback.print_exc() |
|
120 |
def get_value(in_, row): |
|
121 |
node = xpath.get(row, in_) |
|
122 |
if node != None: return xml_dom.value(node) |
|
123 |
else: return None |
|
124 |
row0 = xpath.get(doc0.documentElement, in_root) |
|
125 |
process(get_value, xml_dom.NodeElemIter(row0.parentNode)) |
|
123 | 126 |
else: # input is CSV |
124 | 127 |
map_ = dict(mappings) |
125 | 128 |
reader = csv.reader(sys.stdin) |
... | ... | |
131 | 134 |
try: mappings[i] = (col_idxs[in_], out) |
132 | 135 |
except KeyError: pass |
133 | 136 |
|
134 |
for row_idx, row in enumerate(reader): |
|
135 |
if not (limit == None or row_idx < limit): break |
|
136 |
row_id = str(row_idx) |
|
137 |
for in_, out in mappings: |
|
138 |
value = metadata_value(in_) |
|
139 |
if value == None: |
|
140 |
value = row[in_] |
|
141 |
if value == '': value = None |
|
142 |
if value != None: |
|
143 |
try: xpath.put_obj(root, out, row_id, has_types, value) |
|
144 |
except Exception: traceback.print_exc() |
|
137 |
def get_value(in_, row): |
|
138 |
value = row[in_] |
|
139 |
if value != '': return value |
|
140 |
else: return None |
|
141 |
process(get_value, reader) |
|
145 | 142 |
xml_func.process(root) |
146 | 143 |
return doc1 |
147 | 144 |
|
Also available in: Unified diff
bin/map: Factored out input processing code