Revision 512
Added by Aaron Marcuse-Kubitza about 13 years ago
bin/map | ||
---|---|---|
43 | 43 |
out_is_db = 'engine' in out_db_config |
44 | 44 |
|
45 | 45 |
# Parse args |
46 |
# Parse args |
|
47 | 46 |
map_paths = sys.argv[1:] |
48 |
if map_paths == [] and (in_is_db or not out_is_db): usage_err()
|
|
49 |
map_path = None
|
|
50 |
if map_paths != []: map_path = map_paths[0]
|
|
47 |
if map_paths == []:
|
|
48 |
if in_is_db or not out_is_db: usage_err()
|
|
49 |
else: map_paths = [None]
|
|
51 | 50 |
|
52 |
# Load map header |
|
53 |
in_is_xpaths = True |
|
54 |
out_label = None |
|
55 |
if map_path != None: |
|
56 |
import copy |
|
57 |
import csv |
|
51 |
def process_input(root, process_row, map_path): |
|
52 |
'''Inputs datasource to XML tree, mapping if needed''' |
|
53 |
# Load map header |
|
54 |
in_is_xpaths = True |
|
55 |
out_label = None |
|
56 |
if map_path != None: |
|
57 |
import copy |
|
58 |
import csv |
|
59 |
|
|
60 |
import xpath |
|
61 |
|
|
62 |
metadata = [] |
|
63 |
mappings = [] |
|
64 |
stream = open(map_path, 'rb') |
|
65 |
reader = csv.reader(stream) |
|
66 |
in_label, out_label = reader.next()[:2] |
|
67 |
def split_col_name(name): |
|
68 |
name, sep, root = name.partition(':') |
|
69 |
return name, sep != '', root |
|
70 |
in_label, in_is_xpaths, in_root = split_col_name(in_label) |
|
71 |
out_label, out_is_xpaths, out_root = split_col_name(out_label) |
|
72 |
assert out_is_xpaths # CSV output not supported yet |
|
73 |
has_types = out_root.startswith('/*s/') # outer elements are types |
|
74 |
for row in reader: |
|
75 |
in_, out = row[:2] |
|
76 |
if out != '': |
|
77 |
if out_is_xpaths: out = xpath.parse(out_root+out) |
|
78 |
mappings.append((in_, out)) |
|
79 |
stream.close() |
|
80 |
|
|
81 |
root.ownerDocument.documentElement.tagName = out_label |
|
82 |
in_is_xml = in_is_xpaths and not in_is_db |
|
58 | 83 |
|
59 |
import xpath |
|
84 |
if in_is_xml: |
|
85 |
doc0 = minidom.parse(sys.stdin) |
|
86 |
if out_label == None: out_label = doc0.documentElement.tagName |
|
60 | 87 |
|
61 |
metadata = [] |
|
62 |
mappings = [] |
|
63 |
stream = open(map_path, 'rb') |
|
64 |
reader = csv.reader(stream) |
|
65 |
in_label, out_label = reader.next()[:2] |
|
66 |
def split_col_name(name): |
|
67 |
name, sep, root = name.partition(':') |
|
68 |
return name, sep != '', root |
|
69 |
in_label, in_is_xpaths, in_root = split_col_name(in_label) |
|
70 |
out_label, out_is_xpaths, out_root = split_col_name(out_label) |
|
71 |
assert out_is_xpaths # CSV output not supported yet |
|
72 |
has_types = out_root.startswith('/*s/') # outer elements are types |
|
73 |
for row in reader: |
|
74 |
in_, out = row[:2] |
|
75 |
if out != '': |
|
76 |
if out_is_xpaths: out = xpath.parse(out_root+out) |
|
77 |
mappings.append((in_, out)) |
|
78 |
stream.close() |
|
79 |
in_is_xml = in_is_xpaths and not in_is_db |
|
80 |
|
|
81 |
if in_is_xml: |
|
82 |
doc0 = minidom.parse(sys.stdin) |
|
83 |
if out_label == None: out_label = doc0.documentElement.tagName |
|
84 |
|
|
85 |
def process_input(root, process_row): |
|
86 |
'''Inputs datasource to XML tree, mapping if needed''' |
|
87 | 88 |
def process_rows(get_value, rows): |
88 | 89 |
'''Processes input values |
89 | 90 |
@param get_value f(in_, row):str |
... | ... | |
152 | 153 |
else: return None |
153 | 154 |
process_rows(get_value, reader) |
154 | 155 |
|
156 |
def process_inputs(root, process_row): |
|
157 |
for map_path in map_paths: process_input(root, process_row, map_path) |
|
158 |
|
|
155 | 159 |
# Output XML tree |
156 |
doc = xml_dom.create_doc(out_label)
|
|
160 |
doc = xml_dom.create_doc() |
|
157 | 161 |
root = doc.documentElement |
158 | 162 |
if out_is_db: |
159 | 163 |
from psycopg2.extensions import ISOLATION_LEVEL_SERIALIZABLE |
... | ... | |
182 | 186 |
except sql.DatabaseErrors, e: on_error(e) |
183 | 187 |
root.clear() |
184 | 188 |
|
185 |
process_input(root, process_row) |
|
189 |
process_inputs(root, process_row)
|
|
186 | 190 |
sys.stderr.write('Inserted '+str(row_ct_ref[0])+ |
187 | 191 |
' new rows into database\n') |
188 | 192 |
finally: |
... | ... | |
190 | 194 |
out_db.close() |
191 | 195 |
else: # output is XML |
192 | 196 |
def process_row(input_row): pass |
193 |
process_input(root, process_row) |
|
197 |
process_inputs(root, process_row)
|
|
194 | 198 |
xml_func.process(root) |
195 | 199 |
doc.writexml(sys.stdout, **xml_dom.prettyxml_config) |
196 | 200 |
|
Also available in: Unified diff
bin/map: Added support for processing multiple map_paths at once in the same transaction