Revision 161
Added by Aaron Marcuse-Kubitza about 13 years ago
map | ||
---|---|---|
55 | 55 |
mappings = [] |
56 | 56 |
stream = open(map_path, 'rb') |
57 | 57 |
reader = csv.reader(stream) |
58 |
src, dest = reader.next()[:2]
|
|
58 |
in_label, out_label = reader.next()[:2]
|
|
59 | 59 |
def split_col_name(name): |
60 | 60 |
name, sep, root = name.partition(':') |
61 | 61 |
return name, sep != '', root |
62 |
src, in_is_xpaths, src_root = split_col_name(src)
|
|
63 |
dest, out_is_xpaths, dest_root = split_col_name(dest)
|
|
62 |
in_label, in_is_xpaths, in_root = split_col_name(in_label)
|
|
63 |
out_label, out_is_xpaths, out_root = split_col_name(out_label)
|
|
64 | 64 |
assert out_is_xpaths # CSV output not supported yet |
65 |
has_types = dest_root.startswith('/*s/') # outer elements are types
|
|
65 |
has_types = out_root.startswith('/*s/') # outer elements are types
|
|
66 | 66 |
for row in reader: |
67 | 67 |
in_, out = row[:2] |
68 | 68 |
if out != '': |
69 | 69 |
value = metadata_value(in_) |
70 |
is_metadata = value != None |
|
71 |
if in_is_xpaths and not is_metadata: |
|
72 |
in_ = xpath.parse(src_root+in_) |
|
73 |
if out_is_xpaths: out = xpath.parse(dest_root+out) |
|
74 |
if is_metadata: metadata.append((value, out)) |
|
70 |
if out_is_xpaths: out = xpath.parse(out_root+out) |
|
71 |
if value != None: metadata.append((value, out)) |
|
75 | 72 |
else: mappings.append((in_, out)) |
76 | 73 |
stream.close() |
77 | 74 |
in_is_xml = in_is_xpaths and not in_is_db |
78 | 75 |
|
79 | 76 |
# Input datasource to XML tree, mapping if needed |
80 |
if in_is_xml: doc0 = xml.dom.minidom.parse(sys.stdin) |
|
77 |
if in_is_xml: |
|
78 |
doc0 = xml.dom.minidom.parse(sys.stdin) |
|
81 | 79 |
if map_path != None: |
82 |
doc1 = xml_dom.create_doc(dest)
|
|
80 |
doc1 = xml_dom.create_doc(out_label)
|
|
83 | 81 |
root = doc1.documentElement |
84 | 82 |
if in_is_db: |
85 | 83 |
assert in_is_xpaths |
86 | 84 |
|
87 | 85 |
import db_xml |
88 | 86 |
|
89 |
src_root = xpath.parse(src_root) |
|
90 |
src_root_xml = xpath.path2xml(src_root) |
|
91 |
mappings = [(xpath.path2xml(in_), out) for in_, out in mappings] |
|
87 |
in_root_xml = xpath.path2xml(in_root) |
|
88 |
mappings = [(xpath.path2xml(in_root+in_), out) for in_, out in mappings] |
|
92 | 89 |
|
93 | 90 |
in_db = sql.connect(in_db_config) |
94 | 91 |
in_pkeys = {} |
95 | 92 |
for row_idx, row in enumerate(sql.rows(db_xml.get(in_db, |
96 |
src_root_xml, in_pkeys, limit))):
|
|
93 |
in_root_xml, in_pkeys, limit))):
|
|
97 | 94 |
row_id, = row |
98 | 95 |
row_id = str(row_id) |
99 | 96 |
|
... | ... | |
102 | 99 |
for value, out in metadata: put_col(out, value) |
103 | 100 |
for in_, out in mappings: |
104 | 101 |
in_ = in_.cloneNode(True) # don't modify orig value! |
105 |
xml_dom.set_id(xpath.get(in_, src_root), row_id)
|
|
102 |
xml_dom.set_id(xpath.get(in_, in_root), row_id)
|
|
106 | 103 |
value = sql.value_or_none(db_xml.get(in_db, in_, in_pkeys)) |
107 | 104 |
if value != None: put_col(out, str(value)) |
108 | 105 |
in_db.close() |
109 |
elif in_is_xml: raise SystemExit('XML input not supported yet') |
|
106 |
elif in_is_xml: |
|
107 |
row = xpath.get(doc0.documentElement, in_root) |
|
108 |
for row_idx, row in enumerate(xml_dom.NodeElemIter(row.parentNode)): |
|
109 |
if not (limit == None or row_idx < limit): break |
|
110 |
row_id = str(row_idx) |
|
111 |
|
|
112 |
def put_col(path, value): |
|
113 |
xpath.put_obj(root, path, row_id, has_types, value) |
|
114 |
for value, out in metadata: put_col(out, value) |
|
115 |
for in_, out in mappings: |
|
116 |
node = xpath.get(row, in_) |
|
117 |
if node != None: put_col(out, xml_dom.value(node)) |
|
110 | 118 |
else: # input is CSV |
111 | 119 |
map_ = dict(mappings) |
112 | 120 |
reader = csv.reader(sys.stdin) |
Also available in: Unified diff
map: Added support for XML input