Revision 133
Added by Aaron Marcuse-Kubitza about 13 years ago
scripts/lib/util.py | ||
---|---|---|
1 | 1 |
# Useful functions and classes |
2 | 2 |
|
3 |
def skip(iter_, func): |
|
4 |
# Advance iter while func is True |
|
5 |
try: |
|
6 |
while func(iter_.curr()): iter_.next() |
|
7 |
except StopIteration: pass # nothing after the matching elements |
|
8 |
|
|
3 | 9 |
def rename_key(dict_, orig, new): |
4 | 10 |
try: dict_[new] = dict_.pop(orig) |
5 | 11 |
except KeyError: pass |
scripts/lib/xml_dom.py | ||
---|---|---|
113 | 113 |
if last_only: break |
114 | 114 |
return children |
115 | 115 |
|
116 |
def create_doc(root='_'): |
|
117 |
return xml.dom.minidom.getDOMImplementation().createDocument(None, root, |
|
118 |
None) |
|
119 |
|
|
116 | 120 |
# xml.dom.minidom modifications |
117 | 121 |
|
118 | 122 |
def _write_data(writer, data): writer.write(escape(data)) |
scripts/lib/xpath.py | ||
---|---|---|
215 | 215 |
set_id(xpath, id_, has_types) |
216 | 216 |
if value != None: set_value(xpath, value) |
217 | 217 |
get(doc, xpath, True) |
218 |
|
|
219 |
def path2xml(xpath): |
|
220 |
doc = xml_dom.create_doc() |
|
221 |
get(doc, xpath, True) |
|
222 |
return doc.documentElement |
|
223 |
|
|
224 |
def str2xml(xpath): return path2xml(parse(xpath)) |
scripts/lib/db_xml.py | ||
---|---|---|
5 | 5 |
|
6 | 6 |
import sql |
7 | 7 |
import strings |
8 |
import util |
|
8 | 9 |
import xml_dom |
9 | 10 |
|
10 | 11 |
def name_of(node): return re.sub(r'^.*\.', r'', xml_dom.name_of(node)) |
... | ... | |
32 | 33 |
elif child_name == name: return child |
33 | 34 |
return None |
34 | 35 |
|
35 |
def get(db, node, pkeys=None, parent_id=None):
|
|
36 |
def get(db, node, pkeys=None): |
|
36 | 37 |
if pkeys == None: pkeys = {} |
37 | 38 |
def pkey(table): return sql.pkey(db, pkeys, table) |
38 | 39 |
|
40 |
for child in xml_dom.NodeElemIter(node): |
|
41 |
assert xml_dom.is_text(child) |
|
42 |
|
|
43 |
return [] |
|
39 | 44 |
|
40 | 45 |
def put(db, node, store_ids=False, row_ct_ref=None, pkeys=None, parent_id=None): |
41 | 46 |
# store_ids enables searching the tree for missing fields |
... | ... | |
86 | 91 |
|
87 | 92 |
def xml2db(db, node, row_ct_ref=None): |
88 | 93 |
iter_ = xml_dom.NodeElemIter(node) |
89 |
while xml_dom.is_text(iter_.curr()): iter_.next() # skip metadata
|
|
94 |
util.skip(iter_, xml_dom.is_text) # skip metadata
|
|
90 | 95 |
for child in iter_: put(db, child, True, row_ct_ref) |
scripts/map | ||
---|---|---|
11 | 11 |
sys.path.append(os.path.dirname(__file__)+"/lib") |
12 | 12 |
|
13 | 13 |
import opts |
14 |
from Parser import SyntaxException |
|
14 | 15 |
import sql |
16 |
import xml_dom |
|
15 | 17 |
import xml_func |
16 | 18 |
|
17 | 19 |
def metadata_value(name): |
... | ... | |
46 | 48 |
import copy |
47 | 49 |
import csv |
48 | 50 |
|
49 |
from Parser import SyntaxException |
|
50 | 51 |
import xpath |
51 | 52 |
|
53 |
metadata = [] |
|
52 | 54 |
mappings = [] |
53 | 55 |
stream = open(map_path, 'rb') |
54 | 56 |
reader = csv.reader(stream) |
... | ... | |
58 | 60 |
return name, sep != '', root |
59 | 61 |
src, in_is_xpaths, src_root = split_col_name(src) |
60 | 62 |
dest, out_is_xpaths, dest_root = split_col_name(dest) |
61 |
assert out_is_xpaths |
|
63 |
assert out_is_xpaths # CSV output not supported yet
|
|
62 | 64 |
has_types = dest_root.startswith('/*s/') # outer elements are types |
63 | 65 |
for row in reader: |
64 | 66 |
in_, out = row[:2] |
65 | 67 |
if out != '': |
66 |
try: mappings.append((in_, xpath.parse(dest_root+out))) |
|
67 |
except SyntaxException, ex: raise SystemExit(str(ex)) |
|
68 |
value = metadata_value(in_) |
|
69 |
is_metadata = value != None |
|
70 |
if in_is_xpaths and not is_metadata: |
|
71 |
in_ = xpath.parse(src_root+in_) |
|
72 |
if out_is_xpaths: out = xpath.parse(dest_root+out) |
|
73 |
if is_metadata: metadata.append((value, out)) |
|
74 |
else: mappings.append((in_, out)) |
|
68 | 75 |
stream.close() |
69 | 76 |
in_is_xml = in_is_xpaths and not in_is_db |
70 | 77 |
|
71 | 78 |
# Input datasource to XML tree, mapping if needed |
72 | 79 |
if in_is_xml: doc0 = xml.dom.minidom.parse(sys.stdin) |
73 | 80 |
if map_path != None: |
74 |
doc1 = xml.dom.minidom.getDOMImplementation().createDocument(None, dest, |
|
75 |
None) |
|
81 |
doc1 = xml_dom.create_doc(dest) |
|
76 | 82 |
if in_is_db: |
77 | 83 |
assert in_is_xpaths |
78 | 84 |
|
79 | 85 |
import db_xml |
80 | 86 |
|
81 |
try: src_root = xpath.parse(src_root)
|
|
82 |
except SyntaxException, ex: raise SystemExit(str(ex))
|
|
87 |
src_root = xpath.str2xml(src_root)
|
|
88 |
mappings = [(in_, xpath.path2xml(out)) for in_, out in mappings]
|
|
83 | 89 |
|
84 | 90 |
in_db = sql.connect(in_db_config) |
85 |
for in_, out in mappings: |
|
86 |
value = metadata_value(in_) |
|
87 |
if value == None: |
|
88 |
raise SystemExit('DB-XML mapping not supported yet') |
|
91 |
in_pkeys = {} |
|
92 |
for row_id in db_xml.get(in_db, src_root, in_pkeys): |
|
93 |
def put_col(path, value): |
|
94 |
xpath.put_obj(doc1, path, row_id, has_types, value) |
|
95 |
for value, out in metadata: put_col(out, value) |
|
96 |
for in_, out in mappings: |
|
97 |
put_col(out, db_xml.get(in_db, in_, in_pkeys)) |
|
98 |
xpath.put_obj(doc1, out, row_id, has_types, ) |
|
89 | 99 |
in_db.close() |
90 |
elif in_is_xml: raise SystemExit('XML-XML mapping not supported yet')
|
|
100 |
elif in_is_xml: raise SystemExit('XML input not supported yet')
|
|
91 | 101 |
else: # input is CSV |
92 |
metadata = [] |
|
93 |
map_ = {} |
|
94 |
for in_, out in mappings: |
|
95 |
value = metadata_value(in_) |
|
96 |
if value != None: metadata.append((value, out)) |
|
97 |
else: map_[in_] = out |
|
98 |
|
|
102 |
map_ = dict(mappings) |
|
99 | 103 |
reader = csv.reader(sys.stdin) |
100 | 104 |
cols = reader.next() |
101 | 105 |
for row_idx, row in enumerate(reader): |
... | ... | |
113 | 117 |
from psycopg2.extensions import ISOLATION_LEVEL_SERIALIZABLE |
114 | 118 |
import db_xml |
115 | 119 |
|
116 |
db = sql.connect(out_db_config) |
|
117 |
db.set_isolation_level(ISOLATION_LEVEL_SERIALIZABLE) |
|
120 |
out_db = sql.connect(out_db_config)
|
|
121 |
out_db.set_isolation_level(ISOLATION_LEVEL_SERIALIZABLE)
|
|
118 | 122 |
try: |
119 | 123 |
row_ct_ref = [0] |
120 |
db_xml.xml2db(db, doc1.documentElement, row_ct_ref) |
|
124 |
db_xml.xml2db(out_db, doc1.documentElement, row_ct_ref)
|
|
121 | 125 |
print 'Inserted '+str(row_ct_ref[0])+' rows' |
122 |
if commit: db.commit() |
|
126 |
if commit: out_db.commit()
|
|
123 | 127 |
finally: |
124 |
db.rollback() |
|
125 |
db.close() |
|
128 |
out_db.rollback()
|
|
129 |
out_db.close()
|
|
126 | 130 |
else: doc1.writexml(sys.stdout, addindent=' ', newl='\n') # output is XML |
127 | 131 |
|
128 |
main() |
|
132 |
try: main() |
|
133 |
except SyntaxException, ex: raise SystemExit(str(ex)) |
Also available in: Unified diff
map: Continued to add DB input support