Revision 13
Added by Aaron Marcuse-Kubitza about 13 years ago
scripts/xml2db/test | ||
---|---|---|
6 | 6 |
export host= user=postgres database=vegbank |
7 | 7 |
if test -z "${password+_}" |
8 | 8 |
then |
9 |
echo "Password can also be specified with: export password=..." |
|
9 | 10 |
read -s -p "Password for $user@$host: "; echo |
10 | 11 |
export password="$REPLY" |
11 | 12 |
fi |
scripts/xml2db/ex_util.py | ||
---|---|---|
1 | 1 |
# Exception handling |
2 | 2 |
|
3 | 3 |
def add_msg(ex, msg): ex.args = (str(ex).rstrip()+'\n'+msg,) |
4 |
|
|
5 |
class ExceptionWithCause(Exception): |
|
6 |
def __init__(self, msg, cause=None): |
|
7 |
Exception.__init__(self, msg) |
|
8 |
if cause != None: add_msg(self, 'cause: '+str(cause)) |
scripts/xml2db/xml_util.py | ||
---|---|---|
2 | 2 |
|
3 | 3 |
from xml.dom import Node |
4 | 4 |
|
5 |
def name_of(node): return node.tagName.lower() |
|
6 |
|
|
7 |
def get_id(node): return node.getAttribute('id') |
|
8 |
|
|
9 |
def set_id(node, id_): node.setAttribute('id', id_) |
|
10 |
|
|
5 | 11 |
class NodeElemIter: |
6 | 12 |
def __init__(self, node): self.child = node.firstChild |
7 | 13 |
|
... | ... | |
20 | 26 |
|
21 | 27 |
def first_elem(node): return NodeElemIter(node).next() |
22 | 28 |
|
29 |
class NodeParentIter: |
|
30 |
def __init__(self, node): self.node = node |
|
31 |
|
|
32 |
def __iter__(self): return self |
|
33 |
|
|
34 |
def curr(self): |
|
35 |
if self.node != None and self.node.nodeType == Node.ELEMENT_NODE: |
|
36 |
return self.node |
|
37 |
raise StopIteration |
|
38 |
|
|
39 |
def next(self): |
|
40 |
node = self.curr() |
|
41 |
self.node = self.node.parentNode |
|
42 |
return node |
|
43 |
|
|
23 | 44 |
def is_text(node): |
24 | 45 |
for child in NodeElemIter(node): return False # has an element node |
25 | 46 |
return True |
... | ... | |
27 | 48 |
def value(node): |
28 | 49 |
if node.firstChild != None: return node.firstChild.nodeValue |
29 | 50 |
else: return None |
30 |
|
|
31 |
def get_id(node): return node.getAttribute('id') |
|
32 |
|
|
33 |
def set_id(node, get_id): node.setAttribute('id', get_id) |
scripts/xml2db/xml2db | ||
---|---|---|
8 | 8 |
import sys |
9 | 9 |
import xml.dom.minidom |
10 | 10 |
|
11 |
import db_util |
|
12 |
import xml_util |
|
11 |
import xml_db |
|
13 | 12 |
|
14 |
def table_name(node): return node.tagName.lower() |
|
15 |
|
|
16 |
def is_pointer(node): return node.tagName.lower().endswith('_id') |
|
17 |
|
|
18 |
def pkey_name(node): return table_name(node)+'_id' |
|
19 |
|
|
20 |
def is_field(node): return node.tagName.find('.') >= 0 |
|
21 |
|
|
22 |
def field_name(node): return node.tagName.partition('.')[2].lower() |
|
23 |
|
|
24 |
def is_db_export(node): return node.tagName.find('.') >= 1 # has prefix |
|
25 |
|
|
26 |
class Importer: |
|
27 |
def __init__(self, db): |
|
28 |
self.db = db |
|
29 |
|
|
30 |
def process(self, node): |
|
31 |
self._main(node) |
|
32 |
|
|
33 |
def _main(self, node): |
|
34 |
for child in xml_util.NodeElemIter(node): |
|
35 |
if not xml_util.is_text(child): self._obj(child) # not XML metadata |
|
36 |
|
|
37 |
def _obj(self, node): |
|
38 |
table = table_name(node) |
|
39 |
pkey = pkey_name(node) |
|
40 |
db_util.check_name(table) |
|
41 |
row = {} |
|
42 |
children = [] |
|
43 |
iter_ = xml_util.NodeElemIter(node) |
|
44 |
|
|
45 |
# Skip any pkey |
|
46 |
child = iter_.curr() # first child |
|
47 |
if is_db_export(child) and is_pointer(child)\ |
|
48 |
or field_name(child) == pkey: iter_.next() |
|
49 |
|
|
50 |
for child in iter_: |
|
51 |
if is_field(child): row.update([self._field(child)]) |
|
52 |
else: children.append(child) |
|
53 |
|
|
54 |
# Add fkey to parent |
|
55 |
parent_id = xml_util.get_id(node.parentNode) |
|
56 |
if parent_id != '': row[pkey_name(node.parentNode)] = parent_id |
|
57 |
|
|
58 |
xml_util.set_id(node, db_util.insert_or_get(self.db, table, row, pkey)) |
|
59 |
|
|
60 |
# Add children with fkeys to parent |
|
61 |
for child in children: self._obj(child) |
|
62 |
|
|
63 |
def _field(self, node): |
|
64 |
if xml_util.is_text(node): value = xml_util.value(node) |
|
65 |
else: |
|
66 |
child = xml_util.first_elem(node) |
|
67 |
self._obj(child) |
|
68 |
value = xml_util.get_id(child) |
|
69 |
return (field_name(node), value) |
|
70 |
|
|
71 |
def _pointer(self, node): return None |
|
72 |
|
|
73 | 13 |
def env_flag(name): return name in os.environ and os.environ[name] != '' |
74 | 14 |
|
75 | 15 |
def main(): |
... | ... | |
87 | 27 |
db.set_isolation_level(ISOLATION_LEVEL_SERIALIZABLE) |
88 | 28 |
try: |
89 | 29 |
doc = xml.dom.minidom.parse(sys.stdin) |
90 |
Importer(db).process(doc.documentElement)
|
|
30 |
print 'Inserted '+str(xml_db.xml2db(doc.documentElement, db))+' rows'
|
|
91 | 31 |
if commit: db.commit() |
92 | 32 |
finally: |
93 | 33 |
db.rollback() |
scripts/xml2db/db_util.py | ||
---|---|---|
8 | 8 |
|
9 | 9 |
class NameException(Exception): pass |
10 | 10 |
|
11 |
class DuplicateKeyException(Exception): pass |
|
11 |
class ExceptionWithColumn(ex_util.ExceptionWithCause): |
|
12 |
def __init__(self, col, cause=None): |
|
13 |
ex_util.ExceptionWithCause.__init__(self, 'column: '+col, cause) |
|
14 |
self.col = col |
|
12 | 15 |
|
16 |
class DuplicateKeyException(ExceptionWithColumn): pass |
|
17 |
|
|
18 |
class NullValueException(ExceptionWithColumn): pass |
|
19 |
|
|
13 | 20 |
def check_name(name): |
14 | 21 |
if re.search(r'\W', name) != None: raise NameException('Name "'+name |
15 | 22 |
+'" may contain only alphanumeric characters and _') |
... | ... | |
39 | 46 |
|
40 | 47 |
def select(db, table, fields, conds): |
41 | 48 |
for field in fields: check_name(field) |
42 |
for key in conds.keys(): check_name(key)
|
|
49 |
for col in conds.keys(): check_name(col)
|
|
43 | 50 |
def cond(entry): |
44 |
key, value = entry
|
|
45 |
cond_ = key+' '
|
|
51 |
col, value = entry
|
|
52 |
cond_ = col+' '
|
|
46 | 53 |
if value == None: cond_ += 'IS' |
47 | 54 |
else: cond_ += '=' |
48 | 55 |
cond_ += ' %s' |
... | ... | |
50 | 57 |
return run_query(db, 'SELECT '+', '.join(fields)+' FROM '+table+' WHERE ' |
51 | 58 |
+' AND '.join(map(cond, conds.iteritems())), conds.values()) |
52 | 59 |
|
53 |
def insert(db, table, row, get_id=False):
|
|
60 |
def insert(db, table, row): |
|
54 | 61 |
check_name(table) |
55 |
keys = row.keys() |
|
56 |
for key in keys: check_name(key) |
|
57 |
query = 'INSERT INTO '+table+' ('+', '.join(keys)+') VALUES ('\ |
|
58 |
+', '.join(['%s']*len(keys))+')' |
|
59 |
if get_id: query += ' RETURNING lastval()' |
|
60 |
cur = run_query(db, query, row.values()) |
|
61 |
if get_id: return value(cur) |
|
62 |
cols = row.keys() |
|
63 |
for col in cols: check_name(col) |
|
64 |
return run_query(db, 'INSERT INTO '+table+' ('+', '.join(cols) |
|
65 |
+') VALUES ('+', '.join(['%s']*len(cols))+')', row.values()) |
|
62 | 66 |
|
63 |
def insert_ignore(db, table, row, get_id=False): |
|
64 |
try: return with_savepoint(db, lambda: insert(db, table, row, get_id)) |
|
67 |
def last_insert_id(db): return value(run_query(db, 'SELECT lastval()')) |
|
68 |
|
|
69 |
def insert_ignore(db, table, row): |
|
70 |
try: return with_savepoint(db, lambda: insert(db, table, row)) |
|
65 | 71 |
except Exception, ex: |
72 |
msg = str(ex) |
|
66 | 73 |
match = re.search(r'duplicate key value violates unique constraint "' |
67 |
+table+'_(\w+)_index"', str(ex)) |
|
68 |
if match: raise DuplicateKeyException(match.group(1)) |
|
69 |
else: raise |
|
74 |
+table+'_(\w+)_index"', msg) |
|
75 |
if match: raise DuplicateKeyException(match.group(1), ex) |
|
76 |
match = re.search(r'null value in column "(\w+)" violates not-null ' |
|
77 |
'constraint', msg) |
|
78 |
if match: raise NullValueException(match.group(1), ex) |
|
79 |
raise # no specific exception raised |
|
70 | 80 |
|
71 |
def insert_or_get(db, table, row, pkey): |
|
72 |
try: return insert_ignore(db, table, row, True) |
|
81 |
def insert_or_get(db, table, row, pkey, row_ct_ref=None): |
|
82 |
try: |
|
83 |
row_ct = insert_ignore(db, table, row).rowcount |
|
84 |
if row_ct_ref != None and row_ct >= 0: row_ct_ref[0] += row_ct |
|
85 |
return last_insert_id(db) |
|
73 | 86 |
except DuplicateKeyException, ex: |
74 |
dup_key = str(ex) |
|
75 |
return value(select(db, table, [pkey], {dup_key: row[dup_key]})) |
|
87 |
return value(select(db, table, [pkey], {ex.col: row[ex.col]})) |
scripts/xml2db/vegbank.sql | ||
---|---|---|
6777 | 6777 |
-- PostgreSQL database dump complete |
6778 | 6778 |
-- |
6779 | 6779 |
|
6780 |
-- Modifications |
|
6781 |
|
|
6782 |
ALTER TABLE aux_role RENAME role_id TO aux_role_id; |
|
6783 |
ALTER TABLE place RENAME plotplace_id TO place_id; |
scripts/xml2db/xml_db.py | ||
---|---|---|
1 |
# XML-database conversion |
|
2 |
|
|
3 |
from xml.dom import Node |
|
4 |
|
|
5 |
import db_util |
|
6 |
import xml_util |
|
7 |
from xml_util import name_of |
|
8 |
|
|
9 |
ptr_suffix = '_id' |
|
10 |
|
|
11 |
def is_ptr(node_name): return node_name.endswith(ptr_suffix) |
|
12 |
|
|
13 |
def ptr_type(node_name): |
|
14 |
assert is_ptr(node_name) |
|
15 |
return node_name[:-len(ptr_suffix)] |
|
16 |
|
|
17 |
def ptr_target(node): |
|
18 |
assert is_ptr(name_of(node)) |
|
19 |
return xml_util.first_elem(node) |
|
20 |
|
|
21 |
def pkey_name(node_name): return node_name+ptr_suffix |
|
22 |
|
|
23 |
def is_field(node_name): return node_name.find('.') >= 0 |
|
24 |
|
|
25 |
def field_name(node_name): |
|
26 |
if is_field(node_name): return node_name.partition('.')[2] |
|
27 |
else: return node_name |
|
28 |
|
|
29 |
def is_db_export(node_name): return node_name.find('.') >= 1 # has prefix |
|
30 |
|
|
31 |
def find_by_name(node, name): |
|
32 |
for parent in xml_util.NodeParentIter(node): |
|
33 |
if name_of(parent) == name: return parent |
|
34 |
else: |
|
35 |
for child in xml_util.NodeElemIter(parent): |
|
36 |
child_name = field_name(name_of(child)) |
|
37 |
if is_ptr(child_name): |
|
38 |
if ptr_type(child_name) == name: return ptr_target(child) |
|
39 |
elif child_name == name: return child |
|
40 |
return None |
|
41 |
|
|
42 |
def xml2db(node, db): |
|
43 |
def _main(node): |
|
44 |
for child in xml_util.NodeElemIter(node): |
|
45 |
if not xml_util.is_text(child): _obj(child) # not XML metadata |
|
46 |
|
|
47 |
def _obj(node): |
|
48 |
table = name_of(node) |
|
49 |
db_util.check_name(table) |
|
50 |
pkey = pkey_name(table) |
|
51 |
row = {} |
|
52 |
children = [] |
|
53 |
iter_ = xml_util.NodeElemIter(node) |
|
54 |
|
|
55 |
# Skip any pkey |
|
56 |
child_name = name_of(iter_.curr()) # first child |
|
57 |
if is_db_export(child_name) and is_ptr(child_name)\ |
|
58 |
or field_name(child_name) == pkey: iter_.next() |
|
59 |
|
|
60 |
# Divide children into fields and children with fkeys to parent |
|
61 |
for child in iter_: |
|
62 |
if is_field(name_of(child)): row.update([_field(child)]) |
|
63 |
else: children.append(child) |
|
64 |
|
|
65 |
# Add fkey to parent |
|
66 |
parent_id = xml_util.get_id(node.parentNode) |
|
67 |
if parent_id != '': row[pkey_name(name_of(node.parentNode))] = parent_id |
|
68 |
|
|
69 |
# Insert node |
|
70 |
for try_num in range(2): |
|
71 |
try: |
|
72 |
xml_util.set_id(node, db_util.insert_or_get(db, table, row, |
|
73 |
pkey, row_ct_ref)) |
|
74 |
break |
|
75 |
except db_util.NullValueException, ex: |
|
76 |
if try_num > 0: raise # exception still raised after retry |
|
77 |
target = find_by_name(node, ptr_type(ex.col)) |
|
78 |
if target == None: raise |
|
79 |
row[ex.col] = xml_util.get_id(target) |
|
80 |
|
|
81 |
# Insert children with fkeys to parent |
|
82 |
for child in children: _obj(child) |
|
83 |
|
|
84 |
def _field(node): |
|
85 |
if xml_util.is_text(node): value = xml_util.value(node) |
|
86 |
else: |
|
87 |
child = xml_util.first_elem(node) |
|
88 |
_obj(child) |
|
89 |
value = xml_util.get_id(child) |
|
90 |
return (field_name(name_of(node)), value) |
|
91 |
|
|
92 |
row_ct_ref = [0] |
|
93 |
_main(node) |
|
94 |
return row_ct_ref[0] |
Also available in: Unified diff
Initial version of xml2db. Doesn't yet handle all duplicate rows correctly.