Revision 46
Added by Aaron Marcuse-Kubitza about 13 years ago
scripts/lib/xml_db.py | ||
---|---|---|
1 |
# XML-database conversion |
|
2 |
|
|
3 |
import re |
|
4 |
from xml.dom import Node |
|
5 |
|
|
6 |
import db_util |
|
7 |
import xml_util |
|
8 |
|
|
9 |
def name_of(node): return re.sub(r'^.*\.', r'', xml_util.name_of(node)) |
|
10 |
|
|
11 |
ptr_suffix = '_id' |
|
12 |
|
|
13 |
def is_ptr(node_name): return node_name.endswith(ptr_suffix) |
|
14 |
|
|
15 |
def ptr_type(node_name): |
|
16 |
assert is_ptr(node_name) |
|
17 |
return node_name[:-len(ptr_suffix)] |
|
18 |
|
|
19 |
def ptr_target(node): |
|
20 |
assert is_ptr(name_of(node)) |
|
21 |
return xml_util.first_elem(node) |
|
22 |
|
|
23 |
def find_by_name(node, name): |
|
24 |
for parent in xml_util.NodeParentIter(node): |
|
25 |
if name_of(parent) == name: return parent |
|
26 |
else: |
|
27 |
for child in xml_util.NodeElemIter(parent): |
|
28 |
child_name = name_of(child) |
|
29 |
if is_ptr(child_name): |
|
30 |
if ptr_type(child_name) == name: return ptr_target(child) |
|
31 |
elif child_name == name: return child |
|
32 |
return None |
|
33 |
|
|
34 |
def get(db, node, create=False, store_ids=False, row_ct_ref=None, pkeys=None): |
|
35 |
# store_ids enables searching the tree for missing fields |
|
36 |
if pkeys == None: pkeys = {} |
|
37 |
def pkey(table): |
|
38 |
if table not in pkeys: pkeys[table] = db_util.pkey(db, table) |
|
39 |
return pkeys[table] |
|
40 |
|
|
41 |
def obj(node, parent_id=None): |
|
42 |
table = name_of(node) |
|
43 |
pkey_ = pkey(table) |
|
44 |
row = {} |
|
45 |
children = [] |
|
46 |
|
|
47 |
# Divide children into fields and children with fkeys to parent |
|
48 |
for child in xml_util.NodeElemIter(node): |
|
49 |
child_name = name_of(child) |
|
50 |
if xml_util.is_text(child): row[child_name] = xml_util.value(child) |
|
51 |
elif is_ptr(child_name): row[child_name] = obj(ptr_target(child)) |
|
52 |
else: children.append(child) |
|
53 |
try: del row[pkey_] |
|
54 |
except KeyError: pass |
|
55 |
|
|
56 |
# Add fkey to parent |
|
57 |
if parent_id != None: row[pkey(name_of(node.parentNode))] = parent_id |
|
58 |
|
|
59 |
# Insert node |
|
60 |
for try_num in range(2): |
|
61 |
try: |
|
62 |
id_ = db_util.get(db, table, row, pkey_, create, row_ct_ref) |
|
63 |
if store_ids: xml_util.set_id(node, id_) |
|
64 |
break |
|
65 |
except db_util.NullValueException, ex: |
|
66 |
if try_num > 0: raise # exception still raised after retry |
|
67 |
# Search for required column in ancestors and their children |
|
68 |
target = find_by_name(node, ptr_type(ex.col)) |
|
69 |
if target == None: raise |
|
70 |
row[ex.col] = xml_util.get_id(target) |
|
71 |
|
|
72 |
# Insert children with fkeys to parent |
|
73 |
for child in children: obj(child, id_) |
|
74 |
|
|
75 |
return id_ |
|
76 |
|
|
77 |
return obj(node) |
|
78 |
|
|
79 |
def xml2db(db, node, row_ct_ref=None): |
|
80 |
for child in xml_util.NodeElemIter(node): |
|
81 |
if not xml_util.is_text(child): # not XML metadata |
|
82 |
get(db, child, True, True, row_ct_ref) |
scripts/lib/db_util.py | ||
---|---|---|
1 |
# Database access |
|
2 |
|
|
3 |
import random |
|
4 |
import re |
|
5 |
import sys |
|
6 |
|
|
7 |
import ex_util |
|
8 |
|
|
9 |
def _add_cursor_info(ex, cur): ex_util.add_msg(ex, 'query: '+cur.query) |
|
10 |
|
|
11 |
class NameException(Exception): pass |
|
12 |
|
|
13 |
class DbException(ex_util.ExceptionWithCause): |
|
14 |
def __init__(self, msg, cause=None, cur=None): |
|
15 |
ex_util.ExceptionWithCause.__init__(self, msg, cause) |
|
16 |
if cur != None: _add_cursor_info(self, cur) |
|
17 |
|
|
18 |
class ExceptionWithColumn(DbException): |
|
19 |
def __init__(self, col, cause=None): |
|
20 |
DbException.__init__(self, 'column: '+col, cause) |
|
21 |
self.col = col |
|
22 |
|
|
23 |
class DuplicateKeyException(ExceptionWithColumn): pass |
|
24 |
|
|
25 |
class NullValueException(ExceptionWithColumn): pass |
|
26 |
|
|
27 |
def check_name(name): |
|
28 |
if re.search(r'\W', name) != None: raise NameException('Name "'+name |
|
29 |
+'" may contain only alphanumeric characters and _') |
|
30 |
|
|
31 |
def run_query(db, query, params=None): |
|
32 |
cur = db.cursor() |
|
33 |
try: cur.execute(query, params) |
|
34 |
except Exception, ex: |
|
35 |
_add_cursor_info(ex, cur) |
|
36 |
raise |
|
37 |
return cur |
|
38 |
|
|
39 |
def col(cur, idx): return cur.description[idx][0] |
|
40 |
|
|
41 |
def row(cur): return iter(lambda: cur.fetchone(), None).next() |
|
42 |
|
|
43 |
def value(cur): return row(cur)[0] |
|
44 |
|
|
45 |
def with_savepoint(db, func): |
|
46 |
savepoint = 'savepoint_'+str(random.randint(0, sys.maxint)) # must be unique |
|
47 |
run_query(db, 'SAVEPOINT '+savepoint) |
|
48 |
try: return_val = func() |
|
49 |
except: |
|
50 |
run_query(db, 'ROLLBACK TO SAVEPOINT '+savepoint) |
|
51 |
raise |
|
52 |
else: |
|
53 |
run_query(db, 'RELEASE SAVEPOINT '+savepoint) |
|
54 |
return return_val |
|
55 |
|
|
56 |
def select(db, table, fields, conds): |
|
57 |
check_name(table) |
|
58 |
map(check_name, fields) |
|
59 |
map(check_name, conds.keys()) |
|
60 |
def cond(entry): |
|
61 |
col, value = entry |
|
62 |
cond_ = col+' ' |
|
63 |
if value == None: cond_ += 'IS' |
|
64 |
else: cond_ += '=' |
|
65 |
cond_ += ' %s' |
|
66 |
return cond_ |
|
67 |
return run_query(db, 'SELECT '+', '.join(fields)+' FROM '+table+' WHERE ' |
|
68 |
+' AND '.join(map(cond, conds.iteritems())), conds.values()) |
|
69 |
|
|
70 |
def insert(db, table, row): |
|
71 |
check_name(table) |
|
72 |
cols = row.keys() |
|
73 |
map(check_name, cols) |
|
74 |
return run_query(db, 'INSERT INTO '+table+' ('+', '.join(cols) |
|
75 |
+') VALUES ('+', '.join(['%s']*len(cols))+')', row.values()) |
|
76 |
|
|
77 |
def last_insert_id(db): return value(run_query(db, 'SELECT lastval()')) |
|
78 |
|
|
79 |
def try_insert(db, table, row): |
|
80 |
try: return with_savepoint(db, lambda: insert(db, table, row)) |
|
81 |
except Exception, ex: |
|
82 |
msg = str(ex) |
|
83 |
match = re.search(r'duplicate key value violates unique constraint "' |
|
84 |
+table+'_(\w+)_index"', msg) |
|
85 |
if match: raise DuplicateKeyException(match.group(1), ex) |
|
86 |
match = re.search(r'null value in column "(\w+)" violates not-null ' |
|
87 |
'constraint', msg) |
|
88 |
if match: raise NullValueException(match.group(1), ex) |
|
89 |
raise # no specific exception raised |
|
90 |
|
|
91 |
def pkey(db, table): # Assumed to be first column in table |
|
92 |
check_name(table) |
|
93 |
return col(run_query(db, 'SELECT * FROM '+table+' LIMIT 0'), 0) |
|
94 |
|
|
95 |
def get(db, table, row, pkey, create=False, row_ct_ref=None): |
|
96 |
try: return value(select(db, table, [pkey], row)) |
|
97 |
except StopIteration: |
|
98 |
if not create: raise |
|
99 |
# Insert new row |
|
100 |
try: |
|
101 |
row_ct = try_insert(db, table, row).rowcount |
|
102 |
if row_ct_ref != None and row_ct >= 0: row_ct_ref[0] += row_ct |
|
103 |
return last_insert_id(db) |
|
104 |
except DuplicateKeyException, ex: |
|
105 |
return value(select(db, table, [pkey], {ex.col: row[ex.col]})) |
scripts/lib/ex_util.py | ||
---|---|---|
1 |
# Exception handling |
|
2 |
|
|
3 |
def add_msg(ex, msg): ex.args = (str(ex).rstrip()+'\n'+msg,) |
|
4 |
|
|
5 |
class ExceptionWithCause(Exception): |
|
6 |
def __init__(self, msg, cause=None): |
|
7 |
Exception.__init__(self, msg) |
|
8 |
if cause != None: add_msg(self, 'cause: '+str(cause)) |
scripts/lib/xml_util.py | ||
---|---|---|
1 |
# XML DOM tree manipulation |
|
2 |
|
|
3 |
from xml.dom import Node |
|
4 |
import xml.dom.minidom |
|
5 |
|
|
6 |
def name_of(node): return node.tagName.lower() |
|
7 |
|
|
8 |
def get_id(node): return node.getAttribute('id') |
|
9 |
|
|
10 |
def set_id(node, id_): node.setAttribute('id', id_) |
|
11 |
|
|
12 |
class NodeElemIter: |
|
13 |
def __init__(self, node): self.child = node.firstChild |
|
14 |
|
|
15 |
def __iter__(self): return self |
|
16 |
|
|
17 |
def curr(self): |
|
18 |
while self.child != None: |
|
19 |
if self.child.nodeType == Node.ELEMENT_NODE: return self.child |
|
20 |
self.child = self.child.nextSibling |
|
21 |
raise StopIteration |
|
22 |
|
|
23 |
def next(self): |
|
24 |
child = self.curr() |
|
25 |
self.child = self.child.nextSibling |
|
26 |
return child |
|
27 |
|
|
28 |
def first_elem(node): return NodeElemIter(node).next() |
|
29 |
|
|
30 |
class NodeElemReverseIter: |
|
31 |
def __init__(self, node): self.child = node.lastChild |
|
32 |
|
|
33 |
def __iter__(self): return self |
|
34 |
|
|
35 |
def curr(self): |
|
36 |
while self.child != None: |
|
37 |
if self.child.nodeType == Node.ELEMENT_NODE: return self.child |
|
38 |
self.child = self.child.previousSibling |
|
39 |
raise StopIteration |
|
40 |
|
|
41 |
def next(self): |
|
42 |
child = self.curr() |
|
43 |
self.child = self.child.previousSibling |
|
44 |
return child |
|
45 |
|
|
46 |
def last_elem(node): return NodeElemReverseIter(node).next() |
|
47 |
|
|
48 |
class NodeParentIter: |
|
49 |
def __init__(self, node): self.node = node |
|
50 |
|
|
51 |
def __iter__(self): return self |
|
52 |
|
|
53 |
def curr(self): |
|
54 |
if self.node != None and self.node.nodeType == Node.ELEMENT_NODE: |
|
55 |
return self.node |
|
56 |
raise StopIteration |
|
57 |
|
|
58 |
def next(self): |
|
59 |
node = self.curr() |
|
60 |
self.node = self.node.parentNode |
|
61 |
return node |
|
62 |
|
|
63 |
def is_text(node): |
|
64 |
for child in NodeElemIter(node): return False # has an element node |
|
65 |
return True |
|
66 |
|
|
67 |
def value(node): |
|
68 |
if node.firstChild != None: return node.firstChild.nodeValue |
|
69 |
else: return node.nodeValue |
|
70 |
|
|
71 |
def set_value(doc, node, value): |
|
72 |
if node.nodeType == Node.ELEMENT_NODE: |
|
73 |
node.appendChild(doc.createTextNode(value)) |
|
74 |
else: node.nodeValue = value |
|
75 |
|
|
76 |
def by_tag_name(node, name, last_only=False): |
|
77 |
# last_only optimization returns last (most recently inserted) matching node |
|
78 |
children = [] |
|
79 |
for child in NodeElemReverseIter(node): |
|
80 |
if child.tagName == name: |
|
81 |
children.append(child) |
|
82 |
if last_only: break |
|
83 |
return children |
|
84 |
|
|
85 |
_writexml_orig = xml.dom.minidom.Element.writexml |
|
86 |
|
|
87 |
def _writexml(self, writer, indent="", addindent="", newl=""): |
|
88 |
if self.firstChild != None and self.firstChild.nextSibling == None\ |
|
89 |
and self.firstChild.nodeType == Node.TEXT_NODE: # a single text node |
|
90 |
writer.write(indent+'<'+self.tagName) |
|
91 |
for attr_idx in range(self.attributes.length): |
|
92 |
attr = self.attributes.item(attr_idx) |
|
93 |
writer.write(' '+attr.name+'='+attr.value) |
|
94 |
writer.write('>'+value(self)+'</'+self.tagName+'>'+newl) |
|
95 |
else: _writexml_orig(self, writer, indent, addindent, newl) |
|
96 |
|
|
97 |
xml.dom.minidom.Element.writexml = _writexml |
scripts/lib/ex.py | ||
---|---|---|
1 |
# Exception handling |
|
2 |
|
|
3 |
def add_msg(ex, msg): ex.args = (str(ex).rstrip()+'\n'+msg,) |
|
4 |
|
|
5 |
class ExceptionWithCause(Exception): |
|
6 |
def __init__(self, msg, cause=None): |
|
7 |
Exception.__init__(self, msg) |
|
8 |
if cause != None: add_msg(self, 'cause: '+str(cause)) |
scripts/lib/sql.py | ||
---|---|---|
1 |
# Database access |
|
2 |
|
|
3 |
import random |
|
4 |
import re |
|
5 |
import sys |
|
6 |
|
|
7 |
import ex |
|
8 |
|
|
9 |
def _add_cursor_info(e, cur): ex.add_msg(e, 'query: '+cur.query) |
|
10 |
|
|
11 |
class NameException(Exception): pass |
|
12 |
|
|
13 |
class DbException(ex.ExceptionWithCause): |
|
14 |
def __init__(self, msg, cause=None, cur=None): |
|
15 |
ex.ExceptionWithCause.__init__(self, msg, cause) |
|
16 |
if cur != None: _add_cursor_info(self, cur) |
|
17 |
|
|
18 |
class ExceptionWithColumn(DbException): |
|
19 |
def __init__(self, col, cause=None): |
|
20 |
DbException.__init__(self, 'column: '+col, cause) |
|
21 |
self.col = col |
|
22 |
|
|
23 |
class DuplicateKeyException(ExceptionWithColumn): pass |
|
24 |
|
|
25 |
class NullValueException(ExceptionWithColumn): pass |
|
26 |
|
|
27 |
def check_name(name): |
|
28 |
if re.search(r'\W', name) != None: raise NameException('Name "'+name |
|
29 |
+'" may contain only alphanumeric characters and _') |
|
30 |
|
|
31 |
def run_query(db, query, params=None): |
|
32 |
cur = db.cursor() |
|
33 |
try: cur.execute(query, params) |
|
34 |
except Exception, e: |
|
35 |
_add_cursor_info(e, cur) |
|
36 |
raise |
|
37 |
return cur |
|
38 |
|
|
39 |
def col(cur, idx): return cur.description[idx][0] |
|
40 |
|
|
41 |
def row(cur): return iter(lambda: cur.fetchone(), None).next() |
|
42 |
|
|
43 |
def value(cur): return row(cur)[0] |
|
44 |
|
|
45 |
def with_savepoint(db, func): |
|
46 |
savepoint = 'savepoint_'+str(random.randint(0, sys.maxint)) # must be unique |
|
47 |
run_query(db, 'SAVEPOINT '+savepoint) |
|
48 |
try: return_val = func() |
|
49 |
except: |
|
50 |
run_query(db, 'ROLLBACK TO SAVEPOINT '+savepoint) |
|
51 |
raise |
|
52 |
else: |
|
53 |
run_query(db, 'RELEASE SAVEPOINT '+savepoint) |
|
54 |
return return_val |
|
55 |
|
|
56 |
def select(db, table, fields, conds): |
|
57 |
check_name(table) |
|
58 |
map(check_name, fields) |
|
59 |
map(check_name, conds.keys()) |
|
60 |
def cond(entry): |
|
61 |
col, value = entry |
|
62 |
cond_ = col+' ' |
|
63 |
if value == None: cond_ += 'IS' |
|
64 |
else: cond_ += '=' |
|
65 |
cond_ += ' %s' |
|
66 |
return cond_ |
|
67 |
return run_query(db, 'SELECT '+', '.join(fields)+' FROM '+table+' WHERE ' |
|
68 |
+' AND '.join(map(cond, conds.iteritems())), conds.values()) |
|
69 |
|
|
70 |
def insert(db, table, row): |
|
71 |
check_name(table) |
|
72 |
cols = row.keys() |
|
73 |
map(check_name, cols) |
|
74 |
return run_query(db, 'INSERT INTO '+table+' ('+', '.join(cols) |
|
75 |
+') VALUES ('+', '.join(['%s']*len(cols))+')', row.values()) |
|
76 |
|
|
77 |
def last_insert_id(db): return value(run_query(db, 'SELECT lastval()')) |
|
78 |
|
|
79 |
def try_insert(db, table, row): |
|
80 |
try: return with_savepoint(db, lambda: insert(db, table, row)) |
|
81 |
except Exception, e: |
|
82 |
msg = str(e) |
|
83 |
match = re.search(r'duplicate key value violates unique constraint "' |
|
84 |
+table+'_(\w+)_index"', msg) |
|
85 |
if match: raise DuplicateKeyException(match.group(1), e) |
|
86 |
match = re.search(r'null value in column "(\w+)" violates not-null ' |
|
87 |
'constraint', msg) |
|
88 |
if match: raise NullValueException(match.group(1), e) |
|
89 |
raise # no specific exception raised |
|
90 |
|
|
91 |
def pkey(db, table): # Assumed to be first column in table |
|
92 |
check_name(table) |
|
93 |
return col(run_query(db, 'SELECT * FROM '+table+' LIMIT 0'), 0) |
|
94 |
|
|
95 |
def get(db, table, row, pkey, create=False, row_ct_ref=None): |
|
96 |
try: return value(select(db, table, [pkey], row)) |
|
97 |
except StopIteration: |
|
98 |
if not create: raise |
|
99 |
# Insert new row |
|
100 |
try: |
|
101 |
row_ct = try_insert(db, table, row).rowcount |
|
102 |
if row_ct_ref != None and row_ct >= 0: row_ct_ref[0] += row_ct |
|
103 |
return last_insert_id(db) |
|
104 |
except DuplicateKeyException, e: |
|
105 |
return value(select(db, table, [pkey], {e.col: row[e.col]})) |
scripts/lib/xml_dom.py | ||
---|---|---|
1 |
# XML DOM tree manipulation |
|
2 |
|
|
3 |
from xml.dom import Node |
|
4 |
import xml.dom.minidom |
|
5 |
|
|
6 |
def name_of(node): return node.tagName.lower() |
|
7 |
|
|
8 |
def get_id(node): return node.getAttribute('id') |
|
9 |
|
|
10 |
def set_id(node, id_): node.setAttribute('id', id_) |
|
11 |
|
|
12 |
class NodeElemIter: |
|
13 |
def __init__(self, node): self.child = node.firstChild |
|
14 |
|
|
15 |
def __iter__(self): return self |
|
16 |
|
|
17 |
def curr(self): |
|
18 |
while self.child != None: |
|
19 |
if self.child.nodeType == Node.ELEMENT_NODE: return self.child |
|
20 |
self.child = self.child.nextSibling |
|
21 |
raise StopIteration |
|
22 |
|
|
23 |
def next(self): |
|
24 |
child = self.curr() |
|
25 |
self.child = self.child.nextSibling |
|
26 |
return child |
|
27 |
|
|
28 |
def first_elem(node): return NodeElemIter(node).next() |
|
29 |
|
|
30 |
class NodeElemReverseIter: |
|
31 |
def __init__(self, node): self.child = node.lastChild |
|
32 |
|
|
33 |
def __iter__(self): return self |
|
34 |
|
|
35 |
def curr(self): |
|
36 |
while self.child != None: |
|
37 |
if self.child.nodeType == Node.ELEMENT_NODE: return self.child |
|
38 |
self.child = self.child.previousSibling |
|
39 |
raise StopIteration |
|
40 |
|
|
41 |
def next(self): |
|
42 |
child = self.curr() |
|
43 |
self.child = self.child.previousSibling |
|
44 |
return child |
|
45 |
|
|
46 |
def last_elem(node): return NodeElemReverseIter(node).next() |
|
47 |
|
|
48 |
class NodeParentIter: |
|
49 |
def __init__(self, node): self.node = node |
|
50 |
|
|
51 |
def __iter__(self): return self |
|
52 |
|
|
53 |
def curr(self): |
|
54 |
if self.node != None and self.node.nodeType == Node.ELEMENT_NODE: |
|
55 |
return self.node |
|
56 |
raise StopIteration |
|
57 |
|
|
58 |
def next(self): |
|
59 |
node = self.curr() |
|
60 |
self.node = self.node.parentNode |
|
61 |
return node |
|
62 |
|
|
63 |
def is_text(node): |
|
64 |
for child in NodeElemIter(node): return False # has an element node |
|
65 |
return True |
|
66 |
|
|
67 |
def value(node): |
|
68 |
if node.firstChild != None: return node.firstChild.nodeValue |
|
69 |
else: return node.nodeValue |
|
70 |
|
|
71 |
def set_value(doc, node, value): |
|
72 |
if node.nodeType == Node.ELEMENT_NODE: |
|
73 |
node.appendChild(doc.createTextNode(value)) |
|
74 |
else: node.nodeValue = value |
|
75 |
|
|
76 |
def by_tag_name(node, name, last_only=False): |
|
77 |
# last_only optimization returns last (most recently inserted) matching node |
|
78 |
children = [] |
|
79 |
for child in NodeElemReverseIter(node): |
|
80 |
if child.tagName == name: |
|
81 |
children.append(child) |
|
82 |
if last_only: break |
|
83 |
return children |
|
84 |
|
|
85 |
_writexml_orig = xml.dom.minidom.Element.writexml |
|
86 |
|
|
87 |
def _writexml(self, writer, indent="", addindent="", newl=""): |
|
88 |
if self.firstChild != None and self.firstChild.nextSibling == None\ |
|
89 |
and self.firstChild.nodeType == Node.TEXT_NODE: # a single text node |
|
90 |
writer.write(indent+'<'+self.tagName) |
|
91 |
for attr_idx in range(self.attributes.length): |
|
92 |
attr = self.attributes.item(attr_idx) |
|
93 |
writer.write(' '+attr.name+'='+attr.value) |
|
94 |
writer.write('>'+value(self)+'</'+self.tagName+'>'+newl) |
|
95 |
else: _writexml_orig(self, writer, indent, addindent, newl) |
|
96 |
|
|
97 |
xml.dom.minidom.Element.writexml = _writexml |
scripts/lib/xpath.py | ||
---|---|---|
4 | 4 |
from xml.dom import Node |
5 | 5 |
|
6 | 6 |
from Parser import Parser |
7 |
import xml_util
|
|
7 |
import xml_dom
|
|
8 | 8 |
|
9 | 9 |
class XpathElem: |
10 | 10 |
def __init__(self, name, value=None, attrs=None, is_attr=False, |
... | ... | |
122 | 122 |
child = parent.getAttributeNode(elem.name) |
123 | 123 |
if child != None: children = [child] |
124 | 124 |
elif elem.name == '.': children = [parent] |
125 |
else: children = xml_util.by_tag_name(parent, elem.name, last_only)
|
|
125 |
else: children = xml_dom.by_tag_name(parent, elem.name, last_only)
|
|
126 | 126 |
|
127 | 127 |
# Check each match |
128 | 128 |
node = None |
129 | 129 |
for child in children: |
130 |
is_match = elem.value == None or xml_util.value(child) == elem.value
|
|
130 |
is_match = elem.value == None or xml_dom.value(child) == elem.value
|
|
131 | 131 |
for attr in elem.attrs: |
132 | 132 |
if not is_match: break |
133 | 133 |
is_match = get(doc, attr, False, last_only, child) != None |
... | ... | |
140 | 140 |
parent.setAttribute(elem.name, '') |
141 | 141 |
node = parent.getAttributeNode(elem.name) |
142 | 142 |
else: node = parent.appendChild(doc.createElement(elem.name)) |
143 |
if elem.value != None: xml_util.set_value(doc, node, elem.value)
|
|
143 |
if elem.value != None: xml_dom.set_value(doc, node, elem.value)
|
|
144 | 144 |
for attr in elem.attrs: get(doc, attr, create, last_only, node) |
145 | 145 |
|
146 | 146 |
# Follow pointer |
... | ... | |
149 | 149 |
id_elem = backward_id(path[instance_level]) |
150 | 150 |
if id_elem != None: |
151 | 151 |
# backward (child-to-parent) pointer with target ID attr |
152 |
set_value(id_elem, xml_util.get_id(node))
|
|
152 |
set_value(id_elem, xml_dom.get_id(node))
|
|
153 | 153 |
else: # forward (parent-to-child) pointer |
154 |
id_ = xml_util.value(node)
|
|
154 |
id_ = xml_dom.value(node)
|
|
155 | 155 |
obj_path = obj(path) # target object |
156 | 156 |
if id_ == None or get(doc, obj_path, False, True) == None: |
157 | 157 |
# no target or target attrs don't match |
... | ... | |
160 | 160 |
# Use last target object's ID + 1 |
161 | 161 |
obj_path[-1].attrs = [] # just get by tag name |
162 | 162 |
last = get(doc, obj_path, False, True) |
163 |
if last != None: id_ = str(int(xml_util.get_id(last)) + 1)
|
|
163 |
if last != None: id_ = str(int(xml_dom.get_id(last)) + 1)
|
|
164 | 164 |
else: id_ = '0' |
165 | 165 |
|
166 | 166 |
# Will append if target attrs didn't match. Place ! in XPath |
167 | 167 |
# after element to fork at to avoid this. |
168 |
xml_util.set_value(doc, node, id_)
|
|
168 |
xml_dom.set_value(doc, node, id_)
|
|
169 | 169 |
else: last_only = False |
170 | 170 |
set_id(path, id_) |
171 | 171 |
return get(doc, path, create, last_only) |
scripts/lib/db_xml.py | ||
---|---|---|
1 |
# XML-database conversion |
|
2 |
|
|
3 |
import re |
|
4 |
from xml.dom import Node |
|
5 |
|
|
6 |
import sql |
|
7 |
import xml_dom |
|
8 |
|
|
9 |
def name_of(node): return re.sub(r'^.*\.', r'', xml_dom.name_of(node)) |
|
10 |
|
|
11 |
ptr_suffix = '_id' |
|
12 |
|
|
13 |
def is_ptr(node_name): return node_name.endswith(ptr_suffix) |
|
14 |
|
|
15 |
def ptr_type(node_name): |
|
16 |
assert is_ptr(node_name) |
|
17 |
return node_name[:-len(ptr_suffix)] |
|
18 |
|
|
19 |
def ptr_target(node): |
|
20 |
assert is_ptr(name_of(node)) |
|
21 |
return xml_dom.first_elem(node) |
|
22 |
|
|
23 |
def find_by_name(node, name): |
|
24 |
for parent in xml_dom.NodeParentIter(node): |
|
25 |
if name_of(parent) == name: return parent |
|
26 |
else: |
|
27 |
for child in xml_dom.NodeElemIter(parent): |
|
28 |
child_name = name_of(child) |
|
29 |
if is_ptr(child_name): |
|
30 |
if ptr_type(child_name) == name: return ptr_target(child) |
|
31 |
elif child_name == name: return child |
|
32 |
return None |
|
33 |
|
|
34 |
def get(db, node, create=False, store_ids=False, row_ct_ref=None, pkeys=None): |
|
35 |
# store_ids enables searching the tree for missing fields |
|
36 |
if pkeys == None: pkeys = {} |
|
37 |
def pkey(table): |
|
38 |
if table not in pkeys: pkeys[table] = sql.pkey(db, table) |
|
39 |
return pkeys[table] |
|
40 |
|
|
41 |
def obj(node, parent_id=None): |
|
42 |
table = name_of(node) |
|
43 |
pkey_ = pkey(table) |
|
44 |
row = {} |
|
45 |
children = [] |
|
46 |
|
|
47 |
# Divide children into fields and children with fkeys to parent |
|
48 |
for child in xml_dom.NodeElemIter(node): |
|
49 |
child_name = name_of(child) |
|
50 |
if xml_dom.is_text(child): row[child_name] = xml_dom.value(child) |
|
51 |
elif is_ptr(child_name): row[child_name] = obj(ptr_target(child)) |
|
52 |
else: children.append(child) |
|
53 |
try: del row[pkey_] |
|
54 |
except KeyError: pass |
|
55 |
|
|
56 |
# Add fkey to parent |
|
57 |
if parent_id != None: row[pkey(name_of(node.parentNode))] = parent_id |
|
58 |
|
|
59 |
# Insert node |
|
60 |
for try_num in range(2): |
|
61 |
try: |
|
62 |
id_ = sql.get(db, table, row, pkey_, create, row_ct_ref) |
|
63 |
if store_ids: xml_dom.set_id(node, id_) |
|
64 |
break |
|
65 |
except sql.NullValueException, ex: |
|
66 |
if try_num > 0: raise # exception still raised after retry |
|
67 |
# Search for required column in ancestors and their children |
|
68 |
target = find_by_name(node, ptr_type(ex.col)) |
|
69 |
if target == None: raise |
|
70 |
row[ex.col] = xml_dom.get_id(target) |
|
71 |
|
|
72 |
# Insert children with fkeys to parent |
|
73 |
for child in children: obj(child, id_) |
|
74 |
|
|
75 |
return id_ |
|
76 |
|
|
77 |
return obj(node) |
|
78 |
|
|
79 |
def xml2db(db, node, row_ct_ref=None): |
|
80 |
for child in xml_dom.NodeElemIter(node): |
|
81 |
if not xml_dom.is_text(child): # not XML metadata |
|
82 |
get(db, child, True, True, row_ct_ref) |
scripts/xml2db | ||
---|---|---|
10 | 10 |
import xml.dom.minidom |
11 | 11 |
|
12 | 12 |
sys.path.append(os.path.dirname(__file__)+"/lib") |
13 |
import xml_db
|
|
13 |
import db_xml
|
|
14 | 14 |
|
15 | 15 |
def env_flag(name): return name in os.environ and os.environ[name] != '' |
16 | 16 |
|
... | ... | |
30 | 30 |
try: |
31 | 31 |
doc = xml.dom.minidom.parse(sys.stdin) |
32 | 32 |
row_ct_ref = [0] |
33 |
xml_db.xml2db(db, doc.documentElement, row_ct_ref)
|
|
33 |
db_xml.xml2db(db, doc.documentElement, row_ct_ref)
|
|
34 | 34 |
print 'Inserted '+str(row_ct_ref[0])+' rows' |
35 | 35 |
if commit: db.commit() |
36 | 36 |
finally: |
Also available in: Unified diff
Renamed modules to remove _util