Revision 42
Added by Aaron Marcuse-Kubitza over 13 years ago
scripts/xml2db/xml_db.py | ||
---|---|---|
1 |
# XML-database conversion |
|
2 |
|
|
3 |
import re |
|
4 |
from xml.dom import Node |
|
5 |
|
|
6 |
import db_util |
|
7 |
import xml_util |
|
8 |
|
|
9 |
def name_of(node): return re.sub(r'^.*\.', r'', xml_util.name_of(node)) |
|
10 |
|
|
11 |
ptr_suffix = '_id' |
|
12 |
|
|
13 |
def is_ptr(node_name): return node_name.endswith(ptr_suffix) |
|
14 |
|
|
15 |
def ptr_type(node_name): |
|
16 |
assert is_ptr(node_name) |
|
17 |
return node_name[:-len(ptr_suffix)] |
|
18 |
|
|
19 |
def ptr_target(node): |
|
20 |
assert is_ptr(name_of(node)) |
|
21 |
return xml_util.first_elem(node) |
|
22 |
|
|
23 |
def find_by_name(node, name): |
|
24 |
for parent in xml_util.NodeParentIter(node): |
|
25 |
if name_of(parent) == name: return parent |
|
26 |
else: |
|
27 |
for child in xml_util.NodeElemIter(parent): |
|
28 |
child_name = name_of(child) |
|
29 |
if is_ptr(child_name): |
|
30 |
if ptr_type(child_name) == name: return ptr_target(child) |
|
31 |
elif child_name == name: return child |
|
32 |
return None |
|
33 |
|
|
34 |
def get(db, node, create=False, store_ids=False, row_ct_ref=None, pkeys=None): |
|
35 |
# store_ids enables searching the tree for missing fields |
|
36 |
if pkeys == None: pkeys = {} |
|
37 |
def pkey(table): |
|
38 |
if table not in pkeys: pkeys[table] = db_util.pkey(db, table) |
|
39 |
return pkeys[table] |
|
40 |
|
|
41 |
def obj(node, parent_id=None): |
|
42 |
table = name_of(node) |
|
43 |
pkey_ = pkey(table) |
|
44 |
row = {} |
|
45 |
children = [] |
|
46 |
|
|
47 |
# Divide children into fields and children with fkeys to parent |
|
48 |
for child in xml_util.NodeElemIter(node): |
|
49 |
child_name = name_of(child) |
|
50 |
if xml_util.is_text(child): row[child_name] = xml_util.value(child) |
|
51 |
elif is_ptr(child_name): row[child_name] = obj(ptr_target(child)) |
|
52 |
else: children.append(child) |
|
53 |
try: del row[pkey_] |
|
54 |
except KeyError: pass |
|
55 |
|
|
56 |
# Add fkey to parent |
|
57 |
if parent_id != None: row[pkey(name_of(node.parentNode))] = parent_id |
|
58 |
|
|
59 |
# Insert node |
|
60 |
for try_num in range(2): |
|
61 |
try: |
|
62 |
id_ = db_util.get(db, table, row, pkey_, create, row_ct_ref) |
|
63 |
if store_ids: xml_util.set_id(node, id_) |
|
64 |
break |
|
65 |
except db_util.NullValueException, ex: |
|
66 |
if try_num > 0: raise # exception still raised after retry |
|
67 |
# Search for required column in ancestors and their children |
|
68 |
target = find_by_name(node, ptr_type(ex.col)) |
|
69 |
if target == None: raise |
|
70 |
row[ex.col] = xml_util.get_id(target) |
|
71 |
|
|
72 |
# Insert children with fkeys to parent |
|
73 |
for child in children: obj(child, id_) |
|
74 |
|
|
75 |
return id_ |
|
76 |
|
|
77 |
return obj(node) |
|
78 |
|
|
79 |
def xml2db(db, node, row_ct_ref=None): |
|
80 |
for child in xml_util.NodeElemIter(node): |
|
81 |
if not xml_util.is_text(child): # not XML metadata |
|
82 |
get(db, child, True, True, row_ct_ref) |
scripts/xml2db/db_util.py | ||
---|---|---|
1 |
# Database access |
|
2 |
|
|
3 |
import random |
|
4 |
import re |
|
5 |
import sys |
|
6 |
|
|
7 |
import ex_util |
|
8 |
|
|
9 |
def _add_cursor_info(ex, cur): ex_util.add_msg(ex, 'query: '+cur.query) |
|
10 |
|
|
11 |
class NameException(Exception): pass |
|
12 |
|
|
13 |
class DbException(ex_util.ExceptionWithCause): |
|
14 |
def __init__(self, msg, cause=None, cur=None): |
|
15 |
ex_util.ExceptionWithCause.__init__(self, msg, cause) |
|
16 |
if cur != None: _add_cursor_info(self, cur) |
|
17 |
|
|
18 |
class ExceptionWithColumn(DbException): |
|
19 |
def __init__(self, col, cause=None): |
|
20 |
DbException.__init__(self, 'column: '+col, cause) |
|
21 |
self.col = col |
|
22 |
|
|
23 |
class DuplicateKeyException(ExceptionWithColumn): pass |
|
24 |
|
|
25 |
class NullValueException(ExceptionWithColumn): pass |
|
26 |
|
|
27 |
def check_name(name): |
|
28 |
if re.search(r'\W', name) != None: raise NameException('Name "'+name |
|
29 |
+'" may contain only alphanumeric characters and _') |
|
30 |
|
|
31 |
def run_query(db, query, params=None): |
|
32 |
cur = db.cursor() |
|
33 |
try: cur.execute(query, params) |
|
34 |
except Exception, ex: |
|
35 |
_add_cursor_info(ex, cur) |
|
36 |
raise |
|
37 |
return cur |
|
38 |
|
|
39 |
def col(cur, idx): return cur.description[idx][0] |
|
40 |
|
|
41 |
def row(cur): return iter(lambda: cur.fetchone(), None).next() |
|
42 |
|
|
43 |
def value(cur): return row(cur)[0] |
|
44 |
|
|
45 |
def with_savepoint(db, func): |
|
46 |
savepoint = 'savepoint_'+str(random.randint(0, sys.maxint)) # must be unique |
|
47 |
run_query(db, 'SAVEPOINT '+savepoint) |
|
48 |
try: return_val = func() |
|
49 |
except: |
|
50 |
run_query(db, 'ROLLBACK TO SAVEPOINT '+savepoint) |
|
51 |
raise |
|
52 |
else: |
|
53 |
run_query(db, 'RELEASE SAVEPOINT '+savepoint) |
|
54 |
return return_val |
|
55 |
|
|
56 |
def select(db, table, fields, conds): |
|
57 |
check_name(table) |
|
58 |
map(check_name, fields) |
|
59 |
map(check_name, conds.keys()) |
|
60 |
def cond(entry): |
|
61 |
col, value = entry |
|
62 |
cond_ = col+' ' |
|
63 |
if value == None: cond_ += 'IS' |
|
64 |
else: cond_ += '=' |
|
65 |
cond_ += ' %s' |
|
66 |
return cond_ |
|
67 |
return run_query(db, 'SELECT '+', '.join(fields)+' FROM '+table+' WHERE ' |
|
68 |
+' AND '.join(map(cond, conds.iteritems())), conds.values()) |
|
69 |
|
|
70 |
def insert(db, table, row): |
|
71 |
check_name(table) |
|
72 |
cols = row.keys() |
|
73 |
map(check_name, cols) |
|
74 |
return run_query(db, 'INSERT INTO '+table+' ('+', '.join(cols) |
|
75 |
+') VALUES ('+', '.join(['%s']*len(cols))+')', row.values()) |
|
76 |
|
|
77 |
def last_insert_id(db): return value(run_query(db, 'SELECT lastval()')) |
|
78 |
|
|
79 |
def try_insert(db, table, row): |
|
80 |
try: return with_savepoint(db, lambda: insert(db, table, row)) |
|
81 |
except Exception, ex: |
|
82 |
msg = str(ex) |
|
83 |
match = re.search(r'duplicate key value violates unique constraint "' |
|
84 |
+table+'_(\w+)_index"', msg) |
|
85 |
if match: raise DuplicateKeyException(match.group(1), ex) |
|
86 |
match = re.search(r'null value in column "(\w+)" violates not-null ' |
|
87 |
'constraint', msg) |
|
88 |
if match: raise NullValueException(match.group(1), ex) |
|
89 |
raise # no specific exception raised |
|
90 |
|
|
91 |
def pkey(db, table): # Assumed to be first column in table |
|
92 |
check_name(table) |
|
93 |
return col(run_query(db, 'SELECT * FROM '+table+' LIMIT 0'), 0) |
|
94 |
|
|
95 |
def get(db, table, row, pkey, create=False, row_ct_ref=None): |
|
96 |
try: return value(select(db, table, [pkey], row)) |
|
97 |
except StopIteration: |
|
98 |
if not create: raise |
|
99 |
# Insert new row |
|
100 |
try: |
|
101 |
row_ct = try_insert(db, table, row).rowcount |
|
102 |
if row_ct_ref != None and row_ct >= 0: row_ct_ref[0] += row_ct |
|
103 |
return last_insert_id(db) |
|
104 |
except DuplicateKeyException, ex: |
|
105 |
return value(select(db, table, [pkey], {ex.col: row[ex.col]})) |
scripts/xml2db/ex_util.py | ||
---|---|---|
1 |
# Exception handling |
|
2 |
|
|
3 |
def add_msg(ex, msg): ex.args = (str(ex).rstrip()+'\n'+msg,) |
|
4 |
|
|
5 |
class ExceptionWithCause(Exception): |
|
6 |
def __init__(self, msg, cause=None): |
|
7 |
Exception.__init__(self, msg) |
|
8 |
if cause != None: add_msg(self, 'cause: '+str(cause)) |
scripts/xml2db/xml_util.py | ||
---|---|---|
1 |
# XML DOM tree manipulation |
|
2 |
|
|
3 |
from xml.dom import Node |
|
4 |
import xml.dom.minidom |
|
5 |
|
|
6 |
def name_of(node): return node.tagName.lower() |
|
7 |
|
|
8 |
def get_id(node): return node.getAttribute('id') |
|
9 |
|
|
10 |
def set_id(node, id_): node.setAttribute('id', id_) |
|
11 |
|
|
12 |
class NodeElemIter: |
|
13 |
def __init__(self, node): self.child = node.firstChild |
|
14 |
|
|
15 |
def __iter__(self): return self |
|
16 |
|
|
17 |
def curr(self): |
|
18 |
while self.child != None: |
|
19 |
if self.child.nodeType == Node.ELEMENT_NODE: return self.child |
|
20 |
self.child = self.child.nextSibling |
|
21 |
raise StopIteration |
|
22 |
|
|
23 |
def next(self): |
|
24 |
child = self.curr() |
|
25 |
self.child = self.child.nextSibling |
|
26 |
return child |
|
27 |
|
|
28 |
def first_elem(node): return NodeElemIter(node).next() |
|
29 |
|
|
30 |
class NodeElemReverseIter: |
|
31 |
def __init__(self, node): self.child = node.lastChild |
|
32 |
|
|
33 |
def __iter__(self): return self |
|
34 |
|
|
35 |
def curr(self): |
|
36 |
while self.child != None: |
|
37 |
if self.child.nodeType == Node.ELEMENT_NODE: return self.child |
|
38 |
self.child = self.child.previousSibling |
|
39 |
raise StopIteration |
|
40 |
|
|
41 |
def next(self): |
|
42 |
child = self.curr() |
|
43 |
self.child = self.child.previousSibling |
|
44 |
return child |
|
45 |
|
|
46 |
def last_elem(node): return NodeElemReverseIter(node).next() |
|
47 |
|
|
48 |
class NodeParentIter: |
|
49 |
def __init__(self, node): self.node = node |
|
50 |
|
|
51 |
def __iter__(self): return self |
|
52 |
|
|
53 |
def curr(self): |
|
54 |
if self.node != None and self.node.nodeType == Node.ELEMENT_NODE: |
|
55 |
return self.node |
|
56 |
raise StopIteration |
|
57 |
|
|
58 |
def next(self): |
|
59 |
node = self.curr() |
|
60 |
self.node = self.node.parentNode |
|
61 |
return node |
|
62 |
|
|
63 |
def is_text(node): |
|
64 |
for child in NodeElemIter(node): return False # has an element node |
|
65 |
return True |
|
66 |
|
|
67 |
def value(node): |
|
68 |
if node.firstChild != None: return node.firstChild.nodeValue |
|
69 |
else: return node.nodeValue |
|
70 |
|
|
71 |
def set_value(doc, node, value): |
|
72 |
if node.nodeType == Node.ELEMENT_NODE: |
|
73 |
node.appendChild(doc.createTextNode(value)) |
|
74 |
else: node.nodeValue = value |
|
75 |
|
|
76 |
def by_tag_name(node, name, last_only=False): |
|
77 |
# last_only optimization returns last (most recently inserted) matching node |
|
78 |
children = [] |
|
79 |
for child in NodeElemReverseIter(node): |
|
80 |
if child.tagName == name: |
|
81 |
children.append(child) |
|
82 |
if last_only: break |
|
83 |
return children |
|
84 |
|
|
85 |
_writexml_orig = xml.dom.minidom.Element.writexml |
|
86 |
|
|
87 |
def _writexml(self, writer, indent="", addindent="", newl=""): |
|
88 |
if self.firstChild != None and self.firstChild.nextSibling == None\ |
|
89 |
and self.firstChild.nodeType == Node.TEXT_NODE: # a single text node |
|
90 |
writer.write(indent+'<'+self.tagName) |
|
91 |
for attr_idx in range(self.attributes.length): |
|
92 |
attr = self.attributes.item(attr_idx) |
|
93 |
writer.write(' '+attr.name+'='+attr.value) |
|
94 |
writer.write('>'+value(self)+'</'+self.tagName+'>'+newl) |
|
95 |
else: _writexml_orig(self, writer, indent, addindent, newl) |
|
96 |
|
|
97 |
xml.dom.minidom.Element.writexml = _writexml |
scripts/data2xml/util.py | ||
---|---|---|
1 |
# Useful functions and classes |
|
2 |
|
|
3 |
class Obj: |
|
4 |
def __init__(self, **attrs): self.__dict__ = attrs |
|
5 |
|
|
6 |
def __repr__(self): return repr(self.__dict__) |
|
7 | 0 |
scripts/data2xml/xpath.py | ||
---|---|---|
1 |
# XPath-based XML tree manipulation |
|
2 |
|
|
3 |
from copy import deepcopy |
|
4 |
from xml.dom import Node |
|
5 |
|
|
6 |
from Parser import Parser |
|
7 |
import xml_util |
|
8 |
|
|
9 |
class XpathElem: |
|
10 |
def __init__(self, name, value=None, attrs=None, is_attr=False, |
|
11 |
is_ptr=False): |
|
12 |
if attrs == None: attrs = [] |
|
13 |
self.name = name |
|
14 |
self.value = value |
|
15 |
self.attrs = attrs |
|
16 |
self.is_attr = is_attr |
|
17 |
self.is_ptr = is_ptr |
|
18 |
|
|
19 |
def __repr__(self): |
|
20 |
str_ = '' |
|
21 |
if self.is_attr: str_ += '@' |
|
22 |
str_ += self.name |
|
23 |
if self.attrs != []: str_ += repr(self.attrs) |
|
24 |
if self.value != None: str_ += '='+repr(self.value) |
|
25 |
if self.is_ptr: str_ += '->' |
|
26 |
return str_ |
|
27 |
|
|
28 |
def __eq__(self, other): return self.__dict__ == other.__dict__ |
|
29 |
|
|
30 |
def value(path): return path[-1].value |
|
31 |
|
|
32 |
def set_value(path, value): path[-1].value = value |
|
33 |
|
|
34 |
def backward_id(elem): |
|
35 |
if len(elem.attrs) >= 1 and value(elem.attrs[0]) == None: |
|
36 |
return elem.attrs[0] |
|
37 |
else: return None |
|
38 |
|
|
39 |
class XpathParser(Parser): |
|
40 |
def _main(self): |
|
41 |
self._match_str('/') # optional leading / |
|
42 |
return self._path() |
|
43 |
|
|
44 |
def _path(self): |
|
45 |
tree = [] |
|
46 |
trailing_slash = False |
|
47 |
while True: |
|
48 |
# Split path |
|
49 |
if self._match_str('{'): |
|
50 |
paths = [] |
|
51 |
while True: |
|
52 |
paths.append(tree + self._path()) |
|
53 |
if not self._match_str(','): break |
|
54 |
self._match_str('}', required=True) |
|
55 |
tree = paths[0] # just use first subpath for now |
|
56 |
break # nothing allowed after split path |
|
57 |
|
|
58 |
elem = XpathElem(is_attr=self._match_str('@'), |
|
59 |
name=self._match_re(r'[\w.*]+', required=True)) |
|
60 |
|
|
61 |
# Attrs |
|
62 |
if self._match_str('['): |
|
63 |
elem.attrs = [] |
|
64 |
while True: |
|
65 |
path = self._path() |
|
66 |
if self._match_str('='): |
|
67 |
set_value(path, self._match_re(r'[\w.|]*')) |
|
68 |
elem.attrs.append(path) |
|
69 |
if not self._match_str(','): break |
|
70 |
self._match_str(']', required=True) |
|
71 |
|
|
72 |
elem.is_ptr = self._match_str('->') |
|
73 |
tree.append(elem) |
|
74 |
|
|
75 |
# Lookahead assertion |
|
76 |
if self._match_str('('): |
|
77 |
self._match_str('/', required=True) # next / is inside () |
|
78 |
path = self._path() |
|
79 |
self._match_str(')', required=True) |
|
80 |
elem.attrs.append(path) |
|
81 |
tree += path |
|
82 |
|
|
83 |
if not self._match_str('/'): break |
|
84 |
|
|
85 |
# Expand * abbrs |
|
86 |
elem_idx = 0 |
|
87 |
for elem in tree: |
|
88 |
id_ = backward_id(elem) |
|
89 |
if id_ != None: elem = id_[0]; offset = -2 |
|
90 |
elif elem.is_ptr: offset = 2 |
|
91 |
else: offset = 1 |
|
92 |
before, abbr, after = elem.name.partition('*') |
|
93 |
if abbr != '': |
|
94 |
try: elem.name = before+tree[elem_idx+offset].name+after |
|
95 |
except IndexError: pass # no replacement elem |
|
96 |
elem_idx += 1 |
|
97 |
|
|
98 |
return tree |
|
99 |
|
|
100 |
instance_level = 1 |
|
101 |
|
|
102 |
def obj(path): |
|
103 |
obj_path = deepcopy(path[:instance_level+1]) |
|
104 |
obj_path[-1].is_ptr = False # prevent pointer w/o target |
|
105 |
return obj_path |
|
106 |
|
|
107 |
def set_id(path, id_, has_types=True): |
|
108 |
if has_types: id_level = instance_level |
|
109 |
else: id_level = 0 |
|
110 |
path[id_level].attrs.append([XpathElem('id', id_, is_attr=True)]) |
|
111 |
|
|
112 |
def get(doc, path, create=False, last_only=None, parent=None): |
|
113 |
# Warning: The last_only optimization may put data that should be together |
|
114 |
# into separate nodes |
|
115 |
if parent == None: parent = doc.documentElement |
|
116 |
if last_only == None: last_only = create |
|
117 |
elem_idx = 0 |
|
118 |
for elem in path: |
|
119 |
# Find possible matches |
|
120 |
children = [] |
|
121 |
if elem.is_attr: |
|
122 |
child = parent.getAttributeNode(elem.name) |
|
123 |
if child != None: children = [child] |
|
124 |
elif elem.name == '.': children = [parent] |
|
125 |
else: children = xml_util.by_tag_name(parent, elem.name, last_only) |
|
126 |
|
|
127 |
# Check each match |
|
128 |
node = None |
|
129 |
for child in children: |
|
130 |
is_match = elem.value == None or xml_util.value(child) == elem.value |
|
131 |
for attr in elem.attrs: |
|
132 |
if not is_match: break |
|
133 |
is_match = get(doc, attr, False, last_only, child) != None |
|
134 |
if is_match: node = child; break |
|
135 |
|
|
136 |
# Create node |
|
137 |
if node == None: |
|
138 |
if not create: return None |
|
139 |
if elem.is_attr: |
|
140 |
parent.setAttribute(elem.name, '') |
|
141 |
node = parent.getAttributeNode(elem.name) |
|
142 |
else: node = parent.appendChild(doc.createElement(elem.name)) |
|
143 |
if elem.value != None: xml_util.set_value(doc, node, elem.value) |
|
144 |
for attr in elem.attrs: get(doc, attr, create, last_only, node) |
|
145 |
|
|
146 |
# Follow pointer |
|
147 |
if elem.is_ptr: |
|
148 |
path = deepcopy(path[elem_idx+1:]) # rest of path |
|
149 |
id_elem = backward_id(path[instance_level]) |
|
150 |
if id_elem != None: |
|
151 |
# backward (child-to-parent) pointer with target ID attr |
|
152 |
set_value(id_elem, xml_util.get_id(node)) |
|
153 |
else: # forward (parent-to-child) pointer |
|
154 |
id_ = xml_util.value(node) |
|
155 |
obj_path = obj(path) # target object |
|
156 |
if id_ == None or get(doc, obj_path, False, True) == None: |
|
157 |
# no target or target attrs don't match |
|
158 |
if not create: return None |
|
159 |
|
|
160 |
# Use last target object's ID + 1 |
|
161 |
obj_path[-1].attrs = [] # just get by tag name |
|
162 |
last = get(doc, obj_path, False, True) |
|
163 |
if last != None: id_ = str(int(xml_util.get_id(last)) + 1) |
|
164 |
else: id_ = '0' |
|
165 |
|
|
166 |
# Will append if target attrs didn't match. Place ! in XPath |
|
167 |
# after element to fork at to avoid this. |
|
168 |
xml_util.set_value(doc, node, id_) |
|
169 |
else: last_only = False |
|
170 |
set_id(path, id_) |
|
171 |
return get(doc, path, create, last_only) |
|
172 |
|
|
173 |
parent = node |
|
174 |
elem_idx += 1 |
|
175 |
return parent |
scripts/data2xml/Parser.py | ||
---|---|---|
1 |
# A general recursive descent parser |
|
2 |
|
|
3 |
import re |
|
4 |
|
|
5 |
class SyntaxException(Exception): pass |
|
6 |
|
|
7 |
class Parser: |
|
8 |
def __init__(self, string): |
|
9 |
self._str = string |
|
10 |
self._pos = 0 |
|
11 |
|
|
12 |
def parse(self): |
|
13 |
tree = self._main() |
|
14 |
if not self._pos == len(self._str): self._syntax_err('End of string') |
|
15 |
return tree |
|
16 |
|
|
17 |
def _match_re(self, pattern, required=False): |
|
18 |
matcher = re.compile(pattern).match(self._str, self._pos) |
|
19 |
if matcher: |
|
20 |
self._pos = matcher.end(0) |
|
21 |
return matcher.group(0) |
|
22 |
elif required: self._syntax_err(pattern) |
|
23 |
else: return None |
|
24 |
|
|
25 |
def _match_str(self, string, required=False): |
|
26 |
end_pos = self._pos + len(string) |
|
27 |
if self._str[self._pos:end_pos] == string: |
|
28 |
self._pos = end_pos |
|
29 |
return True |
|
30 |
elif required: self._syntax_err(string) |
|
31 |
else: return False |
|
32 |
|
|
33 |
def _syntax_err(self, token): |
|
34 |
raise SyntaxException(token+' expected in '+self._str[self._pos:]) |
|
35 | 0 |
scripts/data2xml/xml_util.py | ||
---|---|---|
1 |
# XML DOM tree manipulation |
|
2 |
|
|
3 |
from xml.dom import Node |
|
4 |
import xml.dom.minidom |
|
5 |
|
|
6 |
def name_of(node): return node.tagName.lower() |
|
7 |
|
|
8 |
def get_id(node): return node.getAttribute('id') |
|
9 |
|
|
10 |
def set_id(node, id_): node.setAttribute('id', id_) |
|
11 |
|
|
12 |
class NodeElemIter: |
|
13 |
def __init__(self, node): self.child = node.firstChild |
|
14 |
|
|
15 |
def __iter__(self): return self |
|
16 |
|
|
17 |
def curr(self): |
|
18 |
while self.child != None: |
|
19 |
if self.child.nodeType == Node.ELEMENT_NODE: return self.child |
|
20 |
self.child = self.child.nextSibling |
|
21 |
raise StopIteration |
|
22 |
|
|
23 |
def next(self): |
|
24 |
child = self.curr() |
|
25 |
self.child = self.child.nextSibling |
|
26 |
return child |
|
27 |
|
|
28 |
def first_elem(node): return NodeElemIter(node).next() |
|
29 |
|
|
30 |
class NodeElemReverseIter: |
|
31 |
def __init__(self, node): self.child = node.lastChild |
|
32 |
|
|
33 |
def __iter__(self): return self |
|
34 |
|
|
35 |
def curr(self): |
|
36 |
while self.child != None: |
|
37 |
if self.child.nodeType == Node.ELEMENT_NODE: return self.child |
|
38 |
self.child = self.child.previousSibling |
|
39 |
raise StopIteration |
|
40 |
|
|
41 |
def next(self): |
|
42 |
child = self.curr() |
|
43 |
self.child = self.child.previousSibling |
|
44 |
return child |
|
45 |
|
|
46 |
def last_elem(node): return NodeElemReverseIter(node).next() |
|
47 |
|
|
48 |
class NodeParentIter: |
|
49 |
def __init__(self, node): self.node = node |
|
50 |
|
|
51 |
def __iter__(self): return self |
|
52 |
|
|
53 |
def curr(self): |
|
54 |
if self.node != None and self.node.nodeType == Node.ELEMENT_NODE: |
|
55 |
return self.node |
|
56 |
raise StopIteration |
|
57 |
|
|
58 |
def next(self): |
|
59 |
node = self.curr() |
|
60 |
self.node = self.node.parentNode |
|
61 |
return node |
|
62 |
|
|
63 |
def is_text(node): |
|
64 |
for child in NodeElemIter(node): return False # has an element node |
|
65 |
return True |
|
66 |
|
|
67 |
def value(node): |
|
68 |
if node.firstChild != None: return node.firstChild.nodeValue |
|
69 |
else: return node.nodeValue |
|
70 |
|
|
71 |
def set_value(doc, node, value): |
|
72 |
if node.nodeType == Node.ELEMENT_NODE: |
|
73 |
node.appendChild(doc.createTextNode(value)) |
|
74 |
else: node.nodeValue = value |
|
75 |
|
|
76 |
def by_tag_name(node, name, last_only=False): |
|
77 |
# last_only optimization returns last (most recently inserted) matching node |
|
78 |
children = [] |
|
79 |
for child in NodeElemReverseIter(node): |
|
80 |
if child.tagName == name: |
|
81 |
children.append(child) |
|
82 |
if last_only: break |
|
83 |
return children |
|
84 |
|
|
85 |
_writexml_orig = xml.dom.minidom.Element.writexml |
|
86 |
|
|
87 |
def _writexml(self, writer, indent="", addindent="", newl=""): |
|
88 |
if self.firstChild != None and self.firstChild.nextSibling == None\ |
|
89 |
and self.firstChild.nodeType == Node.TEXT_NODE: # a single text node |
|
90 |
writer.write(indent+'<'+self.tagName) |
|
91 |
for attr_idx in range(self.attributes.length): |
|
92 |
attr = self.attributes.item(attr_idx) |
|
93 |
writer.write(' '+attr.name+'='+attr.value) |
|
94 |
writer.write('>'+value(self)+'</'+self.tagName+'>'+newl) |
|
95 |
else: _writexml_orig(self, writer, indent, addindent, newl) |
|
96 |
|
|
97 |
xml.dom.minidom.Element.writexml = _writexml |
scripts/lib/ex_util.py | ||
---|---|---|
1 |
# Exception handling |
|
2 |
|
|
3 |
def add_msg(ex, msg): ex.args = (str(ex).rstrip()+'\n'+msg,) |
|
4 |
|
|
5 |
class ExceptionWithCause(Exception): |
|
6 |
def __init__(self, msg, cause=None): |
|
7 |
Exception.__init__(self, msg) |
|
8 |
if cause != None: add_msg(self, 'cause: '+str(cause)) |
scripts/lib/Parser.py | ||
---|---|---|
1 |
# A general recursive descent parser |
|
2 |
|
|
3 |
import re |
|
4 |
|
|
5 |
class SyntaxException(Exception): pass |
|
6 |
|
|
7 |
class Parser: |
|
8 |
def __init__(self, string): |
|
9 |
self._str = string |
|
10 |
self._pos = 0 |
|
11 |
|
|
12 |
def parse(self): |
|
13 |
tree = self._main() |
|
14 |
if not self._pos == len(self._str): self._syntax_err('End of string') |
|
15 |
return tree |
|
16 |
|
|
17 |
def _match_re(self, pattern, required=False): |
|
18 |
matcher = re.compile(pattern).match(self._str, self._pos) |
|
19 |
if matcher: |
|
20 |
self._pos = matcher.end(0) |
|
21 |
return matcher.group(0) |
|
22 |
elif required: self._syntax_err(pattern) |
|
23 |
else: return None |
|
24 |
|
|
25 |
def _match_str(self, string, required=False): |
|
26 |
end_pos = self._pos + len(string) |
|
27 |
if self._str[self._pos:end_pos] == string: |
|
28 |
self._pos = end_pos |
|
29 |
return True |
|
30 |
elif required: self._syntax_err(string) |
|
31 |
else: return False |
|
32 |
|
|
33 |
def _syntax_err(self, token): |
|
34 |
raise SyntaxException(token+' expected in '+self._str[self._pos:]) |
|
0 | 35 |
scripts/lib/util.py | ||
---|---|---|
1 |
# Useful functions and classes |
|
2 |
|
|
3 |
class Obj: |
|
4 |
def __init__(self, **attrs): self.__dict__ = attrs |
|
5 |
|
|
6 |
def __repr__(self): return repr(self.__dict__) |
|
0 | 7 |
scripts/lib/xml_util.py | ||
---|---|---|
1 |
# XML DOM tree manipulation |
|
2 |
|
|
3 |
from xml.dom import Node |
|
4 |
import xml.dom.minidom |
|
5 |
|
|
6 |
def name_of(node): return node.tagName.lower() |
|
7 |
|
|
8 |
def get_id(node): return node.getAttribute('id') |
|
9 |
|
|
10 |
def set_id(node, id_): node.setAttribute('id', id_) |
|
11 |
|
|
12 |
class NodeElemIter: |
|
13 |
def __init__(self, node): self.child = node.firstChild |
|
14 |
|
|
15 |
def __iter__(self): return self |
|
16 |
|
|
17 |
def curr(self): |
|
18 |
while self.child != None: |
|
19 |
if self.child.nodeType == Node.ELEMENT_NODE: return self.child |
|
20 |
self.child = self.child.nextSibling |
|
21 |
raise StopIteration |
|
22 |
|
|
23 |
def next(self): |
|
24 |
child = self.curr() |
|
25 |
self.child = self.child.nextSibling |
|
26 |
return child |
|
27 |
|
|
28 |
def first_elem(node): return NodeElemIter(node).next() |
|
29 |
|
|
30 |
class NodeElemReverseIter: |
|
31 |
def __init__(self, node): self.child = node.lastChild |
|
32 |
|
|
33 |
def __iter__(self): return self |
|
34 |
|
|
35 |
def curr(self): |
|
36 |
while self.child != None: |
|
37 |
if self.child.nodeType == Node.ELEMENT_NODE: return self.child |
|
38 |
self.child = self.child.previousSibling |
|
39 |
raise StopIteration |
|
40 |
|
|
41 |
def next(self): |
|
42 |
child = self.curr() |
|
43 |
self.child = self.child.previousSibling |
|
44 |
return child |
|
45 |
|
|
46 |
def last_elem(node): return NodeElemReverseIter(node).next() |
|
47 |
|
|
48 |
class NodeParentIter: |
|
49 |
def __init__(self, node): self.node = node |
|
50 |
|
|
51 |
def __iter__(self): return self |
|
52 |
|
|
53 |
def curr(self): |
|
54 |
if self.node != None and self.node.nodeType == Node.ELEMENT_NODE: |
|
55 |
return self.node |
|
56 |
raise StopIteration |
|
57 |
|
|
58 |
def next(self): |
|
59 |
node = self.curr() |
|
60 |
self.node = self.node.parentNode |
|
61 |
return node |
|
62 |
|
|
63 |
def is_text(node): |
|
64 |
for child in NodeElemIter(node): return False # has an element node |
|
65 |
return True |
|
66 |
|
|
67 |
def value(node): |
|
68 |
if node.firstChild != None: return node.firstChild.nodeValue |
|
69 |
else: return node.nodeValue |
|
70 |
|
|
71 |
def set_value(doc, node, value): |
|
72 |
if node.nodeType == Node.ELEMENT_NODE: |
|
73 |
node.appendChild(doc.createTextNode(value)) |
|
74 |
else: node.nodeValue = value |
|
75 |
|
|
76 |
def by_tag_name(node, name, last_only=False): |
|
77 |
# last_only optimization returns last (most recently inserted) matching node |
|
78 |
children = [] |
|
79 |
for child in NodeElemReverseIter(node): |
|
80 |
if child.tagName == name: |
|
81 |
children.append(child) |
|
82 |
if last_only: break |
|
83 |
return children |
|
84 |
|
|
85 |
_writexml_orig = xml.dom.minidom.Element.writexml |
|
86 |
|
|
87 |
def _writexml(self, writer, indent="", addindent="", newl=""): |
|
88 |
if self.firstChild != None and self.firstChild.nextSibling == None\ |
|
89 |
and self.firstChild.nodeType == Node.TEXT_NODE: # a single text node |
|
90 |
writer.write(indent+'<'+self.tagName) |
|
91 |
for attr_idx in range(self.attributes.length): |
|
92 |
attr = self.attributes.item(attr_idx) |
|
93 |
writer.write(' '+attr.name+'='+attr.value) |
|
94 |
writer.write('>'+value(self)+'</'+self.tagName+'>'+newl) |
|
95 |
else: _writexml_orig(self, writer, indent, addindent, newl) |
|
96 |
|
|
97 |
xml.dom.minidom.Element.writexml = _writexml |
scripts/lib/db_util.py | ||
---|---|---|
1 |
# Database access |
|
2 |
|
|
3 |
import random |
|
4 |
import re |
|
5 |
import sys |
|
6 |
|
|
7 |
import ex_util |
|
8 |
|
|
9 |
def _add_cursor_info(ex, cur): ex_util.add_msg(ex, 'query: '+cur.query) |
|
10 |
|
|
11 |
class NameException(Exception): pass |
|
12 |
|
|
13 |
class DbException(ex_util.ExceptionWithCause): |
|
14 |
def __init__(self, msg, cause=None, cur=None): |
|
15 |
ex_util.ExceptionWithCause.__init__(self, msg, cause) |
|
16 |
if cur != None: _add_cursor_info(self, cur) |
|
17 |
|
|
18 |
class ExceptionWithColumn(DbException): |
|
19 |
def __init__(self, col, cause=None): |
|
20 |
DbException.__init__(self, 'column: '+col, cause) |
|
21 |
self.col = col |
|
22 |
|
|
23 |
class DuplicateKeyException(ExceptionWithColumn): pass |
|
24 |
|
|
25 |
class NullValueException(ExceptionWithColumn): pass |
|
26 |
|
|
27 |
def check_name(name): |
|
28 |
if re.search(r'\W', name) != None: raise NameException('Name "'+name |
|
29 |
+'" may contain only alphanumeric characters and _') |
|
30 |
|
|
31 |
def run_query(db, query, params=None): |
|
32 |
cur = db.cursor() |
|
33 |
try: cur.execute(query, params) |
|
34 |
except Exception, ex: |
|
35 |
_add_cursor_info(ex, cur) |
|
36 |
raise |
|
37 |
return cur |
|
38 |
|
|
39 |
def col(cur, idx): return cur.description[idx][0] |
|
40 |
|
|
41 |
def row(cur): return iter(lambda: cur.fetchone(), None).next() |
|
42 |
|
|
43 |
def value(cur): return row(cur)[0] |
|
44 |
|
|
45 |
def with_savepoint(db, func): |
|
46 |
savepoint = 'savepoint_'+str(random.randint(0, sys.maxint)) # must be unique |
|
47 |
run_query(db, 'SAVEPOINT '+savepoint) |
|
48 |
try: return_val = func() |
|
49 |
except: |
|
50 |
run_query(db, 'ROLLBACK TO SAVEPOINT '+savepoint) |
|
51 |
raise |
|
52 |
else: |
|
53 |
run_query(db, 'RELEASE SAVEPOINT '+savepoint) |
|
54 |
return return_val |
|
55 |
|
|
56 |
def select(db, table, fields, conds): |
|
57 |
check_name(table) |
|
58 |
map(check_name, fields) |
|
59 |
map(check_name, conds.keys()) |
|
60 |
def cond(entry): |
|
61 |
col, value = entry |
|
62 |
cond_ = col+' ' |
|
63 |
if value == None: cond_ += 'IS' |
|
64 |
else: cond_ += '=' |
|
65 |
cond_ += ' %s' |
|
66 |
return cond_ |
|
67 |
return run_query(db, 'SELECT '+', '.join(fields)+' FROM '+table+' WHERE ' |
|
68 |
+' AND '.join(map(cond, conds.iteritems())), conds.values()) |
|
69 |
|
|
70 |
def insert(db, table, row): |
|
71 |
check_name(table) |
|
72 |
cols = row.keys() |
|
73 |
map(check_name, cols) |
|
74 |
return run_query(db, 'INSERT INTO '+table+' ('+', '.join(cols) |
|
75 |
+') VALUES ('+', '.join(['%s']*len(cols))+')', row.values()) |
|
76 |
|
|
77 |
def last_insert_id(db): return value(run_query(db, 'SELECT lastval()')) |
|
78 |
|
|
79 |
def try_insert(db, table, row): |
|
80 |
try: return with_savepoint(db, lambda: insert(db, table, row)) |
|
81 |
except Exception, ex: |
|
82 |
msg = str(ex) |
|
83 |
match = re.search(r'duplicate key value violates unique constraint "' |
|
84 |
+table+'_(\w+)_index"', msg) |
|
85 |
if match: raise DuplicateKeyException(match.group(1), ex) |
|
86 |
match = re.search(r'null value in column "(\w+)" violates not-null ' |
|
87 |
'constraint', msg) |
|
88 |
if match: raise NullValueException(match.group(1), ex) |
|
89 |
raise # no specific exception raised |
|
90 |
|
|
91 |
def pkey(db, table): # Assumed to be first column in table |
|
92 |
check_name(table) |
|
93 |
return col(run_query(db, 'SELECT * FROM '+table+' LIMIT 0'), 0) |
|
94 |
|
|
95 |
def get(db, table, row, pkey, create=False, row_ct_ref=None): |
|
96 |
try: return value(select(db, table, [pkey], row)) |
|
97 |
except StopIteration: |
|
98 |
if not create: raise |
|
99 |
# Insert new row |
|
100 |
try: |
|
101 |
row_ct = try_insert(db, table, row).rowcount |
|
102 |
if row_ct_ref != None and row_ct >= 0: row_ct_ref[0] += row_ct |
|
103 |
return last_insert_id(db) |
|
104 |
except DuplicateKeyException, ex: |
|
105 |
return value(select(db, table, [pkey], {ex.col: row[ex.col]})) |
scripts/lib/xpath.py | ||
---|---|---|
1 |
# XPath-based XML tree manipulation |
|
2 |
|
|
3 |
from copy import deepcopy |
|
4 |
from xml.dom import Node |
|
5 |
|
|
6 |
from Parser import Parser |
|
7 |
import xml_util |
|
8 |
|
|
9 |
class XpathElem: |
|
10 |
def __init__(self, name, value=None, attrs=None, is_attr=False, |
|
11 |
is_ptr=False): |
|
12 |
if attrs == None: attrs = [] |
|
13 |
self.name = name |
|
14 |
self.value = value |
|
15 |
self.attrs = attrs |
|
16 |
self.is_attr = is_attr |
|
17 |
self.is_ptr = is_ptr |
|
18 |
|
|
19 |
def __repr__(self): |
|
20 |
str_ = '' |
|
21 |
if self.is_attr: str_ += '@' |
|
22 |
str_ += self.name |
|
23 |
if self.attrs != []: str_ += repr(self.attrs) |
|
24 |
if self.value != None: str_ += '='+repr(self.value) |
|
25 |
if self.is_ptr: str_ += '->' |
|
26 |
return str_ |
|
27 |
|
|
28 |
def __eq__(self, other): return self.__dict__ == other.__dict__ |
|
29 |
|
|
30 |
def value(path): return path[-1].value |
|
31 |
|
|
32 |
def set_value(path, value): path[-1].value = value |
|
33 |
|
|
34 |
def backward_id(elem): |
|
35 |
if len(elem.attrs) >= 1 and value(elem.attrs[0]) == None: |
|
36 |
return elem.attrs[0] |
|
37 |
else: return None |
|
38 |
|
|
39 |
class XpathParser(Parser): |
|
40 |
def _main(self): |
|
41 |
self._match_str('/') # optional leading / |
|
42 |
return self._path() |
|
43 |
|
|
44 |
def _path(self): |
|
45 |
tree = [] |
|
46 |
trailing_slash = False |
|
47 |
while True: |
|
48 |
# Split path |
|
49 |
if self._match_str('{'): |
|
50 |
paths = [] |
|
51 |
while True: |
|
52 |
paths.append(tree + self._path()) |
|
53 |
if not self._match_str(','): break |
|
54 |
self._match_str('}', required=True) |
|
55 |
tree = paths[0] # just use first subpath for now |
|
56 |
break # nothing allowed after split path |
|
57 |
|
|
58 |
elem = XpathElem(is_attr=self._match_str('@'), |
|
59 |
name=self._match_re(r'[\w.*]+', required=True)) |
|
60 |
|
|
61 |
# Attrs |
|
62 |
if self._match_str('['): |
|
63 |
elem.attrs = [] |
|
64 |
while True: |
|
65 |
path = self._path() |
|
66 |
if self._match_str('='): |
|
67 |
set_value(path, self._match_re(r'[\w.|]*')) |
|
68 |
elem.attrs.append(path) |
|
69 |
if not self._match_str(','): break |
|
70 |
self._match_str(']', required=True) |
|
71 |
|
|
72 |
elem.is_ptr = self._match_str('->') |
|
73 |
tree.append(elem) |
|
74 |
|
|
75 |
# Lookahead assertion |
|
76 |
if self._match_str('('): |
|
77 |
self._match_str('/', required=True) # next / is inside () |
|
78 |
path = self._path() |
|
79 |
self._match_str(')', required=True) |
|
80 |
elem.attrs.append(path) |
|
81 |
tree += path |
|
82 |
|
|
83 |
if not self._match_str('/'): break |
|
84 |
|
|
85 |
# Expand * abbrs |
|
86 |
elem_idx = 0 |
|
87 |
for elem in tree: |
|
88 |
id_ = backward_id(elem) |
|
89 |
if id_ != None: elem = id_[0]; offset = -2 |
|
90 |
elif elem.is_ptr: offset = 2 |
|
91 |
else: offset = 1 |
|
92 |
before, abbr, after = elem.name.partition('*') |
|
93 |
if abbr != '': |
|
94 |
try: elem.name = before+tree[elem_idx+offset].name+after |
|
95 |
except IndexError: pass # no replacement elem |
|
96 |
elem_idx += 1 |
|
97 |
|
|
98 |
return tree |
|
99 |
|
|
100 |
instance_level = 1 |
|
101 |
|
|
102 |
def obj(path): |
|
103 |
obj_path = deepcopy(path[:instance_level+1]) |
|
104 |
obj_path[-1].is_ptr = False # prevent pointer w/o target |
|
105 |
return obj_path |
|
106 |
|
|
107 |
def set_id(path, id_, has_types=True): |
|
108 |
if has_types: id_level = instance_level |
|
109 |
else: id_level = 0 |
|
110 |
path[id_level].attrs.append([XpathElem('id', id_, is_attr=True)]) |
|
111 |
|
|
112 |
def get(doc, path, create=False, last_only=None, parent=None): |
|
113 |
# Warning: The last_only optimization may put data that should be together |
|
114 |
# into separate nodes |
|
115 |
if parent == None: parent = doc.documentElement |
|
116 |
if last_only == None: last_only = create |
|
117 |
elem_idx = 0 |
|
118 |
for elem in path: |
|
119 |
# Find possible matches |
|
120 |
children = [] |
|
121 |
if elem.is_attr: |
|
122 |
child = parent.getAttributeNode(elem.name) |
|
123 |
if child != None: children = [child] |
|
124 |
elif elem.name == '.': children = [parent] |
|
125 |
else: children = xml_util.by_tag_name(parent, elem.name, last_only) |
|
126 |
|
|
127 |
# Check each match |
|
128 |
node = None |
|
129 |
for child in children: |
|
130 |
is_match = elem.value == None or xml_util.value(child) == elem.value |
|
131 |
for attr in elem.attrs: |
|
132 |
if not is_match: break |
|
133 |
is_match = get(doc, attr, False, last_only, child) != None |
|
134 |
if is_match: node = child; break |
|
135 |
|
|
136 |
# Create node |
|
137 |
if node == None: |
|
138 |
if not create: return None |
|
139 |
if elem.is_attr: |
|
140 |
parent.setAttribute(elem.name, '') |
|
141 |
node = parent.getAttributeNode(elem.name) |
|
142 |
else: node = parent.appendChild(doc.createElement(elem.name)) |
|
143 |
if elem.value != None: xml_util.set_value(doc, node, elem.value) |
|
144 |
for attr in elem.attrs: get(doc, attr, create, last_only, node) |
|
145 |
|
|
146 |
# Follow pointer |
|
147 |
if elem.is_ptr: |
|
148 |
path = deepcopy(path[elem_idx+1:]) # rest of path |
|
149 |
id_elem = backward_id(path[instance_level]) |
|
150 |
if id_elem != None: |
|
151 |
# backward (child-to-parent) pointer with target ID attr |
|
152 |
set_value(id_elem, xml_util.get_id(node)) |
|
153 |
else: # forward (parent-to-child) pointer |
|
154 |
id_ = xml_util.value(node) |
|
155 |
obj_path = obj(path) # target object |
|
156 |
if id_ == None or get(doc, obj_path, False, True) == None: |
|
157 |
# no target or target attrs don't match |
|
158 |
if not create: return None |
|
159 |
|
|
160 |
# Use last target object's ID + 1 |
|
161 |
obj_path[-1].attrs = [] # just get by tag name |
|
162 |
last = get(doc, obj_path, False, True) |
|
163 |
if last != None: id_ = str(int(xml_util.get_id(last)) + 1) |
|
164 |
else: id_ = '0' |
|
165 |
|
|
166 |
# Will append if target attrs didn't match. Place ! in XPath |
|
167 |
# after element to fork at to avoid this. |
|
168 |
xml_util.set_value(doc, node, id_) |
|
169 |
else: last_only = False |
|
170 |
set_id(path, id_) |
|
171 |
return get(doc, path, create, last_only) |
|
172 |
|
|
173 |
parent = node |
|
174 |
elem_idx += 1 |
|
175 |
return parent |
scripts/lib/xml_db.py | ||
---|---|---|
1 |
# XML-database conversion |
|
2 |
|
|
3 |
import re |
|
4 |
from xml.dom import Node |
|
5 |
|
|
6 |
import db_util |
|
7 |
import xml_util |
|
8 |
|
|
9 |
def name_of(node): return re.sub(r'^.*\.', r'', xml_util.name_of(node)) |
|
10 |
|
|
11 |
ptr_suffix = '_id' |
|
12 |
|
|
13 |
def is_ptr(node_name): return node_name.endswith(ptr_suffix) |
|
14 |
|
|
15 |
def ptr_type(node_name): |
|
16 |
assert is_ptr(node_name) |
|
17 |
return node_name[:-len(ptr_suffix)] |
|
18 |
|
|
19 |
def ptr_target(node): |
|
20 |
assert is_ptr(name_of(node)) |
|
21 |
return xml_util.first_elem(node) |
|
22 |
|
|
23 |
def find_by_name(node, name): |
|
24 |
for parent in xml_util.NodeParentIter(node): |
|
25 |
if name_of(parent) == name: return parent |
|
26 |
else: |
|
27 |
for child in xml_util.NodeElemIter(parent): |
|
28 |
child_name = name_of(child) |
|
29 |
if is_ptr(child_name): |
|
30 |
if ptr_type(child_name) == name: return ptr_target(child) |
|
31 |
elif child_name == name: return child |
|
32 |
return None |
|
33 |
|
|
34 |
def get(db, node, create=False, store_ids=False, row_ct_ref=None, pkeys=None): |
|
35 |
# store_ids enables searching the tree for missing fields |
|
36 |
if pkeys == None: pkeys = {} |
|
37 |
def pkey(table): |
|
38 |
if table not in pkeys: pkeys[table] = db_util.pkey(db, table) |
|
39 |
return pkeys[table] |
|
40 |
|
|
41 |
def obj(node, parent_id=None): |
|
42 |
table = name_of(node) |
|
43 |
pkey_ = pkey(table) |
|
44 |
row = {} |
|
45 |
children = [] |
|
46 |
|
|
47 |
# Divide children into fields and children with fkeys to parent |
|
48 |
for child in xml_util.NodeElemIter(node): |
|
49 |
child_name = name_of(child) |
|
50 |
if xml_util.is_text(child): row[child_name] = xml_util.value(child) |
|
51 |
elif is_ptr(child_name): row[child_name] = obj(ptr_target(child)) |
|
52 |
else: children.append(child) |
|
53 |
try: del row[pkey_] |
|
54 |
except KeyError: pass |
|
55 |
|
|
56 |
# Add fkey to parent |
|
57 |
if parent_id != None: row[pkey(name_of(node.parentNode))] = parent_id |
|
58 |
|
|
59 |
# Insert node |
|
60 |
for try_num in range(2): |
|
61 |
try: |
|
62 |
id_ = db_util.get(db, table, row, pkey_, create, row_ct_ref) |
|
63 |
if store_ids: xml_util.set_id(node, id_) |
|
64 |
break |
|
65 |
except db_util.NullValueException, ex: |
|
66 |
if try_num > 0: raise # exception still raised after retry |
|
67 |
# Search for required column in ancestors and their children |
|
68 |
target = find_by_name(node, ptr_type(ex.col)) |
|
69 |
if target == None: raise |
|
70 |
row[ex.col] = xml_util.get_id(target) |
|
71 |
|
|
72 |
# Insert children with fkeys to parent |
|
73 |
for child in children: obj(child, id_) |
|
74 |
|
|
75 |
return id_ |
|
76 |
|
|
77 |
return obj(node) |
|
78 |
|
|
79 |
def xml2db(db, node, row_ct_ref=None): |
|
80 |
for child in xml_util.NodeElemIter(node): |
|
81 |
if not xml_util.is_text(child): # not XML metadata |
|
82 |
get(db, child, True, True, row_ct_ref) |
scripts/xml2db/xml2db | ||
---|---|---|
3 | 3 |
# Format: see http://vegbank.org/vegdocs/xml/vegbank_example_ver1.0.2.xml |
4 | 4 |
|
5 | 5 |
import os |
6 |
import os.path |
|
6 | 7 |
import psycopg2 |
7 | 8 |
from psycopg2.extensions import ISOLATION_LEVEL_SERIALIZABLE |
8 | 9 |
import sys |
9 | 10 |
import xml.dom.minidom |
10 | 11 |
|
12 |
sys.path.append(os.path.dirname(__file__)+"/../lib") |
|
11 | 13 |
import xml_db |
12 | 14 |
|
13 | 15 |
def env_flag(name): return name in os.environ and os.environ[name] != '' |
scripts/data2xml/data2xml | ||
---|---|---|
2 | 2 |
# Converts a CSV dataset to XML using a mappings spreadsheet |
3 | 3 |
|
4 | 4 |
import csv |
5 |
import os.path |
|
5 | 6 |
import re |
6 | 7 |
import sys |
7 | 8 |
from copy import deepcopy |
8 | 9 |
from xml.dom.minidom import getDOMImplementation |
9 | 10 |
|
11 |
sys.path.append(os.path.dirname(__file__)+"/../lib") |
|
10 | 12 |
import xpath |
11 | 13 |
|
12 | 14 |
def main(): |
Also available in: Unified diff
Moved Python modules to shared lib folder