Revision 77
Added by Aaron Marcuse-Kubitza about 13 years ago
scripts/lib/xml_xpath.py | ||
---|---|---|
1 |
# XPath-based XML tree manipulation |
|
2 |
|
|
3 |
import copy |
|
4 |
|
|
5 |
import xml_dom |
|
6 |
import xpath |
|
7 |
|
|
8 |
def get(doc, path, create=False, last_only=None, parent=None): |
|
9 |
# Warning: The last_only optimization may put data that should be together |
|
10 |
# into separate nodes |
|
11 |
if parent == None: parent = doc.documentElement |
|
12 |
if last_only == None: last_only = create |
|
13 |
elem_idx = 0 |
|
14 |
for elem in path: |
|
15 |
# Find possible matches |
|
16 |
children = [] |
|
17 |
if elem.is_attr: |
|
18 |
child = parent.getAttributeNode(elem.name) |
|
19 |
if child != None: children = [child] |
|
20 |
elif elem.name == '.': children = [parent] |
|
21 |
else: |
|
22 |
children = xml_dom.by_tag_name(parent, elem.name, |
|
23 |
last_only and (elem.attrs == [] or xpath.is_instance(elem))) |
|
24 |
|
|
25 |
# Check each match |
|
26 |
node = None |
|
27 |
for child in children: |
|
28 |
is_match = elem.value == None or xml_dom.value(child) == elem.value |
|
29 |
for attr in elem.attrs: |
|
30 |
if not is_match: break |
|
31 |
is_match = get(doc, attr, False, last_only, child) != None |
|
32 |
if is_match: node = child; break |
|
33 |
|
|
34 |
# Create node |
|
35 |
if node == None: |
|
36 |
if not create: return None |
|
37 |
if elem.is_attr: |
|
38 |
parent.setAttribute(elem.name, '') |
|
39 |
node = parent.getAttributeNode(elem.name) |
|
40 |
else: node = parent.appendChild(doc.createElement(elem.name)) |
|
41 |
if elem.value != None: xml_dom.set_value(doc, node, elem.value) |
|
42 |
for attr in elem.attrs: get(doc, attr, create, last_only, node) |
|
43 |
|
|
44 |
# Follow pointer |
|
45 |
if elem.is_ptr: |
|
46 |
path = copy.deepcopy(path[elem_idx+1:]) # rest of path |
|
47 |
id_elem = xpath.backward_id(path[xpath.instance_level]) |
|
48 |
if id_elem != None: |
|
49 |
# backward (child-to-parent) pointer with target ID attr |
|
50 |
xpath.set_value(id_elem, xml_dom.get_id(node)) |
|
51 |
else: # forward (parent-to-child) pointer |
|
52 |
id_ = xml_dom.value(node) |
|
53 |
obj_path = xpath.obj(path) # target object |
|
54 |
if id_ == None or get(doc, obj_path, False, True) == None: |
|
55 |
# no target or target attrs don't match |
|
56 |
if not create: return None |
|
57 |
|
|
58 |
# Use last target object's ID + 1 |
|
59 |
obj_path[-1].attrs = [] # just get by tag name |
|
60 |
last = get(doc, obj_path, False, True) |
|
61 |
if last != None: id_ = str(int(xml_dom.get_id(last)) + 1) |
|
62 |
else: id_ = '0' |
|
63 |
|
|
64 |
# Will append if target attrs didn't match. Place ! in XPath |
|
65 |
# after element to fork at to avoid this. |
|
66 |
xml_dom.set_value(doc, node, id_) |
|
67 |
else: last_only = False |
|
68 |
xpath.set_id(path, id_) |
|
69 |
return get(doc, path, create, last_only) |
|
70 |
|
|
71 |
parent = node |
|
72 |
elem_idx += 1 |
|
73 |
return parent |
scripts/lib/xpath.py | ||
---|---|---|
3 | 3 |
import copy |
4 | 4 |
|
5 | 5 |
from Parser import Parser |
6 |
import xml_dom |
|
6 | 7 |
|
7 | 8 |
class XpathElem: |
8 | 9 |
def __init__(self, name, value=None, attrs=None, is_attr=False, |
... | ... | |
112 | 113 |
def is_id(path): return path[0].is_attr and path[0].name == 'id' |
113 | 114 |
|
114 | 115 |
def is_instance(elem): return elem.attrs != [] and is_id(elem.attrs[0]) |
116 |
|
|
117 |
def get(doc, xpath, create=False, last_only=None, parent=None): |
|
118 |
# Warning: The last_only optimization may put data that should be together |
|
119 |
# into separate nodes |
|
120 |
if parent == None: parent = doc.documentElement |
|
121 |
if last_only == None: last_only = create |
|
122 |
elem_idx = 0 |
|
123 |
for elem in xpath: |
|
124 |
# Find possible matches |
|
125 |
children = [] |
|
126 |
if elem.is_attr: |
|
127 |
child = parent.getAttributeNode(elem.name) |
|
128 |
if child != None: children = [child] |
|
129 |
elif elem.name == '.': children = [parent] |
|
130 |
else: |
|
131 |
children = xml_dom.by_tag_name(parent, elem.name, |
|
132 |
last_only and (elem.attrs == [] or is_instance(elem))) |
|
133 |
|
|
134 |
# Check each match |
|
135 |
node = None |
|
136 |
for child in children: |
|
137 |
is_match = elem.value == None or xml_dom.value(child) == elem.value |
|
138 |
for attr in elem.attrs: |
|
139 |
if not is_match: break |
|
140 |
is_match = get(doc, attr, False, last_only, child) != None |
|
141 |
if is_match: node = child; break |
|
142 |
|
|
143 |
# Create node |
|
144 |
if node == None: |
|
145 |
if not create: return None |
|
146 |
if elem.is_attr: |
|
147 |
parent.setAttribute(elem.name, '') |
|
148 |
node = parent.getAttributeNode(elem.name) |
|
149 |
else: node = parent.appendChild(doc.createElement(elem.name)) |
|
150 |
if elem.value != None: xml_dom.set_value(doc, node, elem.value) |
|
151 |
for attr in elem.attrs: get(doc, attr, create, last_only, node) |
|
152 |
|
|
153 |
# Follow pointer |
|
154 |
if elem.is_ptr: |
|
155 |
xpath = copy.deepcopy(xpath[elem_idx+1:]) # rest of XPath |
|
156 |
id_elem = backward_id(xpath[instance_level]) |
|
157 |
if id_elem != None: |
|
158 |
# backward (child-to-parent) pointer with target ID attr |
|
159 |
set_value(id_elem, xml_dom.get_id(node)) |
|
160 |
else: # forward (parent-to-child) pointer |
|
161 |
id_ = xml_dom.value(node) |
|
162 |
obj_xpath = obj(xpath) # target object |
|
163 |
if id_ == None or get(doc, obj_xpath, False, True) == None: |
|
164 |
# no target or target attrs don't match |
|
165 |
if not create: return None |
|
166 |
|
|
167 |
# Use last target object's ID + 1 |
|
168 |
obj_xpath[-1].attrs = [] # just get by tag name |
|
169 |
last = get(doc, obj_xpath, False, True) |
|
170 |
if last != None: id_ = str(int(xml_dom.get_id(last)) + 1) |
|
171 |
else: id_ = '0' |
|
172 |
|
|
173 |
# Will append if target attrs didn't match. Place ! in XPath |
|
174 |
# after element to fork at to avoid this. |
|
175 |
xml_dom.set_value(doc, node, id_) |
|
176 |
else: last_only = False |
|
177 |
set_id(xpath, id_) |
|
178 |
return get(doc, xpath, create, last_only) |
|
179 |
|
|
180 |
parent = node |
|
181 |
elem_idx += 1 |
|
182 |
return parent |
scripts/map | ||
---|---|---|
63 | 63 |
# Input datasource to XML tree, mapping if needed |
64 | 64 |
if in_is_xml: doc = xml.dom.minidom.parse(sys.stdin) |
65 | 65 |
if map_path != None: |
66 |
import xml_xpath |
|
67 |
|
|
68 | 66 |
out_doc = xml.dom.minidom.getDOMImplementation().createDocument(None, |
69 | 67 |
dest, None) |
70 | 68 |
if in_is_xml: raise Exception('XML-XML mapping not supported yet') |
... | ... | |
82 | 80 |
path = copy.deepcopy(map_[name]) # don't modify value! |
83 | 81 |
xpath.set_id(path, row_id, has_types) |
84 | 82 |
xpath.set_value(path, value) |
85 |
xml_xpath.get(out_doc, path, True)
|
|
83 |
xpath.get(out_doc, path, True) |
|
86 | 84 |
row_idx += 1 |
87 | 85 |
doc = out_doc |
88 | 86 |
|
Also available in: Unified diff
Merged xml_xpath.py into xpath.py in preparation for changing the XPath parse tree to be the XML DOM tree itself