Revision 55
Added by Aaron Marcuse-Kubitza almost 13 years ago
scripts/lib/xpath.py | ||
---|---|---|
1 |
# XPath-based XML tree manipulation
|
|
1 |
# XPath parsing
|
|
2 | 2 |
|
3 |
from copy import deepcopy |
|
4 |
from xml.dom import Node |
|
3 |
import copy |
|
5 | 4 |
|
6 | 5 |
from Parser import Parser |
7 |
import xml_dom |
|
8 | 6 |
|
9 | 7 |
class XpathElem: |
10 | 8 |
def __init__(self, name, value=None, attrs=None, is_attr=False, |
... | ... | |
100 | 98 |
instance_level = 1 |
101 | 99 |
|
102 | 100 |
def obj(path): |
103 |
obj_path = deepcopy(path[:instance_level+1]) |
|
101 |
obj_path = copy.deepcopy(path[:instance_level+1])
|
|
104 | 102 |
obj_path[-1].is_ptr = False # prevent pointer w/o target |
105 | 103 |
return obj_path |
106 | 104 |
|
... | ... | |
108 | 106 |
if has_types: id_level = instance_level |
109 | 107 |
else: id_level = 0 |
110 | 108 |
path[id_level].attrs.append([XpathElem('id', id_, is_attr=True)]) |
111 |
|
|
112 |
def get(doc, path, create=False, last_only=None, parent=None): |
|
113 |
# Warning: The last_only optimization may put data that should be together |
|
114 |
# into separate nodes |
|
115 |
if parent == None: parent = doc.documentElement |
|
116 |
if last_only == None: last_only = create |
|
117 |
elem_idx = 0 |
|
118 |
for elem in path: |
|
119 |
# Find possible matches |
|
120 |
children = [] |
|
121 |
if elem.is_attr: |
|
122 |
child = parent.getAttributeNode(elem.name) |
|
123 |
if child != None: children = [child] |
|
124 |
elif elem.name == '.': children = [parent] |
|
125 |
else: children = xml_dom.by_tag_name(parent, elem.name, last_only) |
|
126 |
|
|
127 |
# Check each match |
|
128 |
node = None |
|
129 |
for child in children: |
|
130 |
is_match = elem.value == None or xml_dom.value(child) == elem.value |
|
131 |
for attr in elem.attrs: |
|
132 |
if not is_match: break |
|
133 |
is_match = get(doc, attr, False, last_only, child) != None |
|
134 |
if is_match: node = child; break |
|
135 |
|
|
136 |
# Create node |
|
137 |
if node == None: |
|
138 |
if not create: return None |
|
139 |
if elem.is_attr: |
|
140 |
parent.setAttribute(elem.name, '') |
|
141 |
node = parent.getAttributeNode(elem.name) |
|
142 |
else: node = parent.appendChild(doc.createElement(elem.name)) |
|
143 |
if elem.value != None: xml_dom.set_value(doc, node, elem.value) |
|
144 |
for attr in elem.attrs: get(doc, attr, create, last_only, node) |
|
145 |
|
|
146 |
# Follow pointer |
|
147 |
if elem.is_ptr: |
|
148 |
path = deepcopy(path[elem_idx+1:]) # rest of path |
|
149 |
id_elem = backward_id(path[instance_level]) |
|
150 |
if id_elem != None: |
|
151 |
# backward (child-to-parent) pointer with target ID attr |
|
152 |
set_value(id_elem, xml_dom.get_id(node)) |
|
153 |
else: # forward (parent-to-child) pointer |
|
154 |
id_ = xml_dom.value(node) |
|
155 |
obj_path = obj(path) # target object |
|
156 |
if id_ == None or get(doc, obj_path, False, True) == None: |
|
157 |
# no target or target attrs don't match |
|
158 |
if not create: return None |
|
159 |
|
|
160 |
# Use last target object's ID + 1 |
|
161 |
obj_path[-1].attrs = [] # just get by tag name |
|
162 |
last = get(doc, obj_path, False, True) |
|
163 |
if last != None: id_ = str(int(xml_dom.get_id(last)) + 1) |
|
164 |
else: id_ = '0' |
|
165 |
|
|
166 |
# Will append if target attrs didn't match. Place ! in XPath |
|
167 |
# after element to fork at to avoid this. |
|
168 |
xml_dom.set_value(doc, node, id_) |
|
169 |
else: last_only = False |
|
170 |
set_id(path, id_) |
|
171 |
return get(doc, path, create, last_only) |
|
172 |
|
|
173 |
parent = node |
|
174 |
elem_idx += 1 |
|
175 |
return parent |
scripts/lib/xml_xpath.py | ||
---|---|---|
1 |
# XPath-based XML tree manipulation |
|
2 |
|
|
3 |
import copy |
|
4 |
|
|
5 |
import xml_dom |
|
6 |
import xpath |
|
7 |
|
|
8 |
def get(doc, path, create=False, last_only=None, parent=None): |
|
9 |
# Warning: The last_only optimization may put data that should be together |
|
10 |
# into separate nodes |
|
11 |
if parent == None: parent = doc.documentElement |
|
12 |
if last_only == None: last_only = create |
|
13 |
elem_idx = 0 |
|
14 |
for elem in path: |
|
15 |
# Find possible matches |
|
16 |
children = [] |
|
17 |
if elem.is_attr: |
|
18 |
child = parent.getAttributeNode(elem.name) |
|
19 |
if child != None: children = [child] |
|
20 |
elif elem.name == '.': children = [parent] |
|
21 |
else: children = xml_dom.by_tag_name(parent, elem.name, last_only) |
|
22 |
|
|
23 |
# Check each match |
|
24 |
node = None |
|
25 |
for child in children: |
|
26 |
is_match = elem.value == None or xml_dom.value(child) == elem.value |
|
27 |
for attr in elem.attrs: |
|
28 |
if not is_match: break |
|
29 |
is_match = get(doc, attr, False, last_only, child) != None |
|
30 |
if is_match: node = child; break |
|
31 |
|
|
32 |
# Create node |
|
33 |
if node == None: |
|
34 |
if not create: return None |
|
35 |
if elem.is_attr: |
|
36 |
parent.setAttribute(elem.name, '') |
|
37 |
node = parent.getAttributeNode(elem.name) |
|
38 |
else: node = parent.appendChild(doc.createElement(elem.name)) |
|
39 |
if elem.value != None: xml_dom.set_value(doc, node, elem.value) |
|
40 |
for attr in elem.attrs: get(doc, attr, create, last_only, node) |
|
41 |
|
|
42 |
# Follow pointer |
|
43 |
if elem.is_ptr: |
|
44 |
path = copy.deepcopy(path[elem_idx+1:]) # rest of path |
|
45 |
id_elem = xpath.backward_id(path[xpath.instance_level]) |
|
46 |
if id_elem != None: |
|
47 |
# backward (child-to-parent) pointer with target ID attr |
|
48 |
xpath.set_value(id_elem, xml_dom.get_id(node)) |
|
49 |
else: # forward (parent-to-child) pointer |
|
50 |
id_ = xml_dom.value(node) |
|
51 |
obj_path = xpath.obj(path) # target object |
|
52 |
if id_ == None or get(doc, obj_path, False, True) == None: |
|
53 |
# no target or target attrs don't match |
|
54 |
if not create: return None |
|
55 |
|
|
56 |
# Use last target object's ID + 1 |
|
57 |
obj_path[-1].attrs = [] # just get by tag name |
|
58 |
last = get(doc, obj_path, False, True) |
|
59 |
if last != None: id_ = str(int(xml_dom.get_id(last)) + 1) |
|
60 |
else: id_ = '0' |
|
61 |
|
|
62 |
# Will append if target attrs didn't match. Place ! in XPath |
|
63 |
# after element to fork at to avoid this. |
|
64 |
xml_dom.set_value(doc, node, id_) |
|
65 |
else: last_only = False |
|
66 |
xpath.set_id(path, id_) |
|
67 |
return get(doc, path, create, last_only) |
|
68 |
|
|
69 |
parent = node |
|
70 |
elem_idx += 1 |
|
71 |
return parent |
scripts/map | ||
---|---|---|
10 | 10 |
import re |
11 | 11 |
import sys |
12 | 12 |
import xml.dom.minidom |
13 |
from xml.dom.minidom import getDOMImplementation |
|
14 | 13 |
|
15 | 14 |
sys.path.append(os.path.dirname(__file__)+"/lib") |
16 | 15 |
|
... | ... | |
47 | 46 |
|
48 | 47 |
# Input datasource to XML tree |
49 | 48 |
if uses_map: # input is CSV |
49 |
import xml_xpath |
|
50 | 50 |
import xpath |
51 | 51 |
|
52 | 52 |
# Load map |
... | ... | |
64 | 64 |
stream.close() |
65 | 65 |
|
66 | 66 |
# Load and map CSV |
67 |
doc = getDOMImplementation().createDocument(None, dest, None) |
|
67 |
doc = xml.dom.minidom.getDOMImplementation().createDocument(None, dest, |
|
68 |
None) |
|
68 | 69 |
reader = csv.reader(sys.stdin, **csv_config) |
69 | 70 |
fieldnames = reader.next() |
70 | 71 |
row_idx = 0 |
... | ... | |
76 | 77 |
path = copy.deepcopy(map_[name]) # don't modify main value! |
77 | 78 |
xpath.set_id(path, row_id, has_types) |
78 | 79 |
xpath.set_value(path, value) |
79 |
xpath.get(doc, path, True) |
|
80 |
xml_xpath.get(doc, path, True)
|
|
80 | 81 |
row_idx += 1 |
81 | 82 |
else: doc = xml.dom.minidom.parse(sys.stdin) # input is XML |
82 | 83 |
|
Also available in: Unified diff
Split off xpath.py XML functionality into xml_xpath.py