Revision 141
Added by Aaron Marcuse-Kubitza about 13 years ago
scripts/test/accepted_output/SALVIAS_db.organisms.VegX.xml | ||
---|---|---|
30 | 30 |
</simpleUserdefined> |
31 | 31 |
<simpleUserdefined> |
32 | 32 |
<name>individualCode</name> |
33 |
<value>1</value>
|
|
33 |
<value>2</value>
|
|
34 | 34 |
</simpleUserdefined> |
35 | 35 |
<simpleUserdefined> |
36 | 36 |
<name>habit</name> |
... | ... | |
55 | 55 |
<voucher>1</voucher> |
56 | 56 |
</taxonNameUsageConcept> |
57 | 57 |
<taxonNameUsageConcept id="1"> |
58 |
<voucher>1</voucher>
|
|
58 |
<voucher>2</voucher>
|
|
59 | 59 |
</taxonNameUsageConcept> |
60 | 60 |
</taxonNameUsageConcepts> |
61 | 61 |
<plotObservations> |
... | ... | |
198 | 198 |
</taxonConcept> |
199 | 199 |
<taxonConcept id="5"> |
200 | 200 |
<Rank code="fam"/> |
201 |
<Name>Arecaceae</Name>
|
|
201 |
<Name>Clusiaceae</Name>
|
|
202 | 202 |
</taxonConcept> |
203 | 203 |
<taxonConcept id="6"> |
204 | 204 |
<Rank code="gen"/> |
205 |
<Name>Geonoma</Name>
|
|
205 |
<Name>Tovomitopsis</Name>
|
|
206 | 206 |
</taxonConcept> |
207 | 207 |
<taxonConcept id="7"> |
208 | 208 |
<Rank code="sp"/> |
209 |
<Name>seleri</Name>
|
|
209 |
<Name>allenii</Name>
|
|
210 | 210 |
<AccordingTo> |
211 |
<Simple>Burret</Simple>
|
|
211 |
<Simple>Maguire</Simple>
|
|
212 | 212 |
</AccordingTo> |
213 | 213 |
</taxonConcept> |
214 | 214 |
<taxonConcept id="8"> |
215 | 215 |
<Rank code="fam"/> |
216 |
<Name>Arecaceae</Name>
|
|
216 |
<Name>Clusiaceae</Name>
|
|
217 | 217 |
</taxonConcept> |
218 | 218 |
<taxonConcept id="9"> |
219 | 219 |
<Rank code="sp"/> |
220 |
<Name>seleri</Name>
|
|
220 |
<Name>allenii</Name>
|
|
221 | 221 |
</taxonConcept> |
222 | 222 |
</taxonConcepts> |
223 | 223 |
</VegX> |
scripts/test/accepted_output/SALVIAS_db.organisms.VegBank.xml | ||
---|---|---|
289 | 289 |
</reference_ID> |
290 | 290 |
<plantName_ID> |
291 | 291 |
<plantName> |
292 |
<plantName>seleri</plantName>
|
|
292 |
<plantName>allenii</plantName>
|
|
293 | 293 |
</plantName> |
294 | 294 |
</plantName_ID> |
295 | 295 |
</plantConcept> |
... | ... | |
297 | 297 |
<party_ID> |
298 | 298 |
<party/> |
299 | 299 |
</party_ID> |
300 |
<museumAccessionNumber>1</museumAccessionNumber>
|
|
300 |
<museumAccessionNumber>2</museumAccessionNumber>
|
|
301 | 301 |
<definedValue fkey="tableRecord_ID"> |
302 | 302 |
<userDefined_ID> |
303 | 303 |
<userDefined> |
... | ... | |
316 | 316 |
<userDefinedType>varchar</userDefinedType> |
317 | 317 |
</userDefined> |
318 | 318 |
</userDefined_ID> |
319 |
<definedValue>1</definedValue>
|
|
319 |
<definedValue>2</definedValue>
|
|
320 | 320 |
</definedValue> |
321 | 321 |
<definedValue fkey="tableRecord_ID"> |
322 | 322 |
<userDefined_ID> |
... | ... | |
360 | 360 |
</reference_ID> |
361 | 361 |
<plantName_ID> |
362 | 362 |
<plantName> |
363 |
<plantName>Arecaceae</plantName>
|
|
363 |
<plantName>Clusiaceae</plantName>
|
|
364 | 364 |
</plantName> |
365 | 365 |
</plantName_ID> |
366 | 366 |
</plantConcept> |
... | ... | |
394 | 394 |
</reference_ID> |
395 | 395 |
<plantName_ID> |
396 | 396 |
<plantName> |
397 |
<plantName>Geonoma</plantName>
|
|
397 |
<plantName>Tovomitopsis</plantName>
|
|
398 | 398 |
</plantName> |
399 | 399 |
</plantName_ID> |
400 | 400 |
</plantConcept> |
... | ... | |
428 | 428 |
</reference_ID> |
429 | 429 |
<plantName_ID> |
430 | 430 |
<plantName> |
431 |
<plantName>seleri</plantName>
|
|
431 |
<plantName>allenii</plantName>
|
|
432 | 432 |
<reference_ID> |
433 | 433 |
<reference> |
434 |
<shortName>Burret</shortName>
|
|
434 |
<shortName>Maguire</shortName>
|
|
435 | 435 |
</reference> |
436 | 436 |
</reference_ID> |
437 | 437 |
</plantName> |
... | ... | |
472 | 472 |
</reference_ID> |
473 | 473 |
<plantName_ID> |
474 | 474 |
<plantName> |
475 |
<plantName>Arecaceae</plantName>
|
|
475 |
<plantName>Clusiaceae</plantName>
|
|
476 | 476 |
</plantName> |
477 | 477 |
</plantName_ID> |
478 | 478 |
</plantConcept> |
... | ... | |
506 | 506 |
</reference_ID> |
507 | 507 |
<plantName_ID> |
508 | 508 |
<plantName> |
509 |
<plantName>seleri</plantName>
|
|
509 |
<plantName>allenii</plantName>
|
|
510 | 510 |
</plantName> |
511 | 511 |
</plantName_ID> |
512 | 512 |
</plantConcept> |
scripts/lib/xpath.py | ||
---|---|---|
18 | 18 |
|
19 | 19 |
def __repr__(self): |
20 | 20 |
str_ = '' |
21 |
if self.is_positive: str_ += '!' |
|
21 |
if not self.is_positive: str_ += '!'
|
|
22 | 22 |
if self.is_attr: str_ += '@' |
23 | 23 |
str_ += self.name |
24 | 24 |
if self.keys != []: str_ += repr(self.keys) |
... | ... | |
135 | 135 |
|
136 | 136 |
def is_instance(elem): return elem.keys != [] and is_id(elem.keys[0]) |
137 | 137 |
|
138 |
def get(doc, xpath, create=False, last_only=None, parent=None): |
|
139 |
# Warning: The last_only optimization may put data that should be together |
|
140 |
# into separate nodes |
|
141 |
if parent == None: parent = doc.documentElement |
|
138 |
def get(parent, xpath, create=False, last_only=None): |
|
139 |
'''Warning: The last_only optimization may put data that should be together |
|
140 |
into separate nodes''' |
|
142 | 141 |
if last_only == None: last_only = create |
143 | 142 |
|
144 | 143 |
if create and not is_positive(xpath): return None |
145 | 144 |
doc = parent.ownerDocument |
145 |
root = doc.documentElement |
|
146 | 146 |
for elem_idx, elem in enumerate(xpath): |
147 | 147 |
# Find possible matches |
148 | 148 |
children = [] |
... | ... | |
160 | 160 |
is_match = elem.value == None or xml_dom.value(child) == elem.value |
161 | 161 |
for attr in elem.keys: |
162 | 162 |
if not is_match: break |
163 |
is_match = (get(doc, attr, False, last_only, child) != None)\
|
|
163 |
is_match = (get(child, attr, False, last_only) != None)\
|
|
164 | 164 |
== is_positive(attr) |
165 | 165 |
if is_match: node = child; break |
166 | 166 |
|
... | ... | |
174 | 174 |
if elem.value != None: xml_dom.set_value(doc, node, elem.value) |
175 | 175 |
if create: |
176 | 176 |
for attr in elem.keys + elem.attrs: |
177 |
get(doc, attr, create, last_only, node)
|
|
177 |
get(node, attr, create, last_only)
|
|
178 | 178 |
|
179 | 179 |
for branch in elem.other_branches: |
180 | 180 |
branch = copy.deepcopy(branch) |
181 | 181 |
set_value(branch, value(xpath)) |
182 |
get(doc, branch, create, last_only, node)
|
|
182 |
get(node, branch, create, last_only)
|
|
183 | 183 |
|
184 | 184 |
# Follow pointer |
185 | 185 |
if elem.is_ptr: |
... | ... | |
191 | 191 |
else: # forward (parent-to-child) pointer |
192 | 192 |
id_ = xml_dom.value(node) |
193 | 193 |
obj_xpath = obj(xpath) # target object |
194 |
if id_ == None or get(doc, obj_xpath, False, True) == None:
|
|
194 |
if id_ == None or get(root, obj_xpath, False, True) == None:
|
|
195 | 195 |
# no target or target keys don't match |
196 | 196 |
if not create: return None |
197 | 197 |
|
198 | 198 |
# Use last target object's ID + 1 |
199 | 199 |
obj_xpath[-1].keys = [] # just get by tag name |
200 |
last = get(doc, obj_xpath, False, True)
|
|
200 |
last = get(root, obj_xpath, False, True)
|
|
201 | 201 |
if last != None: id_ = str(int(xml_dom.get_id(last)) + 1) |
202 | 202 |
else: id_ = '0' |
203 | 203 |
|
... | ... | |
206 | 206 |
xml_dom.set_value(doc, node, id_) |
207 | 207 |
else: last_only = False |
208 | 208 |
set_id(xpath, id_) |
209 |
return get(doc, xpath, create, last_only)
|
|
209 |
return get(root, xpath, create, last_only)
|
|
210 | 210 |
|
211 | 211 |
parent = node |
212 | 212 |
return parent |
213 | 213 |
|
214 |
def put_obj(doc, xpath, id_, has_types, value=None):
|
|
214 |
def put_obj(root, xpath, id_, has_types, value=None):
|
|
215 | 215 |
xpath = copy.deepcopy(xpath) # don't modify input! |
216 | 216 |
set_id(xpath, id_, has_types) |
217 | 217 |
if value != None: set_value(xpath, value) |
218 |
get(doc, xpath, True)
|
|
218 |
get(root, xpath, True)
|
|
219 | 219 |
|
220 | 220 |
def path2xml(xpath, first_branch=True): |
221 | 221 |
root = xml_dom.create_doc().documentElement |
222 |
get(root.ownerDocument, xpath, True) |
|
223 |
if first_branch: root = root.firstChild |
|
222 |
get(root, xpath, True) |
|
224 | 223 |
return root |
225 | 224 |
|
226 | 225 |
def str2xml(xpath): return path2xml(parse(xpath)) |
226 |
|
|
227 |
def xml_set_id(root, id_): xml_dom.set_id(root.firstChild, id_) |
scripts/lib/db_xml.py | ||
---|---|---|
37 | 37 |
if pkeys == None: pkeys = {} |
38 | 38 |
def pkey(table): return sql.pkey(db, pkeys, table) |
39 | 39 |
|
40 |
node = node.firstChild |
|
40 | 41 |
table = name_of(node) |
41 | 42 |
pkey_ = pkey(table) |
42 | 43 |
|
scripts/map | ||
---|---|---|
79 | 79 |
if in_is_xml: doc0 = xml.dom.minidom.parse(sys.stdin) |
80 | 80 |
if map_path != None: |
81 | 81 |
doc1 = xml_dom.create_doc(dest) |
82 |
root = doc1.documentElement |
|
82 | 83 |
if in_is_db: |
83 | 84 |
assert in_is_xpaths |
84 | 85 |
|
... | ... | |
97 | 98 |
row_id = str(row_id) |
98 | 99 |
|
99 | 100 |
def put_col(path, value): |
100 |
xpath.put_obj(doc1, path, row_id, has_types, value)
|
|
101 |
xpath.put_obj(root, path, row_id, has_types, value)
|
|
101 | 102 |
for value, out in metadata: put_col(out, value) |
102 | 103 |
for in_, out in mappings: |
103 |
root = xpath.get(in_.ownerDocument, src_root) |
|
104 |
xml_dom.replace(root, root.cloneNode(False)) |
|
105 |
xml_dom.set_id(root, row_id) |
|
106 |
|
|
104 |
in_ = in_.cloneNode(True) # don't modify orig value! |
|
105 |
xml_dom.set_id(xpath.get(in_, src_root), row_id) |
|
107 | 106 |
value = sql.value_or_none(db_xml.get(in_db, in_, in_pkeys)) |
108 | 107 |
if value != None: put_col(out, str(value)) |
109 | 108 |
in_db.close() |
... | ... | |
117 | 116 |
row_id = str(row_idx) |
118 | 117 |
|
119 | 118 |
def put_col(path, value): |
120 |
xpath.put_obj(doc1, path, row_id, has_types, value)
|
|
119 |
xpath.put_obj(root, path, row_id, has_types, value)
|
|
121 | 120 |
for value, out in metadata: put_col(out, value) |
122 | 121 |
for i, col in enumerate(cols): |
123 | 122 |
if row[i] != '' and col in map_: put_col(map_[col], row[i]) |
Also available in: Unified diff
xpath.py: Refactored to avoid needing a doc parameter for the XML document