Project

General

Profile

« Previous | Next » 

Revision 141

xpath.py: Refactored to avoid needing a doc parameter for the XML document

View differences:

scripts/test/accepted_output/SALVIAS_db.organisms.VegX.xml
30 30
            </simpleUserdefined>
31 31
            <simpleUserdefined>
32 32
                <name>individualCode</name>
33
                <value>1</value>
33
                <value>2</value>
34 34
            </simpleUserdefined>
35 35
            <simpleUserdefined>
36 36
                <name>habit</name>
......
55 55
            <voucher>1</voucher>
56 56
        </taxonNameUsageConcept>
57 57
        <taxonNameUsageConcept id="1">
58
            <voucher>1</voucher>
58
            <voucher>2</voucher>
59 59
        </taxonNameUsageConcept>
60 60
    </taxonNameUsageConcepts>
61 61
    <plotObservations>
......
198 198
        </taxonConcept>
199 199
        <taxonConcept id="5">
200 200
            <Rank code="fam"/>
201
            <Name>Arecaceae</Name>
201
            <Name>Clusiaceae</Name>
202 202
        </taxonConcept>
203 203
        <taxonConcept id="6">
204 204
            <Rank code="gen"/>
205
            <Name>Geonoma</Name>
205
            <Name>Tovomitopsis</Name>
206 206
        </taxonConcept>
207 207
        <taxonConcept id="7">
208 208
            <Rank code="sp"/>
209
            <Name>seleri</Name>
209
            <Name>allenii</Name>
210 210
            <AccordingTo>
211
                <Simple>Burret</Simple>
211
                <Simple>Maguire</Simple>
212 212
            </AccordingTo>
213 213
        </taxonConcept>
214 214
        <taxonConcept id="8">
215 215
            <Rank code="fam"/>
216
            <Name>Arecaceae</Name>
216
            <Name>Clusiaceae</Name>
217 217
        </taxonConcept>
218 218
        <taxonConcept id="9">
219 219
            <Rank code="sp"/>
220
            <Name>seleri</Name>
220
            <Name>allenii</Name>
221 221
        </taxonConcept>
222 222
    </taxonConcepts>
223 223
</VegX>
scripts/test/accepted_output/SALVIAS_db.organisms.VegBank.xml
289 289
                    </reference_ID>
290 290
                    <plantName_ID>
291 291
                        <plantName>
292
                            <plantName>seleri</plantName>
292
                            <plantName>allenii</plantName>
293 293
                        </plantName>
294 294
                    </plantName_ID>
295 295
                </plantConcept>
......
297 297
            <party_ID>
298 298
                <party/>
299 299
            </party_ID>
300
            <museumAccessionNumber>1</museumAccessionNumber>
300
            <museumAccessionNumber>2</museumAccessionNumber>
301 301
            <definedValue fkey="tableRecord_ID">
302 302
                <userDefined_ID>
303 303
                    <userDefined>
......
316 316
                        <userDefinedType>varchar</userDefinedType>
317 317
                    </userDefined>
318 318
                </userDefined_ID>
319
                <definedValue>1</definedValue>
319
                <definedValue>2</definedValue>
320 320
            </definedValue>
321 321
            <definedValue fkey="tableRecord_ID">
322 322
                <userDefined_ID>
......
360 360
                    </reference_ID>
361 361
                    <plantName_ID>
362 362
                        <plantName>
363
                            <plantName>Arecaceae</plantName>
363
                            <plantName>Clusiaceae</plantName>
364 364
                        </plantName>
365 365
                    </plantName_ID>
366 366
                </plantConcept>
......
394 394
                    </reference_ID>
395 395
                    <plantName_ID>
396 396
                        <plantName>
397
                            <plantName>Geonoma</plantName>
397
                            <plantName>Tovomitopsis</plantName>
398 398
                        </plantName>
399 399
                    </plantName_ID>
400 400
                </plantConcept>
......
428 428
                    </reference_ID>
429 429
                    <plantName_ID>
430 430
                        <plantName>
431
                            <plantName>seleri</plantName>
431
                            <plantName>allenii</plantName>
432 432
                            <reference_ID>
433 433
                                <reference>
434
                                    <shortName>Burret</shortName>
434
                                    <shortName>Maguire</shortName>
435 435
                                </reference>
436 436
                            </reference_ID>
437 437
                        </plantName>
......
472 472
                    </reference_ID>
473 473
                    <plantName_ID>
474 474
                        <plantName>
475
                            <plantName>Arecaceae</plantName>
475
                            <plantName>Clusiaceae</plantName>
476 476
                        </plantName>
477 477
                    </plantName_ID>
478 478
                </plantConcept>
......
506 506
                    </reference_ID>
507 507
                    <plantName_ID>
508 508
                        <plantName>
509
                            <plantName>seleri</plantName>
509
                            <plantName>allenii</plantName>
510 510
                        </plantName>
511 511
                    </plantName_ID>
512 512
                </plantConcept>
scripts/lib/xpath.py
18 18
    
19 19
    def __repr__(self):
20 20
        str_ = ''
21
        if self.is_positive: str_ += '!'
21
        if not self.is_positive: str_ += '!'
22 22
        if self.is_attr: str_ += '@'
23 23
        str_ += self.name
24 24
        if self.keys != []: str_ += repr(self.keys)
......
135 135

  
136 136
def is_instance(elem): return elem.keys != [] and is_id(elem.keys[0]) 
137 137

  
138
def get(doc, xpath, create=False, last_only=None, parent=None):
139
    # Warning: The last_only optimization may put data that should be together
140
    # into separate nodes
141
    if parent == None: parent = doc.documentElement
138
def get(parent, xpath, create=False, last_only=None):
139
    '''Warning: The last_only optimization may put data that should be together
140
    into separate nodes'''
142 141
    if last_only == None: last_only = create
143 142
    
144 143
    if create and not is_positive(xpath): return None
145 144
    doc = parent.ownerDocument
145
    root = doc.documentElement
146 146
    for elem_idx, elem in enumerate(xpath):
147 147
        # Find possible matches
148 148
        children = []
......
160 160
            is_match = elem.value == None or xml_dom.value(child) == elem.value
161 161
            for attr in elem.keys:
162 162
                if not is_match: break
163
                is_match = (get(doc, attr, False, last_only, child) != None)\
163
                is_match = (get(child, attr, False, last_only) != None)\
164 164
                == is_positive(attr)
165 165
            if is_match: node = child; break
166 166
        
......
174 174
            if elem.value != None: xml_dom.set_value(doc, node, elem.value)
175 175
        if create:
176 176
            for attr in elem.keys + elem.attrs:
177
                get(doc, attr, create, last_only, node)
177
                get(node, attr, create, last_only)
178 178
        
179 179
        for branch in elem.other_branches:
180 180
            branch = copy.deepcopy(branch)
181 181
            set_value(branch, value(xpath))
182
            get(doc, branch, create, last_only, node)
182
            get(node, branch, create, last_only)
183 183
        
184 184
        # Follow pointer
185 185
        if elem.is_ptr:
......
191 191
            else: # forward (parent-to-child) pointer
192 192
                id_ = xml_dom.value(node)
193 193
                obj_xpath = obj(xpath) # target object
194
                if id_ == None or get(doc, obj_xpath, False, True) == None:
194
                if id_ == None or get(root, obj_xpath, False, True) == None:
195 195
                    # no target or target keys don't match
196 196
                    if not create: return None
197 197
                    
198 198
                    # Use last target object's ID + 1
199 199
                    obj_xpath[-1].keys = [] # just get by tag name
200
                    last = get(doc, obj_xpath, False, True)
200
                    last = get(root, obj_xpath, False, True)
201 201
                    if last != None: id_ = str(int(xml_dom.get_id(last)) + 1)
202 202
                    else: id_ = '0'
203 203
                    
......
206 206
                    xml_dom.set_value(doc, node, id_)
207 207
                else: last_only = False
208 208
                set_id(xpath, id_)
209
            return get(doc, xpath, create, last_only)
209
            return get(root, xpath, create, last_only)
210 210
        
211 211
        parent = node
212 212
    return parent
213 213

  
214
def put_obj(doc, xpath, id_, has_types, value=None):
214
def put_obj(root, xpath, id_, has_types, value=None):
215 215
    xpath = copy.deepcopy(xpath) # don't modify input!
216 216
    set_id(xpath, id_, has_types)
217 217
    if value != None: set_value(xpath, value)
218
    get(doc, xpath, True)
218
    get(root, xpath, True)
219 219

  
220 220
def path2xml(xpath, first_branch=True):
221 221
    root = xml_dom.create_doc().documentElement
222
    get(root.ownerDocument, xpath, True)
223
    if first_branch: root = root.firstChild
222
    get(root, xpath, True)
224 223
    return root
225 224

  
226 225
def str2xml(xpath): return path2xml(parse(xpath))
226

  
227
def xml_set_id(root, id_): xml_dom.set_id(root.firstChild, id_)
scripts/lib/db_xml.py
37 37
    if pkeys == None: pkeys = {}
38 38
    def pkey(table): return sql.pkey(db, pkeys, table)
39 39
    
40
    node = node.firstChild
40 41
    table = name_of(node)
41 42
    pkey_ = pkey(table)
42 43
    
scripts/map
79 79
    if in_is_xml: doc0 = xml.dom.minidom.parse(sys.stdin)
80 80
    if map_path != None:
81 81
        doc1 = xml_dom.create_doc(dest)
82
        root = doc1.documentElement
82 83
        if in_is_db:
83 84
            assert in_is_xpaths
84 85
            
......
97 98
                row_id = str(row_id)
98 99
                
99 100
                def put_col(path, value):
100
                    xpath.put_obj(doc1, path, row_id, has_types, value)
101
                    xpath.put_obj(root, path, row_id, has_types, value)
101 102
                for value, out in metadata: put_col(out, value)
102 103
                for in_, out in mappings:
103
                    root = xpath.get(in_.ownerDocument, src_root)
104
                    xml_dom.replace(root, root.cloneNode(False))
105
                    xml_dom.set_id(root, row_id)
106
                    
104
                    in_ = in_.cloneNode(True) # don't modify orig value!
105
                    xml_dom.set_id(xpath.get(in_, src_root), row_id)
107 106
                    value = sql.value_or_none(db_xml.get(in_db, in_, in_pkeys))
108 107
                    if value != None: put_col(out, str(value))
109 108
            in_db.close()
......
117 116
                row_id = str(row_idx)
118 117
                
119 118
                def put_col(path, value):
120
                    xpath.put_obj(doc1, path, row_id, has_types, value)
119
                    xpath.put_obj(root, path, row_id, has_types, value)
121 120
                for value, out in metadata: put_col(out, value)
122 121
                for i, col in enumerate(cols):
123 122
                    if row[i] != '' and col in map_: put_col(map_[col], row[i])

Also available in: Unified diff