Project

General

Profile

« Previous | Next » 

Revision 32

data2xml: Added support for * abbrs for backward (child-to-parent) pointers

View differences:

scripts/data2xml/NYBG-VegBank-VegX_mapping.csv
4 4
"InstitutionCode","/taxonInterpretation/museum_ID->/party/OrganizationName/_name/firstName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept/partyWithRole/*ID->/parties/party/organizationName/_name/firstName"
5 5
"CollectionCode","/taxonInterpretation/museum_ID->/party/OrganizationName/_name/lastName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept/partyWithRole/*ID->/parties/party/organizationName/_name/lastName"
6 6
"CatalogNumber","/taxonInterpretation/museumAccessionNumber","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept/voucher"
7
"ScientificName","/plantName/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[taxonNameUsageConceptID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonName/Simple"
7
"ScientificName","/plantName/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[*ID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonName/Simple"
8 8
"BasisOfRecord",,
9
"Kingdom","/plantName[plantStatus/plantLevel=Kingdom]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[taxonNameUsageConceptID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=reg]/Name"
10
"Phylum","/plantName[plantStatus/plantLevel=Subkingdom]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[taxonNameUsageConceptID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=phyl_div]/Name"
11
"Class","/plantName[plantStatus/plantLevel=Class]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[taxonNameUsageConceptID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=cl]/Name"
12
"Order","/plantName[plantStatus/plantLevel=Order]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[taxonNameUsageConceptID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=ord]/Name"
13
"Family","/plantName[plantStatus/plantLevel=Family]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[taxonNameUsageConceptID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=fam]/Name"
14
"Genus","/plantName[plantStatus/plantLevel=Genus]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[taxonNameUsageConceptID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=gen]/Name"
15
"Species","/plantName[plantStatus/plantLevel=Species]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[taxonNameUsageConceptID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=sp]/Name"
16
"Subspecies","/plantName[plantStatus/plantLevel=Subspecies]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[taxonNameUsageConceptID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=ssp]/Name"
17
"ScientificNameAuthor","/plantConcept/reference_ID->/referenceParty/{givenName,surname}","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[taxonNameUsageConceptID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept/AccordingTo/Simple"
18
"IdentifiedBy","/taxonInterpretation/PARTY_ID->/party/{givenName,middleName,surName}","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[taxonNameUsageConceptID]/partyWithRole[role=identifier]/*ID->/parties/party/individualName/{givenName,surName}"
19
"YearIdentified","/taxonInterpretation/interpretationDate/_date/year","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[taxonNameUsageConceptID]/date/_date/year"
20
"MonthIdentified","/taxonInterpretation/interpretationDate/_date/month","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[taxonNameUsageConceptID]/date/_date/month"
21
"DayIdentified","/taxonInterpretation/interpretationDate/_date/day","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[taxonNameUsageConceptID]/date/_date/day"
9
"Kingdom","/plantName[plantStatus/plantLevel=Kingdom]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[*ID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=reg]/Name"
10
"Phylum","/plantName[plantStatus/plantLevel=Subkingdom]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[*ID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=phyl_div]/Name"
11
"Class","/plantName[plantStatus/plantLevel=Class]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[*ID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=cl]/Name"
12
"Order","/plantName[plantStatus/plantLevel=Order]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[*ID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=ord]/Name"
13
"Family","/plantName[plantStatus/plantLevel=Family]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[*ID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=fam]/Name"
14
"Genus","/plantName[plantStatus/plantLevel=Genus]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[*ID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=gen]/Name"
15
"Species","/plantName[plantStatus/plantLevel=Species]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[*ID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=sp]/Name"
16
"Subspecies","/plantName[plantStatus/plantLevel=Subspecies]/plantName","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[*ID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept[Rank/@code=ssp]/Name"
17
"ScientificNameAuthor","/plantConcept/reference_ID->/referenceParty/{givenName,surname}","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[*ID]/*s/taxonRelationshipAssertion!/assertion/*ID->/*s/taxonConcept/AccordingTo/Simple"
18
"IdentifiedBy","/taxonInterpretation/PARTY_ID->/party/{givenName,middleName,surName}","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[*ID]/partyWithRole[role=identifier]/*ID->/parties/party/individualName/{givenName,surName}"
19
"YearIdentified","/taxonInterpretation/interpretationDate/_date/year","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[*ID]/date/_date/year"
20
"MonthIdentified","/taxonInterpretation/interpretationDate/_date/month","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[*ID]/date/_date/month"
21
"DayIdentified","/taxonInterpretation/interpretationDate/_date/day","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[*ID]/date/_date/day"
22 22
"TypeStatus",,
23 23
"CollectorNumber",,
24 24
"FieldNumber","/taxonInterpretation/collectionNumber","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept/authorCode"
25
"Collector","/taxonInterpretation/collector_ID->/party/{givenName,middleName,surName}","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[taxonNameUsageConceptID]/partyWithRole[role=collector]/*ID->/parties/party/individualName/{givenName,surName}"
25
"Collector","/taxonInterpretation/collector_ID->/party/{givenName,middleName,surName}","/*s/individualOrganismObservation/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept->/*s/taxonDetermination[*ID]/partyWithRole[role=collector]/*ID->/parties/party/individualName/{givenName,surName}"
26 26
"YearCollected","/taxonInterpretation/collectionDate/_date/year","/*s/individualOrganismObservation/*ID->/*s/plotObservation/obsStartDate/_date/year"
27 27
"MonthCollected","/taxonInterpretation/collectionDate/_date/month","/*s/individualOrganismObservation/*ID->/*s/plotObservation/obsStartDate/_date/month"
28 28
"DayCollected","/taxonInterpretation/collectionDate/_date/day","/*s/individualOrganismObservation/*ID->/*s/plotObservation/obsStartDate/_date/day"
scripts/data2xml/xpath.py
31 31

  
32 32
def set_value(path, value): path[-1].value = value
33 33

  
34
def backward_id(elem):
35
    if len(elem.attrs) >= 1 and value(elem.attrs[0]) == None:
36
        return elem.attrs[0]
37
    else: return None
38

  
34 39
class XpathParser(Parser):
35 40
    def _main(self):
36 41
        self._match_str('/', required=True)
......
50 55
            tree.append(elem)
51 56
            if not self._match_str('/'): break
52 57
            elem_idx += 1
58
        
59
        # Expand * abbrs
60
        elem_idx = 0
61
        for elem in tree:
62
            id_ = backward_id(elem)
63
            if id_ != None: elem = id_[0]; offset = -2
64
            elif elem.is_ptr: offset = 2
65
            else: offset = 1
66
            before, abbr, after = elem.name.partition('*')
67
            if abbr != '':
68
                try: elem.name = before+tree[elem_idx+offset].name+after
69
                except IndexError: pass # no replacement elem
70
            elem_idx += 1
71
        
53 72
        # Add lookahead assertion for rest of path
54 73
        if fork_idx != None: tree[fork_idx].attrs.append(tree[fork_idx+1:])
74
        
55 75
        return tree
56 76
    
57 77
    def _fields(self):
......
78 98
    def _field(self):
79 99
        return self._name()
80 100
    
81
    def _name(self): return self._match_re(r'[\w.]+', required=True)
101
    def _name(self): return self._match_re(r'[\w.*]+', required=True)
82 102
    
83 103
    def _value(self): return self._match_re(r'[\w.|]+', required=True)
84 104

  
......
95 115
    path[id_level].attrs.append([XpathElem('id', id_, is_attr=True)])
96 116

  
97 117
def get(doc, path, create=False, last_only=None, parent=None):
98
    # Warning: The last_only optimization may split data between multiple nodes
118
    # Warning: The last_only optimization may put data that should be together
119
    # into separate nodes
99 120
    if parent == None: parent = doc.documentElement
100 121
    if last_only == None: last_only = create
101 122
    elem_idx = 0
......
130 151
        # Follow pointer
131 152
        if elem.is_ptr:
132 153
            path = deepcopy(path[elem_idx+1:]) # rest of path
133
            attrs = path[instance_level].attrs
134
            if len(attrs) >= 1 and value(attrs[0]) == None:
154
            id_elem = backward_id(path[instance_level])
155
            if id_elem != None:
135 156
                # backward (child-to-parent) pointer with target ID attr
136
                set_value(attrs[0], xml_util.get_id(node))
157
                set_value(id_elem, xml_util.get_id(node))
137 158
            else: # forward (parent-to-child) pointer
138 159
                id_ = xml_util.value(node)
139 160
                obj_path = obj(path) # target object
scripts/data2xml/data2xml
29 29
        name = row[0]
30 30
        path = row[dest_idx]
31 31
        if name != '' and path != '':
32
            if path.startswith('?'): path = path[1:]
33 32
            if path.startswith('/*s/'): has_types = True # *s used for type elem
34 33
            path = path.replace('<name>', name)
35
            path = re.sub(r'\*(?=\w*(?:->/[^/]+)?/(\w+))', r'\1', path)
36 34
            mappings[name] = xpath.XpathParser(path).parse()
37 35
    stream.close()
38 36
    

Also available in: Unified diff