Project

General

Profile

« Previous | Next » 

Revision 84

Added support for mapping datasource metadata

View differences:

scripts/lib/db_xml.py
4 4
from xml.dom import Node
5 5

  
6 6
import sql
7
import strings
7 8
import xml_dom
8 9

  
9 10
def name_of(node): return re.sub(r'^.*\.', r'', xml_dom.name_of(node))
......
46 47
    # Divide children into fields and children with fkeys to parent
47 48
    for child in xml_dom.NodeElemIter(node):
48 49
        child_name = name_of(child)
49
        if xml_dom.is_text(child): row[child_name] = xml_dom.value(child)
50
        if xml_dom.is_text(child):
51
            row[child_name] = strings.to_unicode(xml_dom.value(child))
50 52
        elif is_ptr(child_name): row[child_name] = put(db, ptr_target(child),
51 53
            store_ids, row_ct_ref, pkeys)
52 54
        else: children.append(child)
scripts/map
12 12

  
13 13
import opts
14 14

  
15
def metadata_value(name):
16
    if name.startswith(':'): return name[1:]
17
    else: return None
18

  
15 19
def main():
16 20
    # Get db config from env vars
17 21
    db_config_names = ['host', 'user', 'password', 'database']
......
55 59
        for row in reader:
56 60
            in_, out = row[:2]
57 61
            if out != '':
58
                try: out = xpath.parse(dest_root+out)
62
                try: mappings.append((in_, xpath.parse(dest_root+out)))
59 63
                except SyntaxException, ex: raise SystemExit(str(ex))
60
                mappings.append((in_, out))
61 64
        stream.close()
62 65
    
63 66
    # Input datasource to XML tree, mapping if needed
......
68 71
        if in_is_xml: raise Exception('XML-XML mapping not supported yet')
69 72
        elif in_is_db: raise Exception('DB-XML mapping not supported yet')
70 73
        else: # input is CSV
71
            map_ = dict(mappings)
74
            metadata = []
75
            map_ = {}
76
            for in_, out in mappings:
77
                value = metadata_value(in_)
78
                if value != None: metadata.append((value, out))
79
                else: map_[in_] = out
80
            
72 81
            reader = csv.reader(sys.stdin)
73
            fieldnames = reader.next()
82
            cols = reader.next()
74 83
            for row_idx, row in enumerate(reader):
75 84
                row_id = str(row_idx)
76
                def put_col(name, value):
77
                    xpath.put_obj(out_doc, map_[name], row_id, has_types, value)
78
                for idx, name in enumerate(fieldnames):
79
                    if row[idx] != '' and name in map_: put_col(name, row[idx])
85
                def put_col(path, value):
86
                    xpath.put_obj(out_doc, path, row_id, has_types, value)
87
                for value, out in metadata: put_col(out, value)
88
                for i, col in enumerate(cols):
89
                    if row[i] != '' and col in map_: put_col(map_[col], row[i])
80 90
        doc = out_doc
81 91
    
82 92
    # Output XML tree
scripts/util/NYSpecimenDataAmericas.xml
2 2
<VegX>
3 3
    <individualOrganismObservations>
4 4
        <individualOrganismObservation id="0">
5
            <plotObservationID>0</plotObservationID>
5 6
            <individualOrganismID>0</individualOrganismID>
6
            <plotObservationID>0</plotObservationID>
7 7
            <simpleUserdefined>
8 8
                <name>habitat</name>
9 9
                <value>Floodplain forest</value>
......
14 14
            </simpleUserdefined>
15 15
        </individualOrganismObservation>
16 16
        <individualOrganismObservation id="1">
17
            <plotObservationID>1</plotObservationID>
17 18
            <individualOrganismID>1</individualOrganismID>
18
            <plotObservationID>1</plotObservationID>
19 19
        </individualOrganismObservation>
20 20
    </individualOrganismObservations>
21
    <plotObservations>
22
        <plotObservation id="0">
23
            <plotUniqueIdentifierID>0</plotUniqueIdentifierID>
24
            <obsStartDate>
25
                <_date>
26
                    <year>1984</year>
27
                    <month>8</month>
28
                    <day>20</day>
29
                </_date>
30
            </obsStartDate>
31
        </plotObservation>
32
        <plotObservation id="1">
33
            <plotUniqueIdentifierID>1</plotUniqueIdentifierID>
34
            <obsStartDate>
35
                <_date>
36
                    <year>1994</year>
37
                    <month>1</month>
38
                    <day>17</day>
39
                </_date>
40
            </obsStartDate>
41
        </plotObservation>
42
    </plotObservations>
43
    <plots>
44
        <plot id="0">
45
            <simpleUserdefined>
46
                <name>confidentialityStatus</name>
47
                <value>0</value>
48
            </simpleUserdefined>
49
            <country>Peru</country>
50
            <state>Madre de Dios</state>
51
            <county>Man&#250;</county>
52
            <simpleUserdefined>
53
                <name>locality</name>
54
                <value>Parque Nacional del Manu. R&#237;o Manu: Cocha Casha Station</value>
55
            </simpleUserdefined>
56
            <geospatial>
57
                <DecimalLongitude>-71.40</DecimalLongitude>
58
                <DecimalLatitude>-11.80</DecimalLatitude>
59
                <minimumElevationInMeters>350</minimumElevationInMeters>
60
                <maximumElevationInMeters>350</maximumElevationInMeters>
61
            </geospatial>
62
        </plot>
63
        <plot id="1">
64
            <simpleUserdefined>
65
                <name>confidentialityStatus</name>
66
                <value>0</value>
67
            </simpleUserdefined>
68
            <country>Belize</country>
69
            <state>Belize District</state>
70
            <simpleUserdefined>
71
                <name>locality</name>
72
                <value>Belize Zoo, in savanna plot immediately behind zoo. Mile 31 on Western Highway</value>
73
            </simpleUserdefined>
74
            <geospatial>
75
                <DecimalLongitude>-88.50</DecimalLongitude>
76
                <DecimalLatitude>17.40</DecimalLatitude>
77
                <minimumElevationInMeters>15</minimumElevationInMeters>
78
                <maximumElevationInMeters>15</maximumElevationInMeters>
79
            </geospatial>
80
        </plot>
81
    </plots>
21 82
    <individualOrganisms>
22 83
        <individualOrganism id="0">
23 84
            <taxonNameUsageConceptsID>0</taxonNameUsageConceptsID>
......
187 248
            <Name>Scrophulariaceae</Name>
188 249
        </taxonConcept>
189 250
    </taxonConcepts>
190
    <plotObservations>
191
        <plotObservation id="0">
192
            <obsStartDate>
193
                <_date>
194
                    <year>1984</year>
195
                    <month>8</month>
196
                    <day>20</day>
197
                </_date>
198
            </obsStartDate>
199
            <plotUniqueIdentifierID>0</plotUniqueIdentifierID>
200
        </plotObservation>
201
        <plotObservation id="1">
202
            <obsStartDate>
203
                <_date>
204
                    <year>1994</year>
205
                    <month>1</month>
206
                    <day>17</day>
207
                </_date>
208
            </obsStartDate>
209
            <plotUniqueIdentifierID>1</plotUniqueIdentifierID>
210
        </plotObservation>
211
    </plotObservations>
212
    <plots>
213
        <plot id="0">
214
            <country>Peru</country>
215
            <state>Madre de Dios</state>
216
            <county>Man&#250;</county>
217
            <simpleUserdefined>
218
                <name>locality</name>
219
                <value>Parque Nacional del Manu. R&#237;o Manu: Cocha Casha Station</value>
220
            </simpleUserdefined>
221
            <geospatial>
222
                <DecimalLongitude>-71.40</DecimalLongitude>
223
                <DecimalLatitude>-11.80</DecimalLatitude>
224
                <minimumElevationInMeters>350</minimumElevationInMeters>
225
                <maximumElevationInMeters>350</maximumElevationInMeters>
226
            </geospatial>
227
        </plot>
228
        <plot id="1">
229
            <country>Belize</country>
230
            <state>Belize District</state>
231
            <simpleUserdefined>
232
                <name>locality</name>
233
                <value>Belize Zoo, in savanna plot immediately behind zoo. Mile 31 on Western Highway</value>
234
            </simpleUserdefined>
235
            <geospatial>
236
                <DecimalLongitude>-88.50</DecimalLongitude>
237
                <DecimalLatitude>17.40</DecimalLatitude>
238
                <minimumElevationInMeters>15</minimumElevationInMeters>
239
                <maximumElevationInMeters>15</maximumElevationInMeters>
240
            </geospatial>
241
        </plot>
242
    </plots>
243 251
</VegX>
mappings/VegX-VegBank.organisms.csv
67 67
"/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/plotUniqueIdentifier","/*_ID/observation/*_ID/plot/authorPlotCode"
68 68
"/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/plotUniqueIdentifier","/*_ID/observation/{*_ID/plot/authorPlotCode,authorObsCode}"
69 69
"/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/relatedSpatialItem/relatedItem[relationshipType=parentPlot]/relatedItemID->/*s/plot/plotUniqueIdentifier","/*_ID/observation/*_ID/plot/PARENT_ID->/*_ID/observation/*_ID/plot/authorPlotCode"
70
"/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=confidentialityStatus]/value","/*_ID/observation/*_ID/plot/confidentialityStatus"
70 71
"/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=locality]/value","/*_ID/observation/*_ID/plot/place/*_ID/namedPlace/placeDescription"
71 72
"/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=localityDescription]/value","/*_ID/observation/*_ID/plot/place/*_ID/namedPlace/placeDescription"
72 73
"/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=majorGeo]/value","/*_ID/observation/*_ID/plot/place(/*_ID/namedPlace[placeSystem=continent])/placeName"
mappings/NYBG-VegX.organisms.csv
1 1
"NYBG","VegX:/*s/individualOrganismObservation"
2
":0","/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=confidentialityStatus]/value"
2 3
"key",
3 4
"DateLastModified",
4 5
"InstitutionCode","/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept/partyWithRole/*ID->/parties/party/organizationName/_name/firstName"
mappings/joins/NYBG-VegBank.organisms.csv
1 1
NYBG,VegBank:/taxonObservation
2
:0,/*_ID/observation/*_ID/plot/confidentialityStatus
2 3
InstitutionCode,/taxonImportance/stemCount/stemLocation/taxonInterpretation[PLANTCONCEPT_ID=]/museum_ID/party/OrganizationName/_name/firstName
3 4
CollectionCode,/taxonImportance/stemCount/stemLocation/taxonInterpretation[PLANTCONCEPT_ID=]/museum_ID/party/OrganizationName/_name/lastName
4 5
ScientificName,/taxonImportance/stemCount/stemLocation/taxonInterpretation(/*_ID/plantConcept[plantStatus/plantLevel=Species])/*_ID/*/plantName
mappings/VegX-VegBank.plots.csv
35 35
/*UniqueIdentifierID->/*s/plot/plotUniqueIdentifier,/*_ID/plot/authorPlotCode
36 36
/*UniqueIdentifierID->/*s/plot/plotUniqueIdentifier,"/{*_ID/plot/authorPlotCode,authorObsCode}"
37 37
/*UniqueIdentifierID->/*s/plot/relatedSpatialItem/relatedItem[relationshipType=parentPlot]/relatedItemID->/*s/plot/plotUniqueIdentifier,/*_ID/plot/PARENT_ID->/*_ID/observation/*_ID/plot/authorPlotCode
38
/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=confidentialityStatus]/value,/*_ID/plot/confidentialityStatus
38 39
/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=locality]/value,/*_ID/plot/place/*_ID/namedPlace/placeDescription
39 40
/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=localityDescription]/value,/*_ID/plot/place/*_ID/namedPlace/placeDescription
40 41
/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=majorGeo]/value,/*_ID/plot/place(/*_ID/namedPlace[placeSystem=continent])/placeName

Also available in: Unified diff