Revision 84
Added by Aaron Marcuse-Kubitza about 13 years ago
scripts/lib/db_xml.py | ||
---|---|---|
4 | 4 |
from xml.dom import Node |
5 | 5 |
|
6 | 6 |
import sql |
7 |
import strings |
|
7 | 8 |
import xml_dom |
8 | 9 |
|
9 | 10 |
def name_of(node): return re.sub(r'^.*\.', r'', xml_dom.name_of(node)) |
... | ... | |
46 | 47 |
# Divide children into fields and children with fkeys to parent |
47 | 48 |
for child in xml_dom.NodeElemIter(node): |
48 | 49 |
child_name = name_of(child) |
49 |
if xml_dom.is_text(child): row[child_name] = xml_dom.value(child) |
|
50 |
if xml_dom.is_text(child): |
|
51 |
row[child_name] = strings.to_unicode(xml_dom.value(child)) |
|
50 | 52 |
elif is_ptr(child_name): row[child_name] = put(db, ptr_target(child), |
51 | 53 |
store_ids, row_ct_ref, pkeys) |
52 | 54 |
else: children.append(child) |
scripts/map | ||
---|---|---|
12 | 12 |
|
13 | 13 |
import opts |
14 | 14 |
|
15 |
def metadata_value(name): |
|
16 |
if name.startswith(':'): return name[1:] |
|
17 |
else: return None |
|
18 |
|
|
15 | 19 |
def main(): |
16 | 20 |
# Get db config from env vars |
17 | 21 |
db_config_names = ['host', 'user', 'password', 'database'] |
... | ... | |
55 | 59 |
for row in reader: |
56 | 60 |
in_, out = row[:2] |
57 | 61 |
if out != '': |
58 |
try: out = xpath.parse(dest_root+out)
|
|
62 |
try: mappings.append((in_, xpath.parse(dest_root+out)))
|
|
59 | 63 |
except SyntaxException, ex: raise SystemExit(str(ex)) |
60 |
mappings.append((in_, out)) |
|
61 | 64 |
stream.close() |
62 | 65 |
|
63 | 66 |
# Input datasource to XML tree, mapping if needed |
... | ... | |
68 | 71 |
if in_is_xml: raise Exception('XML-XML mapping not supported yet') |
69 | 72 |
elif in_is_db: raise Exception('DB-XML mapping not supported yet') |
70 | 73 |
else: # input is CSV |
71 |
map_ = dict(mappings) |
|
74 |
metadata = [] |
|
75 |
map_ = {} |
|
76 |
for in_, out in mappings: |
|
77 |
value = metadata_value(in_) |
|
78 |
if value != None: metadata.append((value, out)) |
|
79 |
else: map_[in_] = out |
|
80 |
|
|
72 | 81 |
reader = csv.reader(sys.stdin) |
73 |
fieldnames = reader.next()
|
|
82 |
cols = reader.next()
|
|
74 | 83 |
for row_idx, row in enumerate(reader): |
75 | 84 |
row_id = str(row_idx) |
76 |
def put_col(name, value): |
|
77 |
xpath.put_obj(out_doc, map_[name], row_id, has_types, value) |
|
78 |
for idx, name in enumerate(fieldnames): |
|
79 |
if row[idx] != '' and name in map_: put_col(name, row[idx]) |
|
85 |
def put_col(path, value): |
|
86 |
xpath.put_obj(out_doc, path, row_id, has_types, value) |
|
87 |
for value, out in metadata: put_col(out, value) |
|
88 |
for i, col in enumerate(cols): |
|
89 |
if row[i] != '' and col in map_: put_col(map_[col], row[i]) |
|
80 | 90 |
doc = out_doc |
81 | 91 |
|
82 | 92 |
# Output XML tree |
scripts/util/NYSpecimenDataAmericas.xml | ||
---|---|---|
2 | 2 |
<VegX> |
3 | 3 |
<individualOrganismObservations> |
4 | 4 |
<individualOrganismObservation id="0"> |
5 |
<plotObservationID>0</plotObservationID> |
|
5 | 6 |
<individualOrganismID>0</individualOrganismID> |
6 |
<plotObservationID>0</plotObservationID> |
|
7 | 7 |
<simpleUserdefined> |
8 | 8 |
<name>habitat</name> |
9 | 9 |
<value>Floodplain forest</value> |
... | ... | |
14 | 14 |
</simpleUserdefined> |
15 | 15 |
</individualOrganismObservation> |
16 | 16 |
<individualOrganismObservation id="1"> |
17 |
<plotObservationID>1</plotObservationID> |
|
17 | 18 |
<individualOrganismID>1</individualOrganismID> |
18 |
<plotObservationID>1</plotObservationID> |
|
19 | 19 |
</individualOrganismObservation> |
20 | 20 |
</individualOrganismObservations> |
21 |
<plotObservations> |
|
22 |
<plotObservation id="0"> |
|
23 |
<plotUniqueIdentifierID>0</plotUniqueIdentifierID> |
|
24 |
<obsStartDate> |
|
25 |
<_date> |
|
26 |
<year>1984</year> |
|
27 |
<month>8</month> |
|
28 |
<day>20</day> |
|
29 |
</_date> |
|
30 |
</obsStartDate> |
|
31 |
</plotObservation> |
|
32 |
<plotObservation id="1"> |
|
33 |
<plotUniqueIdentifierID>1</plotUniqueIdentifierID> |
|
34 |
<obsStartDate> |
|
35 |
<_date> |
|
36 |
<year>1994</year> |
|
37 |
<month>1</month> |
|
38 |
<day>17</day> |
|
39 |
</_date> |
|
40 |
</obsStartDate> |
|
41 |
</plotObservation> |
|
42 |
</plotObservations> |
|
43 |
<plots> |
|
44 |
<plot id="0"> |
|
45 |
<simpleUserdefined> |
|
46 |
<name>confidentialityStatus</name> |
|
47 |
<value>0</value> |
|
48 |
</simpleUserdefined> |
|
49 |
<country>Peru</country> |
|
50 |
<state>Madre de Dios</state> |
|
51 |
<county>Manú</county> |
|
52 |
<simpleUserdefined> |
|
53 |
<name>locality</name> |
|
54 |
<value>Parque Nacional del Manu. Río Manu: Cocha Casha Station</value> |
|
55 |
</simpleUserdefined> |
|
56 |
<geospatial> |
|
57 |
<DecimalLongitude>-71.40</DecimalLongitude> |
|
58 |
<DecimalLatitude>-11.80</DecimalLatitude> |
|
59 |
<minimumElevationInMeters>350</minimumElevationInMeters> |
|
60 |
<maximumElevationInMeters>350</maximumElevationInMeters> |
|
61 |
</geospatial> |
|
62 |
</plot> |
|
63 |
<plot id="1"> |
|
64 |
<simpleUserdefined> |
|
65 |
<name>confidentialityStatus</name> |
|
66 |
<value>0</value> |
|
67 |
</simpleUserdefined> |
|
68 |
<country>Belize</country> |
|
69 |
<state>Belize District</state> |
|
70 |
<simpleUserdefined> |
|
71 |
<name>locality</name> |
|
72 |
<value>Belize Zoo, in savanna plot immediately behind zoo. Mile 31 on Western Highway</value> |
|
73 |
</simpleUserdefined> |
|
74 |
<geospatial> |
|
75 |
<DecimalLongitude>-88.50</DecimalLongitude> |
|
76 |
<DecimalLatitude>17.40</DecimalLatitude> |
|
77 |
<minimumElevationInMeters>15</minimumElevationInMeters> |
|
78 |
<maximumElevationInMeters>15</maximumElevationInMeters> |
|
79 |
</geospatial> |
|
80 |
</plot> |
|
81 |
</plots> |
|
21 | 82 |
<individualOrganisms> |
22 | 83 |
<individualOrganism id="0"> |
23 | 84 |
<taxonNameUsageConceptsID>0</taxonNameUsageConceptsID> |
... | ... | |
187 | 248 |
<Name>Scrophulariaceae</Name> |
188 | 249 |
</taxonConcept> |
189 | 250 |
</taxonConcepts> |
190 |
<plotObservations> |
|
191 |
<plotObservation id="0"> |
|
192 |
<obsStartDate> |
|
193 |
<_date> |
|
194 |
<year>1984</year> |
|
195 |
<month>8</month> |
|
196 |
<day>20</day> |
|
197 |
</_date> |
|
198 |
</obsStartDate> |
|
199 |
<plotUniqueIdentifierID>0</plotUniqueIdentifierID> |
|
200 |
</plotObservation> |
|
201 |
<plotObservation id="1"> |
|
202 |
<obsStartDate> |
|
203 |
<_date> |
|
204 |
<year>1994</year> |
|
205 |
<month>1</month> |
|
206 |
<day>17</day> |
|
207 |
</_date> |
|
208 |
</obsStartDate> |
|
209 |
<plotUniqueIdentifierID>1</plotUniqueIdentifierID> |
|
210 |
</plotObservation> |
|
211 |
</plotObservations> |
|
212 |
<plots> |
|
213 |
<plot id="0"> |
|
214 |
<country>Peru</country> |
|
215 |
<state>Madre de Dios</state> |
|
216 |
<county>Manú</county> |
|
217 |
<simpleUserdefined> |
|
218 |
<name>locality</name> |
|
219 |
<value>Parque Nacional del Manu. Río Manu: Cocha Casha Station</value> |
|
220 |
</simpleUserdefined> |
|
221 |
<geospatial> |
|
222 |
<DecimalLongitude>-71.40</DecimalLongitude> |
|
223 |
<DecimalLatitude>-11.80</DecimalLatitude> |
|
224 |
<minimumElevationInMeters>350</minimumElevationInMeters> |
|
225 |
<maximumElevationInMeters>350</maximumElevationInMeters> |
|
226 |
</geospatial> |
|
227 |
</plot> |
|
228 |
<plot id="1"> |
|
229 |
<country>Belize</country> |
|
230 |
<state>Belize District</state> |
|
231 |
<simpleUserdefined> |
|
232 |
<name>locality</name> |
|
233 |
<value>Belize Zoo, in savanna plot immediately behind zoo. Mile 31 on Western Highway</value> |
|
234 |
</simpleUserdefined> |
|
235 |
<geospatial> |
|
236 |
<DecimalLongitude>-88.50</DecimalLongitude> |
|
237 |
<DecimalLatitude>17.40</DecimalLatitude> |
|
238 |
<minimumElevationInMeters>15</minimumElevationInMeters> |
|
239 |
<maximumElevationInMeters>15</maximumElevationInMeters> |
|
240 |
</geospatial> |
|
241 |
</plot> |
|
242 |
</plots> |
|
243 | 251 |
</VegX> |
mappings/VegX-VegBank.organisms.csv | ||
---|---|---|
67 | 67 |
"/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/plotUniqueIdentifier","/*_ID/observation/*_ID/plot/authorPlotCode" |
68 | 68 |
"/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/plotUniqueIdentifier","/*_ID/observation/{*_ID/plot/authorPlotCode,authorObsCode}" |
69 | 69 |
"/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/relatedSpatialItem/relatedItem[relationshipType=parentPlot]/relatedItemID->/*s/plot/plotUniqueIdentifier","/*_ID/observation/*_ID/plot/PARENT_ID->/*_ID/observation/*_ID/plot/authorPlotCode" |
70 |
"/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=confidentialityStatus]/value","/*_ID/observation/*_ID/plot/confidentialityStatus" |
|
70 | 71 |
"/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=locality]/value","/*_ID/observation/*_ID/plot/place/*_ID/namedPlace/placeDescription" |
71 | 72 |
"/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=localityDescription]/value","/*_ID/observation/*_ID/plot/place/*_ID/namedPlace/placeDescription" |
72 | 73 |
"/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=majorGeo]/value","/*_ID/observation/*_ID/plot/place(/*_ID/namedPlace[placeSystem=continent])/placeName" |
mappings/NYBG-VegX.organisms.csv | ||
---|---|---|
1 | 1 |
"NYBG","VegX:/*s/individualOrganismObservation" |
2 |
":0","/*ID->/*s/plotObservation/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=confidentialityStatus]/value" |
|
2 | 3 |
"key", |
3 | 4 |
"DateLastModified", |
4 | 5 |
"InstitutionCode","/*ID->/*s/individualOrganism/*sID->/*s/taxonNameUsageConcept/partyWithRole/*ID->/parties/party/organizationName/_name/firstName" |
mappings/joins/NYBG-VegBank.organisms.csv | ||
---|---|---|
1 | 1 |
NYBG,VegBank:/taxonObservation |
2 |
:0,/*_ID/observation/*_ID/plot/confidentialityStatus |
|
2 | 3 |
InstitutionCode,/taxonImportance/stemCount/stemLocation/taxonInterpretation[PLANTCONCEPT_ID=]/museum_ID/party/OrganizationName/_name/firstName |
3 | 4 |
CollectionCode,/taxonImportance/stemCount/stemLocation/taxonInterpretation[PLANTCONCEPT_ID=]/museum_ID/party/OrganizationName/_name/lastName |
4 | 5 |
ScientificName,/taxonImportance/stemCount/stemLocation/taxonInterpretation(/*_ID/plantConcept[plantStatus/plantLevel=Species])/*_ID/*/plantName |
mappings/VegX-VegBank.plots.csv | ||
---|---|---|
35 | 35 |
/*UniqueIdentifierID->/*s/plot/plotUniqueIdentifier,/*_ID/plot/authorPlotCode |
36 | 36 |
/*UniqueIdentifierID->/*s/plot/plotUniqueIdentifier,"/{*_ID/plot/authorPlotCode,authorObsCode}" |
37 | 37 |
/*UniqueIdentifierID->/*s/plot/relatedSpatialItem/relatedItem[relationshipType=parentPlot]/relatedItemID->/*s/plot/plotUniqueIdentifier,/*_ID/plot/PARENT_ID->/*_ID/observation/*_ID/plot/authorPlotCode |
38 |
/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=confidentialityStatus]/value,/*_ID/plot/confidentialityStatus |
|
38 | 39 |
/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=locality]/value,/*_ID/plot/place/*_ID/namedPlace/placeDescription |
39 | 40 |
/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=localityDescription]/value,/*_ID/plot/place/*_ID/namedPlace/placeDescription |
40 | 41 |
/*UniqueIdentifierID->/*s/plot/simpleUserdefined[name=majorGeo]/value,/*_ID/plot/place(/*_ID/namedPlace[placeSystem=continent])/placeName |
Also available in: Unified diff
Added support for mapping datasource metadata