Revision 10339
Added by Aaron Marcuse-Kubitza over 11 years ago
inputs/REMIB/Specimen/postprocess.sql | ||
---|---|---|
1 | 1 |
SELECT util.search_path_append('util'); |
2 | 2 |
|
3 |
SELECT util.rename_cols(:table_str, $$ |
|
4 |
country => country_verbatim, |
|
5 |
state => stateProvince_verbatim, |
|
6 |
county => county_verbatim, |
|
7 |
locality => locality_verbatim, |
|
8 |
long_deg => longitude_deg_verbatim, |
|
9 |
long_min => longitude_min_verbatim, |
|
10 |
long_sec => longitude_sec_verbatim, |
|
11 |
lat_deg => latitude_deg_verbatim, |
|
12 |
lat_min => latitude_min_verbatim, |
|
13 |
lat_sec => latitude_sec_verbatim, |
|
14 |
habitat => habitat_verbatim, |
|
15 |
$$::hstore); |
|
16 |
|
|
3 | 17 |
SELECT create_if_not_exists($$CREATE INDEX "Specimen.acronym" ON $$||:table_str||$$ (acronym)$$); |
4 | 18 |
|
5 | 19 |
-- remove frameshifted rows |
6 | 20 |
-- rerun time: 6 s ("Time: 5537.211 ms") thanks to index |
7 | 21 |
DELETE FROM :table WHERE acronym IS NULL; |
8 | 22 |
DELETE FROM :table WHERE coll_year !~ '^(?:1[7-9]|20)\d{2}$'; |
9 |
DELETE FROM :table WHERE country ~ '\d'; |
|
10 |
DELETE FROM :table WHERE long_deg ~ '[[:alpha:]]' AND long_deg NOT IN ('RESTRINGIDO');
|
|
11 |
DELETE FROM :table WHERE long_min !~ '^\d*$';
|
|
12 |
DELETE FROM :table WHERE long_sec !~ '^\d*$';
|
|
13 |
DELETE FROM :table WHERE lat_min !~ '^\d*$';
|
|
14 |
DELETE FROM :table WHERE lat_sec !~ '^\d*$';
|
|
23 |
DELETE FROM :table WHERE country_verbatim ~ '\d';
|
|
24 |
DELETE FROM :table WHERE longitude_deg_verbatim ~ '[[:alpha:]]' AND longitude_deg_verbatim NOT IN ('RESTRINGIDO');
|
|
25 |
DELETE FROM :table WHERE longitude_min_verbatim !~ '^\d*$';
|
|
26 |
DELETE FROM :table WHERE longitude_sec_verbatim !~ '^\d*$';
|
|
27 |
DELETE FROM :table WHERE latitude_min_verbatim !~ '^\d*$';
|
|
28 |
DELETE FROM :table WHERE latitude_sec_verbatim !~ '^\d*$';
|
|
15 | 29 |
|
16 | 30 |
-- Remove institutions that we have direct data for |
17 | 31 |
DELETE FROM :table |
... | ... | |
30 | 44 |
ORDER BY dataprovider |
31 | 45 |
*/ |
32 | 46 |
; |
47 |
|
|
48 |
|
|
49 |
CREATE OR REPLACE FUNCTION map_nulls__text(value text) |
|
50 |
RETURNS text AS |
|
51 |
$BODY$ |
|
52 |
-- "ND = no disponible = not available" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB) |
|
53 |
SELECT util.map_nulls('{ND,NA}', $1) |
|
54 |
$BODY$ |
|
55 |
LANGUAGE sql IMMUTABLE STRICT |
|
56 |
COST 100; |
|
57 |
|
|
58 |
SELECT mk_derived_col((:table_str, 'country' ), $$map_nulls__text("country_verbatim" )$$); -- runtime: 3 min ("Time: 183171.010 ms") |
|
59 |
SELECT mk_derived_col((:table_str, 'stateProvince'), $$map_nulls__text("stateProvince_verbatim")$$); -- runtime: 2.5 min ("Time: 159935.791 ms") |
|
60 |
SELECT mk_derived_col((:table_str, 'county' ), $$map_nulls__text("county_verbatim" )$$); -- runtime: 2.5 min ("Time: 151076.487 ms") |
|
61 |
SELECT mk_derived_col((:table_str, 'locality' ), $$map_nulls__text("locality_verbatim" )$$); -- runtime: 2.5 min ("Time: 155188.486 ms") |
|
62 |
SELECT mk_derived_col((:table_str, 'habitat' ), $$map_nulls__text("habitat_verbatim" )$$); -- runtime: 2 min ("Time: 131904.068 ms") |
|
63 |
|
|
64 |
|
|
65 |
CREATE OR REPLACE FUNCTION map_nulls__coord__longitude(value text) |
|
66 |
RETURNS double precision AS |
|
67 |
$BODY$ |
|
68 |
-- TODO: sometimes also +-99, but not when min/sec are valid |
|
69 |
SELECT util.map_nulls('{-999,999,1000}', |
|
70 |
util.map_nulls('{RESTRINGIDO}', $1)::double precision) |
|
71 |
$BODY$ |
|
72 |
LANGUAGE sql IMMUTABLE STRICT |
|
73 |
COST 100; |
|
74 |
|
|
75 |
CREATE OR REPLACE FUNCTION map_nulls__coord__other(value text) |
|
76 |
RETURNS double precision AS |
|
77 |
$BODY$ |
|
78 |
-- also map nulls that are valid longitudes |
|
79 |
SELECT util.map_nulls('{-99,99}', map_nulls__coord__longitude($1)) |
|
80 |
$BODY$ |
|
81 |
LANGUAGE sql IMMUTABLE STRICT |
|
82 |
COST 100; |
|
83 |
|
|
84 |
SELECT mk_derived_col((:table_str, 'longitude_deg'), $$map_nulls__coord__longitude("longitude_deg_verbatim")$$); -- runtime: 6 min ("Time: 367070.877 ms") |
|
85 |
SELECT mk_derived_col((:table_str, 'longitude_min'), $$map_nulls__coord__other ("longitude_min_verbatim")$$); |
|
86 |
SELECT mk_derived_col((:table_str, 'longitude_sec'), $$map_nulls__coord__other ("longitude_sec_verbatim")$$); -- runtime: 8 min ("Time: 473198.465 ms") |
|
87 |
SELECT mk_derived_col((:table_str, 'latitude_deg' ), $$map_nulls__coord__other ("latitude_deg_verbatim" )$$); -- runtime: 8.5 min ("Time: 508835.119 ms") |
|
88 |
SELECT mk_derived_col((:table_str, 'latitude_min' ), $$map_nulls__coord__other ("latitude_min_verbatim" )$$); -- runtime: 8 min ("Time: 484806.499 ms") |
|
89 |
SELECT mk_derived_col((:table_str, 'latitude_sec' ), $$map_nulls__coord__other ("latitude_sec_verbatim" )$$); -- runtime: 8.5 min ("Time: 505171.027 ms") |
inputs/REMIB/Specimen/map.csv | ||
---|---|---|
13 | 13 |
family,family,, |
14 | 14 |
genus,genus,, |
15 | 15 |
specificEpithet,specificEpithet,, |
16 |
country,country,"/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
|
|
17 |
state,stateProvince,"/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
|
|
18 |
county,county,"/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
|
|
19 |
locality,locality,"/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
|
|
20 |
long_deg,longitude_deg,"/_map:[./{""-999"",99,999,1000}=,*=*]/value",-99 is a valid longitude for Mexico
|
|
21 |
long_min,longitude_min,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
|
|
22 |
long_sec,longitude_sec,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
|
|
23 |
lat_deg,latitude_deg,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
|
|
24 |
lat_min,latitude_min,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
|
|
25 |
lat_sec,latitude_sec,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
|
|
16 |
country,country_verbatim,,
|
|
17 |
state,state_verbatim,,
|
|
18 |
county,county_verbatim,,
|
|
19 |
locality,locality_verbatim,,
|
|
20 |
long_deg,longitude_deg_verbatim,,
|
|
21 |
long_min,longitude_min_verbatim,,
|
|
22 |
long_sec,longitude_sec_verbatim,,
|
|
23 |
lat_deg,latitude_deg_verbatim,,
|
|
24 |
lat_min,latitude_min_verbatim,,
|
|
25 |
lat_sec,latitude_sec_verbatim,,
|
|
26 | 26 |
coll_day,dayCollected,, |
27 | 27 |
coll_month,monthCollected,, |
28 | 28 |
coll_year,yearCollected,, |
29 | 29 |
collector,recordedBy,, |
30 |
habitat,habitat,"/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
|
|
30 |
habitat,habitat_verbatim,,
|
|
31 | 31 |
preparation,preparations,, |
32 |
country,country,, |
|
33 |
stateProvince,stateProvince,, |
|
34 |
county,county,, |
|
35 |
locality,locality,, |
|
36 |
habitat,habitat,, |
|
37 |
longitude_deg,longitude_deg,, |
|
38 |
longitude_min,longitude_min,, |
|
39 |
longitude_sec,longitude_sec,, |
|
40 |
latitude_deg,latitude_deg,, |
|
41 |
latitude_min,latitude_min,, |
|
42 |
latitude_sec,latitude_sec,, |
inputs/REMIB/Specimen/VegBIEN.csv | ||
---|---|---|
37 | 37 |
GROUP BY acronym, accession_number |
38 | 38 |
HAVING count(*) > 1 |
39 | 39 |
-----" |
40 |
locality,"/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/1/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
|
|
41 |
habitat,"/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/3/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
|
|
40 |
locality,/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/1,
|
|
41 |
habitat,/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/3,
|
|
42 | 42 |
coll_day,"/location/locationevent/taxonoccurrence/aggregateoccurrence/collectiondate/_alt/2/_date/day/_nullIf:[null=0,type=float]/value", |
43 | 43 |
coll_month,"/location/locationevent/taxonoccurrence/aggregateoccurrence/collectiondate/_alt/2/_date/month/_nullIf:[null=0,type=float]/value", |
44 | 44 |
coll_year,"/location/locationevent/taxonoccurrence/aggregateoccurrence/collectiondate/_alt/2/_date/year/_nullIf:[null=0,type=float]/value", |
... | ... | |
81 | 81 |
GROUP BY acronym, accession_number |
82 | 82 |
HAVING count(*) > 1 |
83 | 83 |
-----" |
84 |
locality,"/location/locationnarrative/_merge/1/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)" |
|
85 |
habitat,"/location/locationnarrative/_merge/3/_label[label=""habitat""]/value/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB); Brad: Free-text description of vegetation community where collected, frequently redundane wrt 'Vegetation'. Bob, Nick: keep as user defined or create special element?" |
|
86 |
lat_deg,"/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_dms_to_dd/deg/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value", |
|
87 |
lat_min,"/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_dms_to_dd/min/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value", |
|
88 |
lat_sec,"/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_dms_to_dd/sec/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value", |
|
89 |
long_deg,"/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_dms_to_dd/deg/_map:[./{""-999"",99,999,1000}=,*=*]/value",-99 is a valid longitude for Mexico |
|
90 |
long_min,"/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_dms_to_dd/min/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value", |
|
91 |
long_sec,"/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_dms_to_dd/sec/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value", |
|
92 |
country,"/location/locationplace/*_id/place/country/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)" |
|
93 |
county,"/location/locationplace/*_id/place/county/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)" |
|
94 |
lat_deg,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_dms_to_dd/deg/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value", |
|
95 |
lat_min,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_dms_to_dd/min/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value", |
|
96 |
lat_sec,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_dms_to_dd/sec/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value", |
|
97 |
long_deg,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_dms_to_dd/deg/_map:[./{""-999"",99,999,1000}=,*=*]/value",-99 is a valid longitude for Mexico |
|
98 |
long_min,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_dms_to_dd/min/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value", |
|
99 |
long_sec,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_dms_to_dd/sec/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value", |
|
100 |
country,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/country/_first/2/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)" |
|
101 |
county,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/county/_first/2/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)" |
|
102 |
state,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/stateprovince/_first/2/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)" |
|
103 |
state,"/location/locationplace/*_id/place/stateprovince/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)" |
|
84 |
locality,/location/locationnarrative/_merge/1, |
|
85 |
habitat,"/location/locationnarrative/_merge/3/_label[label=""habitat""]/value","Brad: Free-text description of vegetation community where collected, frequently redundane wrt 'Vegetation'. Bob, Nick: keep as user defined or create special element?" |
|
86 |
latitude_deg,/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_dms_to_dd/deg, |
|
87 |
latitude_min,/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_dms_to_dd/min, |
|
88 |
latitude_sec,/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_dms_to_dd/sec, |
|
89 |
longitude_deg,/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_dms_to_dd/deg, |
|
90 |
longitude_min,/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_dms_to_dd/min, |
|
91 |
longitude_sec,/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_dms_to_dd/sec, |
|
92 |
country,/location/locationplace/*_id/place/country, |
|
93 |
county,/location/locationplace/*_id/place/county, |
|
94 |
latitude_deg,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_dms_to_dd/deg", |
|
95 |
latitude_min,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_dms_to_dd/min", |
|
96 |
latitude_sec,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_dms_to_dd/sec", |
|
97 |
longitude_deg,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_dms_to_dd/deg", |
|
98 |
longitude_min,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_dms_to_dd/min", |
|
99 |
longitude_sec,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_dms_to_dd/sec", |
|
100 |
country,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/country/_first/2, |
|
101 |
county,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/county/_first/2, |
|
102 |
stateProvince,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/stateprovince/_first/2, |
|
103 |
stateProvince,/location/locationplace/*_id/place/stateprovince, |
|
104 |
country,,** No join mapping for country_verbatim ** |
|
105 |
county,,** No join mapping for county_verbatim ** |
|
106 |
habitat,,** No join mapping for habitat_verbatim ** |
|
107 |
lat_deg,,** No join mapping for latitude_deg_verbatim ** |
|
108 |
lat_min,,** No join mapping for latitude_min_verbatim ** |
|
109 |
lat_sec,,** No join mapping for latitude_sec_verbatim ** |
|
110 |
locality,,** No join mapping for locality_verbatim ** |
|
111 |
long_deg,,** No join mapping for longitude_deg_verbatim ** |
|
112 |
long_min,,** No join mapping for longitude_min_verbatim ** |
|
113 |
long_sec,,** No join mapping for longitude_sec_verbatim ** |
|
104 | 114 |
preparation,,** No join mapping for preparations ** |
105 | 115 |
row_num,,** No join mapping for *row_num ** |
116 |
state,,** No join mapping for state_verbatim ** |
inputs/REMIB/Specimen/test.xml.ref | ||
---|---|---|
17 | 17 |
<_locationnarrative_is_cultivated> |
18 | 18 |
<locationnarrative> |
19 | 19 |
<_join> |
20 |
<1> |
|
21 |
<_map> |
|
22 |
<ND></ND> |
|
23 |
<NA></NA> |
|
24 |
<*>*</*> |
|
25 |
<value>$locality</value> |
|
26 |
</_map> |
|
27 |
</1> |
|
28 |
<3> |
|
29 |
<_map> |
|
30 |
<ND></ND> |
|
31 |
<NA></NA> |
|
32 |
<*>*</*> |
|
33 |
<value>$habitat</value> |
|
34 |
</_map> |
|
35 |
</3> |
|
20 |
<1>$locality</1> |
|
21 |
<3>$habitat</3> |
|
36 | 22 |
</_join> |
37 | 23 |
</locationnarrative> |
38 | 24 |
</_locationnarrative_is_cultivated> |
... | ... | |
155 | 141 |
</locationevent> |
156 | 142 |
<locationnarrative> |
157 | 143 |
<_merge> |
158 |
<1> |
|
159 |
<_map> |
|
160 |
<ND></ND> |
|
161 |
<NA></NA> |
|
162 |
<*>*</*> |
|
163 |
<value>$locality</value> |
|
164 |
</_map> |
|
165 |
</1> |
|
144 |
<1>$locality</1> |
|
166 | 145 |
<3> |
167 | 146 |
<_label> |
168 | 147 |
<label>habitat</label> |
169 |
<value> |
|
170 |
<_map> |
|
171 |
<ND></ND> |
|
172 |
<NA></NA> |
|
173 |
<*>*</*> |
|
174 |
<value>$habitat</value> |
|
175 |
</_map> |
|
176 |
</value> |
|
148 |
<value>$habitat</value> |
|
177 | 149 |
</_label> |
178 | 150 |
</3> |
179 | 151 |
</_merge> |
... | ... | |
185 | 157 |
<coordinates> |
186 | 158 |
<latitude_deg> |
187 | 159 |
<_dms_to_dd> |
188 |
<deg> |
|
189 |
<_map> |
|
190 |
<-999></-999> |
|
191 |
<-99></-99> |
|
192 |
<99></99> |
|
193 |
<999></999> |
|
194 |
<1000></1000> |
|
195 |
<*>*</*> |
|
196 |
<value>$lat_deg</value> |
|
197 |
</_map> |
|
198 |
</deg> |
|
199 |
<min> |
|
200 |
<_map> |
|
201 |
<-999></-999> |
|
202 |
<-99></-99> |
|
203 |
<99></99> |
|
204 |
<999></999> |
|
205 |
<1000></1000> |
|
206 |
<*>*</*> |
|
207 |
<value>$lat_min</value> |
|
208 |
</_map> |
|
209 |
</min> |
|
210 |
<sec> |
|
211 |
<_map> |
|
212 |
<-999></-999> |
|
213 |
<-99></-99> |
|
214 |
<99></99> |
|
215 |
<999></999> |
|
216 |
<1000></1000> |
|
217 |
<*>*</*> |
|
218 |
<value>$lat_sec</value> |
|
219 |
</_map> |
|
220 |
</sec> |
|
160 |
<deg>$latitude_deg</deg> |
|
161 |
<min>$latitude_min</min> |
|
162 |
<sec>$latitude_sec</sec> |
|
221 | 163 |
</_dms_to_dd> |
222 | 164 |
</latitude_deg> |
223 | 165 |
<longitude_deg> |
224 | 166 |
<_dms_to_dd> |
225 |
<deg> |
|
226 |
<_map> |
|
227 |
<-999></-999> |
|
228 |
<99></99> |
|
229 |
<999></999> |
|
230 |
<1000></1000> |
|
231 |
<*>*</*> |
|
232 |
<value>$long_deg</value> |
|
233 |
</_map> |
|
234 |
</deg> |
|
235 |
<min> |
|
236 |
<_map> |
|
237 |
<-999></-999> |
|
238 |
<-99></-99> |
|
239 |
<99></99> |
|
240 |
<999></999> |
|
241 |
<1000></1000> |
|
242 |
<*>*</*> |
|
243 |
<value>$long_min</value> |
|
244 |
</_map> |
|
245 |
</min> |
|
246 |
<sec> |
|
247 |
<_map> |
|
248 |
<-999></-999> |
|
249 |
<-99></-99> |
|
250 |
<99></99> |
|
251 |
<999></999> |
|
252 |
<1000></1000> |
|
253 |
<*>*</*> |
|
254 |
<value>$long_sec</value> |
|
255 |
</_map> |
|
256 |
</sec> |
|
167 |
<deg>$longitude_deg</deg> |
|
168 |
<min>$longitude_min</min> |
|
169 |
<sec>$longitude_sec</sec> |
|
257 | 170 |
</_dms_to_dd> |
258 | 171 |
</longitude_deg> |
259 | 172 |
</coordinates> |
260 | 173 |
</coordinates_id> |
261 |
<country> |
|
262 |
<_map> |
|
263 |
<ND></ND> |
|
264 |
<NA></NA> |
|
265 |
<*>*</*> |
|
266 |
<value>$country</value> |
|
267 |
</_map> |
|
268 |
</country> |
|
269 |
<county> |
|
270 |
<_map> |
|
271 |
<ND></ND> |
|
272 |
<NA></NA> |
|
273 |
<*>*</*> |
|
274 |
<value>$county</value> |
|
275 |
</_map> |
|
276 |
</county> |
|
174 |
<country>$country</country> |
|
175 |
<county>$county</county> |
|
277 | 176 |
<matched_place_id> |
278 | 177 |
<place> |
279 | 178 |
<source_id><source><shortname>geoscrub</shortname></source></source_id> |
... | ... | |
282 | 181 |
<source_id><source><shortname>geoscrub</shortname></source></source_id> |
283 | 182 |
<latitude_deg> |
284 | 183 |
<_dms_to_dd> |
285 |
<deg> |
|
286 |
<_map> |
|
287 |
<-999></-999> |
|
288 |
<-99></-99> |
|
289 |
<99></99> |
|
290 |
<999></999> |
|
291 |
<1000></1000> |
|
292 |
<*>*</*> |
|
293 |
<value>$lat_deg</value> |
|
294 |
</_map> |
|
295 |
</deg> |
|
296 |
<min> |
|
297 |
<_map> |
|
298 |
<-999></-999> |
|
299 |
<-99></-99> |
|
300 |
<99></99> |
|
301 |
<999></999> |
|
302 |
<1000></1000> |
|
303 |
<*>*</*> |
|
304 |
<value>$lat_min</value> |
|
305 |
</_map> |
|
306 |
</min> |
|
307 |
<sec> |
|
308 |
<_map> |
|
309 |
<-999></-999> |
|
310 |
<-99></-99> |
|
311 |
<99></99> |
|
312 |
<999></999> |
|
313 |
<1000></1000> |
|
314 |
<*>*</*> |
|
315 |
<value>$lat_sec</value> |
|
316 |
</_map> |
|
317 |
</sec> |
|
184 |
<deg>$latitude_deg</deg> |
|
185 |
<min>$latitude_min</min> |
|
186 |
<sec>$latitude_sec</sec> |
|
318 | 187 |
</_dms_to_dd> |
319 | 188 |
</latitude_deg> |
320 | 189 |
<longitude_deg> |
321 | 190 |
<_dms_to_dd> |
322 |
<deg> |
|
323 |
<_map> |
|
324 |
<-999></-999> |
|
325 |
<99></99> |
|
326 |
<999></999> |
|
327 |
<1000></1000> |
|
328 |
<*>*</*> |
|
329 |
<value>$long_deg</value> |
|
330 |
</_map> |
|
331 |
</deg> |
|
332 |
<min> |
|
333 |
<_map> |
|
334 |
<-999></-999> |
|
335 |
<-99></-99> |
|
336 |
<99></99> |
|
337 |
<999></999> |
|
338 |
<1000></1000> |
|
339 |
<*>*</*> |
|
340 |
<value>$long_min</value> |
|
341 |
</_map> |
|
342 |
</min> |
|
343 |
<sec> |
|
344 |
<_map> |
|
345 |
<-999></-999> |
|
346 |
<-99></-99> |
|
347 |
<99></99> |
|
348 |
<999></999> |
|
349 |
<1000></1000> |
|
350 |
<*>*</*> |
|
351 |
<value>$long_sec</value> |
|
352 |
</_map> |
|
353 |
</sec> |
|
191 |
<deg>$longitude_deg</deg> |
|
192 |
<min>$longitude_min</min> |
|
193 |
<sec>$longitude_sec</sec> |
|
354 | 194 |
</_dms_to_dd> |
355 | 195 |
</longitude_deg> |
356 | 196 |
</coordinates> |
357 | 197 |
</coordinates_id> |
358 |
<country> |
|
359 |
<_map> |
|
360 |
<ND></ND> |
|
361 |
<NA></NA> |
|
362 |
<*>*</*> |
|
363 |
<value>$country</value> |
|
364 |
</_map> |
|
365 |
</country> |
|
366 |
<county> |
|
367 |
<_map> |
|
368 |
<ND></ND> |
|
369 |
<NA></NA> |
|
370 |
<*>*</*> |
|
371 |
<value>$county</value> |
|
372 |
</_map> |
|
373 |
</county> |
|
374 |
<stateprovince> |
|
375 |
<_map> |
|
376 |
<ND></ND> |
|
377 |
<NA></NA> |
|
378 |
<*>*</*> |
|
379 |
<value>$state</value> |
|
380 |
</_map> |
|
381 |
</stateprovince> |
|
198 |
<country>$country</country> |
|
199 |
<county>$county</county> |
|
200 |
<stateprovince>$stateProvince</stateprovince> |
|
382 | 201 |
</place> |
383 | 202 |
</matched_place_id> |
384 |
<stateprovince> |
|
385 |
<_map> |
|
386 |
<ND></ND> |
|
387 |
<NA></NA> |
|
388 |
<*>*</*> |
|
389 |
<value>$state</value> |
|
390 |
</_map> |
|
391 |
</stateprovince> |
|
203 |
<stateprovince>$stateProvince</stateprovince> |
|
392 | 204 |
</place> |
393 | 205 |
</place_id> |
394 | 206 |
</locationplace> |
inputs/REMIB/Specimen/new_terms.csv | ||
---|---|---|
1 | 1 |
acronym,specimenHolderInstitutions,, |
2 |
long_deg,longitude_deg,"/_map:[./{""-999"",99,999,1000}=,*=*]/value",-99 is a valid longitude for Mexico
|
|
3 |
long_min,longitude_min,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
|
|
4 |
long_sec,longitude_sec,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
|
|
5 |
lat_deg,latitude_deg,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
|
|
6 |
lat_min,latitude_min,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
|
|
7 |
lat_sec,latitude_sec,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
|
|
2 |
long_deg,longitude_deg_verbatim,,
|
|
3 |
long_min,longitude_min_verbatim,,
|
|
4 |
long_sec,longitude_sec_verbatim,,
|
|
5 |
lat_deg,latitude_deg_verbatim,,
|
|
6 |
lat_min,latitude_min_verbatim,,
|
|
7 |
lat_sec,latitude_sec_verbatim,,
|
|
8 | 8 |
coll_day,dayCollected,, |
9 | 9 |
coll_month,monthCollected,, |
10 | 10 |
coll_year,yearCollected,, |
inputs/REMIB/Specimen/unmapped_terms.csv | ||
---|---|---|
1 | 1 |
*row_num |
2 |
country_verbatim |
|
3 |
state_verbatim |
|
4 |
county_verbatim |
|
5 |
locality_verbatim |
|
6 |
longitude_deg_verbatim |
|
7 |
longitude_min_verbatim |
|
8 |
longitude_sec_verbatim |
|
9 |
latitude_deg_verbatim |
|
10 |
latitude_min_verbatim |
|
11 |
latitude_sec_verbatim |
|
12 |
habitat_verbatim |
|
2 | 13 |
preparations |
Also available in: Unified diff
inputs/REMIB/Specimen/: translated single-column filters to postprocessing derived columns, using the steps at wiki.vegpath.org/Switching_to_new-style_import#stage-I-source-specific > "translate single-column filters to postprocessing derived columns". null-mapping filters now use wrappers around new util.map_nulls(). note that the verbatim columns input to the filters need to be renamed to avoid name collisions with their filtered columns, which must be VegCore terms for new-style import.