Project

General

Profile

« Previous | Next » 

Revision 10339

inputs/REMIB/Specimen/: translated single-column filters to postprocessing derived columns, using the steps at wiki.vegpath.org/Switching_to_new-style_import#stage-I-source-specific > "translate single-column filters to postprocessing derived columns". null-mapping filters now use wrappers around new util.map_nulls(). note that the verbatim columns input to the filters need to be renamed to avoid name collisions with their filtered columns, which must be VegCore terms for new-style import.

View differences:

inputs/REMIB/Specimen/postprocess.sql
1 1
SELECT util.search_path_append('util');
2 2

  
3
SELECT util.rename_cols(:table_str, $$
4
country  => country_verbatim,
5
state    => stateProvince_verbatim,
6
county   => county_verbatim,
7
locality => locality_verbatim,
8
long_deg => longitude_deg_verbatim,
9
long_min => longitude_min_verbatim,
10
long_sec => longitude_sec_verbatim,
11
lat_deg  => latitude_deg_verbatim,
12
lat_min  => latitude_min_verbatim,
13
lat_sec  => latitude_sec_verbatim,
14
habitat  => habitat_verbatim,
15
$$::hstore);
16

  
3 17
SELECT create_if_not_exists($$CREATE INDEX "Specimen.acronym" ON $$||:table_str||$$ (acronym)$$);
4 18

  
5 19
-- remove frameshifted rows
6 20
-- rerun time: 6 s ("Time: 5537.211 ms") thanks to index
7 21
DELETE FROM :table WHERE acronym IS NULL;
8 22
DELETE FROM :table WHERE coll_year !~ '^(?:1[7-9]|20)\d{2}$';
9
DELETE FROM :table WHERE country ~ '\d';
10
DELETE FROM :table WHERE long_deg ~ '[[:alpha:]]' AND long_deg NOT IN ('RESTRINGIDO');
11
DELETE FROM :table WHERE long_min !~ '^\d*$';
12
DELETE FROM :table WHERE long_sec !~ '^\d*$';
13
DELETE FROM :table WHERE lat_min  !~ '^\d*$';
14
DELETE FROM :table WHERE lat_sec  !~ '^\d*$';
23
DELETE FROM :table WHERE country_verbatim ~ '\d';
24
DELETE FROM :table WHERE longitude_deg_verbatim ~ '[[:alpha:]]' AND longitude_deg_verbatim NOT IN ('RESTRINGIDO');
25
DELETE FROM :table WHERE longitude_min_verbatim !~ '^\d*$';
26
DELETE FROM :table WHERE longitude_sec_verbatim !~ '^\d*$';
27
DELETE FROM :table WHERE latitude_min_verbatim  !~ '^\d*$';
28
DELETE FROM :table WHERE latitude_sec_verbatim  !~ '^\d*$';
15 29

  
16 30
-- Remove institutions that we have direct data for
17 31
DELETE FROM :table
......
30 44
ORDER BY dataprovider
31 45
*/
32 46
;
47

  
48

  
49
CREATE OR REPLACE FUNCTION map_nulls__text(value text)
50
  RETURNS text AS
51
$BODY$
52
-- "ND = no disponible = not available" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)
53
SELECT util.map_nulls('{ND,NA}', $1)
54
$BODY$
55
  LANGUAGE sql IMMUTABLE STRICT
56
  COST 100;
57

  
58
SELECT mk_derived_col((:table_str, 'country'      ), $$map_nulls__text("country_verbatim"      )$$); -- runtime: 3   min ("Time: 183171.010 ms")
59
SELECT mk_derived_col((:table_str, 'stateProvince'), $$map_nulls__text("stateProvince_verbatim")$$); -- runtime: 2.5 min ("Time: 159935.791 ms")
60
SELECT mk_derived_col((:table_str, 'county'       ), $$map_nulls__text("county_verbatim"       )$$); -- runtime: 2.5 min ("Time: 151076.487 ms")
61
SELECT mk_derived_col((:table_str, 'locality'     ), $$map_nulls__text("locality_verbatim"     )$$); -- runtime: 2.5 min ("Time: 155188.486 ms")
62
SELECT mk_derived_col((:table_str, 'habitat'      ), $$map_nulls__text("habitat_verbatim"      )$$); -- runtime: 2   min ("Time: 131904.068 ms")
63

  
64

  
65
CREATE OR REPLACE FUNCTION map_nulls__coord__longitude(value text)
66
  RETURNS double precision AS
67
$BODY$
68
-- TODO: sometimes also +-99, but not when min/sec are valid
69
SELECT util.map_nulls('{-999,999,1000}',
70
       util.map_nulls('{RESTRINGIDO}', $1)::double precision)
71
$BODY$
72
  LANGUAGE sql IMMUTABLE STRICT
73
  COST 100;
74

  
75
CREATE OR REPLACE FUNCTION map_nulls__coord__other(value text)
76
  RETURNS double precision AS
77
$BODY$
78
-- also map nulls that are valid longitudes
79
SELECT util.map_nulls('{-99,99}', map_nulls__coord__longitude($1))
80
$BODY$
81
  LANGUAGE sql IMMUTABLE STRICT
82
  COST 100;
83

  
84
SELECT mk_derived_col((:table_str, 'longitude_deg'), $$map_nulls__coord__longitude("longitude_deg_verbatim")$$); -- runtime: 6   min ("Time: 367070.877 ms")
85
SELECT mk_derived_col((:table_str, 'longitude_min'), $$map_nulls__coord__other    ("longitude_min_verbatim")$$);
86
SELECT mk_derived_col((:table_str, 'longitude_sec'), $$map_nulls__coord__other    ("longitude_sec_verbatim")$$); -- runtime: 8   min ("Time: 473198.465 ms")
87
SELECT mk_derived_col((:table_str, 'latitude_deg' ), $$map_nulls__coord__other    ("latitude_deg_verbatim" )$$); -- runtime: 8.5 min ("Time: 508835.119 ms")
88
SELECT mk_derived_col((:table_str, 'latitude_min' ), $$map_nulls__coord__other    ("latitude_min_verbatim" )$$); -- runtime: 8   min ("Time: 484806.499 ms")
89
SELECT mk_derived_col((:table_str, 'latitude_sec' ), $$map_nulls__coord__other    ("latitude_sec_verbatim" )$$); -- runtime: 8.5 min ("Time: 505171.027 ms")
inputs/REMIB/Specimen/map.csv
13 13
family,family,,
14 14
genus,genus,,
15 15
specificEpithet,specificEpithet,,
16
country,country,"/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
17
state,stateProvince,"/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
18
county,county,"/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
19
locality,locality,"/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
20
long_deg,longitude_deg,"/_map:[./{""-999"",99,999,1000}=,*=*]/value",-99 is a valid longitude for Mexico
21
long_min,longitude_min,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
22
long_sec,longitude_sec,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
23
lat_deg,latitude_deg,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
24
lat_min,latitude_min,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
25
lat_sec,latitude_sec,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
16
country,country_verbatim,,
17
state,state_verbatim,,
18
county,county_verbatim,,
19
locality,locality_verbatim,,
20
long_deg,longitude_deg_verbatim,,
21
long_min,longitude_min_verbatim,,
22
long_sec,longitude_sec_verbatim,,
23
lat_deg,latitude_deg_verbatim,,
24
lat_min,latitude_min_verbatim,,
25
lat_sec,latitude_sec_verbatim,,
26 26
coll_day,dayCollected,,
27 27
coll_month,monthCollected,,
28 28
coll_year,yearCollected,,
29 29
collector,recordedBy,,
30
habitat,habitat,"/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
30
habitat,habitat_verbatim,,
31 31
preparation,preparations,,
32
country,country,,
33
stateProvince,stateProvince,,
34
county,county,,
35
locality,locality,,
36
habitat,habitat,,
37
longitude_deg,longitude_deg,,
38
longitude_min,longitude_min,,
39
longitude_sec,longitude_sec,,
40
latitude_deg,latitude_deg,,
41
latitude_min,latitude_min,,
42
latitude_sec,latitude_sec,,
inputs/REMIB/Specimen/VegBIEN.csv
37 37
GROUP BY acronym, accession_number
38 38
HAVING count(*) > 1
39 39
-----"
40
locality,"/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/1/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
41
habitat,"/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/3/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
40
locality,/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/1,
41
habitat,/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/3,
42 42
coll_day,"/location/locationevent/taxonoccurrence/aggregateoccurrence/collectiondate/_alt/2/_date/day/_nullIf:[null=0,type=float]/value",
43 43
coll_month,"/location/locationevent/taxonoccurrence/aggregateoccurrence/collectiondate/_alt/2/_date/month/_nullIf:[null=0,type=float]/value",
44 44
coll_year,"/location/locationevent/taxonoccurrence/aggregateoccurrence/collectiondate/_alt/2/_date/year/_nullIf:[null=0,type=float]/value",
......
81 81
GROUP BY acronym, accession_number
82 82
HAVING count(*) > 1
83 83
-----"
84
locality,"/location/locationnarrative/_merge/1/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
85
habitat,"/location/locationnarrative/_merge/3/_label[label=""habitat""]/value/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB); Brad: Free-text description of vegetation community where collected, frequently redundane wrt 'Vegetation'. Bob, Nick: keep as user defined or create special element?"
86
lat_deg,"/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_dms_to_dd/deg/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
87
lat_min,"/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_dms_to_dd/min/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
88
lat_sec,"/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_dms_to_dd/sec/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
89
long_deg,"/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_dms_to_dd/deg/_map:[./{""-999"",99,999,1000}=,*=*]/value",-99 is a valid longitude for Mexico
90
long_min,"/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_dms_to_dd/min/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
91
long_sec,"/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_dms_to_dd/sec/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
92
country,"/location/locationplace/*_id/place/country/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
93
county,"/location/locationplace/*_id/place/county/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
94
lat_deg,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_dms_to_dd/deg/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
95
lat_min,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_dms_to_dd/min/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
96
lat_sec,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_dms_to_dd/sec/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
97
long_deg,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_dms_to_dd/deg/_map:[./{""-999"",99,999,1000}=,*=*]/value",-99 is a valid longitude for Mexico
98
long_min,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_dms_to_dd/min/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
99
long_sec,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_dms_to_dd/sec/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
100
country,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/country/_first/2/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
101
county,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/county/_first/2/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
102
state,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/stateprovince/_first/2/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
103
state,"/location/locationplace/*_id/place/stateprovince/_map:[./{ND,NA}=,*=*]/value","""ND = no disponible = not available"" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)"
84
locality,/location/locationnarrative/_merge/1,
85
habitat,"/location/locationnarrative/_merge/3/_label[label=""habitat""]/value","Brad: Free-text description of vegetation community where collected, frequently redundane wrt 'Vegetation'. Bob, Nick: keep as user defined or create special element?"
86
latitude_deg,/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_dms_to_dd/deg,
87
latitude_min,/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_dms_to_dd/min,
88
latitude_sec,/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_dms_to_dd/sec,
89
longitude_deg,/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_dms_to_dd/deg,
90
longitude_min,/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_dms_to_dd/min,
91
longitude_sec,/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_dms_to_dd/sec,
92
country,/location/locationplace/*_id/place/country,
93
county,/location/locationplace/*_id/place/county,
94
latitude_deg,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_dms_to_dd/deg",
95
latitude_min,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_dms_to_dd/min",
96
latitude_sec,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_dms_to_dd/sec",
97
longitude_deg,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_dms_to_dd/deg",
98
longitude_min,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_dms_to_dd/min",
99
longitude_sec,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_dms_to_dd/sec",
100
country,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/country/_first/2,
101
county,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/county/_first/2,
102
stateProvince,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/stateprovince/_first/2,
103
stateProvince,/location/locationplace/*_id/place/stateprovince,
104
country,,** No join mapping for country_verbatim ** 
105
county,,** No join mapping for county_verbatim ** 
106
habitat,,** No join mapping for habitat_verbatim ** 
107
lat_deg,,** No join mapping for latitude_deg_verbatim ** 
108
lat_min,,** No join mapping for latitude_min_verbatim ** 
109
lat_sec,,** No join mapping for latitude_sec_verbatim ** 
110
locality,,** No join mapping for locality_verbatim ** 
111
long_deg,,** No join mapping for longitude_deg_verbatim ** 
112
long_min,,** No join mapping for longitude_min_verbatim ** 
113
long_sec,,** No join mapping for longitude_sec_verbatim ** 
104 114
preparation,,** No join mapping for preparations ** 
105 115
row_num,,** No join mapping for *row_num ** 
116
state,,** No join mapping for state_verbatim ** 
inputs/REMIB/Specimen/test.xml.ref
17 17
                            <_locationnarrative_is_cultivated>
18 18
                                <locationnarrative>
19 19
                                    <_join>
20
                                        <1>
21
                                            <_map>
22
                                                <ND></ND>
23
                                                <NA></NA>
24
                                                <*>*</*>
25
                                                <value>$locality</value>
26
                                            </_map>
27
                                        </1>
28
                                        <3>
29
                                            <_map>
30
                                                <ND></ND>
31
                                                <NA></NA>
32
                                                <*>*</*>
33
                                                <value>$habitat</value>
34
                                            </_map>
35
                                        </3>
20
                                        <1>$locality</1>
21
                                        <3>$habitat</3>
36 22
                                    </_join>
37 23
                                </locationnarrative>
38 24
                            </_locationnarrative_is_cultivated>
......
155 141
                        </locationevent>
156 142
                        <locationnarrative>
157 143
                            <_merge>
158
                                <1>
159
                                    <_map>
160
                                        <ND></ND>
161
                                        <NA></NA>
162
                                        <*>*</*>
163
                                        <value>$locality</value>
164
                                    </_map>
165
                                </1>
144
                                <1>$locality</1>
166 145
                                <3>
167 146
                                    <_label>
168 147
                                        <label>habitat</label>
169
                                        <value>
170
                                            <_map>
171
                                                <ND></ND>
172
                                                <NA></NA>
173
                                                <*>*</*>
174
                                                <value>$habitat</value>
175
                                            </_map>
176
                                        </value>
148
                                        <value>$habitat</value>
177 149
                                    </_label>
178 150
                                </3>
179 151
                            </_merge>
......
185 157
                                        <coordinates>
186 158
                                            <latitude_deg>
187 159
                                                <_dms_to_dd>
188
                                                    <deg>
189
                                                        <_map>
190
                                                            <-999></-999>
191
                                                            <-99></-99>
192
                                                            <99></99>
193
                                                            <999></999>
194
                                                            <1000></1000>
195
                                                            <*>*</*>
196
                                                            <value>$lat_deg</value>
197
                                                        </_map>
198
                                                    </deg>
199
                                                    <min>
200
                                                        <_map>
201
                                                            <-999></-999>
202
                                                            <-99></-99>
203
                                                            <99></99>
204
                                                            <999></999>
205
                                                            <1000></1000>
206
                                                            <*>*</*>
207
                                                            <value>$lat_min</value>
208
                                                        </_map>
209
                                                    </min>
210
                                                    <sec>
211
                                                        <_map>
212
                                                            <-999></-999>
213
                                                            <-99></-99>
214
                                                            <99></99>
215
                                                            <999></999>
216
                                                            <1000></1000>
217
                                                            <*>*</*>
218
                                                            <value>$lat_sec</value>
219
                                                        </_map>
220
                                                    </sec>
160
                                                    <deg>$latitude_deg</deg>
161
                                                    <min>$latitude_min</min>
162
                                                    <sec>$latitude_sec</sec>
221 163
                                                </_dms_to_dd>
222 164
                                            </latitude_deg>
223 165
                                            <longitude_deg>
224 166
                                                <_dms_to_dd>
225
                                                    <deg>
226
                                                        <_map>
227
                                                            <-999></-999>
228
                                                            <99></99>
229
                                                            <999></999>
230
                                                            <1000></1000>
231
                                                            <*>*</*>
232
                                                            <value>$long_deg</value>
233
                                                        </_map>
234
                                                    </deg>
235
                                                    <min>
236
                                                        <_map>
237
                                                            <-999></-999>
238
                                                            <-99></-99>
239
                                                            <99></99>
240
                                                            <999></999>
241
                                                            <1000></1000>
242
                                                            <*>*</*>
243
                                                            <value>$long_min</value>
244
                                                        </_map>
245
                                                    </min>
246
                                                    <sec>
247
                                                        <_map>
248
                                                            <-999></-999>
249
                                                            <-99></-99>
250
                                                            <99></99>
251
                                                            <999></999>
252
                                                            <1000></1000>
253
                                                            <*>*</*>
254
                                                            <value>$long_sec</value>
255
                                                        </_map>
256
                                                    </sec>
167
                                                    <deg>$longitude_deg</deg>
168
                                                    <min>$longitude_min</min>
169
                                                    <sec>$longitude_sec</sec>
257 170
                                                </_dms_to_dd>
258 171
                                            </longitude_deg>
259 172
                                        </coordinates>
260 173
                                    </coordinates_id>
261
                                    <country>
262
                                        <_map>
263
                                            <ND></ND>
264
                                            <NA></NA>
265
                                            <*>*</*>
266
                                            <value>$country</value>
267
                                        </_map>
268
                                    </country>
269
                                    <county>
270
                                        <_map>
271
                                            <ND></ND>
272
                                            <NA></NA>
273
                                            <*>*</*>
274
                                            <value>$county</value>
275
                                        </_map>
276
                                    </county>
174
                                    <country>$country</country>
175
                                    <county>$county</county>
277 176
                                    <matched_place_id>
278 177
                                        <place>
279 178
                                            <source_id><source><shortname>geoscrub</shortname></source></source_id>
......
282 181
                                                    <source_id><source><shortname>geoscrub</shortname></source></source_id>
283 182
                                                    <latitude_deg>
284 183
                                                        <_dms_to_dd>
285
                                                            <deg>
286
                                                                <_map>
287
                                                                    <-999></-999>
288
                                                                    <-99></-99>
289
                                                                    <99></99>
290
                                                                    <999></999>
291
                                                                    <1000></1000>
292
                                                                    <*>*</*>
293
                                                                    <value>$lat_deg</value>
294
                                                                </_map>
295
                                                            </deg>
296
                                                            <min>
297
                                                                <_map>
298
                                                                    <-999></-999>
299
                                                                    <-99></-99>
300
                                                                    <99></99>
301
                                                                    <999></999>
302
                                                                    <1000></1000>
303
                                                                    <*>*</*>
304
                                                                    <value>$lat_min</value>
305
                                                                </_map>
306
                                                            </min>
307
                                                            <sec>
308
                                                                <_map>
309
                                                                    <-999></-999>
310
                                                                    <-99></-99>
311
                                                                    <99></99>
312
                                                                    <999></999>
313
                                                                    <1000></1000>
314
                                                                    <*>*</*>
315
                                                                    <value>$lat_sec</value>
316
                                                                </_map>
317
                                                            </sec>
184
                                                            <deg>$latitude_deg</deg>
185
                                                            <min>$latitude_min</min>
186
                                                            <sec>$latitude_sec</sec>
318 187
                                                        </_dms_to_dd>
319 188
                                                    </latitude_deg>
320 189
                                                    <longitude_deg>
321 190
                                                        <_dms_to_dd>
322
                                                            <deg>
323
                                                                <_map>
324
                                                                    <-999></-999>
325
                                                                    <99></99>
326
                                                                    <999></999>
327
                                                                    <1000></1000>
328
                                                                    <*>*</*>
329
                                                                    <value>$long_deg</value>
330
                                                                </_map>
331
                                                            </deg>
332
                                                            <min>
333
                                                                <_map>
334
                                                                    <-999></-999>
335
                                                                    <-99></-99>
336
                                                                    <99></99>
337
                                                                    <999></999>
338
                                                                    <1000></1000>
339
                                                                    <*>*</*>
340
                                                                    <value>$long_min</value>
341
                                                                </_map>
342
                                                            </min>
343
                                                            <sec>
344
                                                                <_map>
345
                                                                    <-999></-999>
346
                                                                    <-99></-99>
347
                                                                    <99></99>
348
                                                                    <999></999>
349
                                                                    <1000></1000>
350
                                                                    <*>*</*>
351
                                                                    <value>$long_sec</value>
352
                                                                </_map>
353
                                                            </sec>
191
                                                            <deg>$longitude_deg</deg>
192
                                                            <min>$longitude_min</min>
193
                                                            <sec>$longitude_sec</sec>
354 194
                                                        </_dms_to_dd>
355 195
                                                    </longitude_deg>
356 196
                                                </coordinates>
357 197
                                            </coordinates_id>
358
                                            <country>
359
                                                <_map>
360
                                                    <ND></ND>
361
                                                    <NA></NA>
362
                                                    <*>*</*>
363
                                                    <value>$country</value>
364
                                                </_map>
365
                                            </country>
366
                                            <county>
367
                                                <_map>
368
                                                    <ND></ND>
369
                                                    <NA></NA>
370
                                                    <*>*</*>
371
                                                    <value>$county</value>
372
                                                </_map>
373
                                            </county>
374
                                            <stateprovince>
375
                                                <_map>
376
                                                    <ND></ND>
377
                                                    <NA></NA>
378
                                                    <*>*</*>
379
                                                    <value>$state</value>
380
                                                </_map>
381
                                            </stateprovince>
198
                                            <country>$country</country>
199
                                            <county>$county</county>
200
                                            <stateprovince>$stateProvince</stateprovince>
382 201
                                        </place>
383 202
                                    </matched_place_id>
384
                                    <stateprovince>
385
                                        <_map>
386
                                            <ND></ND>
387
                                            <NA></NA>
388
                                            <*>*</*>
389
                                            <value>$state</value>
390
                                        </_map>
391
                                    </stateprovince>
203
                                    <stateprovince>$stateProvince</stateprovince>
392 204
                                </place>
393 205
                            </place_id>
394 206
                        </locationplace>
inputs/REMIB/Specimen/new_terms.csv
1 1
acronym,specimenHolderInstitutions,,
2
long_deg,longitude_deg,"/_map:[./{""-999"",99,999,1000}=,*=*]/value",-99 is a valid longitude for Mexico
3
long_min,longitude_min,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
4
long_sec,longitude_sec,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
5
lat_deg,latitude_deg,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
6
lat_min,latitude_min,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
7
lat_sec,latitude_sec,"/_map:[./{""-999"",""-99"",99,999,1000}=,*=*]/value",
2
long_deg,longitude_deg_verbatim,,
3
long_min,longitude_min_verbatim,,
4
long_sec,longitude_sec_verbatim,,
5
lat_deg,latitude_deg_verbatim,,
6
lat_min,latitude_min_verbatim,,
7
lat_sec,latitude_sec_verbatim,,
8 8
coll_day,dayCollected,,
9 9
coll_month,monthCollected,,
10 10
coll_year,yearCollected,,
inputs/REMIB/Specimen/unmapped_terms.csv
1 1
*row_num
2
country_verbatim
3
state_verbatim
4
county_verbatim
5
locality_verbatim
6
longitude_deg_verbatim
7
longitude_min_verbatim
8
longitude_sec_verbatim
9
latitude_deg_verbatim
10
latitude_min_verbatim
11
latitude_sec_verbatim
12
habitat_verbatim
2 13
preparations

Also available in: Unified diff