Revision 9771
Added by Aaron Marcuse-Kubitza over 11 years ago
inputs/GBIF/raw_occurrence_record/new_terms.csv | ||
---|---|---|
1 |
row_num,raw_occurrence_record-row_num,, |
|
2 |
catalogue_number,accessionNumber,, |
|
3 |
rank,taxonRank,, |
|
4 |
order_rank,order,, |
|
5 |
lat_long_precision,coordinatePrecision_deg,,Tentative |
|
6 |
max_altitude,maximumElevationInMeters,, |
|
7 |
min_altitude,minimumElevationInMeters,, |
|
8 |
min_depth,minimumDepthInMeters,, |
|
9 |
max_depth,maximumDepthInMeters,, |
|
10 |
collector_name,recordedBy,, |
|
11 |
identifier_name,identifiedBy,, |
|
12 |
identification_date,dateIdentified,, |
inputs/GBIF/raw_occurrence_record/unmapped_terms.csv | ||
---|---|---|
1 |
raw_occurrence_record-row_num |
|
2 |
*data_provider_id |
|
3 |
*data_resource_id |
|
4 |
*resource_access_point_id |
|
5 |
authors |
|
6 |
coordinatePrecision_deg |
|
7 |
*altitude_precision |
|
8 |
*depth_precision |
|
9 |
year |
|
10 |
month |
|
11 |
day |
|
12 |
basisOfRecord |
|
13 |
*unit_qualifier |
|
14 |
*created |
|
15 |
modified |
|
16 |
*deleted |
inputs/GBIF/raw_occurrence_record/header.csv | ||
---|---|---|
1 |
row_num,id,data_provider_id,data_resource_id,resource_access_point_id,institution_code,collection_code,catalogue_number,scientific_name,author,rank,kingdom,phylum,class,order_rank,family,genus,species,subspecies,latitude,longitude,lat_long_precision,max_altitude,min_altitude,altitude_precision,min_depth,max_depth,depth_precision,continent_ocean,country,state_province,county,collector_name,locality,year,month,day,basis_of_record,identifier_name,identification_date,unit_qualifier,created,modified,deleted |
inputs/GBIF/raw_occurrence_record/postprocess.sql | ||
---|---|---|
1 |
CREATE INDEX ON :table ("institution_code"); |
|
2 |
|
|
3 |
-- Remove institutions that we have direct data for |
|
4 |
DELETE FROM :table |
|
5 |
WHERE institution_code IN ( |
|
6 |
'ACAD' |
|
7 |
, 'ARIZ' |
|
8 |
, 'BRIT' |
|
9 |
, 'MO' |
|
10 |
, 'MT' |
|
11 |
, 'NY' |
|
12 |
, 'QFA' |
|
13 |
, 'TEX' |
|
14 |
, 'TRT' |
|
15 |
, 'TRTE' |
|
16 |
, 'U' |
|
17 |
, 'UBC' |
|
18 |
, 'WIN' |
|
19 |
) |
|
20 |
/* list obtained using the following on r9459: |
|
21 |
SELECT DISTINCT dataprovider |
|
22 |
FROM sourcelist |
|
23 |
JOIN provider_count ON provider_count.dataprovider = sourcelist.name |
|
24 |
WHERE source_id = source_by_shortname('GBIF') |
|
25 |
ORDER BY dataprovider |
|
26 |
*/ |
|
27 |
; |
inputs/GBIF/raw_occurrence_record/run | ||
---|---|---|
1 |
#!/bin/bash -e |
|
2 |
. "$(dirname "${BASH_SOURCE[0]}")"/../table.run |
|
3 |
|
|
4 |
if self_not_included; then |
|
5 |
|
|
6 |
herbaria_filter.ih.csv_/make() |
|
7 |
{ |
|
8 |
echo_func; set_make_vars; check_target_exists |
|
9 |
|
|
10 |
use_local |
|
11 |
limit= query='SELECT DISTINCT acronym FROM herbaria.ih ORDER BY acronym' \ |
|
12 |
to_target pg_export |
|
13 |
} |
|
14 |
|
|
15 |
plant_fraction/make() # runtime: 1 hr ("10603 rows affected (53 min 44.68 sec)") |
|
16 |
{ |
|
17 |
echo_func; set_make_vars; mk_table_esc |
|
18 |
|
|
19 |
use_local_remote |
|
20 |
if remaking; then table=plant_fraction table_esc= mk_drop|mysql_ANSI; fi |
|
21 |
(benign_error=1 mysql <<'EOF' |
|
22 |
CREATE TABLE `plant_fraction` ( |
|
23 |
`institution_code` varchar(255) NOT NULL PRIMARY KEY, |
|
24 |
`plant_fraction` float NOT NULL, |
|
25 |
KEY `plant_fraction` (`plant_fraction`) |
|
26 |
) ENGINE=MyISAM DEFAULT CHARSET=utf8 |
|
27 |
EOF |
|
28 |
) || table=plant_fraction skip_table || return 0 |
|
29 |
mysql_ANSI <<EOF |
|
30 |
INSERT INTO "plant_fraction" |
|
31 |
SELECT |
|
32 |
institution_code |
|
33 |
, COUNT(NULLIF( |
|
34 |
family LIKE '%aceae%' OR family IN ('Compositae','Gramineae','Palmae','Guttiferae','Cruciferae','Labiatae','Umbelliferae','Leguminosae') |
|
35 |
, false))/COUNT(*) |
|
36 |
AS plant_fraction |
|
37 |
FROM ${limit:+(SELECT * FROM }$table_esc ${limit:+$(prefix= limit)) s} |
|
38 |
GROUP BY institution_code |
|
39 |
ORDER BY institution_code |
|
40 |
EOF |
|
41 |
} |
|
42 |
|
|
43 |
plant_fraction/seal() |
|
44 |
{ |
|
45 |
echo_func; set_make_vars |
|
46 |
|
|
47 |
use_local_remote |
|
48 |
table=plant_fraction table_esc= mysql_seal_table |
|
49 |
} |
|
50 |
|
|
51 |
plant_fraction_for_herbaria_filter/make() |
|
52 |
{ |
|
53 |
echo_func; set_make_vars; mk_table_esc |
|
54 |
|
|
55 |
plant_fraction/make |
|
56 |
use_local_remote |
|
57 |
if remaking; then |
|
58 |
table=plant_fraction_for_herbaria_filter table_esc= mk_drop|mysql_ANSI |
|
59 |
fi |
|
60 |
(benign_error=1 mysql <<'EOF' |
|
61 |
CREATE VIEW `plant_fraction_for_herbaria_filter` AS |
|
62 |
SELECT * FROM `plant_fraction` |
|
63 |
WHERE plant_fraction >= 0.8 |
|
64 |
EOF |
|
65 |
) || table=plant_fraction_for_herbaria_filter skip_table || return 0 |
|
66 |
} |
|
67 |
|
|
68 |
herbaria_filter/make() # runtime: ~0 ("0m2.235s"): CSV import, index scans only |
|
69 |
{ |
|
70 |
echo_func; set_make_vars |
|
71 |
|
|
72 |
use_local_remote |
|
73 |
local table="$target_stem" |
|
74 |
if remaking; then mk_drop|mysql_ANSI; fi |
|
75 |
|
|
76 |
herbaria_filter.ih.csv_/make |
|
77 |
plant_fraction_for_herbaria_filter/make |
|
78 |
|
|
79 |
(benign_error=1 mysql <<'EOF' |
|
80 |
CREATE TABLE `herbaria_filter` ( |
|
81 |
`institution_code` varchar(255) NOT NULL PRIMARY KEY |
|
82 |
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='contains all of IH.vegpath.org?&Index+Herbariorum' |
|
83 |
EOF |
|
84 |
) || skip_table || return 0 |
|
85 |
|
|
86 |
time ( |
|
87 |
mysql_ANSI <<EOF |
|
88 |
INSERT INTO "herbaria_filter" |
|
89 |
SELECT institution_code FROM plant_fraction_for_herbaria_filter |
|
90 |
EOF |
|
91 |
append=1 mysql_import <"$top_dir"/herbaria_filter.ih.csv_ |
|
92 |
) || return |
|
93 |
} |
|
94 |
|
|
95 |
herbaria_filter/seal() |
|
96 |
{ |
|
97 |
echo_func; set_make_vars |
|
98 |
|
|
99 |
use_local_remote |
|
100 |
table=herbaria_filter table_esc= mysql_seal_table |
|
101 |
} |
|
102 |
|
|
103 |
# table.tsv.gz/make() runtime: 35 min ("34m55.802s") |
|
104 |
|
|
105 |
func_override table.tsv/make__src_table_run |
|
106 |
table.tsv/make() # usage: [remake=1] [limit=...] .../run table.tsv/make |
|
107 |
{ |
|
108 |
echo_func; set_make_vars; check_target_exists |
|
109 |
|
|
110 |
herbaria_filter/make |
|
111 |
filter=" |
|
112 |
deleted IS NULL |
|
113 |
AND institution_code IN (SELECT * FROM herbaria_filter)" \ |
|
114 |
self_make table.tsv/make__src_table_run |
|
115 |
} |
|
116 |
|
|
117 |
fi |
|
118 | 0 |
inputs/GBIF/raw_occurrence_record/map.csv | ||
---|---|---|
1 |
GBIF,VegCore,Filter,Comments |
|
2 |
row_num,raw_occurrence_record-row_num,, |
|
3 |
id,dataProviderRecordID,, |
|
4 |
data_provider_id,*data_provider_id,, |
|
5 |
data_resource_id,*data_resource_id,, |
|
6 |
resource_access_point_id,*resource_access_point_id,, |
|
7 |
institution_code,specimenHolderInstitutions,, |
|
8 |
collection_code,collection,, |
|
9 |
catalogue_number,accessionNumber,, |
|
10 |
scientific_name,scientificName,, |
|
11 |
author,authors,, |
|
12 |
rank,taxonRank,, |
|
13 |
kingdom,kingdom,, |
|
14 |
phylum,phylum,, |
|
15 |
class,class,, |
|
16 |
order_rank,order,, |
|
17 |
family,family,, |
|
18 |
genus,genus,, |
|
19 |
species,specificEpithet,, |
|
20 |
subspecies,subspecies,, |
|
21 |
latitude,decimalLatitude,, |
|
22 |
longitude,decimalLongitude,, |
|
23 |
lat_long_precision,coordinatePrecision_deg,,Tentative |
|
24 |
max_altitude,maximumElevationInMeters,, |
|
25 |
min_altitude,minimumElevationInMeters,, |
|
26 |
altitude_precision,*altitude_precision,, |
|
27 |
min_depth,minimumDepthInMeters,, |
|
28 |
max_depth,maximumDepthInMeters,, |
|
29 |
depth_precision,*depth_precision,, |
|
30 |
continent_ocean,continent,, |
|
31 |
country,country,, |
|
32 |
state_province,stateProvince,, |
|
33 |
county,county,, |
|
34 |
collector_name,recordedBy,, |
|
35 |
locality,locality,, |
|
36 |
year,year,, |
|
37 |
month,month,, |
|
38 |
day,day,, |
|
39 |
basis_of_record,basisOfRecord,, |
|
40 |
identifier_name,identifiedBy,, |
|
41 |
identification_date,dateIdentified,, |
|
42 |
unit_qualifier,*unit_qualifier,, |
|
43 |
created,*created,, |
|
44 |
modified,modified,, |
|
45 |
deleted,*deleted,, |
inputs/GBIF/raw_occurrence_record/test.xml.ref | ||
---|---|---|
1 |
Put template: |
|
2 |
<VegBIEN> |
|
3 |
<_setDefault id="-1"> |
|
4 |
<source_id><source><shortname>GBIF</shortname></source></source_id> |
|
5 |
<path> |
|
6 |
<_simplifyPath> |
|
7 |
<next>parent_id</next> |
|
8 |
<path> |
|
9 |
<location> |
|
10 |
<authorlocationcode> |
|
11 |
<_alt> |
|
12 |
<1>$id</1> |
|
13 |
<2> |
|
14 |
<_join> |
|
15 |
<1>$institution_code</1> |
|
16 |
<2>$collection_code</2> |
|
17 |
<3>$catalogue_number</3> |
|
18 |
</_join> |
|
19 |
</2> |
|
20 |
</_alt> |
|
21 |
</authorlocationcode> |
|
22 |
<elevation_m> |
|
23 |
<_avg> |
|
24 |
<1>$min_altitude</1> |
|
25 |
<2>$max_altitude</2> |
|
26 |
</_avg> |
|
27 |
</elevation_m> |
|
28 |
<iscultivated><_locationnarrative_is_cultivated><locationnarrative>$locality</locationnarrative></_locationnarrative_is_cultivated></iscultivated> |
|
29 |
<locationevent> |
|
30 |
<taxonoccurrence> |
|
31 |
<aggregateoccurrence> |
|
32 |
<plantobservation> |
|
33 |
<specimenreplicate> |
|
34 |
<catalognumber_dwc>$catalogue_number</catalognumber_dwc> |
|
35 |
<collectioncode_dwc>$collection_code</collectioncode_dwc> |
|
36 |
<institution_id> |
|
37 |
<sourcelist> |
|
38 |
<name>$institution_code</name> |
|
39 |
<sourcename> |
|
40 |
<name> |
|
41 |
<_split> |
|
42 |
<separator>[,;] *</separator> |
|
43 |
<value>$institution_code</value> |
|
44 |
</_split> |
|
45 |
</name> |
|
46 |
</sourcename> |
|
47 |
</sourcelist> |
|
48 |
</institution_id> |
|
49 |
<sourceaccessioncode>$id</sourceaccessioncode> |
|
50 |
</specimenreplicate> |
|
51 |
</plantobservation> |
|
52 |
</aggregateoccurrence> |
|
53 |
<collector_id><party><fullname>$collector_name</fullname></party></collector_id> |
|
54 |
<sourceaccessioncode>$id</sourceaccessioncode> |
|
55 |
<taxondetermination> |
|
56 |
<party_id><party><fullname>$identifier_name</fullname></party></party_id> |
|
57 |
<taxonverbatim_id> |
|
58 |
<taxonverbatim> |
|
59 |
<taxonlabel_id> |
|
60 |
<taxonlabel> |
|
61 |
<parent_id> |
|
62 |
<taxonlabel> |
|
63 |
<rank>$rank</rank> |
|
64 |
<parent_id> |
|
65 |
<taxonlabel> |
|
66 |
<rank>cultivar</rank> |
|
67 |
<parent_id> |
|
68 |
<taxonlabel> |
|
69 |
<rank>forma</rank> |
|
70 |
<parent_id> |
|
71 |
<taxonlabel> |
|
72 |
<rank>variety</rank> |
|
73 |
<parent_id> |
|
74 |
<taxonlabel> |
|
75 |
<rank>subspecies</rank> |
|
76 |
<taxonepithet>$subspecies</taxonepithet> |
|
77 |
<parent_id> |
|
78 |
<taxonlabel> |
|
79 |
<rank>species</rank> |
|
80 |
<taxonepithet>$species</taxonepithet> |
|
81 |
<parent_id> |
|
82 |
<taxonlabel> |
|
83 |
<rank>genus</rank> |
|
84 |
<taxonepithet>$genus</taxonepithet> |
|
85 |
<parent_id> |
|
86 |
<taxonlabel> |
|
87 |
<rank>family</rank> |
|
88 |
<taxonepithet>$family</taxonepithet> |
|
89 |
<parent_id> |
|
90 |
<taxonlabel> |
|
91 |
<rank>order</rank> |
|
92 |
<taxonepithet>$order_rank</taxonepithet> |
|
93 |
<parent_id> |
|
94 |
<taxonlabel> |
|
95 |
<rank>class</rank> |
|
96 |
<taxonepithet>$class</taxonepithet> |
|
97 |
<parent_id> |
|
98 |
<taxonlabel> |
|
99 |
<rank>phylum</rank> |
|
100 |
<taxonepithet>$phylum</taxonepithet> |
|
101 |
<parent_id> |
|
102 |
<taxonlabel> |
|
103 |
<rank>kingdom</rank> |
|
104 |
<taxonepithet>$kingdom</taxonepithet> |
|
105 |
</taxonlabel> |
|
106 |
</parent_id> |
|
107 |
</taxonlabel> |
|
108 |
</parent_id> |
|
109 |
</taxonlabel> |
|
110 |
</parent_id> |
|
111 |
</taxonlabel> |
|
112 |
</parent_id> |
|
113 |
</taxonlabel> |
|
114 |
</parent_id> |
|
115 |
</taxonlabel> |
|
116 |
</parent_id> |
|
117 |
</taxonlabel> |
|
118 |
</parent_id> |
|
119 |
</taxonlabel> |
|
120 |
</parent_id> |
|
121 |
</taxonlabel> |
|
122 |
</parent_id> |
|
123 |
</taxonlabel> |
|
124 |
</parent_id> |
|
125 |
</taxonlabel> |
|
126 |
</parent_id> |
|
127 |
</taxonlabel> |
|
128 |
</parent_id> |
|
129 |
<rank>$rank</rank> |
|
130 |
<taxonomicname> |
|
131 |
<_merge_prefix> |
|
132 |
<prefix><_taxon_family_require_std><family>$family</family></_taxon_family_require_std></prefix> |
|
133 |
<value> |
|
134 |
<_alt> |
|
135 |
<1>$scientific_name</1> |
|
136 |
<2> |
|
137 |
<_join_words> |
|
138 |
<2>$genus</2> |
|
139 |
<3>$species</3> |
|
140 |
<4>$subspecies</4> |
|
141 |
</_join_words> |
|
142 |
</2> |
|
143 |
</_alt> |
|
144 |
</value> |
|
145 |
</_merge_prefix> |
|
146 |
</taxonomicname> |
|
147 |
</taxonlabel> |
|
148 |
</taxonlabel_id> |
|
149 |
<family>$family</family> |
|
150 |
<genus>$genus</genus> |
|
151 |
<specific_epithet>$species</specific_epithet> |
|
152 |
<taxonomicname>$scientific_name</taxonomicname> |
|
153 |
<verbatimrank>$rank</verbatimrank> |
|
154 |
</taxonverbatim> |
|
155 |
</taxonverbatim_id> |
|
156 |
<determinationdate><_dateRangeStart><value>$identification_date</value></_dateRangeStart></determinationdate> |
|
157 |
</taxondetermination> |
|
158 |
</taxonoccurrence> |
|
159 |
<waterdepth_m> |
|
160 |
<_avg> |
|
161 |
<1>$min_depth</1> |
|
162 |
<2>$max_depth</2> |
|
163 |
</_avg> |
|
164 |
</waterdepth_m> |
|
165 |
</locationevent> |
|
166 |
<locationnarrative>$locality</locationnarrative> |
|
167 |
<locationplace> |
|
168 |
<place_id> |
|
169 |
<place> |
|
170 |
<coordinates_id> |
|
171 |
<coordinates> |
|
172 |
<latitude_deg> |
|
173 |
<_nullIf> |
|
174 |
<null>0</null> |
|
175 |
<type>float</type> |
|
176 |
<value>$latitude</value> |
|
177 |
</_nullIf> |
|
178 |
</latitude_deg> |
|
179 |
<longitude_deg> |
|
180 |
<_nullIf> |
|
181 |
<null>0</null> |
|
182 |
<type>float</type> |
|
183 |
<value>$longitude</value> |
|
184 |
</_nullIf> |
|
185 |
</longitude_deg> |
|
186 |
</coordinates> |
|
187 |
</coordinates_id> |
|
188 |
<continent>$continent_ocean</continent> |
|
189 |
<country>$country</country> |
|
190 |
<county>$county</county> |
|
191 |
<matched_place_id> |
|
192 |
<place> |
|
193 |
<source_id><source><shortname>geoscrub</shortname></source></source_id> |
|
194 |
<coordinates_id> |
|
195 |
<coordinates> |
|
196 |
<source_id><source><shortname>geoscrub</shortname></source></source_id> |
|
197 |
<latitude_deg> |
|
198 |
<_nullIf> |
|
199 |
<null>0</null> |
|
200 |
<type>float</type> |
|
201 |
<value>$latitude</value> |
|
202 |
</_nullIf> |
|
203 |
</latitude_deg> |
|
204 |
<longitude_deg> |
|
205 |
<_nullIf> |
|
206 |
<null>0</null> |
|
207 |
<type>float</type> |
|
208 |
<value>$longitude</value> |
|
209 |
</_nullIf> |
|
210 |
</longitude_deg> |
|
211 |
</coordinates> |
|
212 |
</coordinates_id> |
|
213 |
<country>$country</country> |
|
214 |
<county>$county</county> |
|
215 |
<stateprovince>$state_province</stateprovince> |
|
216 |
</place> |
|
217 |
</matched_place_id> |
|
218 |
<stateprovince>$state_province</stateprovince> |
|
219 |
</place> |
|
220 |
</place_id> |
|
221 |
</locationplace> |
|
222 |
</location> |
|
223 |
</path> |
|
224 |
</_simplifyPath> |
|
225 |
</path> |
|
226 |
</_setDefault> |
|
227 |
</VegBIEN> |
|
228 |
Inserted 0 new rows into database |
inputs/GBIF/raw_occurrence_record/VegBIEN.csv | ||
---|---|---|
1 |
GBIF,VegBIEN:/_setDefault:[source_id/source/shortname/_env:[name=source]]/path/_simplifyPath:[next=parent_id]/path,Comments |
|
2 |
catalogue_number,"/_if[@name=""if specimen""]/cond/_exists", |
|
3 |
id,"/_if[@name=""if specimen""]/cond/_exists", |
|
4 |
institution_code,"/_if[@name=""if specimen""]/else/source/shortname/_first/1", |
|
5 |
id,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/1", |
|
6 |
catalogue_number,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/cond/_exists", |
|
7 |
catalogue_number,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/1/_if[@name=""if specimen""]/cond/_exists", |
|
8 |
id,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/1/_if[@name=""if specimen""]/cond/_exists", |
|
9 |
institution_code,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/1/_if[@name=""if specimen""]/then/_first/2", |
|
10 |
collection_code,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/2/_join/1", |
|
11 |
catalogue_number,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/3/_if[@name=""if indirect voucher""]/else", |
|
12 |
min_altitude,/location/elevation_m/_alt/2/_avg/1, |
|
13 |
max_altitude,/location/elevation_m/_alt/2/_avg/2, |
|
14 |
locality,/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/1, |
|
15 |
identification_date,"/location/locationevent/taxonoccurrence/_if[@name=""if has accepted name""]/then/taxondetermination:[determinationtype=accepted]/determinationdate/_alt/1/_dateRangeStart/value", |
|
16 |
catalogue_number,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/catalognumber_dwc/_if[@name=""if indirect voucher""]/else", |
|
17 |
collection_code,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/collectioncode_dwc/_alt/1,Brad: Not sure if mapping correct. DwC element; code for entire collection; often same as InstitutionCode; Aaron: Combining with InstitutionCode to create collection name |
|
18 |
catalogue_number,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/cond/_exists", |
|
19 |
id,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/cond/_exists", |
|
20 |
institution_code,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/then/name/_first/2", |
|
21 |
institution_code,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/then/sourcename/name/_first/2/_split:[separator=""[,;] *""]/value", |
|
22 |
id,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/sourceaccessioncode, |
|
23 |
scientific_name,"/location/locationevent/taxonoccurrence/authortaxoncode/_if[@name=""if in plot""]/then/_first/3/_first/2", |
|
24 |
collector_name,/location/locationevent/taxonoccurrence/collector_id/party/fullname, |
|
25 |
id,"/location/locationevent/taxonoccurrence/iscultivated/_alt/1/_if[@name=""if TaxonOccurrence""]/cond/_exists", |
|
26 |
id,/location/locationevent/taxonoccurrence/sourceaccessioncode/_first/3, |
|
27 |
identifier_name,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/party/fullname, |
|
28 |
rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/_if[@name=""if has explicit parent""]/else/rank", |
|
29 |
subspecies,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=subspecies]/taxonepithet", |
|
30 |
species,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=species]/taxonepithet", |
|
31 |
genus,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=genus]/taxonepithet", |
|
32 |
family,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=family]/taxonepithet", |
|
33 |
order_rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=order]/taxonepithet", |
|
34 |
class,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,order,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=class]/taxonepithet", |
|
35 |
phylum,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,order,class,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=phylum]/taxonepithet", |
|
36 |
kingdom,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,order,class,phylum,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=kingdom]/taxonepithet", |
|
37 |
rank,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/rank, |
|
38 |
rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonepithet/_if[@name=""if taxonomic name is epithet""]/cond/_alt:[2=true]/1/_taxonomic_name_is_epithet/rank", |
|
39 |
family,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/prefix/_taxon_family_require_std/family, |
|
40 |
scientific_name,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/1, |
|
41 |
genus,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/2, |
|
42 |
species,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/3/_join_words/1, |
|
43 |
subspecies,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/4, |
|
44 |
rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/5/_join_words/1/_if[@name=""if has infraspecificEpithet""]/then", |
|
45 |
family,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/family, |
|
46 |
genus,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/genus, |
|
47 |
species,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/specific_epithet, |
|
48 |
scientific_name,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/taxonomicname, |
|
49 |
rank,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/verbatimrank, |
|
50 |
identification_date,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/determinationdate/_alt/1/_dateRangeStart/value, |
|
51 |
catalogue_number,"/location/locationevent/taxonoccurrence/voucher/*_id/specimenreplicate/catalognumber_dwc/_if[@name=""if indirect voucher""]/then", |
|
52 |
min_depth,/location/locationevent/waterdepth_m/_avg/1, |
|
53 |
max_depth,/location/locationevent/waterdepth_m/_avg/2, |
|
54 |
locality,/location/locationnarrative/_merge/1, |
|
55 |
latitude,"/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_nullIf:[null=0,type=float]/value", |
|
56 |
longitude,"/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_nullIf:[null=0,type=float]/value", |
|
57 |
continent_ocean,/location/locationplace/*_id/place/continent, |
|
58 |
country,/location/locationplace/*_id/place/country, |
|
59 |
county,/location/locationplace/*_id/place/county, |
|
60 |
latitude,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_nullIf:[null=0,type=float]/value",""".,"" sorts it with other coordinates mappings" |
|
61 |
longitude,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_nullIf:[null=0,type=float]/value",""".,"" sorts it with other coordinates mappings" |
|
62 |
country,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/country/_first/2, |
|
63 |
county,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/county/_first/2, |
|
64 |
state_province,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/stateprovince/_first/2, |
|
65 |
state_province,/location/locationplace/*_id/place/stateprovince, |
|
66 |
altitude_precision,,** No join mapping for *altitude_precision ** |
|
67 |
author,,** No join mapping for authors ** |
|
68 |
basis_of_record,,** No join mapping for basisOfRecord ** |
|
69 |
created,,** No join mapping for *created ** |
|
70 |
data_provider_id,,** No join mapping for *data_provider_id ** |
|
71 |
data_resource_id,,** No join mapping for *data_resource_id ** |
|
72 |
day,,** No join mapping for day ** |
|
73 |
deleted,,** No join mapping for *deleted ** |
|
74 |
depth_precision,,** No join mapping for *depth_precision ** |
|
75 |
lat_long_precision,,** No join mapping for coordinatePrecision_deg ** Tentative |
|
76 |
modified,,** No join mapping for modified ** |
|
77 |
month,,** No join mapping for month ** |
|
78 |
resource_access_point_id,,** No join mapping for *resource_access_point_id ** |
|
79 |
unit_qualifier,,** No join mapping for *unit_qualifier ** |
|
80 |
year,,** No join mapping for year ** |
inputs/GBIF/raw_occurrence_record_plants/run | ||
---|---|---|
1 |
#!/bin/bash -e |
|
2 |
. "$(dirname "${BASH_SOURCE[0]}")"/../table.run |
|
3 |
|
|
4 |
if self_not_included; then |
|
5 |
|
|
6 |
full_table="${table%_plants}" |
|
7 |
|
|
8 |
herbaria_filter.ih.csv_/make() |
|
9 |
{ |
|
10 |
echo_func; set_make_vars; check_target_exists |
|
11 |
|
|
12 |
use_local |
|
13 |
limit= query='SELECT DISTINCT acronym FROM herbaria.ih ORDER BY acronym' \ |
|
14 |
to_target pg_export |
|
15 |
} |
|
16 |
|
|
17 |
plant_fraction/make() # runtime: 1 hr ("10603 rows affected (53 min 44.68 sec)") |
|
18 |
{ |
|
19 |
echo_func; set_make_vars; mk_table_esc |
|
20 |
|
|
21 |
use_local_remote |
|
22 |
if remaking; then table=plant_fraction table_esc= mk_drop|mysql_ANSI; fi |
|
23 |
(benign_error=1 mysql <<'EOF' |
|
24 |
CREATE TABLE `plant_fraction` ( |
|
25 |
`institution_code` varchar(255) NOT NULL PRIMARY KEY, |
|
26 |
`plant_fraction` float NOT NULL, |
|
27 |
KEY `plant_fraction` (`plant_fraction`) |
|
28 |
) ENGINE=MyISAM DEFAULT CHARSET=utf8 |
|
29 |
EOF |
|
30 |
) || table=plant_fraction skip_table || return 0 |
|
31 |
mysql_ANSI <<EOF |
|
32 |
INSERT INTO "plant_fraction" |
|
33 |
SELECT |
|
34 |
institution_code |
|
35 |
, COUNT(NULLIF( |
|
36 |
family LIKE '%aceae%' OR family IN ('Compositae','Gramineae','Palmae','Guttiferae','Cruciferae','Labiatae','Umbelliferae','Leguminosae') |
|
37 |
, false))/COUNT(*) |
|
38 |
AS plant_fraction |
|
39 |
FROM ${limit:+(SELECT * FROM }$table_esc ${limit:+$(prefix= limit)) s} |
|
40 |
GROUP BY institution_code |
|
41 |
ORDER BY institution_code |
|
42 |
EOF |
|
43 |
} |
|
44 |
|
|
45 |
plant_fraction/seal() |
|
46 |
{ |
|
47 |
echo_func; set_make_vars |
|
48 |
|
|
49 |
use_local_remote |
|
50 |
table=plant_fraction table_esc= mysql_seal_table |
|
51 |
} |
|
52 |
|
|
53 |
plant_fraction_for_herbaria_filter/make() |
|
54 |
{ |
|
55 |
echo_func; set_make_vars; mk_table_esc |
|
56 |
|
|
57 |
plant_fraction/make |
|
58 |
use_local_remote |
|
59 |
if remaking; then |
|
60 |
table=plant_fraction_for_herbaria_filter table_esc= mk_drop|mysql_ANSI |
|
61 |
fi |
|
62 |
(benign_error=1 mysql <<'EOF' |
|
63 |
CREATE VIEW `plant_fraction_for_herbaria_filter` AS |
|
64 |
SELECT * FROM `plant_fraction` |
|
65 |
WHERE plant_fraction >= 0.8 |
|
66 |
EOF |
|
67 |
) || table=plant_fraction_for_herbaria_filter skip_table || return 0 |
|
68 |
} |
|
69 |
|
|
70 |
herbaria_filter/make() # runtime: ~0 ("0m2.235s"): CSV import, index scans only |
|
71 |
{ |
|
72 |
echo_func; set_make_vars |
|
73 |
|
|
74 |
use_local_remote |
|
75 |
local table="$target_stem" |
|
76 |
if remaking; then mk_drop|mysql_ANSI; fi |
|
77 |
|
|
78 |
herbaria_filter.ih.csv_/make |
|
79 |
plant_fraction_for_herbaria_filter/make |
|
80 |
|
|
81 |
(benign_error=1 mysql <<'EOF' |
|
82 |
CREATE TABLE `herbaria_filter` ( |
|
83 |
`institution_code` varchar(255) NOT NULL PRIMARY KEY |
|
84 |
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='contains all of IH.vegpath.org?&Index+Herbariorum' |
|
85 |
EOF |
|
86 |
) || skip_table || return 0 |
|
87 |
|
|
88 |
time ( |
|
89 |
mysql_ANSI <<EOF |
|
90 |
INSERT INTO "herbaria_filter" |
|
91 |
SELECT institution_code FROM plant_fraction_for_herbaria_filter |
|
92 |
EOF |
|
93 |
append=1 mysql_import <"$top_dir"/herbaria_filter.ih.csv_ |
|
94 |
) || return |
|
95 |
} |
|
96 |
|
|
97 |
herbaria_filter/seal() |
|
98 |
{ |
|
99 |
echo_func; set_make_vars |
|
100 |
|
|
101 |
use_local_remote |
|
102 |
table=herbaria_filter table_esc= mysql_seal_table |
|
103 |
} |
|
104 |
|
|
105 |
# table.tsv.gz/make() runtime: 35 min ("34m55.802s") |
|
106 |
|
|
107 |
func_override table.tsv/make__src_table_run |
|
108 |
table.tsv/make() # usage: [remake=1] [limit=...] .../run table.tsv/make |
|
109 |
{ |
|
110 |
echo_func; set_make_vars; check_target_exists |
|
111 |
|
|
112 |
herbaria_filter/make |
|
113 |
table="$full_table" filter=" |
|
114 |
deleted IS NULL |
|
115 |
AND institution_code IN (SELECT * FROM herbaria_filter)" \ |
|
116 |
self_make table.tsv/make__src_table_run |
|
117 |
} |
|
118 |
|
|
119 |
fi |
|
0 | 120 |
inputs/GBIF/raw_occurrence_record_plants/postprocess.sql | ||
---|---|---|
1 |
CREATE INDEX ON :table ("institution_code"); |
|
2 |
|
|
3 |
-- Remove institutions that we have direct data for |
|
4 |
DELETE FROM :table |
|
5 |
WHERE institution_code IN ( |
|
6 |
'ACAD' |
|
7 |
, 'ARIZ' |
|
8 |
, 'BRIT' |
|
9 |
, 'MO' |
|
10 |
, 'MT' |
|
11 |
, 'NY' |
|
12 |
, 'QFA' |
|
13 |
, 'TEX' |
|
14 |
, 'TRT' |
|
15 |
, 'TRTE' |
|
16 |
, 'U' |
|
17 |
, 'UBC' |
|
18 |
, 'WIN' |
|
19 |
) |
|
20 |
/* list obtained using the following on r9459: |
|
21 |
SELECT DISTINCT dataprovider |
|
22 |
FROM sourcelist |
|
23 |
JOIN provider_count ON provider_count.dataprovider = sourcelist.name |
|
24 |
WHERE source_id = source_by_shortname('GBIF') |
|
25 |
ORDER BY dataprovider |
|
26 |
*/ |
|
27 |
; |
inputs/GBIF/raw_occurrence_record_plants/header.csv | ||
---|---|---|
1 |
row_num,id,data_provider_id,data_resource_id,resource_access_point_id,institution_code,collection_code,catalogue_number,scientific_name,author,rank,kingdom,phylum,class,order_rank,family,genus,species,subspecies,latitude,longitude,lat_long_precision,max_altitude,min_altitude,altitude_precision,min_depth,max_depth,depth_precision,continent_ocean,country,state_province,county,collector_name,locality,year,month,day,basis_of_record,identifier_name,identification_date,unit_qualifier,created,modified,deleted |
inputs/GBIF/raw_occurrence_record_plants/map.csv | ||
---|---|---|
1 |
GBIF,VegCore,Filter,Comments |
|
2 |
row_num,raw_occurrence_record-row_num,, |
|
3 |
id,dataProviderRecordID,, |
|
4 |
data_provider_id,*data_provider_id,, |
|
5 |
data_resource_id,*data_resource_id,, |
|
6 |
resource_access_point_id,*resource_access_point_id,, |
|
7 |
institution_code,specimenHolderInstitutions,, |
|
8 |
collection_code,collection,, |
|
9 |
catalogue_number,accessionNumber,, |
|
10 |
scientific_name,scientificName,, |
|
11 |
author,authors,, |
|
12 |
rank,taxonRank,, |
|
13 |
kingdom,kingdom,, |
|
14 |
phylum,phylum,, |
|
15 |
class,class,, |
|
16 |
order_rank,order,, |
|
17 |
family,family,, |
|
18 |
genus,genus,, |
|
19 |
species,specificEpithet,, |
|
20 |
subspecies,subspecies,, |
|
21 |
latitude,decimalLatitude,, |
|
22 |
longitude,decimalLongitude,, |
|
23 |
lat_long_precision,coordinatePrecision_deg,,Tentative |
|
24 |
max_altitude,maximumElevationInMeters,, |
|
25 |
min_altitude,minimumElevationInMeters,, |
|
26 |
altitude_precision,*altitude_precision,, |
|
27 |
min_depth,minimumDepthInMeters,, |
|
28 |
max_depth,maximumDepthInMeters,, |
|
29 |
depth_precision,*depth_precision,, |
|
30 |
continent_ocean,continent,, |
|
31 |
country,country,, |
|
32 |
state_province,stateProvince,, |
|
33 |
county,county,, |
|
34 |
collector_name,recordedBy,, |
|
35 |
locality,locality,, |
|
36 |
year,year,, |
|
37 |
month,month,, |
|
38 |
day,day,, |
|
39 |
basis_of_record,basisOfRecord,, |
|
40 |
identifier_name,identifiedBy,, |
|
41 |
identification_date,dateIdentified,, |
|
42 |
unit_qualifier,*unit_qualifier,, |
|
43 |
created,*created,, |
|
44 |
modified,modified,, |
|
45 |
deleted,*deleted,, |
inputs/GBIF/raw_occurrence_record_plants/new_terms.csv | ||
---|---|---|
1 |
row_num,raw_occurrence_record-row_num,, |
|
2 |
catalogue_number,accessionNumber,, |
|
3 |
rank,taxonRank,, |
|
4 |
order_rank,order,, |
|
5 |
lat_long_precision,coordinatePrecision_deg,,Tentative |
|
6 |
max_altitude,maximumElevationInMeters,, |
|
7 |
min_altitude,minimumElevationInMeters,, |
|
8 |
min_depth,minimumDepthInMeters,, |
|
9 |
max_depth,maximumDepthInMeters,, |
|
10 |
collector_name,recordedBy,, |
|
11 |
identifier_name,identifiedBy,, |
|
12 |
identification_date,dateIdentified,, |
inputs/GBIF/raw_occurrence_record_plants/unmapped_terms.csv | ||
---|---|---|
1 |
raw_occurrence_record-row_num |
|
2 |
*data_provider_id |
|
3 |
*data_resource_id |
|
4 |
*resource_access_point_id |
|
5 |
authors |
|
6 |
coordinatePrecision_deg |
|
7 |
*altitude_precision |
|
8 |
*depth_precision |
|
9 |
year |
|
10 |
month |
|
11 |
day |
|
12 |
basisOfRecord |
|
13 |
*unit_qualifier |
|
14 |
*created |
|
15 |
modified |
|
16 |
*deleted |
inputs/GBIF/raw_occurrence_record_plants/VegBIEN.csv | ||
---|---|---|
1 |
GBIF,VegBIEN:/_setDefault:[source_id/source/shortname/_env:[name=source]]/path/_simplifyPath:[next=parent_id]/path,Comments |
|
2 |
catalogue_number,"/_if[@name=""if specimen""]/cond/_exists", |
|
3 |
id,"/_if[@name=""if specimen""]/cond/_exists", |
|
4 |
institution_code,"/_if[@name=""if specimen""]/else/source/shortname/_first/1", |
|
5 |
id,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/1", |
|
6 |
catalogue_number,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/cond/_exists", |
|
7 |
catalogue_number,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/1/_if[@name=""if specimen""]/cond/_exists", |
|
8 |
id,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/1/_if[@name=""if specimen""]/cond/_exists", |
|
9 |
institution_code,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/1/_if[@name=""if specimen""]/then/_first/2", |
|
10 |
collection_code,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/2/_join/1", |
|
11 |
catalogue_number,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/3/_if[@name=""if indirect voucher""]/else", |
|
12 |
min_altitude,/location/elevation_m/_alt/2/_avg/1, |
|
13 |
max_altitude,/location/elevation_m/_alt/2/_avg/2, |
|
14 |
locality,/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/1, |
|
15 |
identification_date,"/location/locationevent/taxonoccurrence/_if[@name=""if has accepted name""]/then/taxondetermination:[determinationtype=accepted]/determinationdate/_alt/1/_dateRangeStart/value", |
|
16 |
catalogue_number,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/catalognumber_dwc/_if[@name=""if indirect voucher""]/else", |
|
17 |
collection_code,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/collectioncode_dwc/_alt/1,Brad: Not sure if mapping correct. DwC element; code for entire collection; often same as InstitutionCode; Aaron: Combining with InstitutionCode to create collection name |
|
18 |
catalogue_number,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/cond/_exists", |
|
19 |
id,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/cond/_exists", |
|
20 |
institution_code,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/then/name/_first/2", |
|
21 |
institution_code,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/then/sourcename/name/_first/2/_split:[separator=""[,;] *""]/value", |
|
22 |
id,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/sourceaccessioncode, |
|
23 |
scientific_name,"/location/locationevent/taxonoccurrence/authortaxoncode/_if[@name=""if in plot""]/then/_first/3/_first/2", |
|
24 |
collector_name,/location/locationevent/taxonoccurrence/collector_id/party/fullname, |
|
25 |
id,"/location/locationevent/taxonoccurrence/iscultivated/_alt/1/_if[@name=""if TaxonOccurrence""]/cond/_exists", |
|
26 |
id,/location/locationevent/taxonoccurrence/sourceaccessioncode/_first/3, |
|
27 |
identifier_name,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/party/fullname, |
|
28 |
rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/_if[@name=""if has explicit parent""]/else/rank", |
|
29 |
subspecies,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=subspecies]/taxonepithet", |
|
30 |
species,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=species]/taxonepithet", |
|
31 |
genus,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=genus]/taxonepithet", |
|
32 |
family,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=family]/taxonepithet", |
|
33 |
order_rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=order]/taxonepithet", |
|
34 |
class,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,order,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=class]/taxonepithet", |
|
35 |
phylum,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,order,class,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=phylum]/taxonepithet", |
|
36 |
kingdom,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,order,class,phylum,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=kingdom]/taxonepithet", |
|
37 |
rank,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/rank, |
|
38 |
rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonepithet/_if[@name=""if taxonomic name is epithet""]/cond/_alt:[2=true]/1/_taxonomic_name_is_epithet/rank", |
|
39 |
family,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/prefix/_taxon_family_require_std/family, |
|
40 |
scientific_name,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/1, |
|
41 |
genus,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/2, |
|
42 |
species,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/3/_join_words/1, |
|
43 |
subspecies,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/4, |
|
44 |
rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/5/_join_words/1/_if[@name=""if has infraspecificEpithet""]/then", |
|
45 |
family,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/family, |
|
46 |
genus,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/genus, |
|
47 |
species,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/specific_epithet, |
|
48 |
scientific_name,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/taxonomicname, |
|
49 |
rank,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/verbatimrank, |
|
50 |
identification_date,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/determinationdate/_alt/1/_dateRangeStart/value, |
|
51 |
catalogue_number,"/location/locationevent/taxonoccurrence/voucher/*_id/specimenreplicate/catalognumber_dwc/_if[@name=""if indirect voucher""]/then", |
|
52 |
min_depth,/location/locationevent/waterdepth_m/_avg/1, |
|
53 |
max_depth,/location/locationevent/waterdepth_m/_avg/2, |
|
54 |
locality,/location/locationnarrative/_merge/1, |
|
55 |
latitude,"/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_nullIf:[null=0,type=float]/value", |
|
56 |
longitude,"/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_nullIf:[null=0,type=float]/value", |
|
57 |
continent_ocean,/location/locationplace/*_id/place/continent, |
|
58 |
country,/location/locationplace/*_id/place/country, |
|
59 |
county,/location/locationplace/*_id/place/county, |
|
60 |
latitude,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_nullIf:[null=0,type=float]/value",""".,"" sorts it with other coordinates mappings" |
|
61 |
longitude,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_nullIf:[null=0,type=float]/value",""".,"" sorts it with other coordinates mappings" |
|
62 |
country,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/country/_first/2, |
|
63 |
county,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/county/_first/2, |
|
64 |
state_province,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/stateprovince/_first/2, |
|
65 |
state_province,/location/locationplace/*_id/place/stateprovince, |
|
66 |
altitude_precision,,** No join mapping for *altitude_precision ** |
|
67 |
author,,** No join mapping for authors ** |
|
68 |
basis_of_record,,** No join mapping for basisOfRecord ** |
|
69 |
created,,** No join mapping for *created ** |
|
70 |
data_provider_id,,** No join mapping for *data_provider_id ** |
|
71 |
data_resource_id,,** No join mapping for *data_resource_id ** |
|
72 |
day,,** No join mapping for day ** |
|
73 |
deleted,,** No join mapping for *deleted ** |
|
74 |
depth_precision,,** No join mapping for *depth_precision ** |
|
75 |
lat_long_precision,,** No join mapping for coordinatePrecision_deg ** Tentative |
|
76 |
modified,,** No join mapping for modified ** |
|
77 |
month,,** No join mapping for month ** |
|
78 |
resource_access_point_id,,** No join mapping for *resource_access_point_id ** |
|
79 |
unit_qualifier,,** No join mapping for *unit_qualifier ** |
|
80 |
year,,** No join mapping for year ** |
inputs/GBIF/raw_occurrence_record_plants/test.xml.ref | ||
---|---|---|
1 |
Put template: |
|
2 |
<VegBIEN> |
|
3 |
<_setDefault id="-1"> |
|
4 |
<source_id><source><shortname>GBIF</shortname></source></source_id> |
|
5 |
<path> |
|
6 |
<_simplifyPath> |
|
7 |
<next>parent_id</next> |
|
8 |
<path> |
|
9 |
<location> |
|
10 |
<authorlocationcode> |
|
11 |
<_alt> |
|
12 |
<1>$id</1> |
|
13 |
<2> |
|
14 |
<_join> |
|
15 |
<1>$institution_code</1> |
|
16 |
<2>$collection_code</2> |
|
17 |
<3>$catalogue_number</3> |
|
18 |
</_join> |
|
19 |
</2> |
|
20 |
</_alt> |
|
21 |
</authorlocationcode> |
|
22 |
<elevation_m> |
|
23 |
<_avg> |
|
24 |
<1>$min_altitude</1> |
|
25 |
<2>$max_altitude</2> |
|
26 |
</_avg> |
|
27 |
</elevation_m> |
|
28 |
<iscultivated><_locationnarrative_is_cultivated><locationnarrative>$locality</locationnarrative></_locationnarrative_is_cultivated></iscultivated> |
|
29 |
<locationevent> |
|
30 |
<taxonoccurrence> |
|
31 |
<aggregateoccurrence> |
|
32 |
<plantobservation> |
|
33 |
<specimenreplicate> |
|
34 |
<catalognumber_dwc>$catalogue_number</catalognumber_dwc> |
|
35 |
<collectioncode_dwc>$collection_code</collectioncode_dwc> |
|
36 |
<institution_id> |
|
37 |
<sourcelist> |
|
38 |
<name>$institution_code</name> |
|
39 |
<sourcename> |
|
40 |
<name> |
|
41 |
<_split> |
|
42 |
<separator>[,;] *</separator> |
|
43 |
<value>$institution_code</value> |
|
44 |
</_split> |
|
45 |
</name> |
|
46 |
</sourcename> |
|
47 |
</sourcelist> |
|
48 |
</institution_id> |
|
49 |
<sourceaccessioncode>$id</sourceaccessioncode> |
|
50 |
</specimenreplicate> |
|
51 |
</plantobservation> |
|
52 |
</aggregateoccurrence> |
|
53 |
<collector_id><party><fullname>$collector_name</fullname></party></collector_id> |
|
54 |
<sourceaccessioncode>$id</sourceaccessioncode> |
|
55 |
<taxondetermination> |
|
56 |
<party_id><party><fullname>$identifier_name</fullname></party></party_id> |
|
57 |
<taxonverbatim_id> |
|
58 |
<taxonverbatim> |
|
59 |
<taxonlabel_id> |
|
60 |
<taxonlabel> |
|
61 |
<parent_id> |
|
62 |
<taxonlabel> |
|
63 |
<rank>$rank</rank> |
|
64 |
<parent_id> |
|
65 |
<taxonlabel> |
|
66 |
<rank>cultivar</rank> |
|
67 |
<parent_id> |
|
68 |
<taxonlabel> |
|
69 |
<rank>forma</rank> |
|
70 |
<parent_id> |
|
71 |
<taxonlabel> |
|
72 |
<rank>variety</rank> |
|
73 |
<parent_id> |
|
74 |
<taxonlabel> |
|
75 |
<rank>subspecies</rank> |
|
76 |
<taxonepithet>$subspecies</taxonepithet> |
|
77 |
<parent_id> |
|
78 |
<taxonlabel> |
|
79 |
<rank>species</rank> |
|
80 |
<taxonepithet>$species</taxonepithet> |
|
81 |
<parent_id> |
|
82 |
<taxonlabel> |
|
83 |
<rank>genus</rank> |
|
84 |
<taxonepithet>$genus</taxonepithet> |
|
85 |
<parent_id> |
|
86 |
<taxonlabel> |
|
87 |
<rank>family</rank> |
|
88 |
<taxonepithet>$family</taxonepithet> |
|
89 |
<parent_id> |
|
90 |
<taxonlabel> |
|
91 |
<rank>order</rank> |
|
92 |
<taxonepithet>$order_rank</taxonepithet> |
|
93 |
<parent_id> |
|
94 |
<taxonlabel> |
|
95 |
<rank>class</rank> |
|
96 |
<taxonepithet>$class</taxonepithet> |
|
97 |
<parent_id> |
|
98 |
<taxonlabel> |
|
99 |
<rank>phylum</rank> |
|
100 |
<taxonepithet>$phylum</taxonepithet> |
|
101 |
<parent_id> |
|
102 |
<taxonlabel> |
|
103 |
<rank>kingdom</rank> |
|
104 |
<taxonepithet>$kingdom</taxonepithet> |
|
105 |
</taxonlabel> |
|
106 |
</parent_id> |
|
107 |
</taxonlabel> |
|
108 |
</parent_id> |
|
109 |
</taxonlabel> |
|
110 |
</parent_id> |
|
111 |
</taxonlabel> |
|
112 |
</parent_id> |
|
113 |
</taxonlabel> |
|
114 |
</parent_id> |
|
115 |
</taxonlabel> |
|
116 |
</parent_id> |
|
117 |
</taxonlabel> |
|
118 |
</parent_id> |
|
119 |
</taxonlabel> |
|
120 |
</parent_id> |
|
121 |
</taxonlabel> |
|
122 |
</parent_id> |
|
123 |
</taxonlabel> |
|
124 |
</parent_id> |
|
125 |
</taxonlabel> |
|
126 |
</parent_id> |
|
127 |
</taxonlabel> |
|
128 |
</parent_id> |
|
129 |
<rank>$rank</rank> |
|
130 |
<taxonomicname> |
|
131 |
<_merge_prefix> |
|
132 |
<prefix><_taxon_family_require_std><family>$family</family></_taxon_family_require_std></prefix> |
|
133 |
<value> |
|
134 |
<_alt> |
|
135 |
<1>$scientific_name</1> |
|
136 |
<2> |
|
137 |
<_join_words> |
|
138 |
<2>$genus</2> |
|
139 |
<3>$species</3> |
|
140 |
<4>$subspecies</4> |
|
141 |
</_join_words> |
|
142 |
</2> |
|
143 |
</_alt> |
|
144 |
</value> |
|
145 |
</_merge_prefix> |
|
146 |
</taxonomicname> |
|
147 |
</taxonlabel> |
|
148 |
</taxonlabel_id> |
|
149 |
<family>$family</family> |
|
150 |
<genus>$genus</genus> |
|
151 |
<specific_epithet>$species</specific_epithet> |
|
152 |
<taxonomicname>$scientific_name</taxonomicname> |
|
153 |
<verbatimrank>$rank</verbatimrank> |
|
154 |
</taxonverbatim> |
|
155 |
</taxonverbatim_id> |
|
156 |
<determinationdate><_dateRangeStart><value>$identification_date</value></_dateRangeStart></determinationdate> |
|
157 |
</taxondetermination> |
|
158 |
</taxonoccurrence> |
|
159 |
<waterdepth_m> |
|
160 |
<_avg> |
|
161 |
<1>$min_depth</1> |
|
162 |
<2>$max_depth</2> |
|
163 |
</_avg> |
|
164 |
</waterdepth_m> |
|
165 |
</locationevent> |
|
166 |
<locationnarrative>$locality</locationnarrative> |
|
167 |
<locationplace> |
|
168 |
<place_id> |
|
169 |
<place> |
|
170 |
<coordinates_id> |
|
171 |
<coordinates> |
|
172 |
<latitude_deg> |
|
173 |
<_nullIf> |
|
174 |
<null>0</null> |
|
175 |
<type>float</type> |
|
176 |
<value>$latitude</value> |
|
177 |
</_nullIf> |
|
178 |
</latitude_deg> |
|
179 |
<longitude_deg> |
|
180 |
<_nullIf> |
|
181 |
<null>0</null> |
|
182 |
<type>float</type> |
|
183 |
<value>$longitude</value> |
|
184 |
</_nullIf> |
|
185 |
</longitude_deg> |
|
186 |
</coordinates> |
|
187 |
</coordinates_id> |
|
188 |
<continent>$continent_ocean</continent> |
|
189 |
<country>$country</country> |
|
190 |
<county>$county</county> |
|
191 |
<matched_place_id> |
|
192 |
<place> |
|
193 |
<source_id><source><shortname>geoscrub</shortname></source></source_id> |
|
194 |
<coordinates_id> |
|
195 |
<coordinates> |
|
196 |
<source_id><source><shortname>geoscrub</shortname></source></source_id> |
|
197 |
<latitude_deg> |
|
198 |
<_nullIf> |
|
199 |
<null>0</null> |
|
200 |
<type>float</type> |
|
201 |
<value>$latitude</value> |
|
202 |
</_nullIf> |
|
203 |
</latitude_deg> |
|
204 |
<longitude_deg> |
|
205 |
<_nullIf> |
|
206 |
<null>0</null> |
|
207 |
<type>float</type> |
|
208 |
<value>$longitude</value> |
|
209 |
</_nullIf> |
|
210 |
</longitude_deg> |
|
211 |
</coordinates> |
|
212 |
</coordinates_id> |
|
213 |
<country>$country</country> |
|
214 |
<county>$county</county> |
|
215 |
<stateprovince>$state_province</stateprovince> |
|
216 |
</place> |
|
217 |
</matched_place_id> |
|
218 |
<stateprovince>$state_province</stateprovince> |
|
219 |
</place> |
|
220 |
</place_id> |
|
221 |
</locationplace> |
|
222 |
</location> |
|
223 |
</path> |
|
224 |
</_simplifyPath> |
|
225 |
</path> |
|
226 |
</_setDefault> |
|
227 |
</VegBIEN> |
|
228 |
Inserted 0 new rows into database |
Also available in: Unified diff
inputs/GBIF/raw_occurrence_record/: renamed to raw_occurrence_record_plants because it's actually only the plants in raw_occurrence_record, not all of raw_occurrence_record. also, this will allow us to create a separate raw_occurrence_record_plants view whose name matches the folder and does not collide with the raw_occurrence_record table.