Project

General

Profile

« Previous | Next » 

Revision 9771

inputs/GBIF/raw_occurrence_record/: renamed to raw_occurrence_record_plants because it's actually only the plants in raw_occurrence_record, not all of raw_occurrence_record. also, this will allow us to create a separate raw_occurrence_record_plants view whose name matches the folder and does not collide with the raw_occurrence_record table.

View differences:

inputs/GBIF/raw_occurrence_record/new_terms.csv
1
row_num,raw_occurrence_record-row_num,,
2
catalogue_number,accessionNumber,,
3
rank,taxonRank,,
4
order_rank,order,,
5
lat_long_precision,coordinatePrecision_deg,,Tentative
6
max_altitude,maximumElevationInMeters,,
7
min_altitude,minimumElevationInMeters,,
8
min_depth,minimumDepthInMeters,,
9
max_depth,maximumDepthInMeters,,
10
collector_name,recordedBy,,
11
identifier_name,identifiedBy,,
12
identification_date,dateIdentified,,
inputs/GBIF/raw_occurrence_record/unmapped_terms.csv
1
raw_occurrence_record-row_num
2
*data_provider_id
3
*data_resource_id
4
*resource_access_point_id
5
authors
6
coordinatePrecision_deg
7
*altitude_precision
8
*depth_precision
9
year
10
month
11
day
12
basisOfRecord
13
*unit_qualifier
14
*created
15
modified
16
*deleted
inputs/GBIF/raw_occurrence_record/header.csv
1
row_num,id,data_provider_id,data_resource_id,resource_access_point_id,institution_code,collection_code,catalogue_number,scientific_name,author,rank,kingdom,phylum,class,order_rank,family,genus,species,subspecies,latitude,longitude,lat_long_precision,max_altitude,min_altitude,altitude_precision,min_depth,max_depth,depth_precision,continent_ocean,country,state_province,county,collector_name,locality,year,month,day,basis_of_record,identifier_name,identification_date,unit_qualifier,created,modified,deleted
inputs/GBIF/raw_occurrence_record/postprocess.sql
1
CREATE INDEX ON :table ("institution_code");
2

  
3
-- Remove institutions that we have direct data for
4
DELETE FROM :table
5
WHERE institution_code IN (
6
      'ACAD'
7
    , 'ARIZ'
8
    , 'BRIT'
9
    , 'MO'
10
    , 'MT'
11
    , 'NY'
12
    , 'QFA'
13
    , 'TEX'
14
    , 'TRT'
15
    , 'TRTE'
16
    , 'U'
17
    , 'UBC'
18
    , 'WIN'
19
)
20
/* list obtained using the following on r9459:
21
SELECT DISTINCT dataprovider
22
FROM sourcelist
23
JOIN provider_count ON provider_count.dataprovider = sourcelist.name
24
WHERE source_id = source_by_shortname('GBIF')
25
ORDER BY dataprovider
26
*/
27
;
inputs/GBIF/raw_occurrence_record/run
1
#!/bin/bash -e
2
. "$(dirname "${BASH_SOURCE[0]}")"/../table.run
3

  
4
if self_not_included; then
5

  
6
herbaria_filter.ih.csv_/make()
7
{
8
	echo_func; set_make_vars; check_target_exists
9
	
10
	use_local
11
	limit= query='SELECT DISTINCT acronym FROM herbaria.ih ORDER BY acronym' \
12
to_target pg_export
13
}
14

  
15
plant_fraction/make() # runtime: 1 hr ("10603 rows affected (53 min 44.68 sec)")
16
{
17
	echo_func; set_make_vars; mk_table_esc
18
	
19
	use_local_remote
20
	if remaking; then table=plant_fraction table_esc= mk_drop|mysql_ANSI; fi
21
	(benign_error=1 mysql <<'EOF'
22
CREATE TABLE `plant_fraction` (
23
 `institution_code` varchar(255) NOT NULL PRIMARY KEY,
24
 `plant_fraction` float NOT NULL,
25
 KEY `plant_fraction` (`plant_fraction`)
26
) ENGINE=MyISAM DEFAULT CHARSET=utf8
27
EOF
28
) || table=plant_fraction skip_table || return 0
29
	mysql_ANSI <<EOF
30
INSERT INTO "plant_fraction"
31
SELECT
32
  institution_code
33
, COUNT(NULLIF(
34
	family LIKE '%aceae%' OR family IN ('Compositae','Gramineae','Palmae','Guttiferae','Cruciferae','Labiatae','Umbelliferae','Leguminosae')
35
	, false))/COUNT(*)
36
	AS plant_fraction
37
FROM ${limit:+(SELECT * FROM }$table_esc ${limit:+$(prefix= limit)) s}
38
GROUP BY institution_code
39
ORDER BY institution_code
40
EOF
41
}
42

  
43
plant_fraction/seal()
44
{
45
	echo_func; set_make_vars
46
	
47
	use_local_remote
48
	table=plant_fraction table_esc= mysql_seal_table
49
}
50

  
51
plant_fraction_for_herbaria_filter/make()
52
{
53
	echo_func; set_make_vars; mk_table_esc
54
	
55
	plant_fraction/make
56
	use_local_remote
57
	if remaking; then
58
		table=plant_fraction_for_herbaria_filter table_esc= mk_drop|mysql_ANSI
59
	fi
60
	(benign_error=1 mysql <<'EOF'
61
CREATE VIEW `plant_fraction_for_herbaria_filter` AS
62
SELECT * FROM `plant_fraction`
63
WHERE plant_fraction >= 0.8
64
EOF
65
) || table=plant_fraction_for_herbaria_filter skip_table || return 0
66
}
67

  
68
herbaria_filter/make() # runtime: ~0 ("0m2.235s"): CSV import, index scans only
69
{
70
	echo_func; set_make_vars
71
	
72
	use_local_remote
73
	local table="$target_stem"
74
	if remaking; then mk_drop|mysql_ANSI; fi
75
	
76
	herbaria_filter.ih.csv_/make
77
	plant_fraction_for_herbaria_filter/make
78
	
79
	(benign_error=1 mysql <<'EOF'
80
CREATE TABLE `herbaria_filter` (
81
 `institution_code` varchar(255) NOT NULL PRIMARY KEY
82
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='contains all of IH.vegpath.org?&Index+Herbariorum'
83
EOF
84
) || skip_table || return 0
85
	
86
	time (
87
	mysql_ANSI <<EOF
88
INSERT INTO "herbaria_filter"
89
SELECT institution_code FROM plant_fraction_for_herbaria_filter
90
EOF
91
	append=1 mysql_import <"$top_dir"/herbaria_filter.ih.csv_
92
	) || return
93
}
94

  
95
herbaria_filter/seal()
96
{
97
	echo_func; set_make_vars
98
	
99
	use_local_remote
100
	table=herbaria_filter table_esc= mysql_seal_table
101
}
102

  
103
# table.tsv.gz/make() runtime: 35 min ("34m55.802s")
104

  
105
func_override table.tsv/make__src_table_run
106
table.tsv/make() # usage: [remake=1] [limit=...] .../run table.tsv/make
107
{
108
	echo_func; set_make_vars; check_target_exists
109
	
110
	herbaria_filter/make
111
	filter="
112
deleted IS NULL
113
AND institution_code IN (SELECT * FROM herbaria_filter)" \
114
self_make table.tsv/make__src_table_run
115
}
116

  
117
fi
118 0

  
inputs/GBIF/raw_occurrence_record/map.csv
1
GBIF,VegCore,Filter,Comments
2
row_num,raw_occurrence_record-row_num,,
3
id,dataProviderRecordID,,
4
data_provider_id,*data_provider_id,,
5
data_resource_id,*data_resource_id,,
6
resource_access_point_id,*resource_access_point_id,,
7
institution_code,specimenHolderInstitutions,,
8
collection_code,collection,,
9
catalogue_number,accessionNumber,,
10
scientific_name,scientificName,,
11
author,authors,,
12
rank,taxonRank,,
13
kingdom,kingdom,,
14
phylum,phylum,,
15
class,class,,
16
order_rank,order,,
17
family,family,,
18
genus,genus,,
19
species,specificEpithet,,
20
subspecies,subspecies,,
21
latitude,decimalLatitude,,
22
longitude,decimalLongitude,,
23
lat_long_precision,coordinatePrecision_deg,,Tentative
24
max_altitude,maximumElevationInMeters,,
25
min_altitude,minimumElevationInMeters,,
26
altitude_precision,*altitude_precision,,
27
min_depth,minimumDepthInMeters,,
28
max_depth,maximumDepthInMeters,,
29
depth_precision,*depth_precision,,
30
continent_ocean,continent,,
31
country,country,,
32
state_province,stateProvince,,
33
county,county,,
34
collector_name,recordedBy,,
35
locality,locality,,
36
year,year,,
37
month,month,,
38
day,day,,
39
basis_of_record,basisOfRecord,,
40
identifier_name,identifiedBy,,
41
identification_date,dateIdentified,,
42
unit_qualifier,*unit_qualifier,,
43
created,*created,,
44
modified,modified,,
45
deleted,*deleted,,
inputs/GBIF/raw_occurrence_record/test.xml.ref
1
Put template:
2
<VegBIEN>
3
    <_setDefault id="-1">
4
        <source_id><source><shortname>GBIF</shortname></source></source_id>
5
        <path>
6
            <_simplifyPath>
7
                <next>parent_id</next>
8
                <path>
9
                    <location>
10
                        <authorlocationcode>
11
                            <_alt>
12
                                <1>$id</1>
13
                                <2>
14
                                    <_join>
15
                                        <1>$institution_code</1>
16
                                        <2>$collection_code</2>
17
                                        <3>$catalogue_number</3>
18
                                    </_join>
19
                                </2>
20
                            </_alt>
21
                        </authorlocationcode>
22
                        <elevation_m>
23
                            <_avg>
24
                                <1>$min_altitude</1>
25
                                <2>$max_altitude</2>
26
                            </_avg>
27
                        </elevation_m>
28
                        <iscultivated><_locationnarrative_is_cultivated><locationnarrative>$locality</locationnarrative></_locationnarrative_is_cultivated></iscultivated>
29
                        <locationevent>
30
                            <taxonoccurrence>
31
                                <aggregateoccurrence>
32
                                    <plantobservation>
33
                                        <specimenreplicate>
34
                                            <catalognumber_dwc>$catalogue_number</catalognumber_dwc>
35
                                            <collectioncode_dwc>$collection_code</collectioncode_dwc>
36
                                            <institution_id>
37
                                                <sourcelist>
38
                                                    <name>$institution_code</name>
39
                                                    <sourcename>
40
                                                        <name>
41
                                                            <_split>
42
                                                                <separator>[,;] *</separator>
43
                                                                <value>$institution_code</value>
44
                                                            </_split>
45
                                                        </name>
46
                                                    </sourcename>
47
                                                </sourcelist>
48
                                            </institution_id>
49
                                            <sourceaccessioncode>$id</sourceaccessioncode>
50
                                        </specimenreplicate>
51
                                    </plantobservation>
52
                                </aggregateoccurrence>
53
                                <collector_id><party><fullname>$collector_name</fullname></party></collector_id>
54
                                <sourceaccessioncode>$id</sourceaccessioncode>
55
                                <taxondetermination>
56
                                    <party_id><party><fullname>$identifier_name</fullname></party></party_id>
57
                                    <taxonverbatim_id>
58
                                        <taxonverbatim>
59
                                            <taxonlabel_id>
60
                                                <taxonlabel>
61
                                                    <parent_id>
62
                                                        <taxonlabel>
63
                                                            <rank>$rank</rank>
64
                                                            <parent_id>
65
                                                                <taxonlabel>
66
                                                                    <rank>cultivar</rank>
67
                                                                    <parent_id>
68
                                                                        <taxonlabel>
69
                                                                            <rank>forma</rank>
70
                                                                            <parent_id>
71
                                                                                <taxonlabel>
72
                                                                                    <rank>variety</rank>
73
                                                                                    <parent_id>
74
                                                                                        <taxonlabel>
75
                                                                                            <rank>subspecies</rank>
76
                                                                                            <taxonepithet>$subspecies</taxonepithet>
77
                                                                                            <parent_id>
78
                                                                                                <taxonlabel>
79
                                                                                                    <rank>species</rank>
80
                                                                                                    <taxonepithet>$species</taxonepithet>
81
                                                                                                    <parent_id>
82
                                                                                                        <taxonlabel>
83
                                                                                                            <rank>genus</rank>
84
                                                                                                            <taxonepithet>$genus</taxonepithet>
85
                                                                                                            <parent_id>
86
                                                                                                                <taxonlabel>
87
                                                                                                                    <rank>family</rank>
88
                                                                                                                    <taxonepithet>$family</taxonepithet>
89
                                                                                                                    <parent_id>
90
                                                                                                                        <taxonlabel>
91
                                                                                                                            <rank>order</rank>
92
                                                                                                                            <taxonepithet>$order_rank</taxonepithet>
93
                                                                                                                            <parent_id>
94
                                                                                                                                <taxonlabel>
95
                                                                                                                                    <rank>class</rank>
96
                                                                                                                                    <taxonepithet>$class</taxonepithet>
97
                                                                                                                                    <parent_id>
98
                                                                                                                                        <taxonlabel>
99
                                                                                                                                            <rank>phylum</rank>
100
                                                                                                                                            <taxonepithet>$phylum</taxonepithet>
101
                                                                                                                                            <parent_id>
102
                                                                                                                                                <taxonlabel>
103
                                                                                                                                                    <rank>kingdom</rank>
104
                                                                                                                                                    <taxonepithet>$kingdom</taxonepithet>
105
                                                                                                                                                </taxonlabel>
106
                                                                                                                                            </parent_id>
107
                                                                                                                                        </taxonlabel>
108
                                                                                                                                    </parent_id>
109
                                                                                                                                </taxonlabel>
110
                                                                                                                            </parent_id>
111
                                                                                                                        </taxonlabel>
112
                                                                                                                    </parent_id>
113
                                                                                                                </taxonlabel>
114
                                                                                                            </parent_id>
115
                                                                                                        </taxonlabel>
116
                                                                                                    </parent_id>
117
                                                                                                </taxonlabel>
118
                                                                                            </parent_id>
119
                                                                                        </taxonlabel>
120
                                                                                    </parent_id>
121
                                                                                </taxonlabel>
122
                                                                            </parent_id>
123
                                                                        </taxonlabel>
124
                                                                    </parent_id>
125
                                                                </taxonlabel>
126
                                                            </parent_id>
127
                                                        </taxonlabel>
128
                                                    </parent_id>
129
                                                    <rank>$rank</rank>
130
                                                    <taxonomicname>
131
                                                        <_merge_prefix>
132
                                                            <prefix><_taxon_family_require_std><family>$family</family></_taxon_family_require_std></prefix>
133
                                                            <value>
134
                                                                <_alt>
135
                                                                    <1>$scientific_name</1>
136
                                                                    <2>
137
                                                                        <_join_words>
138
                                                                            <2>$genus</2>
139
                                                                            <3>$species</3>
140
                                                                            <4>$subspecies</4>
141
                                                                        </_join_words>
142
                                                                    </2>
143
                                                                </_alt>
144
                                                            </value>
145
                                                        </_merge_prefix>
146
                                                    </taxonomicname>
147
                                                </taxonlabel>
148
                                            </taxonlabel_id>
149
                                            <family>$family</family>
150
                                            <genus>$genus</genus>
151
                                            <specific_epithet>$species</specific_epithet>
152
                                            <taxonomicname>$scientific_name</taxonomicname>
153
                                            <verbatimrank>$rank</verbatimrank>
154
                                        </taxonverbatim>
155
                                    </taxonverbatim_id>
156
                                    <determinationdate><_dateRangeStart><value>$identification_date</value></_dateRangeStart></determinationdate>
157
                                </taxondetermination>
158
                            </taxonoccurrence>
159
                            <waterdepth_m>
160
                                <_avg>
161
                                    <1>$min_depth</1>
162
                                    <2>$max_depth</2>
163
                                </_avg>
164
                            </waterdepth_m>
165
                        </locationevent>
166
                        <locationnarrative>$locality</locationnarrative>
167
                        <locationplace>
168
                            <place_id>
169
                                <place>
170
                                    <coordinates_id>
171
                                        <coordinates>
172
                                            <latitude_deg>
173
                                                <_nullIf>
174
                                                    <null>0</null>
175
                                                    <type>float</type>
176
                                                    <value>$latitude</value>
177
                                                </_nullIf>
178
                                            </latitude_deg>
179
                                            <longitude_deg>
180
                                                <_nullIf>
181
                                                    <null>0</null>
182
                                                    <type>float</type>
183
                                                    <value>$longitude</value>
184
                                                </_nullIf>
185
                                            </longitude_deg>
186
                                        </coordinates>
187
                                    </coordinates_id>
188
                                    <continent>$continent_ocean</continent>
189
                                    <country>$country</country>
190
                                    <county>$county</county>
191
                                    <matched_place_id>
192
                                        <place>
193
                                            <source_id><source><shortname>geoscrub</shortname></source></source_id>
194
                                            <coordinates_id>
195
                                                <coordinates>
196
                                                    <source_id><source><shortname>geoscrub</shortname></source></source_id>
197
                                                    <latitude_deg>
198
                                                        <_nullIf>
199
                                                            <null>0</null>
200
                                                            <type>float</type>
201
                                                            <value>$latitude</value>
202
                                                        </_nullIf>
203
                                                    </latitude_deg>
204
                                                    <longitude_deg>
205
                                                        <_nullIf>
206
                                                            <null>0</null>
207
                                                            <type>float</type>
208
                                                            <value>$longitude</value>
209
                                                        </_nullIf>
210
                                                    </longitude_deg>
211
                                                </coordinates>
212
                                            </coordinates_id>
213
                                            <country>$country</country>
214
                                            <county>$county</county>
215
                                            <stateprovince>$state_province</stateprovince>
216
                                        </place>
217
                                    </matched_place_id>
218
                                    <stateprovince>$state_province</stateprovince>
219
                                </place>
220
                            </place_id>
221
                        </locationplace>
222
                    </location>
223
                </path>
224
            </_simplifyPath>
225
        </path>
226
    </_setDefault>
227
</VegBIEN>
228
Inserted 0 new rows into database
inputs/GBIF/raw_occurrence_record/VegBIEN.csv
1
GBIF,VegBIEN:/_setDefault:[source_id/source/shortname/_env:[name=source]]/path/_simplifyPath:[next=parent_id]/path,Comments
2
catalogue_number,"/_if[@name=""if specimen""]/cond/_exists",
3
id,"/_if[@name=""if specimen""]/cond/_exists",
4
institution_code,"/_if[@name=""if specimen""]/else/source/shortname/_first/1",
5
id,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/1",
6
catalogue_number,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/cond/_exists",
7
catalogue_number,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/1/_if[@name=""if specimen""]/cond/_exists",
8
id,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/1/_if[@name=""if specimen""]/cond/_exists",
9
institution_code,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/1/_if[@name=""if specimen""]/then/_first/2",
10
collection_code,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/2/_join/1",
11
catalogue_number,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/3/_if[@name=""if indirect voucher""]/else",
12
min_altitude,/location/elevation_m/_alt/2/_avg/1,
13
max_altitude,/location/elevation_m/_alt/2/_avg/2,
14
locality,/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/1,
15
identification_date,"/location/locationevent/taxonoccurrence/_if[@name=""if has accepted name""]/then/taxondetermination:[determinationtype=accepted]/determinationdate/_alt/1/_dateRangeStart/value",
16
catalogue_number,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/catalognumber_dwc/_if[@name=""if indirect voucher""]/else",
17
collection_code,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/collectioncode_dwc/_alt/1,Brad: Not sure if mapping correct. DwC element; code for entire collection; often same as InstitutionCode; Aaron: Combining with InstitutionCode to create collection name
18
catalogue_number,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/cond/_exists",
19
id,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/cond/_exists",
20
institution_code,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/then/name/_first/2",
21
institution_code,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/then/sourcename/name/_first/2/_split:[separator=""[,;] *""]/value",
22
id,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/sourceaccessioncode,
23
scientific_name,"/location/locationevent/taxonoccurrence/authortaxoncode/_if[@name=""if in plot""]/then/_first/3/_first/2",
24
collector_name,/location/locationevent/taxonoccurrence/collector_id/party/fullname,
25
id,"/location/locationevent/taxonoccurrence/iscultivated/_alt/1/_if[@name=""if TaxonOccurrence""]/cond/_exists",
26
id,/location/locationevent/taxonoccurrence/sourceaccessioncode/_first/3,
27
identifier_name,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/party/fullname,
28
rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/_if[@name=""if has explicit parent""]/else/rank",
29
subspecies,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=subspecies]/taxonepithet",
30
species,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=species]/taxonepithet",
31
genus,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=genus]/taxonepithet",
32
family,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=family]/taxonepithet",
33
order_rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=order]/taxonepithet",
34
class,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,order,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=class]/taxonepithet",
35
phylum,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,order,class,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=phylum]/taxonepithet",
36
kingdom,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,order,class,phylum,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=kingdom]/taxonepithet",
37
rank,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/rank,
38
rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonepithet/_if[@name=""if taxonomic name is epithet""]/cond/_alt:[2=true]/1/_taxonomic_name_is_epithet/rank",
39
family,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/prefix/_taxon_family_require_std/family,
40
scientific_name,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/1,
41
genus,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/2,
42
species,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/3/_join_words/1,
43
subspecies,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/4,
44
rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/5/_join_words/1/_if[@name=""if has infraspecificEpithet""]/then",
45
family,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/family,
46
genus,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/genus,
47
species,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/specific_epithet,
48
scientific_name,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/taxonomicname,
49
rank,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/verbatimrank,
50
identification_date,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/determinationdate/_alt/1/_dateRangeStart/value,
51
catalogue_number,"/location/locationevent/taxonoccurrence/voucher/*_id/specimenreplicate/catalognumber_dwc/_if[@name=""if indirect voucher""]/then",
52
min_depth,/location/locationevent/waterdepth_m/_avg/1,
53
max_depth,/location/locationevent/waterdepth_m/_avg/2,
54
locality,/location/locationnarrative/_merge/1,
55
latitude,"/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_nullIf:[null=0,type=float]/value",
56
longitude,"/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_nullIf:[null=0,type=float]/value",
57
continent_ocean,/location/locationplace/*_id/place/continent,
58
country,/location/locationplace/*_id/place/country,
59
county,/location/locationplace/*_id/place/county,
60
latitude,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_nullIf:[null=0,type=float]/value",""".,"" sorts it with other coordinates mappings"
61
longitude,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_nullIf:[null=0,type=float]/value",""".,"" sorts it with other coordinates mappings"
62
country,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/country/_first/2,
63
county,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/county/_first/2,
64
state_province,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/stateprovince/_first/2,
65
state_province,/location/locationplace/*_id/place/stateprovince,
66
altitude_precision,,** No join mapping for *altitude_precision ** 
67
author,,** No join mapping for authors ** 
68
basis_of_record,,** No join mapping for basisOfRecord ** 
69
created,,** No join mapping for *created ** 
70
data_provider_id,,** No join mapping for *data_provider_id ** 
71
data_resource_id,,** No join mapping for *data_resource_id ** 
72
day,,** No join mapping for day ** 
73
deleted,,** No join mapping for *deleted ** 
74
depth_precision,,** No join mapping for *depth_precision ** 
75
lat_long_precision,,** No join mapping for coordinatePrecision_deg ** Tentative
76
modified,,** No join mapping for modified ** 
77
month,,** No join mapping for month ** 
78
resource_access_point_id,,** No join mapping for *resource_access_point_id ** 
79
unit_qualifier,,** No join mapping for *unit_qualifier ** 
80
year,,** No join mapping for year ** 
inputs/GBIF/raw_occurrence_record_plants/run
1
#!/bin/bash -e
2
. "$(dirname "${BASH_SOURCE[0]}")"/../table.run
3

  
4
if self_not_included; then
5

  
6
full_table="${table%_plants}"
7

  
8
herbaria_filter.ih.csv_/make()
9
{
10
	echo_func; set_make_vars; check_target_exists
11
	
12
	use_local
13
	limit= query='SELECT DISTINCT acronym FROM herbaria.ih ORDER BY acronym' \
14
to_target pg_export
15
}
16

  
17
plant_fraction/make() # runtime: 1 hr ("10603 rows affected (53 min 44.68 sec)")
18
{
19
	echo_func; set_make_vars; mk_table_esc
20
	
21
	use_local_remote
22
	if remaking; then table=plant_fraction table_esc= mk_drop|mysql_ANSI; fi
23
	(benign_error=1 mysql <<'EOF'
24
CREATE TABLE `plant_fraction` (
25
 `institution_code` varchar(255) NOT NULL PRIMARY KEY,
26
 `plant_fraction` float NOT NULL,
27
 KEY `plant_fraction` (`plant_fraction`)
28
) ENGINE=MyISAM DEFAULT CHARSET=utf8
29
EOF
30
) || table=plant_fraction skip_table || return 0
31
	mysql_ANSI <<EOF
32
INSERT INTO "plant_fraction"
33
SELECT
34
  institution_code
35
, COUNT(NULLIF(
36
	family LIKE '%aceae%' OR family IN ('Compositae','Gramineae','Palmae','Guttiferae','Cruciferae','Labiatae','Umbelliferae','Leguminosae')
37
	, false))/COUNT(*)
38
	AS plant_fraction
39
FROM ${limit:+(SELECT * FROM }$table_esc ${limit:+$(prefix= limit)) s}
40
GROUP BY institution_code
41
ORDER BY institution_code
42
EOF
43
}
44

  
45
plant_fraction/seal()
46
{
47
	echo_func; set_make_vars
48
	
49
	use_local_remote
50
	table=plant_fraction table_esc= mysql_seal_table
51
}
52

  
53
plant_fraction_for_herbaria_filter/make()
54
{
55
	echo_func; set_make_vars; mk_table_esc
56
	
57
	plant_fraction/make
58
	use_local_remote
59
	if remaking; then
60
		table=plant_fraction_for_herbaria_filter table_esc= mk_drop|mysql_ANSI
61
	fi
62
	(benign_error=1 mysql <<'EOF'
63
CREATE VIEW `plant_fraction_for_herbaria_filter` AS
64
SELECT * FROM `plant_fraction`
65
WHERE plant_fraction >= 0.8
66
EOF
67
) || table=plant_fraction_for_herbaria_filter skip_table || return 0
68
}
69

  
70
herbaria_filter/make() # runtime: ~0 ("0m2.235s"): CSV import, index scans only
71
{
72
	echo_func; set_make_vars
73
	
74
	use_local_remote
75
	local table="$target_stem"
76
	if remaking; then mk_drop|mysql_ANSI; fi
77
	
78
	herbaria_filter.ih.csv_/make
79
	plant_fraction_for_herbaria_filter/make
80
	
81
	(benign_error=1 mysql <<'EOF'
82
CREATE TABLE `herbaria_filter` (
83
 `institution_code` varchar(255) NOT NULL PRIMARY KEY
84
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='contains all of IH.vegpath.org?&Index+Herbariorum'
85
EOF
86
) || skip_table || return 0
87
	
88
	time (
89
	mysql_ANSI <<EOF
90
INSERT INTO "herbaria_filter"
91
SELECT institution_code FROM plant_fraction_for_herbaria_filter
92
EOF
93
	append=1 mysql_import <"$top_dir"/herbaria_filter.ih.csv_
94
	) || return
95
}
96

  
97
herbaria_filter/seal()
98
{
99
	echo_func; set_make_vars
100
	
101
	use_local_remote
102
	table=herbaria_filter table_esc= mysql_seal_table
103
}
104

  
105
# table.tsv.gz/make() runtime: 35 min ("34m55.802s")
106

  
107
func_override table.tsv/make__src_table_run
108
table.tsv/make() # usage: [remake=1] [limit=...] .../run table.tsv/make
109
{
110
	echo_func; set_make_vars; check_target_exists
111
	
112
	herbaria_filter/make
113
	table="$full_table" filter="
114
deleted IS NULL
115
AND institution_code IN (SELECT * FROM herbaria_filter)" \
116
self_make table.tsv/make__src_table_run
117
}
118

  
119
fi
0 120

  
inputs/GBIF/raw_occurrence_record_plants/postprocess.sql
1
CREATE INDEX ON :table ("institution_code");
2

  
3
-- Remove institutions that we have direct data for
4
DELETE FROM :table
5
WHERE institution_code IN (
6
      'ACAD'
7
    , 'ARIZ'
8
    , 'BRIT'
9
    , 'MO'
10
    , 'MT'
11
    , 'NY'
12
    , 'QFA'
13
    , 'TEX'
14
    , 'TRT'
15
    , 'TRTE'
16
    , 'U'
17
    , 'UBC'
18
    , 'WIN'
19
)
20
/* list obtained using the following on r9459:
21
SELECT DISTINCT dataprovider
22
FROM sourcelist
23
JOIN provider_count ON provider_count.dataprovider = sourcelist.name
24
WHERE source_id = source_by_shortname('GBIF')
25
ORDER BY dataprovider
26
*/
27
;
inputs/GBIF/raw_occurrence_record_plants/header.csv
1
row_num,id,data_provider_id,data_resource_id,resource_access_point_id,institution_code,collection_code,catalogue_number,scientific_name,author,rank,kingdom,phylum,class,order_rank,family,genus,species,subspecies,latitude,longitude,lat_long_precision,max_altitude,min_altitude,altitude_precision,min_depth,max_depth,depth_precision,continent_ocean,country,state_province,county,collector_name,locality,year,month,day,basis_of_record,identifier_name,identification_date,unit_qualifier,created,modified,deleted
inputs/GBIF/raw_occurrence_record_plants/map.csv
1
GBIF,VegCore,Filter,Comments
2
row_num,raw_occurrence_record-row_num,,
3
id,dataProviderRecordID,,
4
data_provider_id,*data_provider_id,,
5
data_resource_id,*data_resource_id,,
6
resource_access_point_id,*resource_access_point_id,,
7
institution_code,specimenHolderInstitutions,,
8
collection_code,collection,,
9
catalogue_number,accessionNumber,,
10
scientific_name,scientificName,,
11
author,authors,,
12
rank,taxonRank,,
13
kingdom,kingdom,,
14
phylum,phylum,,
15
class,class,,
16
order_rank,order,,
17
family,family,,
18
genus,genus,,
19
species,specificEpithet,,
20
subspecies,subspecies,,
21
latitude,decimalLatitude,,
22
longitude,decimalLongitude,,
23
lat_long_precision,coordinatePrecision_deg,,Tentative
24
max_altitude,maximumElevationInMeters,,
25
min_altitude,minimumElevationInMeters,,
26
altitude_precision,*altitude_precision,,
27
min_depth,minimumDepthInMeters,,
28
max_depth,maximumDepthInMeters,,
29
depth_precision,*depth_precision,,
30
continent_ocean,continent,,
31
country,country,,
32
state_province,stateProvince,,
33
county,county,,
34
collector_name,recordedBy,,
35
locality,locality,,
36
year,year,,
37
month,month,,
38
day,day,,
39
basis_of_record,basisOfRecord,,
40
identifier_name,identifiedBy,,
41
identification_date,dateIdentified,,
42
unit_qualifier,*unit_qualifier,,
43
created,*created,,
44
modified,modified,,
45
deleted,*deleted,,
inputs/GBIF/raw_occurrence_record_plants/new_terms.csv
1
row_num,raw_occurrence_record-row_num,,
2
catalogue_number,accessionNumber,,
3
rank,taxonRank,,
4
order_rank,order,,
5
lat_long_precision,coordinatePrecision_deg,,Tentative
6
max_altitude,maximumElevationInMeters,,
7
min_altitude,minimumElevationInMeters,,
8
min_depth,minimumDepthInMeters,,
9
max_depth,maximumDepthInMeters,,
10
collector_name,recordedBy,,
11
identifier_name,identifiedBy,,
12
identification_date,dateIdentified,,
inputs/GBIF/raw_occurrence_record_plants/unmapped_terms.csv
1
raw_occurrence_record-row_num
2
*data_provider_id
3
*data_resource_id
4
*resource_access_point_id
5
authors
6
coordinatePrecision_deg
7
*altitude_precision
8
*depth_precision
9
year
10
month
11
day
12
basisOfRecord
13
*unit_qualifier
14
*created
15
modified
16
*deleted
inputs/GBIF/raw_occurrence_record_plants/VegBIEN.csv
1
GBIF,VegBIEN:/_setDefault:[source_id/source/shortname/_env:[name=source]]/path/_simplifyPath:[next=parent_id]/path,Comments
2
catalogue_number,"/_if[@name=""if specimen""]/cond/_exists",
3
id,"/_if[@name=""if specimen""]/cond/_exists",
4
institution_code,"/_if[@name=""if specimen""]/else/source/shortname/_first/1",
5
id,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/1",
6
catalogue_number,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/cond/_exists",
7
catalogue_number,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/1/_if[@name=""if specimen""]/cond/_exists",
8
id,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/1/_if[@name=""if specimen""]/cond/_exists",
9
institution_code,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/1/_if[@name=""if specimen""]/then/_first/2",
10
collection_code,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/2/_join/1",
11
catalogue_number,"/location/_if[@name=""if subplot""]/else/authorlocationcode/_first/3/_if[@name=""if plot""]/else/_alt/2/_if[@name=""if catalogNumber""]/then/_join/3/_if[@name=""if indirect voucher""]/else",
12
min_altitude,/location/elevation_m/_alt/2/_avg/1,
13
max_altitude,/location/elevation_m/_alt/2/_avg/2,
14
locality,/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/1,
15
identification_date,"/location/locationevent/taxonoccurrence/_if[@name=""if has accepted name""]/then/taxondetermination:[determinationtype=accepted]/determinationdate/_alt/1/_dateRangeStart/value",
16
catalogue_number,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/catalognumber_dwc/_if[@name=""if indirect voucher""]/else",
17
collection_code,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/collectioncode_dwc/_alt/1,Brad: Not sure if mapping correct. DwC element; code for entire collection; often same as InstitutionCode; Aaron: Combining with InstitutionCode to create collection name
18
catalogue_number,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/cond/_exists",
19
id,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/cond/_exists",
20
institution_code,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/then/name/_first/2",
21
institution_code,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/institution_id/sourcelist/_if[@name=""if specimen""]/then/sourcename/name/_first/2/_split:[separator=""[,;] *""]/value",
22
id,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/sourceaccessioncode,
23
scientific_name,"/location/locationevent/taxonoccurrence/authortaxoncode/_if[@name=""if in plot""]/then/_first/3/_first/2",
24
collector_name,/location/locationevent/taxonoccurrence/collector_id/party/fullname,
25
id,"/location/locationevent/taxonoccurrence/iscultivated/_alt/1/_if[@name=""if TaxonOccurrence""]/cond/_exists",
26
id,/location/locationevent/taxonoccurrence/sourceaccessioncode/_first/3,
27
identifier_name,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/party/fullname,
28
rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/_if[@name=""if has explicit parent""]/else/rank",
29
subspecies,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=subspecies]/taxonepithet",
30
species,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=species]/taxonepithet",
31
genus,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=genus]/taxonepithet",
32
family,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=family]/taxonepithet",
33
order_rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=order]/taxonepithet",
34
class,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,order,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=class]/taxonepithet",
35
phylum,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,order,class,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=phylum]/taxonepithet",
36
kingdom,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,species,genus,family,order,class,phylum,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=kingdom]/taxonepithet",
37
rank,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/rank,
38
rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonepithet/_if[@name=""if taxonomic name is epithet""]/cond/_alt:[2=true]/1/_taxonomic_name_is_epithet/rank",
39
family,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/prefix/_taxon_family_require_std/family,
40
scientific_name,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/1,
41
genus,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/2,
42
species,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/3/_join_words/1,
43
subspecies,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/4,
44
rank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/taxonomicname/_merge_prefix/value/_alt/2/_join_words/1/_alt/2/_join_words/5/_join_words/1/_if[@name=""if has infraspecificEpithet""]/then",
45
family,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/family,
46
genus,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/genus,
47
species,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/specific_epithet,
48
scientific_name,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/taxonomicname,
49
rank,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/verbatimrank,
50
identification_date,/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/determinationdate/_alt/1/_dateRangeStart/value,
51
catalogue_number,"/location/locationevent/taxonoccurrence/voucher/*_id/specimenreplicate/catalognumber_dwc/_if[@name=""if indirect voucher""]/then",
52
min_depth,/location/locationevent/waterdepth_m/_avg/1,
53
max_depth,/location/locationevent/waterdepth_m/_avg/2,
54
locality,/location/locationnarrative/_merge/1,
55
latitude,"/location/locationplace/*_id/place/*_id/coordinates/latitude_deg/_nullIf:[null=0,type=float]/value",
56
longitude,"/location/locationplace/*_id/place/*_id/coordinates/longitude_deg/_nullIf:[null=0,type=float]/value",
57
continent_ocean,/location/locationplace/*_id/place/continent,
58
country,/location/locationplace/*_id/place/country,
59
county,/location/locationplace/*_id/place/county,
60
latitude,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/latitude_deg/_nullIf:[null=0,type=float]/value",""".,"" sorts it with other coordinates mappings"
61
longitude,"/location/locationplace/*_id/place/matched_place_id/place:[.,source_id/source/shortname=geoscrub]/*_id/coordinates:[source_id/source/shortname=geoscrub]/_first/2/longitude_deg/_nullIf:[null=0,type=float]/value",""".,"" sorts it with other coordinates mappings"
62
country,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/country/_first/2,
63
county,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/county/_first/2,
64
state_province,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/stateprovince/_first/2,
65
state_province,/location/locationplace/*_id/place/stateprovince,
66
altitude_precision,,** No join mapping for *altitude_precision ** 
67
author,,** No join mapping for authors ** 
68
basis_of_record,,** No join mapping for basisOfRecord ** 
69
created,,** No join mapping for *created ** 
70
data_provider_id,,** No join mapping for *data_provider_id ** 
71
data_resource_id,,** No join mapping for *data_resource_id ** 
72
day,,** No join mapping for day ** 
73
deleted,,** No join mapping for *deleted ** 
74
depth_precision,,** No join mapping for *depth_precision ** 
75
lat_long_precision,,** No join mapping for coordinatePrecision_deg ** Tentative
76
modified,,** No join mapping for modified ** 
77
month,,** No join mapping for month ** 
78
resource_access_point_id,,** No join mapping for *resource_access_point_id ** 
79
unit_qualifier,,** No join mapping for *unit_qualifier ** 
80
year,,** No join mapping for year ** 
inputs/GBIF/raw_occurrence_record_plants/test.xml.ref
1
Put template:
2
<VegBIEN>
3
    <_setDefault id="-1">
4
        <source_id><source><shortname>GBIF</shortname></source></source_id>
5
        <path>
6
            <_simplifyPath>
7
                <next>parent_id</next>
8
                <path>
9
                    <location>
10
                        <authorlocationcode>
11
                            <_alt>
12
                                <1>$id</1>
13
                                <2>
14
                                    <_join>
15
                                        <1>$institution_code</1>
16
                                        <2>$collection_code</2>
17
                                        <3>$catalogue_number</3>
18
                                    </_join>
19
                                </2>
20
                            </_alt>
21
                        </authorlocationcode>
22
                        <elevation_m>
23
                            <_avg>
24
                                <1>$min_altitude</1>
25
                                <2>$max_altitude</2>
26
                            </_avg>
27
                        </elevation_m>
28
                        <iscultivated><_locationnarrative_is_cultivated><locationnarrative>$locality</locationnarrative></_locationnarrative_is_cultivated></iscultivated>
29
                        <locationevent>
30
                            <taxonoccurrence>
31
                                <aggregateoccurrence>
32
                                    <plantobservation>
33
                                        <specimenreplicate>
34
                                            <catalognumber_dwc>$catalogue_number</catalognumber_dwc>
35
                                            <collectioncode_dwc>$collection_code</collectioncode_dwc>
36
                                            <institution_id>
37
                                                <sourcelist>
38
                                                    <name>$institution_code</name>
39
                                                    <sourcename>
40
                                                        <name>
41
                                                            <_split>
42
                                                                <separator>[,;] *</separator>
43
                                                                <value>$institution_code</value>
44
                                                            </_split>
45
                                                        </name>
46
                                                    </sourcename>
47
                                                </sourcelist>
48
                                            </institution_id>
49
                                            <sourceaccessioncode>$id</sourceaccessioncode>
50
                                        </specimenreplicate>
51
                                    </plantobservation>
52
                                </aggregateoccurrence>
53
                                <collector_id><party><fullname>$collector_name</fullname></party></collector_id>
54
                                <sourceaccessioncode>$id</sourceaccessioncode>
55
                                <taxondetermination>
56
                                    <party_id><party><fullname>$identifier_name</fullname></party></party_id>
57
                                    <taxonverbatim_id>
58
                                        <taxonverbatim>
59
                                            <taxonlabel_id>
60
                                                <taxonlabel>
61
                                                    <parent_id>
62
                                                        <taxonlabel>
63
                                                            <rank>$rank</rank>
64
                                                            <parent_id>
65
                                                                <taxonlabel>
66
                                                                    <rank>cultivar</rank>
67
                                                                    <parent_id>
68
                                                                        <taxonlabel>
69
                                                                            <rank>forma</rank>
70
                                                                            <parent_id>
71
                                                                                <taxonlabel>
72
                                                                                    <rank>variety</rank>
73
                                                                                    <parent_id>
74
                                                                                        <taxonlabel>
75
                                                                                            <rank>subspecies</rank>
76
                                                                                            <taxonepithet>$subspecies</taxonepithet>
77
                                                                                            <parent_id>
78
                                                                                                <taxonlabel>
79
                                                                                                    <rank>species</rank>
80
                                                                                                    <taxonepithet>$species</taxonepithet>
81
                                                                                                    <parent_id>
82
                                                                                                        <taxonlabel>
83
                                                                                                            <rank>genus</rank>
84
                                                                                                            <taxonepithet>$genus</taxonepithet>
85
                                                                                                            <parent_id>
86
                                                                                                                <taxonlabel>
87
                                                                                                                    <rank>family</rank>
88
                                                                                                                    <taxonepithet>$family</taxonepithet>
89
                                                                                                                    <parent_id>
90
                                                                                                                        <taxonlabel>
91
                                                                                                                            <rank>order</rank>
92
                                                                                                                            <taxonepithet>$order_rank</taxonepithet>
93
                                                                                                                            <parent_id>
94
                                                                                                                                <taxonlabel>
95
                                                                                                                                    <rank>class</rank>
96
                                                                                                                                    <taxonepithet>$class</taxonepithet>
97
                                                                                                                                    <parent_id>
98
                                                                                                                                        <taxonlabel>
99
                                                                                                                                            <rank>phylum</rank>
100
                                                                                                                                            <taxonepithet>$phylum</taxonepithet>
101
                                                                                                                                            <parent_id>
102
                                                                                                                                                <taxonlabel>
103
                                                                                                                                                    <rank>kingdom</rank>
104
                                                                                                                                                    <taxonepithet>$kingdom</taxonepithet>
105
                                                                                                                                                </taxonlabel>
106
                                                                                                                                            </parent_id>
107
                                                                                                                                        </taxonlabel>
108
                                                                                                                                    </parent_id>
109
                                                                                                                                </taxonlabel>
110
                                                                                                                            </parent_id>
111
                                                                                                                        </taxonlabel>
112
                                                                                                                    </parent_id>
113
                                                                                                                </taxonlabel>
114
                                                                                                            </parent_id>
115
                                                                                                        </taxonlabel>
116
                                                                                                    </parent_id>
117
                                                                                                </taxonlabel>
118
                                                                                            </parent_id>
119
                                                                                        </taxonlabel>
120
                                                                                    </parent_id>
121
                                                                                </taxonlabel>
122
                                                                            </parent_id>
123
                                                                        </taxonlabel>
124
                                                                    </parent_id>
125
                                                                </taxonlabel>
126
                                                            </parent_id>
127
                                                        </taxonlabel>
128
                                                    </parent_id>
129
                                                    <rank>$rank</rank>
130
                                                    <taxonomicname>
131
                                                        <_merge_prefix>
132
                                                            <prefix><_taxon_family_require_std><family>$family</family></_taxon_family_require_std></prefix>
133
                                                            <value>
134
                                                                <_alt>
135
                                                                    <1>$scientific_name</1>
136
                                                                    <2>
137
                                                                        <_join_words>
138
                                                                            <2>$genus</2>
139
                                                                            <3>$species</3>
140
                                                                            <4>$subspecies</4>
141
                                                                        </_join_words>
142
                                                                    </2>
143
                                                                </_alt>
144
                                                            </value>
145
                                                        </_merge_prefix>
146
                                                    </taxonomicname>
147
                                                </taxonlabel>
148
                                            </taxonlabel_id>
149
                                            <family>$family</family>
150
                                            <genus>$genus</genus>
151
                                            <specific_epithet>$species</specific_epithet>
152
                                            <taxonomicname>$scientific_name</taxonomicname>
153
                                            <verbatimrank>$rank</verbatimrank>
154
                                        </taxonverbatim>
155
                                    </taxonverbatim_id>
156
                                    <determinationdate><_dateRangeStart><value>$identification_date</value></_dateRangeStart></determinationdate>
157
                                </taxondetermination>
158
                            </taxonoccurrence>
159
                            <waterdepth_m>
160
                                <_avg>
161
                                    <1>$min_depth</1>
162
                                    <2>$max_depth</2>
163
                                </_avg>
164
                            </waterdepth_m>
165
                        </locationevent>
166
                        <locationnarrative>$locality</locationnarrative>
167
                        <locationplace>
168
                            <place_id>
169
                                <place>
170
                                    <coordinates_id>
171
                                        <coordinates>
172
                                            <latitude_deg>
173
                                                <_nullIf>
174
                                                    <null>0</null>
175
                                                    <type>float</type>
176
                                                    <value>$latitude</value>
177
                                                </_nullIf>
178
                                            </latitude_deg>
179
                                            <longitude_deg>
180
                                                <_nullIf>
181
                                                    <null>0</null>
182
                                                    <type>float</type>
183
                                                    <value>$longitude</value>
184
                                                </_nullIf>
185
                                            </longitude_deg>
186
                                        </coordinates>
187
                                    </coordinates_id>
188
                                    <continent>$continent_ocean</continent>
189
                                    <country>$country</country>
190
                                    <county>$county</county>
191
                                    <matched_place_id>
192
                                        <place>
193
                                            <source_id><source><shortname>geoscrub</shortname></source></source_id>
194
                                            <coordinates_id>
195
                                                <coordinates>
196
                                                    <source_id><source><shortname>geoscrub</shortname></source></source_id>
197
                                                    <latitude_deg>
198
                                                        <_nullIf>
199
                                                            <null>0</null>
200
                                                            <type>float</type>
201
                                                            <value>$latitude</value>
202
                                                        </_nullIf>
203
                                                    </latitude_deg>
204
                                                    <longitude_deg>
205
                                                        <_nullIf>
206
                                                            <null>0</null>
207
                                                            <type>float</type>
208
                                                            <value>$longitude</value>
209
                                                        </_nullIf>
210
                                                    </longitude_deg>
211
                                                </coordinates>
212
                                            </coordinates_id>
213
                                            <country>$country</country>
214
                                            <county>$county</county>
215
                                            <stateprovince>$state_province</stateprovince>
216
                                        </place>
217
                                    </matched_place_id>
218
                                    <stateprovince>$state_province</stateprovince>
219
                                </place>
220
                            </place_id>
221
                        </locationplace>
222
                    </location>
223
                </path>
224
            </_simplifyPath>
225
        </path>
226
    </_setDefault>
227
</VegBIEN>
228
Inserted 0 new rows into database

Also available in: Unified diff