Revision 10604
Added by Aaron Marcuse-Kubitza over 11 years ago
inputs/UNCC/Specimen/postprocess.sql | ||
---|---|---|
1 |
SELECT util.search_path_append('util'); |
|
2 |
|
|
3 |
SELECT util.rename_cols(:table_str, $$ |
|
4 |
cultivated => *cultivated, |
|
5 |
flower => *flower, |
|
6 |
fruit => *fruit, |
|
7 |
$$::hstore); |
|
8 |
|
|
9 |
|
|
10 |
SELECT mk_derived_col((:table_str, 'specimenDonorInstitution'), |
|
11 |
-- UNCCD likely = UNCC database |
|
12 |
$$_map('UNCCD=>UNCC, *=>*', "herbarium")$$) |
|
13 |
; -- runtime: 1 s ("Time: 1057.222 ms") @starscream |
|
14 |
|
|
15 |
SELECT mk_derived_col((:table_str, 'cultivated__campus'), |
|
16 |
/* "this refers to occurrence on the UNCC campus, but I do not know the meaning |
|
17 |
of the codes. Presumably C = cultivated" |
|
18 |
(https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus) |
|
19 |
*/ |
|
20 |
$$_map('*=>true', "campus")$$) |
|
21 |
; -- runtime: 0.5 s ("Time: 353.373 ms") @starscream |
|
22 |
|
|
23 |
SELECT mk_derived_col((:table_str, 'cultivated__main'), |
|
24 |
-- for our purposes, anything non-NULL should be assumed to be cultivated |
|
25 |
$$_map('*=>true', "*cultivated")$$) |
|
26 |
; -- runtime: 0.5 s ("Time: 403.916 ms") @starscream |
|
27 |
|
|
28 |
|
|
29 |
CREATE OR REPLACE FUNCTION map_reproductive_condition(value text) |
|
30 |
RETURNS text AS |
|
31 |
$BODY$ |
|
32 |
/* |
|
33 |
" |
|
34 |
Most are typos |
|
35 |
|
|
36 |
A = absent |
|
37 |
|
|
38 |
I = immature |
|
39 |
|
|
40 |
M = mature |
|
41 |
" (mailto:jim@habitatassessment.com?Habitat_Assessment.Jim_Matthews.to(uniola@email.unc.edu?Bob_Peet).2010-12-3:17:08.Re:_herbarium_phenology_codes_(fwd)) |
|
42 |
*/ |
|
43 |
SELECT util._map('A=>absent, I=>immature, M=>mature, *=>NULL', $1) |
|
44 |
$BODY$ |
|
45 |
LANGUAGE sql IMMUTABLE |
|
46 |
COST 100; |
|
47 |
|
|
48 |
SELECT mk_derived_col((:table_str, 'flower'), $$map_reproductive_condition("*flower")$$); -- runtime: 0.5 s ("Time: 635.054 ms") @starscream |
|
49 |
SELECT mk_derived_col((:table_str, 'fruit' ), $$map_reproductive_condition("*fruit" )$$); -- runtime: 0.5 s ("Time: 591.854 ms") @starscream |
inputs/UNCC/Specimen/map.csv | ||
---|---|---|
9 | 9 |
GROUP BY herbarium, accession |
10 | 10 |
HAVING count(*) > 1 |
11 | 11 |
-----" |
12 |
herbarium,specimenDonorInstitution,"/_map:[UNCCD=UNCC,*=*]/value","""The name or acronym in use by the institution that donated the collection"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/593/UNCC-validation-Peet.xlsx#institutionCode). UNCCD likely = UNCC database."
|
|
12 |
herbarium,specimenDonorInstitution_verbatim,,"""The name or acronym in use by the institution that donated the collection"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/593/UNCC-validation-Peet.xlsx#institutionCode)"
|
|
13 | 13 |
family,family,, |
14 | 14 |
genus,genus,, |
15 | 15 |
species,specificEpithet,, |
... | ... | |
29 | 29 |
country,country,, |
30 | 30 |
state,stateProvince,, |
31 | 31 |
county,county,, |
32 |
campus,cultivated,/_alt/2/_map:[*=true]/value,"""this refers to occurrence on the UNCC campus, but I do not know the meaning of the codes. Presumably C = cultivated"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus)"
|
|
32 |
campus,cultivated__campus__verbatim,,"""this refers to occurrence on the UNCC campus, but I do not know the meaning of the codes. Presumably C = cultivated"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus)"
|
|
33 | 33 |
leaves,*leaves,, |
34 |
flower,reproductiveCondition,"/_join/1/_map:[A=absent,I=immature,M=mature]/value",Translation is from e-mail conv between Jim Matthews at Habitat Assessment <jim@habitatassessment.com> and Bob Peet on 2010-12-2~3
|
|
35 |
fruit,reproductiveCondition,"/_join/2/_map:[A=absent,I=immature,M=mature]/value",Translation is from e-mail conv between Jim Matthews at Habitat Assessment <jim@habitatassessment.com> and Bob Peet on 2010-12-2~3
|
|
34 |
flower,flower_verbatim,,
|
|
35 |
fruit,fruit_verbatim,,
|
|
36 | 36 |
root,*root,, |
37 | 37 |
locality,locality,, |
38 | 38 |
habitat,habitat,, |
... | ... | |
43 | 43 |
loanto,occurrenceRemarks,"/_join/6/_label[label=""loaned to""]/value", |
44 | 44 |
inorout,occurrenceRemarks,"/_join/7/_label[label=""in or out""]/value", |
45 | 45 |
sheetno,associatedMedia,,? |
46 |
cultivated,cultivated,/_alt/1/_map:[*=true]/value,"For our purposes, anything non-NULL should be assumed to be cultivated"
|
|
46 |
cultivated,cultivated__main__verbatim,,
|
|
47 | 47 |
filler,*filler,,"Where used, same as family" |
48 | 48 |
:UNCC,specimenHolderInstitutions,, |
49 |
flower,reproductiveCondition,/_join/1, |
|
50 |
fruit,reproductiveCondition,/_join/2, |
|
51 |
cultivated__campus,cultivated,/_alt/2, |
|
52 |
cultivated__main,cultivated,/_alt/1, |
inputs/UNCC/Specimen/VegBIEN.csv | ||
---|---|---|
38 | 38 |
HAVING count(*) > 1 |
39 | 39 |
-----" |
40 | 40 |
collectno,"/location/iscultivated/_or/1/_if[@name=""if TaxonOccurrence""]/cond/_exists", |
41 |
cultivated,"/location/iscultivated/_or/1/_if[@name=""if TaxonOccurrence""]/else/_first/1/_alt/1/_map:[*=true]/value","For our purposes, anything non-NULL should be assumed to be cultivated"
|
|
42 |
campus,"/location/iscultivated/_or/1/_if[@name=""if TaxonOccurrence""]/else/_first/1/_alt/2/_map:[*=true]/value","""this refers to occurrence on the UNCC campus, but I do not know the meaning of the codes. Presumably C = cultivated"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus)"
|
|
41 |
cultivated__main,"/location/iscultivated/_or/1/_if[@name=""if TaxonOccurrence""]/else/_first/1/_alt/1",
|
|
42 |
cultivated__campus,"/location/iscultivated/_or/1/_if[@name=""if TaxonOccurrence""]/else/_first/1/_alt/2",
|
|
43 | 43 |
locality,/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/1, |
44 | 44 |
habitat,/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/3, |
45 | 45 |
collday,"/location/locationevent/taxonoccurrence/aggregateoccurrence/collectiondate/_alt/2/_date/day/_nullIf:[null=0,type=float]/value", |
... | ... | |
52 | 52 |
loanto,"/location/locationevent/taxonoccurrence/aggregateoccurrence/notes/_join/1/_join/6/_label[label=""loaned to""]/value", |
53 | 53 |
inorout,"/location/locationevent/taxonoccurrence/aggregateoccurrence/notes/_join/1/_join/7/_label[label=""in or out""]/value", |
54 | 54 |
collectno,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/authorplantcode/_first/2, |
55 |
flower,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/reproductivecondition/_join/1/_map:[A=absent,I=immature,M=mature]/value",Translation is from e-mail conv between Jim Matthews at Habitat Assessment <jim@habitatassessment.com> and Bob Peet on 2010-12-2~3
|
|
56 |
fruit,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/reproductivecondition/_join/2/_map:[A=absent,I=immature,M=mature]/value",Translation is from e-mail conv between Jim Matthews at Habitat Assessment <jim@habitatassessment.com> and Bob Peet on 2010-12-2~3
|
|
55 |
flower,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/reproductivecondition/_join/1,
|
|
56 |
fruit,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/reproductivecondition/_join/2,
|
|
57 | 57 |
accession,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/catalognumber_dwc/_if[@name=""if indirect voucher""]/else","Globally unique, although occasionally duplicated. [1] |
58 | 58 |
|
59 | 59 |
[1] Using the following query: |
... | ... | |
82 | 82 |
collector2,/location/locationevent/taxonoccurrence/collector_id/party/fullname/_join/3, |
83 | 83 |
collector3,/location/locationevent/taxonoccurrence/collector_id/party/fullname/_join/4, |
84 | 84 |
collectno,"/location/locationevent/taxonoccurrence/iscultivated/_alt/1/_if[@name=""if TaxonOccurrence""]/cond/_exists", |
85 |
cultivated,"/location/locationevent/taxonoccurrence/iscultivated/_alt/1/_if[@name=""if TaxonOccurrence""]/then/_first/1/_alt/1/_map:[*=true]/value","For our purposes, anything non-NULL should be assumed to be cultivated"
|
|
86 |
campus,"/location/locationevent/taxonoccurrence/iscultivated/_alt/1/_if[@name=""if TaxonOccurrence""]/then/_first/1/_alt/2/_map:[*=true]/value","""this refers to occurrence on the UNCC campus, but I do not know the meaning of the codes. Presumably C = cultivated"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus)"
|
|
85 |
cultivated__main,"/location/locationevent/taxonoccurrence/iscultivated/_alt/1/_if[@name=""if TaxonOccurrence""]/then/_first/1/_alt/1",
|
|
86 |
cultivated__campus,"/location/locationevent/taxonoccurrence/iscultivated/_alt/1/_if[@name=""if TaxonOccurrence""]/then/_first/1/_alt/2",
|
|
87 | 87 |
usdaRank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/_if[@name=""if has explicit parent""]/else/rank/_alt/1", |
88 | 88 |
infrarank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/_if[@name=""if has explicit parent""]/else/rank/_alt/2", |
89 | 89 |
species,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=species]/taxonepithet", |
... | ... | |
128 | 128 |
county,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/county/_first/2, |
129 | 129 |
state,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/stateprovince/_first/2, |
130 | 130 |
state,/location/locationplace/*_id/place/stateprovince, |
131 |
campus,,"** No join mapping for cultivated__campus__verbatim ** ""this refers to occurrence on the UNCC campus, but I do not know the meaning of the codes. Presumably C = cultivated"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus)" |
|
132 |
cultivated,,** No join mapping for cultivated__main__verbatim ** |
|
131 | 133 |
filler,,"** No join mapping for *filler ** Where used, same as family" |
132 |
herbarium,,"** No join mapping for specimenDonorInstitution/_map:[UNCCD=UNCC,*=*]/value ** ""The name or acronym in use by the institution that donated the collection"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/593/UNCC-validation-Peet.xlsx#institutionCode). UNCCD likely = UNCC database." |
|
134 |
flower,,** No join mapping for flower_verbatim ** |
|
135 |
fruit,,** No join mapping for fruit_verbatim ** |
|
136 |
herbarium,,"** No join mapping for specimenDonorInstitution_verbatim ** ""The name or acronym in use by the institution that donated the collection"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/593/UNCC-validation-Peet.xlsx#institutionCode)" |
|
133 | 137 |
leaves,,** No join mapping for *leaves ** |
134 | 138 |
root,,** No join mapping for *root ** |
135 | 139 |
row_num,,** No join mapping for *row_num ** |
inputs/UNCC/Specimen/test.xml.ref | ||
---|---|---|
75 | 75 |
<authorplantcode>$collectno</authorplantcode> |
76 | 76 |
<reproductivecondition> |
77 | 77 |
<_join> |
78 |
<1> |
|
79 |
<_map> |
|
80 |
<A>absent</A> |
|
81 |
<I>immature</I> |
|
82 |
<M>mature</M> |
|
83 |
<value>$flower</value> |
|
84 |
</_map> |
|
85 |
</1> |
|
86 |
<2> |
|
87 |
<_map> |
|
88 |
<A>absent</A> |
|
89 |
<I>immature</I> |
|
90 |
<M>mature</M> |
|
91 |
<value>$fruit</value> |
|
92 |
</_map> |
|
93 |
</2> |
|
78 |
<1>$flower</1> |
|
79 |
<2>$fruit</2> |
|
94 | 80 |
</_join> |
95 | 81 |
</reproductivecondition> |
96 | 82 |
<specimenreplicate> |
... | ... | |
126 | 112 |
</collector_id> |
127 | 113 |
<iscultivated> |
128 | 114 |
<_alt> |
129 |
<1> |
|
130 |
<_map> |
|
131 |
<*>true</*> |
|
132 |
<value>$cultivated</value> |
|
133 |
</_map> |
|
134 |
</1> |
|
135 |
<2> |
|
136 |
<_map> |
|
137 |
<*>true</*> |
|
138 |
<value>$campus</value> |
|
139 |
</_map> |
|
140 |
</2> |
|
115 |
<1>$cultivated__main</1> |
|
116 |
<2>$cultivated__campus</2> |
|
141 | 117 |
</_alt> |
142 | 118 |
</iscultivated> |
143 | 119 |
<taxondetermination> |
inputs/UNCC/Specimen/new_terms.csv | ||
---|---|---|
1 |
herbarium,specimenDonorInstitution,"/_map:[UNCCD=UNCC,*=*]/value","""The name or acronym in use by the institution that donated the collection"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/593/UNCC-validation-Peet.xlsx#institutionCode). UNCCD likely = UNCC database."
|
|
1 |
herbarium,specimenDonorInstitution_verbatim,,"""The name or acronym in use by the institution that donated the collection"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/593/UNCC-validation-Peet.xlsx#institutionCode)"
|
|
2 | 2 |
usdaRank,taxonRank,/_alt/1, |
3 | 3 |
infrarank,taxonRank,/_alt/2, |
4 | 4 |
infraname,infraspecificEpithet,, |
... | ... | |
11 | 11 |
collmonth,monthCollected,, |
12 | 12 |
collday,dayCollected,, |
13 | 13 |
collyear,yearCollected,, |
14 |
campus,cultivated,/_alt/2/_map:[*=true]/value,"""this refers to occurrence on the UNCC campus, but I do not know the meaning of the codes. Presumably C = cultivated"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus)"
|
|
14 |
campus,cultivated__campus__verbatim,,"""this refers to occurrence on the UNCC campus, but I do not know the meaning of the codes. Presumably C = cultivated"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus)"
|
|
15 | 15 |
comment1,occurrenceRemarks,/_join/1, |
16 | 16 |
comment2,occurrenceRemarks,/_join/2, |
17 | 17 |
comment3,occurrenceRemarks,/_join/3, |
... | ... | |
19 | 19 |
loanto,occurrenceRemarks,"/_join/6/_label[label=""loaned to""]/value", |
20 | 20 |
inorout,occurrenceRemarks,"/_join/7/_label[label=""in or out""]/value", |
21 | 21 |
sheetno,associatedMedia,,? |
22 |
cultivated__campus,cultivated,/_alt/2, |
|
23 |
cultivated__main,cultivated,/_alt/1, |
inputs/UNCC/Specimen/unmapped_terms.csv | ||
---|---|---|
1 | 1 |
*row_num |
2 |
specimenDonorInstitution |
|
2 |
specimenDonorInstitution_verbatim |
|
3 |
cultivated__campus__verbatim |
|
3 | 4 |
*leaves |
5 |
flower_verbatim |
|
6 |
fruit_verbatim |
|
4 | 7 |
*root |
5 | 8 |
associatedMedia |
9 |
cultivated__main__verbatim |
|
6 | 10 |
*filler |
Also available in: Unified diff
inputs/UNCC/Specimen/: translated single-column filters to postprocessing derived columns, using the steps at http://wiki.vegpath.org/Adding_new-style_import_to_a_datasource#Translating-filters-to-postprocessing-derived-columns