Project

General

Profile

« Previous | Next » 

Revision 10604

inputs/UNCC/Specimen/: translated single-column filters to postprocessing derived columns, using the steps at http://wiki.vegpath.org/Adding_new-style_import_to_a_datasource#Translating-filters-to-postprocessing-derived-columns

View differences:

inputs/UNCC/Specimen/postprocess.sql
1
SELECT util.search_path_append('util');
2

  
3
SELECT util.rename_cols(:table_str, $$
4
cultivated => *cultivated,
5
flower     => *flower,
6
fruit      => *fruit,
7
$$::hstore);
8

  
9

  
10
SELECT mk_derived_col((:table_str, 'specimenDonorInstitution'),
11
-- UNCCD likely = UNCC database
12
$$_map('UNCCD=>UNCC, *=>*', "herbarium")$$)
13
; -- runtime: 1 s ("Time: 1057.222 ms") @starscream
14

  
15
SELECT mk_derived_col((:table_str, 'cultivated__campus'),
16
/* "this refers to occurrence on the UNCC campus, but I do not know the meaning
17
of the codes. Presumably C = cultivated"
18
(https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus)
19
*/
20
$$_map('*=>true', "campus")$$)
21
; -- runtime: 0.5 s ("Time: 353.373 ms") @starscream
22

  
23
SELECT mk_derived_col((:table_str, 'cultivated__main'),
24
-- for our purposes, anything non-NULL should be assumed to be cultivated
25
$$_map('*=>true', "*cultivated")$$)
26
; -- runtime: 0.5 s ("Time: 403.916 ms") @starscream
27

  
28

  
29
CREATE OR REPLACE FUNCTION map_reproductive_condition(value text)
30
  RETURNS text AS
31
$BODY$
32
/*
33
"
34
Most are typos
35

  
36
A = absent
37

  
38
I = immature
39

  
40
M = mature
41
" (mailto:jim@habitatassessment.com?Habitat_Assessment.Jim_Matthews.to(uniola@email.unc.edu?Bob_Peet).2010-12-3:17:08.Re:_herbarium_phenology_codes_(fwd))
42
*/
43
SELECT util._map('A=>absent, I=>immature, M=>mature, *=>NULL', $1)
44
$BODY$
45
  LANGUAGE sql IMMUTABLE
46
  COST 100;
47

  
48
SELECT mk_derived_col((:table_str, 'flower'), $$map_reproductive_condition("*flower")$$); -- runtime: 0.5 s ("Time: 635.054 ms") @starscream
49
SELECT mk_derived_col((:table_str, 'fruit' ), $$map_reproductive_condition("*fruit" )$$); -- runtime: 0.5 s ("Time: 591.854 ms") @starscream
inputs/UNCC/Specimen/map.csv
9 9
GROUP BY herbarium, accession
10 10
HAVING count(*) > 1
11 11
-----"
12
herbarium,specimenDonorInstitution,"/_map:[UNCCD=UNCC,*=*]/value","""The name or acronym in use by the institution that donated the collection"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/593/UNCC-validation-Peet.xlsx#institutionCode). UNCCD likely = UNCC database."
12
herbarium,specimenDonorInstitution_verbatim,,"""The name or acronym in use by the institution that donated the collection"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/593/UNCC-validation-Peet.xlsx#institutionCode)"
13 13
family,family,,
14 14
genus,genus,,
15 15
species,specificEpithet,,
......
29 29
country,country,,
30 30
state,stateProvince,,
31 31
county,county,,
32
campus,cultivated,/_alt/2/_map:[*=true]/value,"""this refers to occurrence on the UNCC campus, but I do not know the meaning of the codes. Presumably C = cultivated"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus)"
32
campus,cultivated__campus__verbatim,,"""this refers to occurrence on the UNCC campus, but I do not know the meaning of the codes. Presumably C = cultivated"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus)"
33 33
leaves,*leaves,,
34
flower,reproductiveCondition,"/_join/1/_map:[A=absent,I=immature,M=mature]/value",Translation is from e-mail conv between Jim Matthews at Habitat Assessment <jim@habitatassessment.com> and Bob Peet on 2010-12-2~3
35
fruit,reproductiveCondition,"/_join/2/_map:[A=absent,I=immature,M=mature]/value",Translation is from e-mail conv between Jim Matthews at Habitat Assessment <jim@habitatassessment.com> and Bob Peet on 2010-12-2~3
34
flower,flower_verbatim,,
35
fruit,fruit_verbatim,,
36 36
root,*root,,
37 37
locality,locality,,
38 38
habitat,habitat,,
......
43 43
loanto,occurrenceRemarks,"/_join/6/_label[label=""loaned to""]/value",
44 44
inorout,occurrenceRemarks,"/_join/7/_label[label=""in or out""]/value",
45 45
sheetno,associatedMedia,,?
46
cultivated,cultivated,/_alt/1/_map:[*=true]/value,"For our purposes, anything non-NULL should be assumed to be cultivated"
46
cultivated,cultivated__main__verbatim,,
47 47
filler,*filler,,"Where used, same as family"
48 48
:UNCC,specimenHolderInstitutions,,
49
flower,reproductiveCondition,/_join/1,
50
fruit,reproductiveCondition,/_join/2,
51
cultivated__campus,cultivated,/_alt/2,
52
cultivated__main,cultivated,/_alt/1,
inputs/UNCC/Specimen/VegBIEN.csv
38 38
HAVING count(*) > 1
39 39
-----"
40 40
collectno,"/location/iscultivated/_or/1/_if[@name=""if TaxonOccurrence""]/cond/_exists",
41
cultivated,"/location/iscultivated/_or/1/_if[@name=""if TaxonOccurrence""]/else/_first/1/_alt/1/_map:[*=true]/value","For our purposes, anything non-NULL should be assumed to be cultivated"
42
campus,"/location/iscultivated/_or/1/_if[@name=""if TaxonOccurrence""]/else/_first/1/_alt/2/_map:[*=true]/value","""this refers to occurrence on the UNCC campus, but I do not know the meaning of the codes. Presumably C = cultivated"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus)"
41
cultivated__main,"/location/iscultivated/_or/1/_if[@name=""if TaxonOccurrence""]/else/_first/1/_alt/1",
42
cultivated__campus,"/location/iscultivated/_or/1/_if[@name=""if TaxonOccurrence""]/else/_first/1/_alt/2",
43 43
locality,/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/1,
44 44
habitat,/location/iscultivated/_or/2/_locationnarrative_is_cultivated/locationnarrative/_join/3,
45 45
collday,"/location/locationevent/taxonoccurrence/aggregateoccurrence/collectiondate/_alt/2/_date/day/_nullIf:[null=0,type=float]/value",
......
52 52
loanto,"/location/locationevent/taxonoccurrence/aggregateoccurrence/notes/_join/1/_join/6/_label[label=""loaned to""]/value",
53 53
inorout,"/location/locationevent/taxonoccurrence/aggregateoccurrence/notes/_join/1/_join/7/_label[label=""in or out""]/value",
54 54
collectno,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/authorplantcode/_first/2,
55
flower,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/reproductivecondition/_join/1/_map:[A=absent,I=immature,M=mature]/value",Translation is from e-mail conv between Jim Matthews at Habitat Assessment <jim@habitatassessment.com> and Bob Peet on 2010-12-2~3
56
fruit,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/reproductivecondition/_join/2/_map:[A=absent,I=immature,M=mature]/value",Translation is from e-mail conv between Jim Matthews at Habitat Assessment <jim@habitatassessment.com> and Bob Peet on 2010-12-2~3
55
flower,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/reproductivecondition/_join/1,
56
fruit,/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/reproductivecondition/_join/2,
57 57
accession,"/location/locationevent/taxonoccurrence/aggregateoccurrence/plantobservation/specimenreplicate/catalognumber_dwc/_if[@name=""if indirect voucher""]/else","Globally unique, although occasionally duplicated. [1]
58 58

  
59 59
[1] Using the following query:
......
82 82
collector2,/location/locationevent/taxonoccurrence/collector_id/party/fullname/_join/3,
83 83
collector3,/location/locationevent/taxonoccurrence/collector_id/party/fullname/_join/4,
84 84
collectno,"/location/locationevent/taxonoccurrence/iscultivated/_alt/1/_if[@name=""if TaxonOccurrence""]/cond/_exists",
85
cultivated,"/location/locationevent/taxonoccurrence/iscultivated/_alt/1/_if[@name=""if TaxonOccurrence""]/then/_first/1/_alt/1/_map:[*=true]/value","For our purposes, anything non-NULL should be assumed to be cultivated"
86
campus,"/location/locationevent/taxonoccurrence/iscultivated/_alt/1/_if[@name=""if TaxonOccurrence""]/then/_first/1/_alt/2/_map:[*=true]/value","""this refers to occurrence on the UNCC campus, but I do not know the meaning of the codes. Presumably C = cultivated"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus)"
85
cultivated__main,"/location/locationevent/taxonoccurrence/iscultivated/_alt/1/_if[@name=""if TaxonOccurrence""]/then/_first/1/_alt/1",
86
cultivated__campus,"/location/locationevent/taxonoccurrence/iscultivated/_alt/1/_if[@name=""if TaxonOccurrence""]/then/_first/1/_alt/2",
87 87
usdaRank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/_if[@name=""if has explicit parent""]/else/rank/_alt/1",
88 88
infrarank,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/_if[@name=""if has explicit parent""]/else/rank/_alt/2",
89 89
species,"/location/locationevent/taxonoccurrence/taxondetermination[!isoriginal]/*_id/taxonverbatim/*_id/taxonlabel/parent_id/taxonlabel/parent_id/_forEach:[in:[cultivar,forma,variety,subspecies,],do=""taxonlabel[rank=_val]/parent_id""]/taxonlabel[rank=species]/taxonepithet",
......
128 128
county,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/county/_first/2,
129 129
state,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/stateprovince/_first/2,
130 130
state,/location/locationplace/*_id/place/stateprovince,
131
campus,,"** No join mapping for cultivated__campus__verbatim ** ""this refers to occurrence on the UNCC campus, but I do not know the meaning of the codes. Presumably C = cultivated"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus)"
132
cultivated,,** No join mapping for cultivated__main__verbatim ** 
131 133
filler,,"** No join mapping for *filler ** Where used, same as family"
132
herbarium,,"** No join mapping for specimenDonorInstitution/_map:[UNCCD=UNCC,*=*]/value ** ""The name or acronym in use by the institution that donated the collection"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/593/UNCC-validation-Peet.xlsx#institutionCode). UNCCD likely = UNCC database."
134
flower,,** No join mapping for flower_verbatim ** 
135
fruit,,** No join mapping for fruit_verbatim ** 
136
herbarium,,"** No join mapping for specimenDonorInstitution_verbatim ** ""The name or acronym in use by the institution that donated the collection"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/593/UNCC-validation-Peet.xlsx#institutionCode)"
133 137
leaves,,** No join mapping for *leaves ** 
134 138
root,,** No join mapping for *root ** 
135 139
row_num,,** No join mapping for *row_num ** 
inputs/UNCC/Specimen/test.xml.ref
75 75
                                        <authorplantcode>$collectno</authorplantcode>
76 76
                                        <reproductivecondition>
77 77
                                            <_join>
78
                                                <1>
79
                                                    <_map>
80
                                                        <A>absent</A>
81
                                                        <I>immature</I>
82
                                                        <M>mature</M>
83
                                                        <value>$flower</value>
84
                                                    </_map>
85
                                                </1>
86
                                                <2>
87
                                                    <_map>
88
                                                        <A>absent</A>
89
                                                        <I>immature</I>
90
                                                        <M>mature</M>
91
                                                        <value>$fruit</value>
92
                                                    </_map>
93
                                                </2>
78
                                                <1>$flower</1>
79
                                                <2>$fruit</2>
94 80
                                            </_join>
95 81
                                        </reproductivecondition>
96 82
                                        <specimenreplicate>
......
126 112
                                </collector_id>
127 113
                                <iscultivated>
128 114
                                    <_alt>
129
                                        <1>
130
                                            <_map>
131
                                                <*>true</*>
132
                                                <value>$cultivated</value>
133
                                            </_map>
134
                                        </1>
135
                                        <2>
136
                                            <_map>
137
                                                <*>true</*>
138
                                                <value>$campus</value>
139
                                            </_map>
140
                                        </2>
115
                                        <1>$cultivated__main</1>
116
                                        <2>$cultivated__campus</2>
141 117
                                    </_alt>
142 118
                                </iscultivated>
143 119
                                <taxondetermination>
inputs/UNCC/Specimen/new_terms.csv
1
herbarium,specimenDonorInstitution,"/_map:[UNCCD=UNCC,*=*]/value","""The name or acronym in use by the institution that donated the collection"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/593/UNCC-validation-Peet.xlsx#institutionCode). UNCCD likely = UNCC database."
1
herbarium,specimenDonorInstitution_verbatim,,"""The name or acronym in use by the institution that donated the collection"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/593/UNCC-validation-Peet.xlsx#institutionCode)"
2 2
usdaRank,taxonRank,/_alt/1,
3 3
infrarank,taxonRank,/_alt/2,
4 4
infraname,infraspecificEpithet,,
......
11 11
collmonth,monthCollected,,
12 12
collday,dayCollected,,
13 13
collyear,yearCollected,,
14
campus,cultivated,/_alt/2/_map:[*=true]/value,"""this refers to occurrence on the UNCC campus, but I do not know the meaning of the codes. Presumably C = cultivated"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus)"
14
campus,cultivated__campus__verbatim,,"""this refers to occurrence on the UNCC campus, but I do not know the meaning of the codes. Presumably C = cultivated"" (https://projects.nceas.ucsb.edu/nceas/attachments/download/723/UNCC-validate_rkp_bb_rkp.xlsx#Campus)"
15 15
comment1,occurrenceRemarks,/_join/1,
16 16
comment2,occurrenceRemarks,/_join/2,
17 17
comment3,occurrenceRemarks,/_join/3,
......
19 19
loanto,occurrenceRemarks,"/_join/6/_label[label=""loaned to""]/value",
20 20
inorout,occurrenceRemarks,"/_join/7/_label[label=""in or out""]/value",
21 21
sheetno,associatedMedia,,?
22
cultivated__campus,cultivated,/_alt/2,
23
cultivated__main,cultivated,/_alt/1,
inputs/UNCC/Specimen/unmapped_terms.csv
1 1
*row_num
2
specimenDonorInstitution
2
specimenDonorInstitution_verbatim
3
cultivated__campus__verbatim
3 4
*leaves
5
flower_verbatim
6
fruit_verbatim
4 7
*root
5 8
associatedMedia
9
cultivated__main__verbatim
6 10
*filler

Also available in: Unified diff