Project

General

Profile

1
CREATE TABLE download_settings
2
(
3
  "Time_submitted" timestamp with time zone NOT NULL,
4
  "E-mail" text,
5
  "Id" text,
6
  "Job type" text,
7
  "Contains Id" boolean,
8
  "Start time" text,
9
  "Finish time" text,
10
  "TNRS version" text,
11
  "Sources selected" text,
12
  "Match threshold" double precision,
13
  "Classification" text,
14
  "Allow partial matches?" boolean,
15
  "Sort by source" boolean,
16
  "Constrain by higher taxonomy" boolean,
17
  CONSTRAINT download_settings_pkey PRIMARY KEY ("Time_submitted" )
18
)
19
WITH (
20
  OIDS=FALSE
21
);
22
COMMENT ON TABLE download_settings
23
  IS 'stores data from http://tnrs.iplantcollaborative.org/TNRSapp.html > Submit List > results section > download_settings > settings.txt';
24

    
25

    
26
CREATE OR REPLACE FUNCTION score_ok(score double precision)
27
  RETURNS boolean AS
28
$BODY$
29
SELECT $1 >= 0.8
30
$BODY$
31
  LANGUAGE sql IMMUTABLE STRICT
32
  COST 100;
33

    
34
CREATE OR REPLACE FUNCTION family_is_homonym(family text)
35
  RETURNS boolean AS
36
$BODY$
37
SELECT EXISTS(SELECT * FROM "IRMNG".family_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
38
$BODY$
39
  LANGUAGE sql STABLE STRICT
40
  COST 100;
41

    
42
CREATE OR REPLACE FUNCTION genus_is_homonym(genus text)
43
  RETURNS boolean AS
44
$BODY$
45
SELECT EXISTS(SELECT * FROM "IRMNG".genus_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
46
$BODY$
47
  LANGUAGE sql STABLE STRICT
48
  COST 100;
49

    
50
/* IMPORTANT: when changing this table's schema, you must regenerate data.sql:
51
$ <this_file>/../test_taxonomic_names/test_scrub
52
you must also make the same changes on vegbiendev: e.g.
53
$ make backups/TNRS.backup-remake # runtime: 3 min ("real 2m44.277s")
54
SELECT util.set_col_types('"TNRS".tnrs', ARRAY[
55
  ('col', 'new_type')
56
]::util.col_cast[]);
57
runtime: 4 min ("242100 ms")
58
*/
59
CREATE TABLE tnrs
60
(
61
  "Time_submitted" timestamp with time zone NOT NULL DEFAULT now(),
62
  "Name_number" integer NOT NULL,
63
  "Name_submitted" text NOT NULL,
64
  "Overall_score" double precision,
65
  "Name_matched" text,
66
  "Name_matched_rank" text,
67
  "Name_score" double precision,
68
  "Name_matched_author" text,
69
  "Name_matched_url" text,
70
  "Author_matched" text,
71
  "Author_score" double precision,
72
  "Family_matched" text,
73
  "Family_score" double precision,
74
  "Name_matched_accepted_family" text,
75
  "Genus_matched" text,
76
  "Genus_score" double precision,
77
  "Specific_epithet_matched" text,
78
  "Specific_epithet_score" double precision,
79
  "Infraspecific_rank" text,
80
  "Infraspecific_epithet_matched" text,
81
  "Infraspecific_epithet_score" double precision,
82
  "Infraspecific_rank_2" text,
83
  "Infraspecific_epithet_2_matched" text,
84
  "Infraspecific_epithet_2_score" double precision,
85
  "Annotations" text,
86
  "Unmatched_terms" text,
87
  "Taxonomic_status" text,
88
  "Accepted_name" text,
89
  "Accepted_name_author" text,
90
  "Accepted_name_rank" text,
91
  "Accepted_name_url" text,
92
  "Accepted_name_species" text,
93
  "Accepted_name_family" text,
94
  "Selected" text,
95
  "Source" text,
96
  "Warnings" text,
97
  "Accepted_name_lsid" text,
98
  "Accepted_scientific_name" text,
99
  "Max_score" double precision,
100
  "Is_homonym" boolean,
101
  "Is_plant" boolean,
102
  CONSTRAINT tnrs_pkey PRIMARY KEY ("Time_submitted" , "Name_number" ),
103
  CONSTRAINT "tnrs_Name_submitted_key" UNIQUE ("Name_submitted" )
104
)
105
WITH (
106
  OIDS=FALSE
107
);
108

    
109
CREATE UNIQUE INDEX tnrs_score_ok
110
  ON tnrs
111
  USING btree
112
  ("Name_submitted" )
113
  WHERE score_ok("Max_score");
114

    
115
/* IMPORTANT: when changing this function, you must regenerate the derived cols:
116
UPDATE "TNRS".tnrs SET "Name_submitted" = "Name_submitted"
117
runtime: 25 min ("5221748 rows affected, 1524228 ms execution time")
118
VACUUM ANALYZE "TNRS".tnrs -- to remove previous rows
119
runtime: 1 min ("62350 ms")
120
*/
121
CREATE OR REPLACE FUNCTION tnrs_populate_fields()
122
  RETURNS trigger AS
123
$BODY$
124
DECLARE
125
    "Specific_epithet_is_plant" boolean :=
126
        (CASE
127
        WHEN   new."Infraspecific_epithet_matched"   IS NOT NULL
128
            OR new."Infraspecific_epithet_2_matched" IS NOT NULL
129
            OR new."Specific_epithet_score" >= 0.9 -- fuzzy match
130
            THEN true
131
        ELSE NULL -- ambiguous
132
        END);
133
    never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); -- author disambiguates
134
    family_is_homonym boolean = NOT never_homonym AND "TNRS".family_is_homonym(new."Family_matched");
135
    genus_is_homonym  boolean = NOT never_homonym AND "TNRS".genus_is_homonym( new."Genus_matched" );
136
BEGIN
137
    new."Accepted_scientific_name" = NULLIF(concat_ws(' '
138
        , NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), new."Accepted_name")
139
        , new."Accepted_name"
140
        , new."Accepted_name_author"
141
    ), '');
142
    new."Max_score" = GREATEST(
143
          new."Overall_score"
144
        , new."Family_score"
145
        , new."Genus_score"
146
        , new."Specific_epithet_score"
147
    );
148
    new."Is_homonym" = COALESCE(genus_is_homonym, family_is_homonym);
149
    new."Is_plant" = (CASE
150
        WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match
151
            THEN true
152
        ELSE -- consider genus
153
            (CASE
154
            WHEN new."Genus_score" =  1    -- exact match
155
                THEN
156
                (CASE
157
                WHEN NOT genus_is_homonym THEN true
158
                ELSE "Specific_epithet_is_plant"
159
                END)
160
            WHEN new."Genus_score" >= 0.85 -- fuzzy match
161
                THEN "Specific_epithet_is_plant"
162
            ELSE NULL -- ambiguous
163
            END)
164
        END);
165
    
166
    RETURN new;
167
END;
168
$BODY$
169
  LANGUAGE plpgsql VOLATILE
170
  COST 100;
171

    
172
CREATE TRIGGER tnrs_populate_fields
173
  BEFORE INSERT OR UPDATE
174
  ON tnrs
175
  FOR EACH ROW
176
  EXECUTE PROCEDURE tnrs_populate_fields();
177

    
178

    
179
CREATE OR REPLACE VIEW "MatchedTaxon" AS
180
SELECT
181
  "Time_submitted" AS "*Name_matched.Time_submitted"
182
, "Name_submitted" AS "concatenatedScientificName"
183
, "Name_matched" AS "matchedTaxonName"
184
, "Name_matched_rank" AS "matchedTaxonRank"
185
, "Name_score" AS "*Name_matched.Name_score"
186
, "Name_matched_author" AS "matchedScientificNameAuthorship"
187
, "Name_matched_url" AS "matchedScientificNameID"
188
, "Author_score" AS "*Name_matched.Author_score"
189
, "Family_score" AS "matchedFamilyConfidence_fraction"
190
, COALESCE("Name_matched_accepted_family", "Accepted_name_family") AS "matchedFamily"
191
, "Genus_matched" AS "matchedGenus"
192
, "Genus_score" AS "matchedGenusConfidence_fraction"
193
, "Specific_epithet_matched" AS "matchedSpecificEpithet"
194
, "Specific_epithet_score" AS "matchedSpeciesConfidence_fraction"
195
, "Infraspecific_epithet_matched" AS "matchedInfraspecificEpithet"
196
, "Infraspecific_epithet_score" AS "*Name_matched.Infraspecific_epithet_score"
197
, "Annotations" AS "identificationQualifier"
198
, "Unmatched_terms" AS "morphospeciesSuffix"
199
, "Taxonomic_status" AS "taxonomicStatus"
200
, "Accepted_name" AS "acceptedTaxonName"
201
, "Accepted_name_author" AS "acceptedScientificNameAuthorship"
202
, "Accepted_name_rank" AS "acceptedTaxonRank"
203
, "Accepted_name_url" AS "acceptedScientificNameID"
204
, "Accepted_name_species" AS "*Name_matched.Accepted_name_species"
205
, "Accepted_name_family" AS "acceptedFamily"
206
, "Selected" AS "*Name_matched.Selected"
207
, "Source" AS "*Name_matched.Source"
208
, "Warnings" AS "*Name_matched.Warnings"
209
, "Accepted_name_lsid" AS "*Name_matched.Accepted_name_lsid"
210
, "Accepted_scientific_name" AS "acceptedScientificName"
211
, "Max_score" AS "matchedTaxonConfidence_fraction"
212
FROM tnrs
213
;
214

    
215
CREATE OR REPLACE VIEW "ValidMatchedTaxon" AS
216
SELECT *
217
FROM "MatchedTaxon"
218
WHERE score_ok("matchedTaxonConfidence_fraction")
219
;
220

    
221
CREATE OR REPLACE VIEW "AcceptedTaxon" AS
222
SELECT
223
  "Time_submitted" AS "*Accepted_name.Time_submitted"
224
, "Name_submitted" AS "acceptedScientificName"
225
, "Genus_matched" AS "acceptedGenus"
226
, "Specific_epithet_matched" AS "acceptedSpecificEpithet"
227
, "Infraspecific_epithet_matched" AS "acceptedInfraspecificEpithet"
228
, "Taxonomic_status" AS "acceptedTaxonomicStatus"
229
, "Selected" AS "*Accepted_name.Selected"
230
, "Source" AS "*Accepted_name.Source"
231
, "Warnings" AS "*Accepted_name.Warnings"
232
, "Accepted_name_lsid" AS "*Accepted_name.Accepted_name_lsid"
233
FROM tnrs
234
;
235

    
236
CREATE OR REPLACE VIEW "ScrubbedTaxon" AS
237
SELECT *
238
FROM "ValidMatchedTaxon"
239
NATURAL LEFT JOIN "AcceptedTaxon"
240
;
(7-7/8)