Project

General

Profile

1 10726 aaronmk
CREATE TABLE download_settings
2 10725 aaronmk
(
3
  "Time_submitted" timestamp with time zone NOT NULL,
4
  "E-mail" text,
5
  "Id" text,
6
  "Job type" text,
7
  "Contains Id" boolean,
8
  "Start time" text,
9
  "Finish time" text,
10
  "TNRS version" text,
11
  "Sources selected" text,
12
  "Match threshold" double precision,
13
  "Classification" text,
14
  "Allow partial matches?" boolean,
15
  "Sort by source" boolean,
16
  "Constrain by higher taxonomy" boolean,
17 10727 aaronmk
  CONSTRAINT "Download settings_pkey" PRIMARY KEY ("Time_submitted" )
18 10725 aaronmk
)
19
WITH (
20
  OIDS=FALSE
21
);
22 10726 aaronmk
COMMENT ON TABLE download_settings
23 10727 aaronmk
  IS 'stores data from http://tnrs.iplantcollaborative.org/TNRSapp.html > Submit List > results section > Download settings > settings.txt';
24 10725 aaronmk
25
26 7844 aaronmk
CREATE OR REPLACE FUNCTION score_ok(score double precision)
27
  RETURNS boolean AS
28
$BODY$
29
SELECT $1 >= 0.8
30
$BODY$
31 9984 aaronmk
  LANGUAGE sql IMMUTABLE STRICT
32 7844 aaronmk
  COST 100;
33
34 9985 aaronmk
CREATE OR REPLACE FUNCTION family_is_homonym(family text)
35
  RETURNS boolean AS
36
$BODY$
37
SELECT EXISTS(SELECT * FROM "IRMNG".family_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
38
$BODY$
39 9986 aaronmk
  LANGUAGE sql STABLE STRICT
40 9985 aaronmk
  COST 100;
41
42
CREATE OR REPLACE FUNCTION genus_is_homonym(genus text)
43
  RETURNS boolean AS
44
$BODY$
45
SELECT EXISTS(SELECT * FROM "IRMNG".genus_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
46
$BODY$
47 9986 aaronmk
  LANGUAGE sql STABLE STRICT
48 9985 aaronmk
  COST 100;
49
50 9756 aaronmk
/* IMPORTANT: when changing this table's schema, you must regenerate data.sql:
51
$ <this_file>/../test_taxonomic_names/test_scrub
52 9990 aaronmk
you must also make the same changes on vegbiendev: e.g.
53 9994 aaronmk
$ make backups/TNRS.backup-remake # runtime: 3 min ("real 2m44.277s")
54 9990 aaronmk
SELECT util.set_col_types('"TNRS".tnrs', ARRAY[
55
  ('col', 'new_type')
56
]::util.col_cast[]);
57
runtime: 4 min ("242100 ms")
58 9756 aaronmk
*/
59 5183 aaronmk
CREATE TABLE tnrs
60 5110 aaronmk
(
61 9905 aaronmk
  "Time_submitted" timestamp with time zone NOT NULL DEFAULT now(),
62 9908 aaronmk
  "Name_number" integer NOT NULL,
63 5110 aaronmk
  "Name_submitted" text NOT NULL,
64 9913 aaronmk
  "Overall_score" double precision,
65 5110 aaronmk
  "Name_matched" text,
66
  "Name_matched_rank" text,
67 9913 aaronmk
  "Name_score" double precision,
68 5110 aaronmk
  "Name_matched_author" text,
69
  "Name_matched_url" text,
70
  "Author_matched" text,
71 9913 aaronmk
  "Author_score" double precision,
72 5110 aaronmk
  "Family_matched" text,
73 9913 aaronmk
  "Family_score" double precision,
74 9529 aaronmk
  "Name_matched_accepted_family" text,
75 5110 aaronmk
  "Genus_matched" text,
76 9913 aaronmk
  "Genus_score" double precision,
77 5110 aaronmk
  "Specific_epithet_matched" text,
78 9913 aaronmk
  "Specific_epithet_score" double precision,
79 5110 aaronmk
  "Infraspecific_rank" text,
80
  "Infraspecific_epithet_matched" text,
81 9913 aaronmk
  "Infraspecific_epithet_score" double precision,
82 5110 aaronmk
  "Infraspecific_rank_2" text,
83
  "Infraspecific_epithet_2_matched" text,
84 9913 aaronmk
  "Infraspecific_epithet_2_score" double precision,
85 5110 aaronmk
  "Annotations" text,
86
  "Unmatched_terms" text,
87
  "Taxonomic_status" text,
88
  "Accepted_name" text,
89
  "Accepted_name_author" text,
90
  "Accepted_name_rank" text,
91
  "Accepted_name_url" text,
92 9762 aaronmk
  "Accepted_name_species" text,
93
  "Accepted_name_family" text,
94 5110 aaronmk
  "Selected" text,
95
  "Source" text,
96
  "Warnings" text,
97
  "Accepted_name_lsid" text,
98 7133 aaronmk
  "Accepted_scientific_name" text,
99 7293 aaronmk
  "Max_score" double precision,
100 9972 aaronmk
  "Is_homonym" boolean,
101 9763 aaronmk
  "Is_plant" boolean,
102 9907 aaronmk
  CONSTRAINT tnrs_pkey PRIMARY KEY ("Time_submitted" , "Name_number" ),
103 9906 aaronmk
  CONSTRAINT "tnrs_Name_submitted_key" UNIQUE ("Name_submitted" )
104 5110 aaronmk
)
105
WITH (
106
  OIDS=FALSE
107
);
108 7132 aaronmk
109 7844 aaronmk
CREATE UNIQUE INDEX tnrs_score_ok
110
  ON tnrs
111
  USING btree
112
  ("Name_submitted" )
113
  WHERE score_ok("Max_score");
114
115 9765 aaronmk
/* IMPORTANT: when changing this function, you must regenerate the derived cols:
116 9974 aaronmk
UPDATE "TNRS".tnrs SET "Name_submitted" = "Name_submitted"
117 10280 aaronmk
runtime: 25 min ("5221748 rows affected, 1524228 ms execution time")
118 9993 aaronmk
VACUUM ANALYZE "TNRS".tnrs -- to remove previous rows
119 10280 aaronmk
runtime: 1 min ("62350 ms")
120 9765 aaronmk
*/
121 9512 aaronmk
CREATE OR REPLACE FUNCTION tnrs_populate_fields()
122 7134 aaronmk
  RETURNS trigger AS
123
$BODY$
124 9763 aaronmk
DECLARE
125
    "Specific_epithet_is_plant" boolean :=
126
        (CASE
127
        WHEN   new."Infraspecific_epithet_matched"   IS NOT NULL
128
            OR new."Infraspecific_epithet_2_matched" IS NOT NULL
129 9914 aaronmk
            OR new."Specific_epithet_score" >= 0.9 -- fuzzy match
130 9763 aaronmk
            THEN true
131
        ELSE NULL -- ambiguous
132
        END);
133 9982 aaronmk
    never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); -- author disambiguates
134 9988 aaronmk
    family_is_homonym boolean = NOT never_homonym AND "TNRS".family_is_homonym(new."Family_matched");
135
    genus_is_homonym  boolean = NOT never_homonym AND "TNRS".genus_is_homonym( new."Genus_matched" );
136 7134 aaronmk
BEGIN
137 7848 aaronmk
    new."Accepted_scientific_name" = NULLIF(concat_ws(' '
138 9762 aaronmk
        , NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), new."Accepted_name")
139 7134 aaronmk
        , new."Accepted_name"
140
        , new."Accepted_name_author"
141 7848 aaronmk
    ), '');
142 7293 aaronmk
    new."Max_score" = GREATEST(
143 9914 aaronmk
          new."Overall_score"
144
        , new."Family_score"
145
        , new."Genus_score"
146
        , new."Specific_epithet_score"
147 7293 aaronmk
    );
148 9987 aaronmk
    new."Is_homonym" = COALESCE(genus_is_homonym, family_is_homonym);
149 9763 aaronmk
    new."Is_plant" = (CASE
150 9973 aaronmk
        WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match
151
            THEN true
152
        ELSE -- consider genus
153 9763 aaronmk
            (CASE
154 9914 aaronmk
            WHEN new."Genus_score" =  1    -- exact match
155 9973 aaronmk
                THEN
156
                (CASE
157
                WHEN NOT genus_is_homonym THEN true
158
                ELSE "Specific_epithet_is_plant"
159
                END)
160 9914 aaronmk
            WHEN new."Genus_score" >= 0.85 -- fuzzy match
161 9763 aaronmk
                THEN "Specific_epithet_is_plant"
162
            ELSE NULL -- ambiguous
163
            END)
164
        END);
165 7134 aaronmk
166
    RETURN new;
167
END;
168
$BODY$
169
  LANGUAGE plpgsql VOLATILE
170
  COST 100;
171
172 9512 aaronmk
CREATE TRIGGER tnrs_populate_fields
173 7134 aaronmk
  BEFORE INSERT OR UPDATE
174
  ON tnrs
175
  FOR EACH ROW
176 9512 aaronmk
  EXECUTE PROCEDURE tnrs_populate_fields();
177 7251 aaronmk
178
179 9759 aaronmk
CREATE OR REPLACE VIEW "MatchedTaxon" AS
180 7823 aaronmk
SELECT
181
  "Time_submitted" AS "*Name_matched.Time_submitted"
182 7830 aaronmk
, "Name_submitted" AS "concatenatedScientificName"
183 7829 aaronmk
, "Name_matched" AS "matchedTaxonName"
184 7823 aaronmk
, "Name_matched_rank" AS "matchedTaxonRank"
185
, "Name_score" AS "*Name_matched.Name_score"
186
, "Name_matched_author" AS "matchedScientificNameAuthorship"
187
, "Name_matched_url" AS "matchedScientificNameID"
188
, "Author_score" AS "*Name_matched.Author_score"
189
, "Family_score" AS "matchedFamilyConfidence_fraction"
190 9762 aaronmk
, COALESCE("Name_matched_accepted_family", "Accepted_name_family") AS "matchedFamily"
191 7831 aaronmk
, "Genus_matched" AS "matchedGenus"
192 7823 aaronmk
, "Genus_score" AS "matchedGenusConfidence_fraction"
193
, "Specific_epithet_matched" AS "matchedSpecificEpithet"
194
, "Specific_epithet_score" AS "matchedSpeciesConfidence_fraction"
195
, "Infraspecific_epithet_matched" AS "matchedInfraspecificEpithet"
196
, "Infraspecific_epithet_score" AS "*Name_matched.Infraspecific_epithet_score"
197
, "Annotations" AS "identificationQualifier"
198
, "Unmatched_terms" AS "morphospeciesSuffix"
199 7833 aaronmk
, "Taxonomic_status" AS "taxonomicStatus"
200 7823 aaronmk
, "Accepted_name" AS "acceptedTaxonName"
201
, "Accepted_name_author" AS "acceptedScientificNameAuthorship"
202
, "Accepted_name_rank" AS "acceptedTaxonRank"
203
, "Accepted_name_url" AS "acceptedScientificNameID"
204 9762 aaronmk
, "Accepted_name_species" AS "*Name_matched.Accepted_name_species"
205
, "Accepted_name_family" AS "acceptedFamily"
206 7823 aaronmk
, "Selected" AS "*Name_matched.Selected"
207 7828 aaronmk
, "Source" AS "*Name_matched.Source"
208 7823 aaronmk
, "Warnings" AS "*Name_matched.Warnings"
209
, "Accepted_name_lsid" AS "*Name_matched.Accepted_name_lsid"
210
, "Accepted_scientific_name" AS "acceptedScientificName"
211
, "Max_score" AS "matchedTaxonConfidence_fraction"
212 9616 aaronmk
FROM tnrs
213
;
214 7823 aaronmk
215 9759 aaronmk
CREATE OR REPLACE VIEW "ValidMatchedTaxon" AS
216
SELECT *
217
FROM "MatchedTaxon"
218
WHERE score_ok("matchedTaxonConfidence_fraction")
219
;
220
221 7823 aaronmk
CREATE OR REPLACE VIEW "AcceptedTaxon" AS
222
SELECT
223
  "Time_submitted" AS "*Accepted_name.Time_submitted"
224
, "Name_submitted" AS "acceptedScientificName"
225
, "Genus_matched" AS "acceptedGenus"
226
, "Specific_epithet_matched" AS "acceptedSpecificEpithet"
227
, "Infraspecific_epithet_matched" AS "acceptedInfraspecificEpithet"
228
, "Taxonomic_status" AS "acceptedTaxonomicStatus"
229
, "Selected" AS "*Accepted_name.Selected"
230
, "Source" AS "*Accepted_name.Source"
231
, "Warnings" AS "*Accepted_name.Warnings"
232
, "Accepted_name_lsid" AS "*Accepted_name.Accepted_name_lsid"
233 9760 aaronmk
FROM tnrs
234
;
235 7823 aaronmk
236
CREATE OR REPLACE VIEW "ScrubbedTaxon" AS
237
SELECT *
238 9758 aaronmk
FROM "ValidMatchedTaxon"
239 7823 aaronmk
NATURAL LEFT JOIN "AcceptedTaxon"
240
;