Project

General

Profile

1 7844 aaronmk
CREATE OR REPLACE FUNCTION score_ok(score double precision)
2
  RETURNS boolean AS
3
$BODY$
4
SELECT $1 >= 0.8
5
$BODY$
6
  LANGUAGE sql VOLATILE
7
  COST 100;
8
9 9756 aaronmk
/* IMPORTANT: when changing this table's schema, you must regenerate data.sql:
10
$ <this_file>/../test_taxonomic_names/test_scrub
11
*/
12 5183 aaronmk
CREATE TABLE tnrs
13 5110 aaronmk
(
14 9905 aaronmk
  "Time_submitted" timestamp with time zone NOT NULL DEFAULT now(),
15 9908 aaronmk
  "Name_number" integer NOT NULL,
16 5110 aaronmk
  "Name_submitted" text NOT NULL,
17 9913 aaronmk
  "Overall_score" double precision,
18 5110 aaronmk
  "Name_matched" text,
19
  "Name_matched_rank" text,
20 9913 aaronmk
  "Name_score" double precision,
21 5110 aaronmk
  "Name_matched_author" text,
22
  "Name_matched_url" text,
23
  "Author_matched" text,
24 9913 aaronmk
  "Author_score" double precision,
25 5110 aaronmk
  "Family_matched" text,
26 9913 aaronmk
  "Family_score" double precision,
27 9529 aaronmk
  "Name_matched_accepted_family" text,
28 5110 aaronmk
  "Genus_matched" text,
29 9913 aaronmk
  "Genus_score" double precision,
30 5110 aaronmk
  "Specific_epithet_matched" text,
31 9913 aaronmk
  "Specific_epithet_score" double precision,
32 5110 aaronmk
  "Infraspecific_rank" text,
33
  "Infraspecific_epithet_matched" text,
34 9913 aaronmk
  "Infraspecific_epithet_score" double precision,
35 5110 aaronmk
  "Infraspecific_rank_2" text,
36
  "Infraspecific_epithet_2_matched" text,
37 9913 aaronmk
  "Infraspecific_epithet_2_score" double precision,
38 5110 aaronmk
  "Annotations" text,
39
  "Unmatched_terms" text,
40
  "Taxonomic_status" text,
41
  "Accepted_name" text,
42
  "Accepted_name_author" text,
43
  "Accepted_name_rank" text,
44
  "Accepted_name_url" text,
45 9762 aaronmk
  "Accepted_name_species" text,
46
  "Accepted_name_family" text,
47 5110 aaronmk
  "Selected" text,
48
  "Source" text,
49
  "Warnings" text,
50
  "Accepted_name_lsid" text,
51 7133 aaronmk
  "Accepted_scientific_name" text,
52 7293 aaronmk
  "Max_score" double precision,
53 9763 aaronmk
  "Is_plant" boolean,
54 9907 aaronmk
  CONSTRAINT tnrs_pkey PRIMARY KEY ("Time_submitted" , "Name_number" ),
55 9906 aaronmk
  CONSTRAINT "tnrs_Name_submitted_key" UNIQUE ("Name_submitted" )
56 5110 aaronmk
)
57
WITH (
58
  OIDS=FALSE
59
);
60 7132 aaronmk
61 7844 aaronmk
CREATE UNIQUE INDEX tnrs_score_ok
62
  ON tnrs
63
  USING btree
64
  ("Name_submitted" )
65
  WHERE score_ok("Max_score");
66
67 9765 aaronmk
/* IMPORTANT: when changing this function, you must regenerate the derived cols:
68
UPDATE tnrs SET "Name_submitted" = "Name_submitted"
69 9767 aaronmk
runtime: 17 min ("4992166 rows affected, 1019907 ms execution time")
70 9765 aaronmk
*/
71 9512 aaronmk
CREATE OR REPLACE FUNCTION tnrs_populate_fields()
72 7134 aaronmk
  RETURNS trigger AS
73
$BODY$
74 9763 aaronmk
DECLARE
75
    "Specific_epithet_is_plant" boolean :=
76
        (CASE
77
        WHEN   new."Infraspecific_epithet_matched"   IS NOT NULL
78
            OR new."Infraspecific_epithet_2_matched" IS NOT NULL
79 9914 aaronmk
            OR new."Specific_epithet_score" >= 0.9 -- fuzzy match
80 9763 aaronmk
            THEN true
81
        ELSE NULL -- ambiguous
82
        END);
83 7134 aaronmk
BEGIN
84 7848 aaronmk
    new."Accepted_scientific_name" = NULLIF(concat_ws(' '
85 9762 aaronmk
        , NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), new."Accepted_name")
86 7134 aaronmk
        , new."Accepted_name"
87
        , new."Accepted_name_author"
88 7848 aaronmk
    ), '');
89 7293 aaronmk
    new."Max_score" = GREATEST(
90 9914 aaronmk
          new."Overall_score"
91
        , new."Family_score"
92
        , new."Genus_score"
93
        , new."Specific_epithet_score"
94 7293 aaronmk
    );
95 9763 aaronmk
    new."Is_plant" = (CASE
96 9914 aaronmk
        WHEN new."Family_score" = 1 THEN true -- exact match
97 9763 aaronmk
        ELSE -- Family_matched IS NULL
98
            (CASE
99 9914 aaronmk
            WHEN new."Genus_score" =  1    -- exact match
100 9763 aaronmk
                THEN "Specific_epithet_is_plant"
101 9914 aaronmk
            WHEN new."Genus_score" >= 0.85 -- fuzzy match
102 9763 aaronmk
                THEN "Specific_epithet_is_plant"
103
            ELSE NULL -- ambiguous
104
            END)
105
        END);
106 7134 aaronmk
107
    RETURN new;
108
END;
109
$BODY$
110
  LANGUAGE plpgsql VOLATILE
111
  COST 100;
112
113 9512 aaronmk
CREATE TRIGGER tnrs_populate_fields
114 7134 aaronmk
  BEFORE INSERT OR UPDATE
115
  ON tnrs
116
  FOR EACH ROW
117 9512 aaronmk
  EXECUTE PROCEDURE tnrs_populate_fields();
118 7251 aaronmk
119
120 9759 aaronmk
CREATE OR REPLACE VIEW "MatchedTaxon" AS
121 7823 aaronmk
SELECT
122
  "Time_submitted" AS "*Name_matched.Time_submitted"
123 7830 aaronmk
, "Name_submitted" AS "concatenatedScientificName"
124 7829 aaronmk
, "Name_matched" AS "matchedTaxonName"
125 7823 aaronmk
, "Name_matched_rank" AS "matchedTaxonRank"
126
, "Name_score" AS "*Name_matched.Name_score"
127
, "Name_matched_author" AS "matchedScientificNameAuthorship"
128
, "Name_matched_url" AS "matchedScientificNameID"
129
, "Author_score" AS "*Name_matched.Author_score"
130
, "Family_score" AS "matchedFamilyConfidence_fraction"
131 9762 aaronmk
, COALESCE("Name_matched_accepted_family", "Accepted_name_family") AS "matchedFamily"
132 7831 aaronmk
, "Genus_matched" AS "matchedGenus"
133 7823 aaronmk
, "Genus_score" AS "matchedGenusConfidence_fraction"
134
, "Specific_epithet_matched" AS "matchedSpecificEpithet"
135
, "Specific_epithet_score" AS "matchedSpeciesConfidence_fraction"
136
, "Infraspecific_epithet_matched" AS "matchedInfraspecificEpithet"
137
, "Infraspecific_epithet_score" AS "*Name_matched.Infraspecific_epithet_score"
138
, "Annotations" AS "identificationQualifier"
139
, "Unmatched_terms" AS "morphospeciesSuffix"
140 7833 aaronmk
, "Taxonomic_status" AS "taxonomicStatus"
141 7823 aaronmk
, "Accepted_name" AS "acceptedTaxonName"
142
, "Accepted_name_author" AS "acceptedScientificNameAuthorship"
143
, "Accepted_name_rank" AS "acceptedTaxonRank"
144
, "Accepted_name_url" AS "acceptedScientificNameID"
145 9762 aaronmk
, "Accepted_name_species" AS "*Name_matched.Accepted_name_species"
146
, "Accepted_name_family" AS "acceptedFamily"
147 7823 aaronmk
, "Selected" AS "*Name_matched.Selected"
148 7828 aaronmk
, "Source" AS "*Name_matched.Source"
149 7823 aaronmk
, "Warnings" AS "*Name_matched.Warnings"
150
, "Accepted_name_lsid" AS "*Name_matched.Accepted_name_lsid"
151
, "Accepted_scientific_name" AS "acceptedScientificName"
152
, "Max_score" AS "matchedTaxonConfidence_fraction"
153 9616 aaronmk
FROM tnrs
154
;
155 7823 aaronmk
156 9759 aaronmk
CREATE OR REPLACE VIEW "ValidMatchedTaxon" AS
157
SELECT *
158
FROM "MatchedTaxon"
159
WHERE score_ok("matchedTaxonConfidence_fraction")
160
;
161
162 7823 aaronmk
CREATE OR REPLACE VIEW "AcceptedTaxon" AS
163
SELECT
164
  "Time_submitted" AS "*Accepted_name.Time_submitted"
165
, "Name_submitted" AS "acceptedScientificName"
166
, "Genus_matched" AS "acceptedGenus"
167
, "Specific_epithet_matched" AS "acceptedSpecificEpithet"
168
, "Infraspecific_epithet_matched" AS "acceptedInfraspecificEpithet"
169
, "Annotations" AS "*Accepted_name.Annotations"
170
, "Taxonomic_status" AS "acceptedTaxonomicStatus"
171
, "Selected" AS "*Accepted_name.Selected"
172
, "Source" AS "*Accepted_name.Source"
173
, "Warnings" AS "*Accepted_name.Warnings"
174
, "Accepted_name_lsid" AS "*Accepted_name.Accepted_name_lsid"
175 9760 aaronmk
FROM tnrs
176
;
177 7823 aaronmk
178
CREATE OR REPLACE VIEW "ScrubbedTaxon" AS
179
SELECT *
180 9758 aaronmk
FROM "ValidMatchedTaxon"
181 7823 aaronmk
NATURAL LEFT JOIN "AcceptedTaxon"
182
;