Project

General

Profile

1
CREATE OR REPLACE FUNCTION score_ok(score double precision)
2
  RETURNS boolean AS
3
$BODY$
4
SELECT $1 >= 0.8
5
$BODY$
6
  LANGUAGE sql VOLATILE
7
  COST 100;
8

    
9
/* IMPORTANT: when changing this table's schema, you must regenerate data.sql:
10
$ <this_file>/../test_taxonomic_names/test_scrub
11
*/
12
CREATE TABLE tnrs
13
(
14
  "Time_submitted" timestamp with time zone NOT NULL DEFAULT now(),
15
  "Name_number" integer NOT NULL,
16
  "Name_submitted" text NOT NULL,
17
  "Overall_score" double precision,
18
  "Name_matched" text,
19
  "Name_matched_rank" text,
20
  "Name_score" double precision,
21
  "Name_matched_author" text,
22
  "Name_matched_url" text,
23
  "Author_matched" text,
24
  "Author_score" double precision,
25
  "Family_matched" text,
26
  "Family_score" double precision,
27
  "Name_matched_accepted_family" text,
28
  "Genus_matched" text,
29
  "Genus_score" double precision,
30
  "Specific_epithet_matched" text,
31
  "Specific_epithet_score" double precision,
32
  "Infraspecific_rank" text,
33
  "Infraspecific_epithet_matched" text,
34
  "Infraspecific_epithet_score" double precision,
35
  "Infraspecific_rank_2" text,
36
  "Infraspecific_epithet_2_matched" text,
37
  "Infraspecific_epithet_2_score" double precision,
38
  "Annotations" text,
39
  "Unmatched_terms" text,
40
  "Taxonomic_status" text,
41
  "Accepted_name" text,
42
  "Accepted_name_author" text,
43
  "Accepted_name_rank" text,
44
  "Accepted_name_url" text,
45
  "Accepted_name_species" text,
46
  "Accepted_name_family" text,
47
  "Selected" text,
48
  "Source" text,
49
  "Warnings" text,
50
  "Accepted_name_lsid" text,
51
  "Accepted_scientific_name" text,
52
  "Max_score" double precision,
53
  "Is_homonym" boolean,
54
  "Is_plant" boolean,
55
  CONSTRAINT tnrs_pkey PRIMARY KEY ("Time_submitted" , "Name_number" ),
56
  CONSTRAINT "tnrs_Name_submitted_key" UNIQUE ("Name_submitted" )
57
)
58
WITH (
59
  OIDS=FALSE
60
);
61

    
62
CREATE UNIQUE INDEX tnrs_score_ok
63
  ON tnrs
64
  USING btree
65
  ("Name_submitted" )
66
  WHERE score_ok("Max_score");
67

    
68
/* IMPORTANT: when changing this function, you must regenerate the derived cols:
69
UPDATE "TNRS".tnrs SET "Name_submitted" = "Name_submitted"
70
runtime: 16 min ("5220212 rows affected, 934611 ms execution time")
71
*/
72
CREATE OR REPLACE FUNCTION tnrs_populate_fields()
73
  RETURNS trigger AS
74
$BODY$
75
DECLARE
76
    "Specific_epithet_is_plant" boolean :=
77
        (CASE
78
        WHEN   new."Infraspecific_epithet_matched"   IS NOT NULL
79
            OR new."Infraspecific_epithet_2_matched" IS NOT NULL
80
            OR new."Specific_epithet_score" >= 0.9 -- fuzzy match
81
            THEN true
82
        ELSE NULL -- ambiguous
83
        END);
84
    never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); -- author disambiguates
85
    family_is_homonym boolean = NOT never_homonym AND EXISTS(SELECT * FROM "IRMNG".family_homonym_epithet WHERE "taxonNameOrEpithet" = new."Family_matched");
86
    genus_is_homonym  boolean = NOT never_homonym AND EXISTS(SELECT * FROM "IRMNG".genus_homonym_epithet  WHERE "taxonNameOrEpithet" = new."Genus_matched");
87
BEGIN
88
    new."Accepted_scientific_name" = NULLIF(concat_ws(' '
89
        , NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), new."Accepted_name")
90
        , new."Accepted_name"
91
        , new."Accepted_name_author"
92
    ), '');
93
    new."Max_score" = GREATEST(
94
          new."Overall_score"
95
        , new."Family_score"
96
        , new."Genus_score"
97
        , new."Specific_epithet_score"
98
    );
99
    new."Is_homonym" = family_is_homonym OR genus_is_homonym;
100
    new."Is_plant" = (CASE
101
        WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match
102
            THEN true
103
        ELSE -- consider genus
104
            (CASE
105
            WHEN new."Genus_score" =  1    -- exact match
106
                THEN
107
                (CASE
108
                WHEN NOT genus_is_homonym THEN true
109
                ELSE "Specific_epithet_is_plant"
110
                END)
111
            WHEN new."Genus_score" >= 0.85 -- fuzzy match
112
                THEN "Specific_epithet_is_plant"
113
            ELSE NULL -- ambiguous
114
            END)
115
        END);
116
    
117
    RETURN new;
118
END;
119
$BODY$
120
  LANGUAGE plpgsql VOLATILE
121
  COST 100;
122

    
123
CREATE TRIGGER tnrs_populate_fields
124
  BEFORE INSERT OR UPDATE
125
  ON tnrs
126
  FOR EACH ROW
127
  EXECUTE PROCEDURE tnrs_populate_fields();
128

    
129

    
130
CREATE OR REPLACE VIEW "MatchedTaxon" AS
131
SELECT
132
  "Time_submitted" AS "*Name_matched.Time_submitted"
133
, "Name_submitted" AS "concatenatedScientificName"
134
, "Name_matched" AS "matchedTaxonName"
135
, "Name_matched_rank" AS "matchedTaxonRank"
136
, "Name_score" AS "*Name_matched.Name_score"
137
, "Name_matched_author" AS "matchedScientificNameAuthorship"
138
, "Name_matched_url" AS "matchedScientificNameID"
139
, "Author_score" AS "*Name_matched.Author_score"
140
, "Family_score" AS "matchedFamilyConfidence_fraction"
141
, COALESCE("Name_matched_accepted_family", "Accepted_name_family") AS "matchedFamily"
142
, "Genus_matched" AS "matchedGenus"
143
, "Genus_score" AS "matchedGenusConfidence_fraction"
144
, "Specific_epithet_matched" AS "matchedSpecificEpithet"
145
, "Specific_epithet_score" AS "matchedSpeciesConfidence_fraction"
146
, "Infraspecific_epithet_matched" AS "matchedInfraspecificEpithet"
147
, "Infraspecific_epithet_score" AS "*Name_matched.Infraspecific_epithet_score"
148
, "Annotations" AS "identificationQualifier"
149
, "Unmatched_terms" AS "morphospeciesSuffix"
150
, "Taxonomic_status" AS "taxonomicStatus"
151
, "Accepted_name" AS "acceptedTaxonName"
152
, "Accepted_name_author" AS "acceptedScientificNameAuthorship"
153
, "Accepted_name_rank" AS "acceptedTaxonRank"
154
, "Accepted_name_url" AS "acceptedScientificNameID"
155
, "Accepted_name_species" AS "*Name_matched.Accepted_name_species"
156
, "Accepted_name_family" AS "acceptedFamily"
157
, "Selected" AS "*Name_matched.Selected"
158
, "Source" AS "*Name_matched.Source"
159
, "Warnings" AS "*Name_matched.Warnings"
160
, "Accepted_name_lsid" AS "*Name_matched.Accepted_name_lsid"
161
, "Accepted_scientific_name" AS "acceptedScientificName"
162
, "Max_score" AS "matchedTaxonConfidence_fraction"
163
FROM tnrs
164
;
165

    
166
CREATE OR REPLACE VIEW "ValidMatchedTaxon" AS
167
SELECT *
168
FROM "MatchedTaxon"
169
WHERE score_ok("matchedTaxonConfidence_fraction")
170
;
171

    
172
CREATE OR REPLACE VIEW "AcceptedTaxon" AS
173
SELECT
174
  "Time_submitted" AS "*Accepted_name.Time_submitted"
175
, "Name_submitted" AS "acceptedScientificName"
176
, "Genus_matched" AS "acceptedGenus"
177
, "Specific_epithet_matched" AS "acceptedSpecificEpithet"
178
, "Infraspecific_epithet_matched" AS "acceptedInfraspecificEpithet"
179
, "Annotations" AS "*Accepted_name.Annotations"
180
, "Taxonomic_status" AS "acceptedTaxonomicStatus"
181
, "Selected" AS "*Accepted_name.Selected"
182
, "Source" AS "*Accepted_name.Source"
183
, "Warnings" AS "*Accepted_name.Warnings"
184
, "Accepted_name_lsid" AS "*Accepted_name.Accepted_name_lsid"
185
FROM tnrs
186
;
187

    
188
CREATE OR REPLACE VIEW "ScrubbedTaxon" AS
189
SELECT *
190
FROM "ValidMatchedTaxon"
191
NATURAL LEFT JOIN "AcceptedTaxon"
192
;
(4-4/4)