Project

General

Profile

1
CREATE OR REPLACE FUNCTION score_ok(score double precision)
2
  RETURNS boolean AS
3
$BODY$
4
SELECT $1 >= 0.8
5
$BODY$
6
  LANGUAGE sql IMMUTABLE STRICT
7
  COST 100;
8

    
9
CREATE OR REPLACE FUNCTION family_is_homonym(family text)
10
  RETURNS boolean AS
11
$BODY$
12
SELECT EXISTS(SELECT * FROM "IRMNG".family_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
13
$BODY$
14
  LANGUAGE sql STABLE STRICT
15
  COST 100;
16

    
17
CREATE OR REPLACE FUNCTION genus_is_homonym(genus text)
18
  RETURNS boolean AS
19
$BODY$
20
SELECT EXISTS(SELECT * FROM "IRMNG".genus_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
21
$BODY$
22
  LANGUAGE sql STABLE STRICT
23
  COST 100;
24

    
25
/* IMPORTANT: when changing this table's schema, you must regenerate data.sql:
26
$ <this_file>/../test_taxonomic_names/test_scrub
27
*/
28
CREATE TABLE tnrs
29
(
30
  "Time_submitted" timestamp with time zone NOT NULL DEFAULT now(),
31
  "Name_number" integer NOT NULL,
32
  "Name_submitted" text NOT NULL,
33
  "Overall_score" double precision,
34
  "Name_matched" text,
35
  "Name_matched_rank" text,
36
  "Name_score" double precision,
37
  "Name_matched_author" text,
38
  "Name_matched_url" text,
39
  "Author_matched" text,
40
  "Author_score" double precision,
41
  "Family_matched" text,
42
  "Family_score" double precision,
43
  "Name_matched_accepted_family" text,
44
  "Genus_matched" text,
45
  "Genus_score" double precision,
46
  "Specific_epithet_matched" text,
47
  "Specific_epithet_score" double precision,
48
  "Infraspecific_rank" text,
49
  "Infraspecific_epithet_matched" text,
50
  "Infraspecific_epithet_score" double precision,
51
  "Infraspecific_rank_2" text,
52
  "Infraspecific_epithet_2_matched" text,
53
  "Infraspecific_epithet_2_score" double precision,
54
  "Annotations" text,
55
  "Unmatched_terms" text,
56
  "Taxonomic_status" text,
57
  "Accepted_name" text,
58
  "Accepted_name_author" text,
59
  "Accepted_name_rank" text,
60
  "Accepted_name_url" text,
61
  "Accepted_name_species" text,
62
  "Accepted_name_family" text,
63
  "Selected" text,
64
  "Source" text,
65
  "Warnings" text,
66
  "Accepted_name_lsid" text,
67
  "Accepted_scientific_name" text,
68
  "Max_score" double precision,
69
  "Is_homonym" boolean,
70
  "Is_plant" boolean,
71
  CONSTRAINT tnrs_pkey PRIMARY KEY ("Time_submitted" , "Name_number" ),
72
  CONSTRAINT "tnrs_Name_submitted_key" UNIQUE ("Name_submitted" )
73
)
74
WITH (
75
  OIDS=FALSE
76
);
77

    
78
CREATE UNIQUE INDEX tnrs_score_ok
79
  ON tnrs
80
  USING btree
81
  ("Name_submitted" )
82
  WHERE score_ok("Max_score");
83

    
84
/* IMPORTANT: when changing this function, you must regenerate the derived cols:
85
UPDATE "TNRS".tnrs SET "Name_submitted" = "Name_submitted"
86
runtime: 16 min ("5220212 rows affected, 979511 ms execution time")
87
*/
88
CREATE OR REPLACE FUNCTION tnrs_populate_fields()
89
  RETURNS trigger AS
90
$BODY$
91
DECLARE
92
    "Specific_epithet_is_plant" boolean :=
93
        (CASE
94
        WHEN   new."Infraspecific_epithet_matched"   IS NOT NULL
95
            OR new."Infraspecific_epithet_2_matched" IS NOT NULL
96
            OR new."Specific_epithet_score" >= 0.9 -- fuzzy match
97
            THEN true
98
        ELSE NULL -- ambiguous
99
        END);
100
    never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); -- author disambiguates
101
    family_is_homonym boolean = NOT never_homonym AND "TNRS".family_is_homonym(new."Family_matched");
102
    genus_is_homonym  boolean = NOT never_homonym AND "TNRS".genus_is_homonym( new."Genus_matched" );
103
BEGIN
104
    new."Accepted_scientific_name" = NULLIF(concat_ws(' '
105
        , NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), new."Accepted_name")
106
        , new."Accepted_name"
107
        , new."Accepted_name_author"
108
    ), '');
109
    new."Max_score" = GREATEST(
110
          new."Overall_score"
111
        , new."Family_score"
112
        , new."Genus_score"
113
        , new."Specific_epithet_score"
114
    );
115
    new."Is_homonym" = COALESCE(genus_is_homonym, family_is_homonym);
116
    new."Is_plant" = (CASE
117
        WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match
118
            THEN true
119
        ELSE -- consider genus
120
            (CASE
121
            WHEN new."Genus_score" =  1    -- exact match
122
                THEN
123
                (CASE
124
                WHEN NOT genus_is_homonym THEN true
125
                ELSE "Specific_epithet_is_plant"
126
                END)
127
            WHEN new."Genus_score" >= 0.85 -- fuzzy match
128
                THEN "Specific_epithet_is_plant"
129
            ELSE NULL -- ambiguous
130
            END)
131
        END);
132
    
133
    RETURN new;
134
END;
135
$BODY$
136
  LANGUAGE plpgsql VOLATILE
137
  COST 100;
138

    
139
CREATE TRIGGER tnrs_populate_fields
140
  BEFORE INSERT OR UPDATE
141
  ON tnrs
142
  FOR EACH ROW
143
  EXECUTE PROCEDURE tnrs_populate_fields();
144

    
145

    
146
CREATE OR REPLACE VIEW "MatchedTaxon" AS
147
SELECT
148
  "Time_submitted" AS "*Name_matched.Time_submitted"
149
, "Name_submitted" AS "concatenatedScientificName"
150
, "Name_matched" AS "matchedTaxonName"
151
, "Name_matched_rank" AS "matchedTaxonRank"
152
, "Name_score" AS "*Name_matched.Name_score"
153
, "Name_matched_author" AS "matchedScientificNameAuthorship"
154
, "Name_matched_url" AS "matchedScientificNameID"
155
, "Author_score" AS "*Name_matched.Author_score"
156
, "Family_score" AS "matchedFamilyConfidence_fraction"
157
, COALESCE("Name_matched_accepted_family", "Accepted_name_family") AS "matchedFamily"
158
, "Genus_matched" AS "matchedGenus"
159
, "Genus_score" AS "matchedGenusConfidence_fraction"
160
, "Specific_epithet_matched" AS "matchedSpecificEpithet"
161
, "Specific_epithet_score" AS "matchedSpeciesConfidence_fraction"
162
, "Infraspecific_epithet_matched" AS "matchedInfraspecificEpithet"
163
, "Infraspecific_epithet_score" AS "*Name_matched.Infraspecific_epithet_score"
164
, "Annotations" AS "identificationQualifier"
165
, "Unmatched_terms" AS "morphospeciesSuffix"
166
, "Taxonomic_status" AS "taxonomicStatus"
167
, "Accepted_name" AS "acceptedTaxonName"
168
, "Accepted_name_author" AS "acceptedScientificNameAuthorship"
169
, "Accepted_name_rank" AS "acceptedTaxonRank"
170
, "Accepted_name_url" AS "acceptedScientificNameID"
171
, "Accepted_name_species" AS "*Name_matched.Accepted_name_species"
172
, "Accepted_name_family" AS "acceptedFamily"
173
, "Selected" AS "*Name_matched.Selected"
174
, "Source" AS "*Name_matched.Source"
175
, "Warnings" AS "*Name_matched.Warnings"
176
, "Accepted_name_lsid" AS "*Name_matched.Accepted_name_lsid"
177
, "Accepted_scientific_name" AS "acceptedScientificName"
178
, "Max_score" AS "matchedTaxonConfidence_fraction"
179
FROM tnrs
180
;
181

    
182
CREATE OR REPLACE VIEW "ValidMatchedTaxon" AS
183
SELECT *
184
FROM "MatchedTaxon"
185
WHERE score_ok("matchedTaxonConfidence_fraction")
186
;
187

    
188
CREATE OR REPLACE VIEW "AcceptedTaxon" AS
189
SELECT
190
  "Time_submitted" AS "*Accepted_name.Time_submitted"
191
, "Name_submitted" AS "acceptedScientificName"
192
, "Genus_matched" AS "acceptedGenus"
193
, "Specific_epithet_matched" AS "acceptedSpecificEpithet"
194
, "Infraspecific_epithet_matched" AS "acceptedInfraspecificEpithet"
195
, "Annotations" AS "*Accepted_name.Annotations"
196
, "Taxonomic_status" AS "acceptedTaxonomicStatus"
197
, "Selected" AS "*Accepted_name.Selected"
198
, "Source" AS "*Accepted_name.Source"
199
, "Warnings" AS "*Accepted_name.Warnings"
200
, "Accepted_name_lsid" AS "*Accepted_name.Accepted_name_lsid"
201
FROM tnrs
202
;
203

    
204
CREATE OR REPLACE VIEW "ScrubbedTaxon" AS
205
SELECT *
206
FROM "ValidMatchedTaxon"
207
NATURAL LEFT JOIN "AcceptedTaxon"
208
;
(4-4/4)