Project

General

Profile

1
CREATE TABLE batch
2
(
3
  id text NOT NULL,
4
  id_by_time text,
5
  time_submitted timestamp with time zone DEFAULT now(),
6
  CONSTRAINT batch_pkey PRIMARY KEY (id ),
7
  CONSTRAINT download_settings_id_by_time_key UNIQUE (id_by_time )
8
)
9
WITH (
10
  OIDS=FALSE
11
);
12

    
13

    
14
CREATE OR REPLACE FUNCTION batch__fill()
15
  RETURNS trigger AS
16
$BODY$
17
BEGIN
18
	new.id_by_time = new.time_submitted;
19
	new.id = COALESCE(new.id, new.id_by_time);
20
	RETURN new;
21
END;
22
$BODY$
23
  LANGUAGE plpgsql VOLATILE
24
  COST 100;
25

    
26
CREATE TRIGGER batch__fill
27
  BEFORE INSERT OR UPDATE
28
  ON batch
29
  FOR EACH ROW
30
  EXECUTE PROCEDURE batch__fill();
31

    
32
--
33

    
34
CREATE TABLE download_settings
35
(
36
  id text NOT NULL,
37
  "E-mail" text,
38
  "Id" text,
39
  "Job type" text,
40
  "Contains Id" boolean,
41
  "Start time" text,
42
  "Finish time" text,
43
  "TNRS version" text,
44
  "Sources selected" text,
45
  "Match threshold" double precision,
46
  "Classification" text,
47
  "Allow partial matches?" boolean,
48
  "Sort by source" boolean,
49
  "Constrain by higher taxonomy" boolean,
50
  CONSTRAINT download_settings_pkey PRIMARY KEY (id )
51
)
52
WITH (
53
  OIDS=FALSE
54
);
55
COMMENT ON TABLE download_settings
56
  IS 'stores data from http://tnrs.iplantcollaborative.org/TNRSapp.html > Submit List > results section > Download settings > settings.txt';
57

    
58
--
59

    
60
CREATE OR REPLACE FUNCTION score_ok(score double precision)
61
  RETURNS boolean AS
62
$BODY$
63
SELECT $1 >= 0.8
64
$BODY$
65
  LANGUAGE sql IMMUTABLE STRICT
66
  COST 100;
67

    
68
CREATE OR REPLACE FUNCTION family_is_homonym(family text)
69
  RETURNS boolean AS
70
$BODY$
71
SELECT EXISTS(SELECT * FROM "IRMNG".family_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
72
$BODY$
73
  LANGUAGE sql STABLE STRICT
74
  COST 100;
75

    
76
CREATE OR REPLACE FUNCTION genus_is_homonym(genus text)
77
  RETURNS boolean AS
78
$BODY$
79
SELECT EXISTS(SELECT * FROM "IRMNG".genus_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
80
$BODY$
81
  LANGUAGE sql STABLE STRICT
82
  COST 100;
83

    
84
/* IMPORTANT: when changing this table's schema, you must regenerate data.sql:
85
$ <this_file>/../test_taxonomic_names/test_scrub
86
you must also make the same changes on vegbiendev: e.g.
87
$ make backups/TNRS.backup-remake # runtime: 3 min ("real 2m44.277s")
88
SELECT util.set_col_types('"TNRS".tnrs', ARRAY[
89
  ('col', 'new_type')
90
]::util.col_cast[]);
91
runtime: 4 min ("242100 ms")
92
*/
93
CREATE TABLE tnrs
94
(
95
  "Time_submitted" timestamp with time zone NOT NULL DEFAULT now(),
96
  "Name_number" integer NOT NULL,
97
  "Name_submitted" text NOT NULL,
98
  "Overall_score" double precision,
99
  "Name_matched" text,
100
  "Name_matched_rank" text,
101
  "Name_score" double precision,
102
  "Name_matched_author" text,
103
  "Name_matched_url" text,
104
  "Author_matched" text,
105
  "Author_score" double precision,
106
  "Family_matched" text,
107
  "Family_score" double precision,
108
  "Name_matched_accepted_family" text,
109
  "Genus_matched" text,
110
  "Genus_score" double precision,
111
  "Specific_epithet_matched" text,
112
  "Specific_epithet_score" double precision,
113
  "Infraspecific_rank" text,
114
  "Infraspecific_epithet_matched" text,
115
  "Infraspecific_epithet_score" double precision,
116
  "Infraspecific_rank_2" text,
117
  "Infraspecific_epithet_2_matched" text,
118
  "Infraspecific_epithet_2_score" double precision,
119
  "Annotations" text,
120
  "Unmatched_terms" text,
121
  "Taxonomic_status" text,
122
  "Accepted_name" text,
123
  "Accepted_name_author" text,
124
  "Accepted_name_rank" text,
125
  "Accepted_name_url" text,
126
  "Accepted_name_species" text,
127
  "Accepted_name_family" text,
128
  "Selected" text,
129
  "Source" text,
130
  "Warnings" text,
131
  "Accepted_name_lsid" text,
132
  "Accepted_scientific_name" text,
133
  "Max_score" double precision,
134
  "Is_homonym" boolean,
135
  "Is_plant" boolean,
136
  CONSTRAINT tnrs_pkey PRIMARY KEY ("Time_submitted" , "Name_number" ),
137
  CONSTRAINT "tnrs_Name_submitted_key" UNIQUE ("Name_submitted" )
138
)
139
WITH (
140
  OIDS=FALSE
141
);
142

    
143
CREATE UNIQUE INDEX tnrs_score_ok
144
  ON tnrs
145
  USING btree
146
  ("Name_submitted" )
147
  WHERE score_ok("Max_score");
148

    
149
/* IMPORTANT: when changing this function, you must regenerate the derived cols:
150
UPDATE "TNRS".tnrs SET "Name_submitted" = "Name_submitted"
151
runtime: 25 min ("5221748 rows affected, 1524228 ms execution time")
152
VACUUM ANALYZE "TNRS".tnrs -- to remove previous rows
153
runtime: 1 min ("62350 ms")
154
*/
155
CREATE OR REPLACE FUNCTION tnrs_populate_fields()
156
  RETURNS trigger AS
157
$BODY$
158
DECLARE
159
    "Specific_epithet_is_plant" boolean :=
160
        (CASE
161
        WHEN   new."Infraspecific_epithet_matched"   IS NOT NULL
162
            OR new."Infraspecific_epithet_2_matched" IS NOT NULL
163
            OR new."Specific_epithet_score" >= 0.9 -- fuzzy match
164
            THEN true
165
        ELSE NULL -- ambiguous
166
        END);
167
    never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); -- author disambiguates
168
    family_is_homonym boolean = NOT never_homonym AND "TNRS".family_is_homonym(new."Family_matched");
169
    genus_is_homonym  boolean = NOT never_homonym AND "TNRS".genus_is_homonym( new."Genus_matched" );
170
BEGIN
171
    new."Accepted_scientific_name" = NULLIF(concat_ws(' '
172
        , NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), new."Accepted_name")
173
        , new."Accepted_name"
174
        , new."Accepted_name_author"
175
    ), '');
176
    new."Max_score" = GREATEST(
177
          new."Overall_score"
178
        , new."Family_score"
179
        , new."Genus_score"
180
        , new."Specific_epithet_score"
181
    );
182
    new."Is_homonym" = COALESCE(genus_is_homonym, family_is_homonym);
183
    new."Is_plant" = (CASE
184
        WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match
185
            THEN true
186
        ELSE -- consider genus
187
            (CASE
188
            WHEN new."Genus_score" =  1    -- exact match
189
                THEN
190
                (CASE
191
                WHEN NOT genus_is_homonym THEN true
192
                ELSE "Specific_epithet_is_plant"
193
                END)
194
            WHEN new."Genus_score" >= 0.85 -- fuzzy match
195
                THEN "Specific_epithet_is_plant"
196
            ELSE NULL -- ambiguous
197
            END)
198
        END);
199
    
200
    RETURN new;
201
END;
202
$BODY$
203
  LANGUAGE plpgsql VOLATILE
204
  COST 100;
205

    
206
CREATE TRIGGER tnrs_populate_fields
207
  BEFORE INSERT OR UPDATE
208
  ON tnrs
209
  FOR EACH ROW
210
  EXECUTE PROCEDURE tnrs_populate_fields();
211

    
212
--
213

    
214
CREATE OR REPLACE VIEW "MatchedTaxon" AS
215
SELECT
216
  "Time_submitted" AS "*Name_matched.Time_submitted"
217
, "Name_submitted" AS "concatenatedScientificName"
218
, "Name_matched" AS "matchedTaxonName"
219
, "Name_matched_rank" AS "matchedTaxonRank"
220
, "Name_score" AS "*Name_matched.Name_score"
221
, "Name_matched_author" AS "matchedScientificNameAuthorship"
222
, "Name_matched_url" AS "matchedScientificNameID"
223
, "Author_score" AS "*Name_matched.Author_score"
224
, "Family_score" AS "matchedFamilyConfidence_fraction"
225
, COALESCE("Name_matched_accepted_family", "Accepted_name_family") AS "matchedFamily"
226
, "Genus_matched" AS "matchedGenus"
227
, "Genus_score" AS "matchedGenusConfidence_fraction"
228
, "Specific_epithet_matched" AS "matchedSpecificEpithet"
229
, "Specific_epithet_score" AS "matchedSpeciesConfidence_fraction"
230
, "Infraspecific_epithet_matched" AS "matchedInfraspecificEpithet"
231
, "Infraspecific_epithet_score" AS "*Name_matched.Infraspecific_epithet_score"
232
, "Annotations" AS "identificationQualifier"
233
, "Unmatched_terms" AS "morphospeciesSuffix"
234
, "Taxonomic_status" AS "taxonomicStatus"
235
, "Accepted_name" AS "acceptedTaxonName"
236
, "Accepted_name_author" AS "acceptedScientificNameAuthorship"
237
, "Accepted_name_rank" AS "acceptedTaxonRank"
238
, "Accepted_name_url" AS "acceptedScientificNameID"
239
, "Accepted_name_species" AS "*Name_matched.Accepted_name_species"
240
, "Accepted_name_family" AS "acceptedFamily"
241
, "Selected" AS "*Name_matched.Selected"
242
, "Source" AS "*Name_matched.Source"
243
, "Warnings" AS "*Name_matched.Warnings"
244
, "Accepted_name_lsid" AS "*Name_matched.Accepted_name_lsid"
245
, "Accepted_scientific_name" AS "acceptedScientificName"
246
, "Max_score" AS "matchedTaxonConfidence_fraction"
247
FROM tnrs
248
;
249

    
250
CREATE OR REPLACE VIEW "ValidMatchedTaxon" AS
251
SELECT *
252
FROM "MatchedTaxon"
253
WHERE score_ok("matchedTaxonConfidence_fraction")
254
;
255

    
256
CREATE OR REPLACE VIEW "AcceptedTaxon" AS
257
SELECT
258
  "Time_submitted" AS "*Accepted_name.Time_submitted"
259
, "Name_submitted" AS "acceptedScientificName"
260
, "Genus_matched" AS "acceptedGenus"
261
, "Specific_epithet_matched" AS "acceptedSpecificEpithet"
262
, "Infraspecific_epithet_matched" AS "acceptedInfraspecificEpithet"
263
, "Taxonomic_status" AS "acceptedTaxonomicStatus"
264
, "Selected" AS "*Accepted_name.Selected"
265
, "Source" AS "*Accepted_name.Source"
266
, "Warnings" AS "*Accepted_name.Warnings"
267
, "Accepted_name_lsid" AS "*Accepted_name.Accepted_name_lsid"
268
FROM tnrs
269
;
270

    
271
CREATE OR REPLACE VIEW "ScrubbedTaxon" AS
272
SELECT *
273
FROM "ValidMatchedTaxon"
274
NATURAL LEFT JOIN "AcceptedTaxon"
275
;
(7-7/8)