Project

General

Profile

1
CREATE TABLE client_version
2
(
3
  id text NOT NULL,
4
  global_rev integer NOT NULL,
5
  "/lib/tnrs.py rev" integer,
6
  "/bin/tnrs_db rev" integer,
7
  CONSTRAINT client_version_pkey PRIMARY KEY (id )
8
)
9
WITH (
10
  OIDS=FALSE
11
);
12

    
13
--
14

    
15
CREATE TABLE batch
16
(
17
  id text NOT NULL,
18
  id_by_time text,
19
  time_submitted timestamp with time zone DEFAULT now(),
20
  client_version text,
21
  CONSTRAINT batch_pkey PRIMARY KEY (id ),
22
  CONSTRAINT download_settings_id_by_time_key UNIQUE (id_by_time ),
23
  CONSTRAINT batch_client_version_fkey FOREIGN KEY (client_version)
24
      REFERENCES client_version (id) MATCH SIMPLE
25
      ON UPDATE CASCADE ON DELETE CASCADE
26
)
27
WITH (
28
  OIDS=FALSE
29
);
30

    
31

    
32
CREATE OR REPLACE FUNCTION batch__fill()
33
  RETURNS trigger AS
34
$BODY$
35
BEGIN
36
	new.id_by_time = new.time_submitted;
37
	new.id = COALESCE(new.id, new.id_by_time);
38
	RETURN new;
39
END;
40
$BODY$
41
  LANGUAGE plpgsql VOLATILE
42
  COST 100;
43

    
44
CREATE TRIGGER batch__fill
45
  BEFORE INSERT OR UPDATE
46
  ON batch
47
  FOR EACH ROW
48
  EXECUTE PROCEDURE batch__fill();
49

    
50
--
51

    
52
CREATE TABLE download_settings
53
(
54
  id text NOT NULL,
55
  "E-mail" text,
56
  "Id" text,
57
  "Job type" text,
58
  "Contains Id" boolean,
59
  "Start time" text,
60
  "Finish time" text,
61
  "TNRS version" text,
62
  "Sources selected" text,
63
  "Match threshold" double precision,
64
  "Classification" text,
65
  "Allow partial matches?" boolean,
66
  "Sort by source" boolean,
67
  "Constrain by higher taxonomy" boolean,
68
  CONSTRAINT download_settings_pkey PRIMARY KEY (id )
69
)
70
WITH (
71
  OIDS=FALSE
72
);
73
COMMENT ON TABLE download_settings
74
  IS 'stores data from http://tnrs.iplantcollaborative.org/TNRSapp.html > Submit List > results section > Download settings > settings.txt';
75

    
76
--
77

    
78
CREATE OR REPLACE FUNCTION score_ok(score double precision)
79
  RETURNS boolean AS
80
$BODY$
81
SELECT $1 >= 0.8
82
$BODY$
83
  LANGUAGE sql IMMUTABLE STRICT
84
  COST 100;
85

    
86
CREATE OR REPLACE FUNCTION family_is_homonym(family text)
87
  RETURNS boolean AS
88
$BODY$
89
SELECT EXISTS(SELECT * FROM "IRMNG".family_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
90
$BODY$
91
  LANGUAGE sql STABLE STRICT
92
  COST 100;
93

    
94
CREATE OR REPLACE FUNCTION genus_is_homonym(genus text)
95
  RETURNS boolean AS
96
$BODY$
97
SELECT EXISTS(SELECT * FROM "IRMNG".genus_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
98
$BODY$
99
  LANGUAGE sql STABLE STRICT
100
  COST 100;
101

    
102
/* IMPORTANT: when changing this table's schema, you must regenerate data.sql:
103
$ <this_file>/../test_taxonomic_names/test_scrub
104
you must also make the same changes on vegbiendev: e.g.
105
$ make backups/TNRS.backup-remake # runtime: 3 min ("real 2m44.277s")
106
SELECT util.set_col_types('"TNRS".tnrs', ARRAY[
107
  ('col', 'new_type')
108
]::util.col_cast[]);
109
runtime: 4 min ("242100 ms")
110
*/
111
CREATE TABLE tnrs
112
(
113
  "Time_submitted" timestamp with time zone NOT NULL DEFAULT now(),
114
  "Name_number" integer NOT NULL,
115
  "Name_submitted" text NOT NULL,
116
  "Overall_score" double precision,
117
  "Name_matched" text,
118
  "Name_matched_rank" text,
119
  "Name_score" double precision,
120
  "Name_matched_author" text,
121
  "Name_matched_url" text,
122
  "Author_matched" text,
123
  "Author_score" double precision,
124
  "Family_matched" text,
125
  "Family_score" double precision,
126
  "Name_matched_accepted_family" text,
127
  "Genus_matched" text,
128
  "Genus_score" double precision,
129
  "Specific_epithet_matched" text,
130
  "Specific_epithet_score" double precision,
131
  "Infraspecific_rank" text,
132
  "Infraspecific_epithet_matched" text,
133
  "Infraspecific_epithet_score" double precision,
134
  "Infraspecific_rank_2" text,
135
  "Infraspecific_epithet_2_matched" text,
136
  "Infraspecific_epithet_2_score" double precision,
137
  "Annotations" text,
138
  "Unmatched_terms" text,
139
  "Taxonomic_status" text,
140
  "Accepted_name" text,
141
  "Accepted_name_author" text,
142
  "Accepted_name_rank" text,
143
  "Accepted_name_url" text,
144
  "Accepted_name_species" text,
145
  "Accepted_name_family" text,
146
  "Selected" text,
147
  "Source" text,
148
  "Warnings" text,
149
  "Accepted_name_lsid" text,
150
  "Accepted_scientific_name" text,
151
  "Max_score" double precision,
152
  "Is_homonym" boolean,
153
  "Is_plant" boolean,
154
  CONSTRAINT tnrs_pkey PRIMARY KEY ("Time_submitted" , "Name_number" ),
155
  CONSTRAINT "tnrs_Name_submitted_key" UNIQUE ("Name_submitted" )
156
)
157
WITH (
158
  OIDS=FALSE
159
);
160

    
161
CREATE UNIQUE INDEX tnrs_score_ok
162
  ON tnrs
163
  USING btree
164
  ("Name_submitted" )
165
  WHERE score_ok("Max_score");
166

    
167
/* IMPORTANT: when changing this function, you must regenerate the derived cols:
168
UPDATE "TNRS".tnrs SET "Name_submitted" = "Name_submitted"
169
runtime: 25 min ("5221748 rows affected, 1524228 ms execution time")
170
VACUUM ANALYZE "TNRS".tnrs -- to remove previous rows
171
runtime: 1 min ("62350 ms")
172
*/
173
CREATE OR REPLACE FUNCTION tnrs_populate_fields()
174
  RETURNS trigger AS
175
$BODY$
176
DECLARE
177
    "Specific_epithet_is_plant" boolean :=
178
        (CASE
179
        WHEN   new."Infraspecific_epithet_matched"   IS NOT NULL
180
            OR new."Infraspecific_epithet_2_matched" IS NOT NULL
181
            OR new."Specific_epithet_score" >= 0.9 -- fuzzy match
182
            THEN true
183
        ELSE NULL -- ambiguous
184
        END);
185
    never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); -- author disambiguates
186
    family_is_homonym boolean = NOT never_homonym AND "TNRS".family_is_homonym(new."Family_matched");
187
    genus_is_homonym  boolean = NOT never_homonym AND "TNRS".genus_is_homonym( new."Genus_matched" );
188
BEGIN
189
    new."Accepted_scientific_name" = NULLIF(concat_ws(' '
190
        , NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), new."Accepted_name")
191
        , new."Accepted_name"
192
        , new."Accepted_name_author"
193
    ), '');
194
    new."Max_score" = GREATEST(
195
          new."Overall_score"
196
        , new."Family_score"
197
        , new."Genus_score"
198
        , new."Specific_epithet_score"
199
    );
200
    new."Is_homonym" = COALESCE(genus_is_homonym, family_is_homonym);
201
    new."Is_plant" = (CASE
202
        WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match
203
            THEN true
204
        ELSE -- consider genus
205
            (CASE
206
            WHEN new."Genus_score" =  1    -- exact match
207
                THEN
208
                (CASE
209
                WHEN NOT genus_is_homonym THEN true
210
                ELSE "Specific_epithet_is_plant"
211
                END)
212
            WHEN new."Genus_score" >= 0.85 -- fuzzy match
213
                THEN "Specific_epithet_is_plant"
214
            ELSE NULL -- ambiguous
215
            END)
216
        END);
217
    
218
    RETURN new;
219
END;
220
$BODY$
221
  LANGUAGE plpgsql VOLATILE
222
  COST 100;
223

    
224
CREATE TRIGGER tnrs_populate_fields
225
  BEFORE INSERT OR UPDATE
226
  ON tnrs
227
  FOR EACH ROW
228
  EXECUTE PROCEDURE tnrs_populate_fields();
229

    
230
--
231

    
232
CREATE OR REPLACE VIEW "MatchedTaxon" AS
233
SELECT
234
  "Time_submitted" AS "*Name_matched.Time_submitted"
235
, "Name_submitted" AS "concatenatedScientificName"
236
, "Name_matched" AS "matchedTaxonName"
237
, "Name_matched_rank" AS "matchedTaxonRank"
238
, "Name_score" AS "*Name_matched.Name_score"
239
, "Name_matched_author" AS "matchedScientificNameAuthorship"
240
, "Name_matched_url" AS "matchedScientificNameID"
241
, "Author_score" AS "*Name_matched.Author_score"
242
, "Family_score" AS "matchedFamilyConfidence_fraction"
243
, COALESCE("Name_matched_accepted_family", "Accepted_name_family") AS "matchedFamily"
244
, "Genus_matched" AS "matchedGenus"
245
, "Genus_score" AS "matchedGenusConfidence_fraction"
246
, "Specific_epithet_matched" AS "matchedSpecificEpithet"
247
, "Specific_epithet_score" AS "matchedSpeciesConfidence_fraction"
248
, "Infraspecific_epithet_matched" AS "matchedInfraspecificEpithet"
249
, "Infraspecific_epithet_score" AS "*Name_matched.Infraspecific_epithet_score"
250
, "Annotations" AS "identificationQualifier"
251
, "Unmatched_terms" AS "morphospeciesSuffix"
252
, "Taxonomic_status" AS "taxonomicStatus"
253
, "Accepted_name" AS "acceptedTaxonName"
254
, "Accepted_name_author" AS "acceptedScientificNameAuthorship"
255
, "Accepted_name_rank" AS "acceptedTaxonRank"
256
, "Accepted_name_url" AS "acceptedScientificNameID"
257
, "Accepted_name_species" AS "*Name_matched.Accepted_name_species"
258
, "Accepted_name_family" AS "acceptedFamily"
259
, "Selected" AS "*Name_matched.Selected"
260
, "Source" AS "*Name_matched.Source"
261
, "Warnings" AS "*Name_matched.Warnings"
262
, "Accepted_name_lsid" AS "*Name_matched.Accepted_name_lsid"
263
, "Accepted_scientific_name" AS "acceptedScientificName"
264
, "Max_score" AS "matchedTaxonConfidence_fraction"
265
FROM tnrs
266
;
267

    
268
CREATE OR REPLACE VIEW "ValidMatchedTaxon" AS
269
SELECT *
270
FROM "MatchedTaxon"
271
WHERE score_ok("matchedTaxonConfidence_fraction")
272
;
273

    
274
CREATE OR REPLACE VIEW "AcceptedTaxon" AS
275
SELECT
276
  "Time_submitted" AS "*Accepted_name.Time_submitted"
277
, "Name_submitted" AS "acceptedScientificName"
278
, "Genus_matched" AS "acceptedGenus"
279
, "Specific_epithet_matched" AS "acceptedSpecificEpithet"
280
, "Infraspecific_epithet_matched" AS "acceptedInfraspecificEpithet"
281
, "Taxonomic_status" AS "acceptedTaxonomicStatus"
282
, "Selected" AS "*Accepted_name.Selected"
283
, "Source" AS "*Accepted_name.Source"
284
, "Warnings" AS "*Accepted_name.Warnings"
285
, "Accepted_name_lsid" AS "*Accepted_name.Accepted_name_lsid"
286
FROM tnrs
287
;
288

    
289
CREATE OR REPLACE VIEW "ScrubbedTaxon" AS
290
SELECT *
291
FROM "ValidMatchedTaxon"
292
NATURAL LEFT JOIN "AcceptedTaxon"
293
;
(7-7/8)