Project

General

Profile

1
CREATE TABLE client_version
2
(
3
  id text NOT NULL,
4
  global_rev integer NOT NULL, -- from `svn info .` > Last Changed Rev
5
  "/lib/tnrs.py rev" integer, -- from `svn info lib/tnrs.py` > Last Changed Rev
6
  "/bin/tnrs_db rev" integer, -- from `svn info bin/tnrs_db` > Last Changed Rev
7
  CONSTRAINT client_version_pkey PRIMARY KEY (id )
8
)
9
WITH (
10
  OIDS=FALSE
11
);
12
COMMENT ON TABLE client_version
13
  IS 'contains svn revisions';
14
COMMENT ON COLUMN client_version.global_rev IS 'from `svn info .` > Last Changed Rev';
15
COMMENT ON COLUMN client_version."/lib/tnrs.py rev" IS 'from `svn info lib/tnrs.py` > Last Changed Rev';
16
COMMENT ON COLUMN client_version."/bin/tnrs_db rev" IS 'from `svn info bin/tnrs_db` > Last Changed Rev';
17

    
18
--
19

    
20
CREATE TABLE batch
21
(
22
  id text NOT NULL,
23
  id_by_time text,
24
  time_submitted timestamp with time zone DEFAULT now(),
25
  client_version text,
26
  CONSTRAINT batch_pkey PRIMARY KEY (id ),
27
  CONSTRAINT batch_client_version_fkey FOREIGN KEY (client_version)
28
      REFERENCES client_version (id) MATCH SIMPLE
29
      ON UPDATE CASCADE ON DELETE CASCADE,
30
  CONSTRAINT batch_id_by_time_key UNIQUE (id_by_time )
31
)
32
WITH (
33
  OIDS=FALSE
34
);
35

    
36

    
37
CREATE OR REPLACE FUNCTION batch__fill()
38
  RETURNS trigger AS
39
$BODY$
40
BEGIN
41
	new.id_by_time = new.time_submitted;
42
	new.id = COALESCE(new.id, new.id_by_time);
43
	RETURN new;
44
END;
45
$BODY$
46
  LANGUAGE plpgsql VOLATILE
47
  COST 100;
48

    
49
CREATE TRIGGER batch__fill
50
  BEFORE INSERT OR UPDATE
51
  ON batch
52
  FOR EACH ROW
53
  EXECUTE PROCEDURE batch__fill();
54

    
55
--
56

    
57
CREATE TABLE batch_download_settings
58
(
59
  id text NOT NULL,
60
  "E-mail" text,
61
  "Id" text,
62
  "Job type" text,
63
  "Contains Id" boolean,
64
  "Start time" text,
65
  "Finish time" text,
66
  "TNRS version" text,
67
  "Sources selected" text,
68
  "Match threshold" double precision,
69
  "Classification" text,
70
  "Allow partial matches?" boolean,
71
  "Sort by source" boolean,
72
  "Constrain by higher taxonomy" boolean,
73
  CONSTRAINT batch_download_settings_pkey PRIMARY KEY (id ),
74
  CONSTRAINT batch_download_settings_id_fkey FOREIGN KEY (id)
75
      REFERENCES "TNRS".batch (id) MATCH SIMPLE
76
      ON UPDATE CASCADE ON DELETE CASCADE
77
)
78
WITH (
79
  OIDS=FALSE
80
);
81
COMMENT ON TABLE batch_download_settings
82
  IS 'stores data from http://tnrs.iplantcollaborative.org/TNRSapp.html > Submit List > results section > Download settings > settings.txt';
83

    
84
--
85

    
86
CREATE OR REPLACE FUNCTION score_ok(score double precision)
87
  RETURNS boolean AS
88
$BODY$
89
SELECT $1 >= 0.8
90
$BODY$
91
  LANGUAGE sql IMMUTABLE STRICT
92
  COST 100;
93

    
94
CREATE OR REPLACE FUNCTION family_is_homonym(family text)
95
  RETURNS boolean AS
96
$BODY$
97
SELECT EXISTS(SELECT * FROM "IRMNG".family_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
98
$BODY$
99
  LANGUAGE sql STABLE STRICT
100
  COST 100;
101

    
102
CREATE OR REPLACE FUNCTION genus_is_homonym(genus text)
103
  RETURNS boolean AS
104
$BODY$
105
SELECT EXISTS(SELECT * FROM "IRMNG".genus_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
106
$BODY$
107
  LANGUAGE sql STABLE STRICT
108
  COST 100;
109

    
110
/* IMPORTANT: when changing this table's schema, you must regenerate data.sql:
111
$ <this_file>/../test_taxonomic_names/test_scrub
112
you must also make the same changes on vegbiendev: e.g.
113
$ make backups/TNRS.backup-remake # runtime: 3 min ("real 2m44.277s")
114
SELECT util.set_col_types('"TNRS".tnrs', ARRAY[
115
  ('col', 'new_type')
116
]::util.col_cast[]);
117
runtime: 9 min ("531282 ms")
118
*/
119
CREATE TABLE tnrs
120
(
121
  batch text NOT NULL DEFAULT now(),
122
  "Name_number" integer NOT NULL,
123
  "Name_submitted" text NOT NULL,
124
  "Overall_score" double precision,
125
  "Name_matched" text,
126
  "Name_matched_rank" text,
127
  "Name_score" double precision,
128
  "Name_matched_author" text,
129
  "Name_matched_url" text,
130
  "Author_matched" text,
131
  "Author_score" double precision,
132
  "Family_matched" text,
133
  "Family_score" double precision,
134
  "Name_matched_accepted_family" text,
135
  "Genus_matched" text,
136
  "Genus_score" double precision,
137
  "Specific_epithet_matched" text,
138
  "Specific_epithet_score" double precision,
139
  "Infraspecific_rank" text,
140
  "Infraspecific_epithet_matched" text,
141
  "Infraspecific_epithet_score" double precision,
142
  "Infraspecific_rank_2" text,
143
  "Infraspecific_epithet_2_matched" text,
144
  "Infraspecific_epithet_2_score" double precision,
145
  "Annotations" text,
146
  "Unmatched_terms" text,
147
  "Taxonomic_status" text,
148
  "Accepted_name" text,
149
  "Accepted_name_author" text,
150
  "Accepted_name_rank" text,
151
  "Accepted_name_url" text,
152
  "Accepted_name_species" text,
153
  "Accepted_name_family" text,
154
  "Selected" text,
155
  "Source" text,
156
  "Warnings" text,
157
  "Accepted_name_lsid" text,
158
  "Accepted_scientific_name" text,
159
  "Max_score" double precision,
160
  "Is_homonym" boolean,
161
  "Is_plant" boolean,
162
  CONSTRAINT tnrs_pkey PRIMARY KEY (batch , "Name_number" ),
163
  CONSTRAINT tnrs_batch_fkey FOREIGN KEY (batch)
164
      REFERENCES batch (id) MATCH SIMPLE
165
      ON UPDATE CASCADE ON DELETE CASCADE,
166
  CONSTRAINT "tnrs_Name_submitted_key" UNIQUE ("Name_submitted" )
167
)
168
WITH (
169
  OIDS=FALSE
170
);
171

    
172
CREATE UNIQUE INDEX tnrs_score_ok
173
  ON tnrs
174
  USING btree
175
  ("Name_submitted" )
176
  WHERE score_ok("Max_score");
177

    
178
/* IMPORTANT: when changing this function, you must regenerate the derived cols:
179
UPDATE "TNRS".tnrs SET "Name_submitted" = "Name_submitted"
180
runtime: 25 min ("5221748 rows affected, 1524228 ms execution time")
181
VACUUM ANALYZE "TNRS".tnrs -- to remove previous rows
182
runtime: 1 min ("62350 ms")
183
*/
184
CREATE OR REPLACE FUNCTION tnrs_populate_fields()
185
  RETURNS trigger AS
186
$BODY$
187
DECLARE
188
    "Specific_epithet_is_plant" boolean :=
189
        (CASE
190
        WHEN   new."Infraspecific_epithet_matched"   IS NOT NULL
191
            OR new."Infraspecific_epithet_2_matched" IS NOT NULL
192
            OR new."Specific_epithet_score" >= 0.9 -- fuzzy match
193
            THEN true
194
        ELSE NULL -- ambiguous
195
        END);
196
    never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); -- author disambiguates
197
    family_is_homonym boolean = NOT never_homonym AND "TNRS".family_is_homonym(new."Family_matched");
198
    genus_is_homonym  boolean = NOT never_homonym AND "TNRS".genus_is_homonym( new."Genus_matched" );
199
BEGIN
200
    new."Accepted_scientific_name" = NULLIF(concat_ws(' '
201
        , NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), new."Accepted_name")
202
        , new."Accepted_name"
203
        , new."Accepted_name_author"
204
    ), '');
205
    new."Max_score" = GREATEST(
206
          new."Overall_score"
207
        , new."Family_score"
208
        , new."Genus_score"
209
        , new."Specific_epithet_score"
210
    );
211
    new."Is_homonym" = COALESCE(genus_is_homonym, family_is_homonym);
212
    new."Is_plant" = (CASE
213
        WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match
214
            THEN true
215
        ELSE -- consider genus
216
            (CASE
217
            WHEN new."Genus_score" =  1    -- exact match
218
                THEN
219
                (CASE
220
                WHEN NOT genus_is_homonym THEN true
221
                ELSE "Specific_epithet_is_plant"
222
                END)
223
            WHEN new."Genus_score" >= 0.85 -- fuzzy match
224
                THEN "Specific_epithet_is_plant"
225
            ELSE NULL -- ambiguous
226
            END)
227
        END);
228
    
229
    RETURN new;
230
END;
231
$BODY$
232
  LANGUAGE plpgsql VOLATILE
233
  COST 100;
234

    
235
CREATE TRIGGER tnrs_populate_fields
236
  BEFORE INSERT OR UPDATE
237
  ON tnrs
238
  FOR EACH ROW
239
  EXECUTE PROCEDURE tnrs_populate_fields();
240

    
241
--
242

    
243
CREATE OR REPLACE VIEW "MatchedTaxon" AS
244
SELECT
245
  "batch" AS "*Name_matched.batch"
246
, "Name_submitted" AS "concatenatedScientificName"
247
, "Name_matched" AS "matchedTaxonName"
248
, "Name_matched_rank" AS "matchedTaxonRank"
249
, "Name_score" AS "*Name_matched.Name_score"
250
, "Name_matched_author" AS "matchedScientificNameAuthorship"
251
, "Name_matched_url" AS "matchedScientificNameID"
252
, "Author_score" AS "*Name_matched.Author_score"
253
, "Family_score" AS "matchedFamilyConfidence_fraction"
254
, COALESCE("Name_matched_accepted_family", "Accepted_name_family") AS "matchedFamily"
255
, "Genus_matched" AS "matchedGenus"
256
, "Genus_score" AS "matchedGenusConfidence_fraction"
257
, "Specific_epithet_matched" AS "matchedSpecificEpithet"
258
, "Specific_epithet_score" AS "matchedSpeciesConfidence_fraction"
259
, "Infraspecific_epithet_matched" AS "matchedInfraspecificEpithet"
260
, "Infraspecific_epithet_score" AS "*Name_matched.Infraspecific_epithet_score"
261
, "Annotations" AS "identificationQualifier"
262
, "Unmatched_terms" AS "morphospeciesSuffix"
263
, "Taxonomic_status" AS "taxonomicStatus"
264
, "Accepted_name" AS "acceptedTaxonName"
265
, "Accepted_name_author" AS "acceptedScientificNameAuthorship"
266
, "Accepted_name_rank" AS "acceptedTaxonRank"
267
, "Accepted_name_url" AS "acceptedScientificNameID"
268
, "Accepted_name_species" AS "*Name_matched.Accepted_name_species"
269
, "Accepted_name_family" AS "acceptedFamily"
270
, "Selected" AS "*Name_matched.Selected"
271
, "Source" AS "*Name_matched.Source"
272
, "Warnings" AS "*Name_matched.Warnings"
273
, "Accepted_name_lsid" AS "*Name_matched.Accepted_name_lsid"
274
, "Accepted_scientific_name" AS "acceptedScientificName"
275
, "Max_score" AS "matchedTaxonConfidence_fraction"
276
FROM tnrs
277
;
278

    
279
CREATE OR REPLACE VIEW "ValidMatchedTaxon" AS
280
SELECT *
281
FROM "MatchedTaxon"
282
WHERE score_ok("matchedTaxonConfidence_fraction")
283
;
284
COMMENT ON VIEW "ValidMatchedTaxon"
285
  IS 'to update, use * as the column list';
286

    
287
CREATE OR REPLACE VIEW "AcceptedTaxon" AS
288
SELECT
289
  "batch" AS "*Accepted_name.batch"
290
, "Name_submitted" AS "acceptedScientificName"
291
, "Genus_matched" AS "acceptedGenus"
292
, "Specific_epithet_matched" AS "acceptedSpecificEpithet"
293
, "Infraspecific_epithet_matched" AS "acceptedInfraspecificEpithet"
294
, "Taxonomic_status" AS "acceptedTaxonomicStatus"
295
, "Selected" AS "*Accepted_name.Selected"
296
, "Source" AS "*Accepted_name.Source"
297
, "Warnings" AS "*Accepted_name.Warnings"
298
, "Accepted_name_lsid" AS "*Accepted_name.Accepted_name_lsid"
299
FROM tnrs
300
;
301

    
302
CREATE OR REPLACE VIEW "ScrubbedTaxon" AS
303
SELECT *
304
FROM "ValidMatchedTaxon"
305
NATURAL LEFT JOIN "AcceptedTaxon"
306
;
307
COMMENT ON VIEW "ScrubbedTaxon"
308
  IS 'to update, use * as the column list';
(7-7/8)