Project

General

Profile

1
CREATE TABLE client_version
2
(
3
  id text NOT NULL,
4
  global_rev integer NOT NULL, -- from `svn info .` > Last Changed Rev
5
  "/lib/tnrs.py rev" integer, -- from `svn info lib/tnrs.py` > Last Changed Rev
6
  "/bin/tnrs_db rev" integer, -- from `svn info bin/tnrs_db` > Last Changed Rev
7
  CONSTRAINT client_version_pkey PRIMARY KEY (id )
8
)
9
WITH (
10
  OIDS=FALSE
11
);
12
COMMENT ON TABLE client_version
13
  IS 'contains svn revisions';
14
COMMENT ON COLUMN client_version.global_rev IS 'from `svn info .` > Last Changed Rev';
15
COMMENT ON COLUMN client_version."/lib/tnrs.py rev" IS 'from `svn info lib/tnrs.py` > Last Changed Rev';
16
COMMENT ON COLUMN client_version."/bin/tnrs_db rev" IS 'from `svn info bin/tnrs_db` > Last Changed Rev';
17

    
18
--
19

    
20
CREATE TABLE batch
21
(
22
  id text NOT NULL,
23
  id_by_time text,
24
  time_submitted timestamp with time zone DEFAULT now(),
25
  client_version text,
26
  CONSTRAINT batch_pkey PRIMARY KEY (id ),
27
  CONSTRAINT batch_client_version_fkey FOREIGN KEY (client_version)
28
      REFERENCES client_version (id) MATCH SIMPLE
29
      ON UPDATE CASCADE ON DELETE CASCADE,
30
  CONSTRAINT batch_id_by_time_key UNIQUE (id_by_time )
31
)
32
WITH (
33
  OIDS=FALSE
34
);
35

    
36

    
37
CREATE OR REPLACE FUNCTION batch__fill()
38
  RETURNS trigger AS
39
$BODY$
40
BEGIN
41
	new.id_by_time = new.time_submitted;
42
	new.id = COALESCE(new.id, new.id_by_time);
43
	RETURN new;
44
END;
45
$BODY$
46
  LANGUAGE plpgsql VOLATILE
47
  COST 100;
48

    
49
CREATE TRIGGER batch__fill
50
  BEFORE INSERT OR UPDATE
51
  ON batch
52
  FOR EACH ROW
53
  EXECUTE PROCEDURE batch__fill();
54

    
55
--
56

    
57
CREATE TABLE batch_download_settings
58
(
59
  id text NOT NULL,
60
  "E-mail" text,
61
  "Id" text,
62
  "Job type" text,
63
  "Contains Id" boolean,
64
  "Start time" text,
65
  "Finish time" text,
66
  "TNRS version" text,
67
  "Sources selected" text,
68
  "Match threshold" double precision,
69
  "Classification" text,
70
  "Allow partial matches?" boolean,
71
  "Sort by source" boolean,
72
  "Constrain by higher taxonomy" boolean,
73
  CONSTRAINT batch_download_settings_pkey PRIMARY KEY (id ),
74
  CONSTRAINT batch_download_settings_id_fkey FOREIGN KEY (id)
75
      REFERENCES "TNRS".batch (id) MATCH SIMPLE
76
      ON UPDATE CASCADE ON DELETE CASCADE
77
)
78
WITH (
79
  OIDS=FALSE
80
);
81
COMMENT ON TABLE batch_download_settings
82
  IS 'stores data from http://tnrs.iplantcollaborative.org/TNRSapp.html > Submit List > results section > Download settings > settings.txt';
83

    
84
--
85

    
86
CREATE OR REPLACE FUNCTION score_ok(score double precision)
87
  RETURNS boolean AS
88
$BODY$
89
SELECT $1 >= 0.8
90
$BODY$
91
  LANGUAGE sql IMMUTABLE STRICT
92
  COST 100;
93

    
94
CREATE OR REPLACE FUNCTION family_is_homonym(family text)
95
  RETURNS boolean AS
96
$BODY$
97
SELECT EXISTS(SELECT * FROM "IRMNG".family_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
98
$BODY$
99
  LANGUAGE sql STABLE STRICT
100
  COST 100;
101

    
102
CREATE OR REPLACE FUNCTION genus_is_homonym(genus text)
103
  RETURNS boolean AS
104
$BODY$
105
SELECT EXISTS(SELECT * FROM "IRMNG".genus_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
106
$BODY$
107
  LANGUAGE sql STABLE STRICT
108
  COST 100;
109

    
110
CREATE TABLE tnrs
111
(
112
  batch text NOT NULL DEFAULT now(),
113
  "Name_number" integer NOT NULL,
114
  "Name_submitted" text NOT NULL,
115
  "Overall_score" double precision,
116
  "Name_matched" text,
117
  "Name_matched_rank" text,
118
  "Name_score" double precision,
119
  "Name_matched_author" text,
120
  "Name_matched_url" text,
121
  "Author_matched" text,
122
  "Author_score" double precision,
123
  "Family_matched" text,
124
  "Family_score" double precision,
125
  "Name_matched_accepted_family" text,
126
  "Genus_matched" text,
127
  "Genus_score" double precision,
128
  "Specific_epithet_matched" text,
129
  "Specific_epithet_score" double precision,
130
  "Infraspecific_rank" text,
131
  "Infraspecific_epithet_matched" text,
132
  "Infraspecific_epithet_score" double precision,
133
  "Infraspecific_rank_2" text,
134
  "Infraspecific_epithet_2_matched" text,
135
  "Infraspecific_epithet_2_score" double precision,
136
  "Annotations" text,
137
  "Unmatched_terms" text,
138
  "Taxonomic_status" text,
139
  "Accepted_name" text,
140
  "Accepted_name_author" text,
141
  "Accepted_name_rank" text,
142
  "Accepted_name_url" text,
143
  "Accepted_name_species" text,
144
  "Accepted_name_family" text,
145
  "Selected" text,
146
  "Source" text,
147
  "Warnings" text,
148
  "Accepted_name_lsid" text,
149
  "Accepted_scientific_name" text,
150
  "Max_score" double precision,
151
  "Is_homonym" boolean,
152
  "Is_plant" boolean,
153
  CONSTRAINT tnrs_pkey PRIMARY KEY (batch , "Name_number" ),
154
  CONSTRAINT tnrs_batch_fkey FOREIGN KEY (batch)
155
      REFERENCES batch (id) MATCH SIMPLE
156
      ON UPDATE CASCADE ON DELETE CASCADE,
157
  CONSTRAINT "tnrs_Name_submitted_key" UNIQUE ("Name_submitted" )
158
)
159
WITH (
160
  OIDS=FALSE
161
);
162
COMMENT ON TABLE tnrs
163
  IS 'IMPORTANT: when changing this table''s schema, you must regenerate data.sql:
164
$ <this_file>/../test_taxonomic_names/test_scrub
165
you must also make the same changes on vegbiendev: e.g.
166
$ make backups/TNRS.backup-remake # runtime: 3 min ("real 2m44.277s")
167
SELECT util.set_col_types(''"TNRS".tnrs'', ARRAY[
168
  (''col'', ''new_type'')
169
]::util.col_cast[]);
170
runtime: 9 min ("531282 ms")';
171

    
172
CREATE UNIQUE INDEX tnrs_score_ok
173
  ON tnrs
174
  USING btree
175
  ("Name_submitted" )
176
  WHERE score_ok("Max_score");
177

    
178
CREATE OR REPLACE FUNCTION tnrs_populate_fields()
179
  RETURNS trigger AS
180
$BODY$
181
DECLARE
182
    "Specific_epithet_is_plant" boolean :=
183
        (CASE
184
        WHEN   new."Infraspecific_epithet_matched"   IS NOT NULL
185
            OR new."Infraspecific_epithet_2_matched" IS NOT NULL
186
            OR new."Specific_epithet_score" >= 0.9 -- fuzzy match
187
            THEN true
188
        ELSE NULL -- ambiguous
189
        END);
190
    never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); -- author disambiguates
191
    family_is_homonym boolean = NOT never_homonym AND "TNRS".family_is_homonym(new."Family_matched");
192
    genus_is_homonym  boolean = NOT never_homonym AND "TNRS".genus_is_homonym( new."Genus_matched" );
193
BEGIN
194
    new."Accepted_scientific_name" = NULLIF(concat_ws(' '
195
        , NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), new."Accepted_name")
196
        , new."Accepted_name"
197
        , new."Accepted_name_author"
198
    ), '');
199
    new."Max_score" = GREATEST(
200
          new."Overall_score"
201
        , new."Family_score"
202
        , new."Genus_score"
203
        , new."Specific_epithet_score"
204
    );
205
    new."Is_homonym" = COALESCE(genus_is_homonym, family_is_homonym);
206
    new."Is_plant" = (CASE
207
        WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match
208
            THEN true
209
        ELSE -- consider genus
210
            (CASE
211
            WHEN new."Genus_score" =  1    -- exact match
212
                THEN
213
                (CASE
214
                WHEN NOT genus_is_homonym THEN true
215
                ELSE "Specific_epithet_is_plant"
216
                END)
217
            WHEN new."Genus_score" >= 0.85 -- fuzzy match
218
                THEN "Specific_epithet_is_plant"
219
            ELSE NULL -- ambiguous
220
            END)
221
        END);
222
    
223
    RETURN new;
224
END;
225
$BODY$
226
  LANGUAGE plpgsql VOLATILE
227
  COST 100;
228
COMMENT ON FUNCTION tnrs_populate_fields() IS 'IMPORTANT: when changing this function, you must regenerate the derived cols:
229
UPDATE "TNRS".tnrs SET "Name_submitted" = "Name_submitted"
230
runtime: 25 min ("5221748 rows affected, 1524228 ms execution time")
231
VACUUM ANALYZE "TNRS".tnrs -- to remove previous rows
232
runtime: 1 min ("62350 ms")';
233

    
234
CREATE TRIGGER tnrs_populate_fields
235
  BEFORE INSERT OR UPDATE
236
  ON tnrs
237
  FOR EACH ROW
238
  EXECUTE PROCEDURE tnrs_populate_fields();
239

    
240
--
241

    
242
CREATE OR REPLACE VIEW "MatchedTaxon" AS
243
SELECT
244
  "batch" AS "*Name_matched.batch"
245
, "Name_submitted" AS "concatenatedScientificName"
246
, "Name_matched" AS "matchedTaxonName"
247
, "Name_matched_rank" AS "matchedTaxonRank"
248
, "Name_score" AS "*Name_matched.Name_score"
249
, "Name_matched_author" AS "matchedScientificNameAuthorship"
250
, "Name_matched_url" AS "matchedScientificNameID"
251
, "Author_score" AS "*Name_matched.Author_score"
252
, "Family_score" AS "matchedFamilyConfidence_fraction"
253
, COALESCE("Name_matched_accepted_family", "Accepted_name_family") AS "matchedFamily"
254
, "Genus_matched" AS "matchedGenus"
255
, "Genus_score" AS "matchedGenusConfidence_fraction"
256
, "Specific_epithet_matched" AS "matchedSpecificEpithet"
257
, "Specific_epithet_score" AS "matchedSpeciesConfidence_fraction"
258
, "Infraspecific_epithet_matched" AS "matchedInfraspecificEpithet"
259
, "Infraspecific_epithet_score" AS "*Name_matched.Infraspecific_epithet_score"
260
, "Annotations" AS "identificationQualifier"
261
, "Unmatched_terms" AS "morphospeciesSuffix"
262
, "Taxonomic_status" AS "taxonomicStatus"
263
, "Accepted_name" AS "acceptedTaxonName"
264
, "Accepted_name_author" AS "acceptedScientificNameAuthorship"
265
, "Accepted_name_rank" AS "acceptedTaxonRank"
266
, "Accepted_name_url" AS "acceptedScientificNameID"
267
, "Accepted_name_species" AS "*Name_matched.Accepted_name_species"
268
, "Accepted_name_family" AS "acceptedFamily"
269
, "Selected" AS "*Name_matched.Selected"
270
, "Source" AS "*Name_matched.Source"
271
, "Warnings" AS "*Name_matched.Warnings"
272
, "Accepted_name_lsid" AS "*Name_matched.Accepted_name_lsid"
273
, "Accepted_scientific_name" AS "acceptedScientificName"
274
, "Max_score" AS "matchedTaxonConfidence_fraction"
275
FROM tnrs
276
;
277

    
278
CREATE OR REPLACE VIEW "ValidMatchedTaxon" AS
279
SELECT *
280
FROM "MatchedTaxon"
281
WHERE score_ok("matchedTaxonConfidence_fraction")
282
;
283
COMMENT ON VIEW "ValidMatchedTaxon"
284
  IS 'to update, use * as the column list';
285

    
286
CREATE OR REPLACE VIEW "AcceptedTaxon" AS
287
SELECT
288
  "batch" AS "*Accepted_name.batch"
289
, "Name_submitted" AS "acceptedScientificName"
290
, "Genus_matched" AS "acceptedGenus"
291
, "Specific_epithet_matched" AS "acceptedSpecificEpithet"
292
, "Infraspecific_epithet_matched" AS "acceptedInfraspecificEpithet"
293
, "Taxonomic_status" AS "acceptedTaxonomicStatus"
294
, "Selected" AS "*Accepted_name.Selected"
295
, "Source" AS "*Accepted_name.Source"
296
, "Warnings" AS "*Accepted_name.Warnings"
297
, "Accepted_name_lsid" AS "*Accepted_name.Accepted_name_lsid"
298
FROM tnrs
299
;
300

    
301
CREATE OR REPLACE VIEW "ScrubbedTaxon" AS
302
SELECT *
303
FROM "ValidMatchedTaxon"
304
NATURAL LEFT JOIN "AcceptedTaxon"
305
;
306
COMMENT ON VIEW "ScrubbedTaxon"
307
  IS 'to update, use * as the column list';
(7-7/8)