Project

General

Profile

1
CREATE TABLE client_version
2
(
3
  id text NOT NULL,
4
  global_rev integer NOT NULL, -- from `svn info .` > Last Changed Rev
5
  "/lib/tnrs.py rev" integer, -- from `svn info lib/tnrs.py` > Last Changed Rev
6
  "/bin/tnrs_db rev" integer, -- from `svn info bin/tnrs_db` > Last Changed Rev
7
  CONSTRAINT client_version_pkey PRIMARY KEY (id )
8
)
9
WITH (
10
  OIDS=FALSE
11
);
12
COMMENT ON TABLE client_version
13
  IS 'contains svn revisions';
14
COMMENT ON COLUMN client_version.global_rev IS 'from `svn info .` > Last Changed Rev';
15
COMMENT ON COLUMN client_version."/lib/tnrs.py rev" IS 'from `svn info lib/tnrs.py` > Last Changed Rev';
16
COMMENT ON COLUMN client_version."/bin/tnrs_db rev" IS 'from `svn info bin/tnrs_db` > Last Changed Rev';
17

    
18
--
19

    
20
CREATE TABLE batch
21
(
22
  id text NOT NULL,
23
  id_by_time text,
24
  time_submitted timestamp with time zone DEFAULT now(),
25
  client_version text,
26
  CONSTRAINT batch_pkey PRIMARY KEY (id ),
27
  CONSTRAINT download_settings_id_by_time_key UNIQUE (id_by_time ),
28
  CONSTRAINT batch_client_version_fkey FOREIGN KEY (client_version)
29
      REFERENCES client_version (id) MATCH SIMPLE
30
      ON UPDATE CASCADE ON DELETE CASCADE
31
)
32
WITH (
33
  OIDS=FALSE
34
);
35

    
36

    
37
CREATE OR REPLACE FUNCTION batch__fill()
38
  RETURNS trigger AS
39
$BODY$
40
BEGIN
41
	new.id_by_time = new.time_submitted;
42
	new.id = COALESCE(new.id, new.id_by_time);
43
	RETURN new;
44
END;
45
$BODY$
46
  LANGUAGE plpgsql VOLATILE
47
  COST 100;
48

    
49
CREATE TRIGGER batch__fill
50
  BEFORE INSERT OR UPDATE
51
  ON batch
52
  FOR EACH ROW
53
  EXECUTE PROCEDURE batch__fill();
54

    
55
--
56

    
57
CREATE TABLE download_settings
58
(
59
  id text NOT NULL,
60
  "E-mail" text,
61
  "Id" text,
62
  "Job type" text,
63
  "Contains Id" boolean,
64
  "Start time" text,
65
  "Finish time" text,
66
  "TNRS version" text,
67
  "Sources selected" text,
68
  "Match threshold" double precision,
69
  "Classification" text,
70
  "Allow partial matches?" boolean,
71
  "Sort by source" boolean,
72
  "Constrain by higher taxonomy" boolean,
73
  CONSTRAINT download_settings_pkey PRIMARY KEY (id )
74
)
75
WITH (
76
  OIDS=FALSE
77
);
78
COMMENT ON TABLE download_settings
79
  IS 'stores data from http://tnrs.iplantcollaborative.org/TNRSapp.html > Submit List > results section > Download settings > settings.txt';
80

    
81
--
82

    
83
CREATE OR REPLACE FUNCTION score_ok(score double precision)
84
  RETURNS boolean AS
85
$BODY$
86
SELECT $1 >= 0.8
87
$BODY$
88
  LANGUAGE sql IMMUTABLE STRICT
89
  COST 100;
90

    
91
CREATE OR REPLACE FUNCTION family_is_homonym(family text)
92
  RETURNS boolean AS
93
$BODY$
94
SELECT EXISTS(SELECT * FROM "IRMNG".family_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
95
$BODY$
96
  LANGUAGE sql STABLE STRICT
97
  COST 100;
98

    
99
CREATE OR REPLACE FUNCTION genus_is_homonym(genus text)
100
  RETURNS boolean AS
101
$BODY$
102
SELECT EXISTS(SELECT * FROM "IRMNG".genus_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
103
$BODY$
104
  LANGUAGE sql STABLE STRICT
105
  COST 100;
106

    
107
/* IMPORTANT: when changing this table's schema, you must regenerate data.sql:
108
$ <this_file>/../test_taxonomic_names/test_scrub
109
you must also make the same changes on vegbiendev: e.g.
110
$ make backups/TNRS.backup-remake # runtime: 3 min ("real 2m44.277s")
111
SELECT util.set_col_types('"TNRS".tnrs', ARRAY[
112
  ('col', 'new_type')
113
]::util.col_cast[]);
114
runtime: 4 min ("242100 ms")
115
*/
116
CREATE TABLE tnrs
117
(
118
  "Time_submitted" timestamp with time zone NOT NULL DEFAULT now(),
119
  "Name_number" integer NOT NULL,
120
  "Name_submitted" text NOT NULL,
121
  "Overall_score" double precision,
122
  "Name_matched" text,
123
  "Name_matched_rank" text,
124
  "Name_score" double precision,
125
  "Name_matched_author" text,
126
  "Name_matched_url" text,
127
  "Author_matched" text,
128
  "Author_score" double precision,
129
  "Family_matched" text,
130
  "Family_score" double precision,
131
  "Name_matched_accepted_family" text,
132
  "Genus_matched" text,
133
  "Genus_score" double precision,
134
  "Specific_epithet_matched" text,
135
  "Specific_epithet_score" double precision,
136
  "Infraspecific_rank" text,
137
  "Infraspecific_epithet_matched" text,
138
  "Infraspecific_epithet_score" double precision,
139
  "Infraspecific_rank_2" text,
140
  "Infraspecific_epithet_2_matched" text,
141
  "Infraspecific_epithet_2_score" double precision,
142
  "Annotations" text,
143
  "Unmatched_terms" text,
144
  "Taxonomic_status" text,
145
  "Accepted_name" text,
146
  "Accepted_name_author" text,
147
  "Accepted_name_rank" text,
148
  "Accepted_name_url" text,
149
  "Accepted_name_species" text,
150
  "Accepted_name_family" text,
151
  "Selected" text,
152
  "Source" text,
153
  "Warnings" text,
154
  "Accepted_name_lsid" text,
155
  "Accepted_scientific_name" text,
156
  "Max_score" double precision,
157
  "Is_homonym" boolean,
158
  "Is_plant" boolean,
159
  CONSTRAINT tnrs_pkey PRIMARY KEY ("Time_submitted" , "Name_number" ),
160
  CONSTRAINT "tnrs_Name_submitted_key" UNIQUE ("Name_submitted" )
161
)
162
WITH (
163
  OIDS=FALSE
164
);
165

    
166
CREATE UNIQUE INDEX tnrs_score_ok
167
  ON tnrs
168
  USING btree
169
  ("Name_submitted" )
170
  WHERE score_ok("Max_score");
171

    
172
/* IMPORTANT: when changing this function, you must regenerate the derived cols:
173
UPDATE "TNRS".tnrs SET "Name_submitted" = "Name_submitted"
174
runtime: 25 min ("5221748 rows affected, 1524228 ms execution time")
175
VACUUM ANALYZE "TNRS".tnrs -- to remove previous rows
176
runtime: 1 min ("62350 ms")
177
*/
178
CREATE OR REPLACE FUNCTION tnrs_populate_fields()
179
  RETURNS trigger AS
180
$BODY$
181
DECLARE
182
    "Specific_epithet_is_plant" boolean :=
183
        (CASE
184
        WHEN   new."Infraspecific_epithet_matched"   IS NOT NULL
185
            OR new."Infraspecific_epithet_2_matched" IS NOT NULL
186
            OR new."Specific_epithet_score" >= 0.9 -- fuzzy match
187
            THEN true
188
        ELSE NULL -- ambiguous
189
        END);
190
    never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); -- author disambiguates
191
    family_is_homonym boolean = NOT never_homonym AND "TNRS".family_is_homonym(new."Family_matched");
192
    genus_is_homonym  boolean = NOT never_homonym AND "TNRS".genus_is_homonym( new."Genus_matched" );
193
BEGIN
194
    new."Accepted_scientific_name" = NULLIF(concat_ws(' '
195
        , NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), new."Accepted_name")
196
        , new."Accepted_name"
197
        , new."Accepted_name_author"
198
    ), '');
199
    new."Max_score" = GREATEST(
200
          new."Overall_score"
201
        , new."Family_score"
202
        , new."Genus_score"
203
        , new."Specific_epithet_score"
204
    );
205
    new."Is_homonym" = COALESCE(genus_is_homonym, family_is_homonym);
206
    new."Is_plant" = (CASE
207
        WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match
208
            THEN true
209
        ELSE -- consider genus
210
            (CASE
211
            WHEN new."Genus_score" =  1    -- exact match
212
                THEN
213
                (CASE
214
                WHEN NOT genus_is_homonym THEN true
215
                ELSE "Specific_epithet_is_plant"
216
                END)
217
            WHEN new."Genus_score" >= 0.85 -- fuzzy match
218
                THEN "Specific_epithet_is_plant"
219
            ELSE NULL -- ambiguous
220
            END)
221
        END);
222
    
223
    RETURN new;
224
END;
225
$BODY$
226
  LANGUAGE plpgsql VOLATILE
227
  COST 100;
228

    
229
CREATE TRIGGER tnrs_populate_fields
230
  BEFORE INSERT OR UPDATE
231
  ON tnrs
232
  FOR EACH ROW
233
  EXECUTE PROCEDURE tnrs_populate_fields();
234

    
235
--
236

    
237
CREATE OR REPLACE VIEW "MatchedTaxon" AS
238
SELECT
239
  "Time_submitted" AS "*Name_matched.Time_submitted"
240
, "Name_submitted" AS "concatenatedScientificName"
241
, "Name_matched" AS "matchedTaxonName"
242
, "Name_matched_rank" AS "matchedTaxonRank"
243
, "Name_score" AS "*Name_matched.Name_score"
244
, "Name_matched_author" AS "matchedScientificNameAuthorship"
245
, "Name_matched_url" AS "matchedScientificNameID"
246
, "Author_score" AS "*Name_matched.Author_score"
247
, "Family_score" AS "matchedFamilyConfidence_fraction"
248
, COALESCE("Name_matched_accepted_family", "Accepted_name_family") AS "matchedFamily"
249
, "Genus_matched" AS "matchedGenus"
250
, "Genus_score" AS "matchedGenusConfidence_fraction"
251
, "Specific_epithet_matched" AS "matchedSpecificEpithet"
252
, "Specific_epithet_score" AS "matchedSpeciesConfidence_fraction"
253
, "Infraspecific_epithet_matched" AS "matchedInfraspecificEpithet"
254
, "Infraspecific_epithet_score" AS "*Name_matched.Infraspecific_epithet_score"
255
, "Annotations" AS "identificationQualifier"
256
, "Unmatched_terms" AS "morphospeciesSuffix"
257
, "Taxonomic_status" AS "taxonomicStatus"
258
, "Accepted_name" AS "acceptedTaxonName"
259
, "Accepted_name_author" AS "acceptedScientificNameAuthorship"
260
, "Accepted_name_rank" AS "acceptedTaxonRank"
261
, "Accepted_name_url" AS "acceptedScientificNameID"
262
, "Accepted_name_species" AS "*Name_matched.Accepted_name_species"
263
, "Accepted_name_family" AS "acceptedFamily"
264
, "Selected" AS "*Name_matched.Selected"
265
, "Source" AS "*Name_matched.Source"
266
, "Warnings" AS "*Name_matched.Warnings"
267
, "Accepted_name_lsid" AS "*Name_matched.Accepted_name_lsid"
268
, "Accepted_scientific_name" AS "acceptedScientificName"
269
, "Max_score" AS "matchedTaxonConfidence_fraction"
270
FROM tnrs
271
;
272

    
273
CREATE OR REPLACE VIEW "ValidMatchedTaxon" AS
274
SELECT *
275
FROM "MatchedTaxon"
276
WHERE score_ok("matchedTaxonConfidence_fraction")
277
;
278

    
279
CREATE OR REPLACE VIEW "AcceptedTaxon" AS
280
SELECT
281
  "Time_submitted" AS "*Accepted_name.Time_submitted"
282
, "Name_submitted" AS "acceptedScientificName"
283
, "Genus_matched" AS "acceptedGenus"
284
, "Specific_epithet_matched" AS "acceptedSpecificEpithet"
285
, "Infraspecific_epithet_matched" AS "acceptedInfraspecificEpithet"
286
, "Taxonomic_status" AS "acceptedTaxonomicStatus"
287
, "Selected" AS "*Accepted_name.Selected"
288
, "Source" AS "*Accepted_name.Source"
289
, "Warnings" AS "*Accepted_name.Warnings"
290
, "Accepted_name_lsid" AS "*Accepted_name.Accepted_name_lsid"
291
FROM tnrs
292
;
293

    
294
CREATE OR REPLACE VIEW "ScrubbedTaxon" AS
295
SELECT *
296
FROM "ValidMatchedTaxon"
297
NATURAL LEFT JOIN "AcceptedTaxon"
298
;
(7-7/8)