Project

General

Profile

1 10737 aaronmk
CREATE TABLE client_version
2
(
3
  id text NOT NULL,
4 10738 aaronmk
  global_rev integer NOT NULL, -- from `svn info .` > Last Changed Rev
5
  "/lib/tnrs.py rev" integer, -- from `svn info lib/tnrs.py` > Last Changed Rev
6
  "/bin/tnrs_db rev" integer, -- from `svn info bin/tnrs_db` > Last Changed Rev
7 10737 aaronmk
  CONSTRAINT client_version_pkey PRIMARY KEY (id )
8
)
9
WITH (
10
  OIDS=FALSE
11
);
12 10738 aaronmk
COMMENT ON TABLE client_version
13
  IS 'contains svn revisions';
14
COMMENT ON COLUMN client_version.global_rev IS 'from `svn info .` > Last Changed Rev';
15
COMMENT ON COLUMN client_version."/lib/tnrs.py rev" IS 'from `svn info lib/tnrs.py` > Last Changed Rev';
16
COMMENT ON COLUMN client_version."/bin/tnrs_db rev" IS 'from `svn info bin/tnrs_db` > Last Changed Rev';
17 10737 aaronmk
18
--
19
20 10736 aaronmk
CREATE TABLE batch
21 10725 aaronmk
(
22 10728 aaronmk
  id text NOT NULL,
23
  id_by_time text,
24
  time_submitted timestamp with time zone DEFAULT now(),
25 10737 aaronmk
  client_version text,
26 10736 aaronmk
  CONSTRAINT batch_pkey PRIMARY KEY (id ),
27 10737 aaronmk
  CONSTRAINT batch_client_version_fkey FOREIGN KEY (client_version)
28
      REFERENCES client_version (id) MATCH SIMPLE
29 10741 aaronmk
      ON UPDATE CASCADE ON DELETE CASCADE,
30
  CONSTRAINT batch_id_by_time_key UNIQUE (id_by_time )
31 10725 aaronmk
)
32
WITH (
33
  OIDS=FALSE
34
);
35
36
37 10736 aaronmk
CREATE OR REPLACE FUNCTION batch__fill()
38 10728 aaronmk
  RETURNS trigger AS
39
$BODY$
40
BEGIN
41
	new.id_by_time = new.time_submitted;
42
	new.id = COALESCE(new.id, new.id_by_time);
43
	RETURN new;
44
END;
45
$BODY$
46
  LANGUAGE plpgsql VOLATILE
47
  COST 100;
48
49 10736 aaronmk
CREATE TRIGGER batch__fill
50 10728 aaronmk
  BEFORE INSERT OR UPDATE
51 10736 aaronmk
  ON batch
52 10728 aaronmk
  FOR EACH ROW
53 10736 aaronmk
  EXECUTE PROCEDURE batch__fill();
54 10728 aaronmk
55
--
56
57 10740 aaronmk
CREATE TABLE batch_download_settings
58 10736 aaronmk
(
59
  id text NOT NULL,
60
  "E-mail" text,
61
  "Id" text,
62
  "Job type" text,
63
  "Contains Id" boolean,
64
  "Start time" text,
65
  "Finish time" text,
66
  "TNRS version" text,
67
  "Sources selected" text,
68
  "Match threshold" double precision,
69
  "Classification" text,
70
  "Allow partial matches?" boolean,
71
  "Sort by source" boolean,
72
  "Constrain by higher taxonomy" boolean,
73 10740 aaronmk
  CONSTRAINT batch_download_settings_pkey PRIMARY KEY (id ),
74
  CONSTRAINT batch_download_settings_id_fkey FOREIGN KEY (id)
75 10739 aaronmk
      REFERENCES "TNRS".batch (id) MATCH SIMPLE
76
      ON UPDATE CASCADE ON DELETE CASCADE
77 10736 aaronmk
)
78
WITH (
79
  OIDS=FALSE
80
);
81 10740 aaronmk
COMMENT ON TABLE batch_download_settings
82 10736 aaronmk
  IS 'stores data from http://tnrs.iplantcollaborative.org/TNRSapp.html > Submit List > results section > Download settings > settings.txt';
83
84
--
85
86 7844 aaronmk
CREATE OR REPLACE FUNCTION score_ok(score double precision)
87
  RETURNS boolean AS
88
$BODY$
89
SELECT $1 >= 0.8
90
$BODY$
91 9984 aaronmk
  LANGUAGE sql IMMUTABLE STRICT
92 7844 aaronmk
  COST 100;
93
94 9985 aaronmk
CREATE OR REPLACE FUNCTION family_is_homonym(family text)
95
  RETURNS boolean AS
96
$BODY$
97
SELECT EXISTS(SELECT * FROM "IRMNG".family_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
98
$BODY$
99 9986 aaronmk
  LANGUAGE sql STABLE STRICT
100 9985 aaronmk
  COST 100;
101
102
CREATE OR REPLACE FUNCTION genus_is_homonym(genus text)
103
  RETURNS boolean AS
104
$BODY$
105
SELECT EXISTS(SELECT * FROM "IRMNG".genus_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
106
$BODY$
107 9986 aaronmk
  LANGUAGE sql STABLE STRICT
108 9985 aaronmk
  COST 100;
109
110 9756 aaronmk
/* IMPORTANT: when changing this table's schema, you must regenerate data.sql:
111
$ <this_file>/../test_taxonomic_names/test_scrub
112 9990 aaronmk
you must also make the same changes on vegbiendev: e.g.
113 9994 aaronmk
$ make backups/TNRS.backup-remake # runtime: 3 min ("real 2m44.277s")
114 9990 aaronmk
SELECT util.set_col_types('"TNRS".tnrs', ARRAY[
115
  ('col', 'new_type')
116
]::util.col_cast[]);
117 10747 aaronmk
runtime: 9 min ("531282 ms")
118 9756 aaronmk
*/
119 5183 aaronmk
CREATE TABLE tnrs
120 5110 aaronmk
(
121 10746 aaronmk
  batch text NOT NULL DEFAULT now(),
122 9908 aaronmk
  "Name_number" integer NOT NULL,
123 5110 aaronmk
  "Name_submitted" text NOT NULL,
124 9913 aaronmk
  "Overall_score" double precision,
125 5110 aaronmk
  "Name_matched" text,
126
  "Name_matched_rank" text,
127 9913 aaronmk
  "Name_score" double precision,
128 5110 aaronmk
  "Name_matched_author" text,
129
  "Name_matched_url" text,
130
  "Author_matched" text,
131 9913 aaronmk
  "Author_score" double precision,
132 5110 aaronmk
  "Family_matched" text,
133 9913 aaronmk
  "Family_score" double precision,
134 9529 aaronmk
  "Name_matched_accepted_family" text,
135 5110 aaronmk
  "Genus_matched" text,
136 9913 aaronmk
  "Genus_score" double precision,
137 5110 aaronmk
  "Specific_epithet_matched" text,
138 9913 aaronmk
  "Specific_epithet_score" double precision,
139 5110 aaronmk
  "Infraspecific_rank" text,
140
  "Infraspecific_epithet_matched" text,
141 9913 aaronmk
  "Infraspecific_epithet_score" double precision,
142 5110 aaronmk
  "Infraspecific_rank_2" text,
143
  "Infraspecific_epithet_2_matched" text,
144 9913 aaronmk
  "Infraspecific_epithet_2_score" double precision,
145 5110 aaronmk
  "Annotations" text,
146
  "Unmatched_terms" text,
147
  "Taxonomic_status" text,
148
  "Accepted_name" text,
149
  "Accepted_name_author" text,
150
  "Accepted_name_rank" text,
151
  "Accepted_name_url" text,
152 9762 aaronmk
  "Accepted_name_species" text,
153
  "Accepted_name_family" text,
154 5110 aaronmk
  "Selected" text,
155
  "Source" text,
156
  "Warnings" text,
157
  "Accepted_name_lsid" text,
158 7133 aaronmk
  "Accepted_scientific_name" text,
159 7293 aaronmk
  "Max_score" double precision,
160 9972 aaronmk
  "Is_homonym" boolean,
161 9763 aaronmk
  "Is_plant" boolean,
162 10746 aaronmk
  CONSTRAINT tnrs_pkey PRIMARY KEY (batch , "Name_number" ),
163
  CONSTRAINT tnrs_batch_fkey FOREIGN KEY (batch)
164
      REFERENCES batch (id) MATCH SIMPLE
165
      ON UPDATE CASCADE ON DELETE CASCADE,
166 9906 aaronmk
  CONSTRAINT "tnrs_Name_submitted_key" UNIQUE ("Name_submitted" )
167 5110 aaronmk
)
168
WITH (
169
  OIDS=FALSE
170
);
171 7132 aaronmk
172 7844 aaronmk
CREATE UNIQUE INDEX tnrs_score_ok
173
  ON tnrs
174
  USING btree
175
  ("Name_submitted" )
176
  WHERE score_ok("Max_score");
177
178 9765 aaronmk
/* IMPORTANT: when changing this function, you must regenerate the derived cols:
179 9974 aaronmk
UPDATE "TNRS".tnrs SET "Name_submitted" = "Name_submitted"
180 10280 aaronmk
runtime: 25 min ("5221748 rows affected, 1524228 ms execution time")
181 9993 aaronmk
VACUUM ANALYZE "TNRS".tnrs -- to remove previous rows
182 10280 aaronmk
runtime: 1 min ("62350 ms")
183 9765 aaronmk
*/
184 9512 aaronmk
CREATE OR REPLACE FUNCTION tnrs_populate_fields()
185 7134 aaronmk
  RETURNS trigger AS
186
$BODY$
187 9763 aaronmk
DECLARE
188
    "Specific_epithet_is_plant" boolean :=
189
        (CASE
190
        WHEN   new."Infraspecific_epithet_matched"   IS NOT NULL
191
            OR new."Infraspecific_epithet_2_matched" IS NOT NULL
192 9914 aaronmk
            OR new."Specific_epithet_score" >= 0.9 -- fuzzy match
193 9763 aaronmk
            THEN true
194
        ELSE NULL -- ambiguous
195
        END);
196 9982 aaronmk
    never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); -- author disambiguates
197 9988 aaronmk
    family_is_homonym boolean = NOT never_homonym AND "TNRS".family_is_homonym(new."Family_matched");
198
    genus_is_homonym  boolean = NOT never_homonym AND "TNRS".genus_is_homonym( new."Genus_matched" );
199 7134 aaronmk
BEGIN
200 7848 aaronmk
    new."Accepted_scientific_name" = NULLIF(concat_ws(' '
201 9762 aaronmk
        , NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), new."Accepted_name")
202 7134 aaronmk
        , new."Accepted_name"
203
        , new."Accepted_name_author"
204 7848 aaronmk
    ), '');
205 7293 aaronmk
    new."Max_score" = GREATEST(
206 9914 aaronmk
          new."Overall_score"
207
        , new."Family_score"
208
        , new."Genus_score"
209
        , new."Specific_epithet_score"
210 7293 aaronmk
    );
211 9987 aaronmk
    new."Is_homonym" = COALESCE(genus_is_homonym, family_is_homonym);
212 9763 aaronmk
    new."Is_plant" = (CASE
213 9973 aaronmk
        WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match
214
            THEN true
215
        ELSE -- consider genus
216 9763 aaronmk
            (CASE
217 9914 aaronmk
            WHEN new."Genus_score" =  1    -- exact match
218 9973 aaronmk
                THEN
219
                (CASE
220
                WHEN NOT genus_is_homonym THEN true
221
                ELSE "Specific_epithet_is_plant"
222
                END)
223 9914 aaronmk
            WHEN new."Genus_score" >= 0.85 -- fuzzy match
224 9763 aaronmk
                THEN "Specific_epithet_is_plant"
225
            ELSE NULL -- ambiguous
226
            END)
227
        END);
228 7134 aaronmk
229
    RETURN new;
230
END;
231
$BODY$
232
  LANGUAGE plpgsql VOLATILE
233
  COST 100;
234
235 9512 aaronmk
CREATE TRIGGER tnrs_populate_fields
236 7134 aaronmk
  BEFORE INSERT OR UPDATE
237
  ON tnrs
238
  FOR EACH ROW
239 9512 aaronmk
  EXECUTE PROCEDURE tnrs_populate_fields();
240 7251 aaronmk
241 10728 aaronmk
--
242 7251 aaronmk
243 9759 aaronmk
CREATE OR REPLACE VIEW "MatchedTaxon" AS
244 7823 aaronmk
SELECT
245 10746 aaronmk
  "batch" AS "*Name_matched.batch"
246 7830 aaronmk
, "Name_submitted" AS "concatenatedScientificName"
247 7829 aaronmk
, "Name_matched" AS "matchedTaxonName"
248 7823 aaronmk
, "Name_matched_rank" AS "matchedTaxonRank"
249
, "Name_score" AS "*Name_matched.Name_score"
250
, "Name_matched_author" AS "matchedScientificNameAuthorship"
251
, "Name_matched_url" AS "matchedScientificNameID"
252
, "Author_score" AS "*Name_matched.Author_score"
253
, "Family_score" AS "matchedFamilyConfidence_fraction"
254 9762 aaronmk
, COALESCE("Name_matched_accepted_family", "Accepted_name_family") AS "matchedFamily"
255 7831 aaronmk
, "Genus_matched" AS "matchedGenus"
256 7823 aaronmk
, "Genus_score" AS "matchedGenusConfidence_fraction"
257
, "Specific_epithet_matched" AS "matchedSpecificEpithet"
258
, "Specific_epithet_score" AS "matchedSpeciesConfidence_fraction"
259
, "Infraspecific_epithet_matched" AS "matchedInfraspecificEpithet"
260
, "Infraspecific_epithet_score" AS "*Name_matched.Infraspecific_epithet_score"
261
, "Annotations" AS "identificationQualifier"
262
, "Unmatched_terms" AS "morphospeciesSuffix"
263 7833 aaronmk
, "Taxonomic_status" AS "taxonomicStatus"
264 7823 aaronmk
, "Accepted_name" AS "acceptedTaxonName"
265
, "Accepted_name_author" AS "acceptedScientificNameAuthorship"
266
, "Accepted_name_rank" AS "acceptedTaxonRank"
267
, "Accepted_name_url" AS "acceptedScientificNameID"
268 9762 aaronmk
, "Accepted_name_species" AS "*Name_matched.Accepted_name_species"
269
, "Accepted_name_family" AS "acceptedFamily"
270 7823 aaronmk
, "Selected" AS "*Name_matched.Selected"
271 7828 aaronmk
, "Source" AS "*Name_matched.Source"
272 7823 aaronmk
, "Warnings" AS "*Name_matched.Warnings"
273
, "Accepted_name_lsid" AS "*Name_matched.Accepted_name_lsid"
274
, "Accepted_scientific_name" AS "acceptedScientificName"
275
, "Max_score" AS "matchedTaxonConfidence_fraction"
276 9616 aaronmk
FROM tnrs
277
;
278 7823 aaronmk
279 9759 aaronmk
CREATE OR REPLACE VIEW "ValidMatchedTaxon" AS
280
SELECT *
281
FROM "MatchedTaxon"
282
WHERE score_ok("matchedTaxonConfidence_fraction")
283
;
284
285 7823 aaronmk
CREATE OR REPLACE VIEW "AcceptedTaxon" AS
286
SELECT
287 10746 aaronmk
  "batch" AS "*Accepted_name.batch"
288 7823 aaronmk
, "Name_submitted" AS "acceptedScientificName"
289
, "Genus_matched" AS "acceptedGenus"
290
, "Specific_epithet_matched" AS "acceptedSpecificEpithet"
291
, "Infraspecific_epithet_matched" AS "acceptedInfraspecificEpithet"
292
, "Taxonomic_status" AS "acceptedTaxonomicStatus"
293
, "Selected" AS "*Accepted_name.Selected"
294
, "Source" AS "*Accepted_name.Source"
295
, "Warnings" AS "*Accepted_name.Warnings"
296
, "Accepted_name_lsid" AS "*Accepted_name.Accepted_name_lsid"
297 9760 aaronmk
FROM tnrs
298
;
299 7823 aaronmk
300
CREATE OR REPLACE VIEW "ScrubbedTaxon" AS
301
SELECT *
302 9758 aaronmk
FROM "ValidMatchedTaxon"
303 7823 aaronmk
NATURAL LEFT JOIN "AcceptedTaxon"
304
;