Project

General

Profile

1 10737 aaronmk
CREATE TABLE client_version
2
(
3
  id text NOT NULL,
4 10738 aaronmk
  global_rev integer NOT NULL, -- from `svn info .` > Last Changed Rev
5
  "/lib/tnrs.py rev" integer, -- from `svn info lib/tnrs.py` > Last Changed Rev
6
  "/bin/tnrs_db rev" integer, -- from `svn info bin/tnrs_db` > Last Changed Rev
7 10737 aaronmk
  CONSTRAINT client_version_pkey PRIMARY KEY (id )
8
)
9
WITH (
10
  OIDS=FALSE
11
);
12 10738 aaronmk
COMMENT ON TABLE client_version
13
  IS 'contains svn revisions';
14
COMMENT ON COLUMN client_version.global_rev IS 'from `svn info .` > Last Changed Rev';
15
COMMENT ON COLUMN client_version."/lib/tnrs.py rev" IS 'from `svn info lib/tnrs.py` > Last Changed Rev';
16
COMMENT ON COLUMN client_version."/bin/tnrs_db rev" IS 'from `svn info bin/tnrs_db` > Last Changed Rev';
17 10737 aaronmk
18
--
19
20 10736 aaronmk
CREATE TABLE batch
21 10725 aaronmk
(
22 10728 aaronmk
  id text NOT NULL,
23
  id_by_time text,
24
  time_submitted timestamp with time zone DEFAULT now(),
25 10737 aaronmk
  client_version text,
26 10736 aaronmk
  CONSTRAINT batch_pkey PRIMARY KEY (id ),
27 10737 aaronmk
  CONSTRAINT download_settings_id_by_time_key UNIQUE (id_by_time ),
28
  CONSTRAINT batch_client_version_fkey FOREIGN KEY (client_version)
29
      REFERENCES client_version (id) MATCH SIMPLE
30
      ON UPDATE CASCADE ON DELETE CASCADE
31 10725 aaronmk
)
32
WITH (
33
  OIDS=FALSE
34
);
35
36
37 10736 aaronmk
CREATE OR REPLACE FUNCTION batch__fill()
38 10728 aaronmk
  RETURNS trigger AS
39
$BODY$
40
BEGIN
41
	new.id_by_time = new.time_submitted;
42
	new.id = COALESCE(new.id, new.id_by_time);
43
	RETURN new;
44
END;
45
$BODY$
46
  LANGUAGE plpgsql VOLATILE
47
  COST 100;
48
49 10736 aaronmk
CREATE TRIGGER batch__fill
50 10728 aaronmk
  BEFORE INSERT OR UPDATE
51 10736 aaronmk
  ON batch
52 10728 aaronmk
  FOR EACH ROW
53 10736 aaronmk
  EXECUTE PROCEDURE batch__fill();
54 10728 aaronmk
55
--
56
57 10736 aaronmk
CREATE TABLE download_settings
58
(
59
  id text NOT NULL,
60
  "E-mail" text,
61
  "Id" text,
62
  "Job type" text,
63
  "Contains Id" boolean,
64
  "Start time" text,
65
  "Finish time" text,
66
  "TNRS version" text,
67
  "Sources selected" text,
68
  "Match threshold" double precision,
69
  "Classification" text,
70
  "Allow partial matches?" boolean,
71
  "Sort by source" boolean,
72
  "Constrain by higher taxonomy" boolean,
73
  CONSTRAINT download_settings_pkey PRIMARY KEY (id )
74
)
75
WITH (
76
  OIDS=FALSE
77
);
78
COMMENT ON TABLE download_settings
79
  IS 'stores data from http://tnrs.iplantcollaborative.org/TNRSapp.html > Submit List > results section > Download settings > settings.txt';
80
81
--
82
83 7844 aaronmk
CREATE OR REPLACE FUNCTION score_ok(score double precision)
84
  RETURNS boolean AS
85
$BODY$
86
SELECT $1 >= 0.8
87
$BODY$
88 9984 aaronmk
  LANGUAGE sql IMMUTABLE STRICT
89 7844 aaronmk
  COST 100;
90
91 9985 aaronmk
CREATE OR REPLACE FUNCTION family_is_homonym(family text)
92
  RETURNS boolean AS
93
$BODY$
94
SELECT EXISTS(SELECT * FROM "IRMNG".family_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
95
$BODY$
96 9986 aaronmk
  LANGUAGE sql STABLE STRICT
97 9985 aaronmk
  COST 100;
98
99
CREATE OR REPLACE FUNCTION genus_is_homonym(genus text)
100
  RETURNS boolean AS
101
$BODY$
102
SELECT EXISTS(SELECT * FROM "IRMNG".genus_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
103
$BODY$
104 9986 aaronmk
  LANGUAGE sql STABLE STRICT
105 9985 aaronmk
  COST 100;
106
107 9756 aaronmk
/* IMPORTANT: when changing this table's schema, you must regenerate data.sql:
108
$ <this_file>/../test_taxonomic_names/test_scrub
109 9990 aaronmk
you must also make the same changes on vegbiendev: e.g.
110 9994 aaronmk
$ make backups/TNRS.backup-remake # runtime: 3 min ("real 2m44.277s")
111 9990 aaronmk
SELECT util.set_col_types('"TNRS".tnrs', ARRAY[
112
  ('col', 'new_type')
113
]::util.col_cast[]);
114
runtime: 4 min ("242100 ms")
115 9756 aaronmk
*/
116 5183 aaronmk
CREATE TABLE tnrs
117 5110 aaronmk
(
118 9905 aaronmk
  "Time_submitted" timestamp with time zone NOT NULL DEFAULT now(),
119 9908 aaronmk
  "Name_number" integer NOT NULL,
120 5110 aaronmk
  "Name_submitted" text NOT NULL,
121 9913 aaronmk
  "Overall_score" double precision,
122 5110 aaronmk
  "Name_matched" text,
123
  "Name_matched_rank" text,
124 9913 aaronmk
  "Name_score" double precision,
125 5110 aaronmk
  "Name_matched_author" text,
126
  "Name_matched_url" text,
127
  "Author_matched" text,
128 9913 aaronmk
  "Author_score" double precision,
129 5110 aaronmk
  "Family_matched" text,
130 9913 aaronmk
  "Family_score" double precision,
131 9529 aaronmk
  "Name_matched_accepted_family" text,
132 5110 aaronmk
  "Genus_matched" text,
133 9913 aaronmk
  "Genus_score" double precision,
134 5110 aaronmk
  "Specific_epithet_matched" text,
135 9913 aaronmk
  "Specific_epithet_score" double precision,
136 5110 aaronmk
  "Infraspecific_rank" text,
137
  "Infraspecific_epithet_matched" text,
138 9913 aaronmk
  "Infraspecific_epithet_score" double precision,
139 5110 aaronmk
  "Infraspecific_rank_2" text,
140
  "Infraspecific_epithet_2_matched" text,
141 9913 aaronmk
  "Infraspecific_epithet_2_score" double precision,
142 5110 aaronmk
  "Annotations" text,
143
  "Unmatched_terms" text,
144
  "Taxonomic_status" text,
145
  "Accepted_name" text,
146
  "Accepted_name_author" text,
147
  "Accepted_name_rank" text,
148
  "Accepted_name_url" text,
149 9762 aaronmk
  "Accepted_name_species" text,
150
  "Accepted_name_family" text,
151 5110 aaronmk
  "Selected" text,
152
  "Source" text,
153
  "Warnings" text,
154
  "Accepted_name_lsid" text,
155 7133 aaronmk
  "Accepted_scientific_name" text,
156 7293 aaronmk
  "Max_score" double precision,
157 9972 aaronmk
  "Is_homonym" boolean,
158 9763 aaronmk
  "Is_plant" boolean,
159 9907 aaronmk
  CONSTRAINT tnrs_pkey PRIMARY KEY ("Time_submitted" , "Name_number" ),
160 9906 aaronmk
  CONSTRAINT "tnrs_Name_submitted_key" UNIQUE ("Name_submitted" )
161 5110 aaronmk
)
162
WITH (
163
  OIDS=FALSE
164
);
165 7132 aaronmk
166 7844 aaronmk
CREATE UNIQUE INDEX tnrs_score_ok
167
  ON tnrs
168
  USING btree
169
  ("Name_submitted" )
170
  WHERE score_ok("Max_score");
171
172 9765 aaronmk
/* IMPORTANT: when changing this function, you must regenerate the derived cols:
173 9974 aaronmk
UPDATE "TNRS".tnrs SET "Name_submitted" = "Name_submitted"
174 10280 aaronmk
runtime: 25 min ("5221748 rows affected, 1524228 ms execution time")
175 9993 aaronmk
VACUUM ANALYZE "TNRS".tnrs -- to remove previous rows
176 10280 aaronmk
runtime: 1 min ("62350 ms")
177 9765 aaronmk
*/
178 9512 aaronmk
CREATE OR REPLACE FUNCTION tnrs_populate_fields()
179 7134 aaronmk
  RETURNS trigger AS
180
$BODY$
181 9763 aaronmk
DECLARE
182
    "Specific_epithet_is_plant" boolean :=
183
        (CASE
184
        WHEN   new."Infraspecific_epithet_matched"   IS NOT NULL
185
            OR new."Infraspecific_epithet_2_matched" IS NOT NULL
186 9914 aaronmk
            OR new."Specific_epithet_score" >= 0.9 -- fuzzy match
187 9763 aaronmk
            THEN true
188
        ELSE NULL -- ambiguous
189
        END);
190 9982 aaronmk
    never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); -- author disambiguates
191 9988 aaronmk
    family_is_homonym boolean = NOT never_homonym AND "TNRS".family_is_homonym(new."Family_matched");
192
    genus_is_homonym  boolean = NOT never_homonym AND "TNRS".genus_is_homonym( new."Genus_matched" );
193 7134 aaronmk
BEGIN
194 7848 aaronmk
    new."Accepted_scientific_name" = NULLIF(concat_ws(' '
195 9762 aaronmk
        , NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), new."Accepted_name")
196 7134 aaronmk
        , new."Accepted_name"
197
        , new."Accepted_name_author"
198 7848 aaronmk
    ), '');
199 7293 aaronmk
    new."Max_score" = GREATEST(
200 9914 aaronmk
          new."Overall_score"
201
        , new."Family_score"
202
        , new."Genus_score"
203
        , new."Specific_epithet_score"
204 7293 aaronmk
    );
205 9987 aaronmk
    new."Is_homonym" = COALESCE(genus_is_homonym, family_is_homonym);
206 9763 aaronmk
    new."Is_plant" = (CASE
207 9973 aaronmk
        WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match
208
            THEN true
209
        ELSE -- consider genus
210 9763 aaronmk
            (CASE
211 9914 aaronmk
            WHEN new."Genus_score" =  1    -- exact match
212 9973 aaronmk
                THEN
213
                (CASE
214
                WHEN NOT genus_is_homonym THEN true
215
                ELSE "Specific_epithet_is_plant"
216
                END)
217 9914 aaronmk
            WHEN new."Genus_score" >= 0.85 -- fuzzy match
218 9763 aaronmk
                THEN "Specific_epithet_is_plant"
219
            ELSE NULL -- ambiguous
220
            END)
221
        END);
222 7134 aaronmk
223
    RETURN new;
224
END;
225
$BODY$
226
  LANGUAGE plpgsql VOLATILE
227
  COST 100;
228
229 9512 aaronmk
CREATE TRIGGER tnrs_populate_fields
230 7134 aaronmk
  BEFORE INSERT OR UPDATE
231
  ON tnrs
232
  FOR EACH ROW
233 9512 aaronmk
  EXECUTE PROCEDURE tnrs_populate_fields();
234 7251 aaronmk
235 10728 aaronmk
--
236 7251 aaronmk
237 9759 aaronmk
CREATE OR REPLACE VIEW "MatchedTaxon" AS
238 7823 aaronmk
SELECT
239
  "Time_submitted" AS "*Name_matched.Time_submitted"
240 7830 aaronmk
, "Name_submitted" AS "concatenatedScientificName"
241 7829 aaronmk
, "Name_matched" AS "matchedTaxonName"
242 7823 aaronmk
, "Name_matched_rank" AS "matchedTaxonRank"
243
, "Name_score" AS "*Name_matched.Name_score"
244
, "Name_matched_author" AS "matchedScientificNameAuthorship"
245
, "Name_matched_url" AS "matchedScientificNameID"
246
, "Author_score" AS "*Name_matched.Author_score"
247
, "Family_score" AS "matchedFamilyConfidence_fraction"
248 9762 aaronmk
, COALESCE("Name_matched_accepted_family", "Accepted_name_family") AS "matchedFamily"
249 7831 aaronmk
, "Genus_matched" AS "matchedGenus"
250 7823 aaronmk
, "Genus_score" AS "matchedGenusConfidence_fraction"
251
, "Specific_epithet_matched" AS "matchedSpecificEpithet"
252
, "Specific_epithet_score" AS "matchedSpeciesConfidence_fraction"
253
, "Infraspecific_epithet_matched" AS "matchedInfraspecificEpithet"
254
, "Infraspecific_epithet_score" AS "*Name_matched.Infraspecific_epithet_score"
255
, "Annotations" AS "identificationQualifier"
256
, "Unmatched_terms" AS "morphospeciesSuffix"
257 7833 aaronmk
, "Taxonomic_status" AS "taxonomicStatus"
258 7823 aaronmk
, "Accepted_name" AS "acceptedTaxonName"
259
, "Accepted_name_author" AS "acceptedScientificNameAuthorship"
260
, "Accepted_name_rank" AS "acceptedTaxonRank"
261
, "Accepted_name_url" AS "acceptedScientificNameID"
262 9762 aaronmk
, "Accepted_name_species" AS "*Name_matched.Accepted_name_species"
263
, "Accepted_name_family" AS "acceptedFamily"
264 7823 aaronmk
, "Selected" AS "*Name_matched.Selected"
265 7828 aaronmk
, "Source" AS "*Name_matched.Source"
266 7823 aaronmk
, "Warnings" AS "*Name_matched.Warnings"
267
, "Accepted_name_lsid" AS "*Name_matched.Accepted_name_lsid"
268
, "Accepted_scientific_name" AS "acceptedScientificName"
269
, "Max_score" AS "matchedTaxonConfidence_fraction"
270 9616 aaronmk
FROM tnrs
271
;
272 7823 aaronmk
273 9759 aaronmk
CREATE OR REPLACE VIEW "ValidMatchedTaxon" AS
274
SELECT *
275
FROM "MatchedTaxon"
276
WHERE score_ok("matchedTaxonConfidence_fraction")
277
;
278
279 7823 aaronmk
CREATE OR REPLACE VIEW "AcceptedTaxon" AS
280
SELECT
281
  "Time_submitted" AS "*Accepted_name.Time_submitted"
282
, "Name_submitted" AS "acceptedScientificName"
283
, "Genus_matched" AS "acceptedGenus"
284
, "Specific_epithet_matched" AS "acceptedSpecificEpithet"
285
, "Infraspecific_epithet_matched" AS "acceptedInfraspecificEpithet"
286
, "Taxonomic_status" AS "acceptedTaxonomicStatus"
287
, "Selected" AS "*Accepted_name.Selected"
288
, "Source" AS "*Accepted_name.Source"
289
, "Warnings" AS "*Accepted_name.Warnings"
290
, "Accepted_name_lsid" AS "*Accepted_name.Accepted_name_lsid"
291 9760 aaronmk
FROM tnrs
292
;
293 7823 aaronmk
294
CREATE OR REPLACE VIEW "ScrubbedTaxon" AS
295
SELECT *
296 9758 aaronmk
FROM "ValidMatchedTaxon"
297 7823 aaronmk
NATURAL LEFT JOIN "AcceptedTaxon"
298
;