Project

General

Profile

1 10737 aaronmk
CREATE TABLE client_version
2
(
3
  id text NOT NULL,
4 10738 aaronmk
  global_rev integer NOT NULL, -- from `svn info .` > Last Changed Rev
5
  "/lib/tnrs.py rev" integer, -- from `svn info lib/tnrs.py` > Last Changed Rev
6
  "/bin/tnrs_db rev" integer, -- from `svn info bin/tnrs_db` > Last Changed Rev
7 10737 aaronmk
  CONSTRAINT client_version_pkey PRIMARY KEY (id )
8
)
9
WITH (
10
  OIDS=FALSE
11
);
12 10738 aaronmk
COMMENT ON TABLE client_version
13
  IS 'contains svn revisions';
14
COMMENT ON COLUMN client_version.global_rev IS 'from `svn info .` > Last Changed Rev';
15
COMMENT ON COLUMN client_version."/lib/tnrs.py rev" IS 'from `svn info lib/tnrs.py` > Last Changed Rev';
16
COMMENT ON COLUMN client_version."/bin/tnrs_db rev" IS 'from `svn info bin/tnrs_db` > Last Changed Rev';
17 10737 aaronmk
18
--
19
20 10736 aaronmk
CREATE TABLE batch
21 10725 aaronmk
(
22 10728 aaronmk
  id text NOT NULL,
23
  id_by_time text,
24
  time_submitted timestamp with time zone DEFAULT now(),
25 10737 aaronmk
  client_version text,
26 10736 aaronmk
  CONSTRAINT batch_pkey PRIMARY KEY (id ),
27 10737 aaronmk
  CONSTRAINT batch_client_version_fkey FOREIGN KEY (client_version)
28
      REFERENCES client_version (id) MATCH SIMPLE
29 10741 aaronmk
      ON UPDATE CASCADE ON DELETE CASCADE,
30
  CONSTRAINT batch_id_by_time_key UNIQUE (id_by_time )
31 10725 aaronmk
)
32
WITH (
33
  OIDS=FALSE
34
);
35
36
37 10736 aaronmk
CREATE OR REPLACE FUNCTION batch__fill()
38 10728 aaronmk
  RETURNS trigger AS
39
$BODY$
40
BEGIN
41
	new.id_by_time = new.time_submitted;
42
	new.id = COALESCE(new.id, new.id_by_time);
43
	RETURN new;
44
END;
45
$BODY$
46
  LANGUAGE plpgsql VOLATILE
47
  COST 100;
48
49 10736 aaronmk
CREATE TRIGGER batch__fill
50 10728 aaronmk
  BEFORE INSERT OR UPDATE
51 10736 aaronmk
  ON batch
52 10728 aaronmk
  FOR EACH ROW
53 10736 aaronmk
  EXECUTE PROCEDURE batch__fill();
54 10728 aaronmk
55
--
56
57 10740 aaronmk
CREATE TABLE batch_download_settings
58 10736 aaronmk
(
59
  id text NOT NULL,
60
  "E-mail" text,
61
  "Id" text,
62
  "Job type" text,
63
  "Contains Id" boolean,
64
  "Start time" text,
65
  "Finish time" text,
66
  "TNRS version" text,
67
  "Sources selected" text,
68
  "Match threshold" double precision,
69
  "Classification" text,
70
  "Allow partial matches?" boolean,
71
  "Sort by source" boolean,
72
  "Constrain by higher taxonomy" boolean,
73 10740 aaronmk
  CONSTRAINT batch_download_settings_pkey PRIMARY KEY (id ),
74
  CONSTRAINT batch_download_settings_id_fkey FOREIGN KEY (id)
75 10739 aaronmk
      REFERENCES "TNRS".batch (id) MATCH SIMPLE
76
      ON UPDATE CASCADE ON DELETE CASCADE
77 10736 aaronmk
)
78
WITH (
79
  OIDS=FALSE
80
);
81 10740 aaronmk
COMMENT ON TABLE batch_download_settings
82 10736 aaronmk
  IS 'stores data from http://tnrs.iplantcollaborative.org/TNRSapp.html > Submit List > results section > Download settings > settings.txt';
83
84
--
85
86 7844 aaronmk
CREATE OR REPLACE FUNCTION score_ok(score double precision)
87
  RETURNS boolean AS
88
$BODY$
89
SELECT $1 >= 0.8
90
$BODY$
91 9984 aaronmk
  LANGUAGE sql IMMUTABLE STRICT
92 7844 aaronmk
  COST 100;
93
94 9985 aaronmk
CREATE OR REPLACE FUNCTION family_is_homonym(family text)
95
  RETURNS boolean AS
96
$BODY$
97
SELECT EXISTS(SELECT * FROM "IRMNG".family_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
98
$BODY$
99 9986 aaronmk
  LANGUAGE sql STABLE STRICT
100 9985 aaronmk
  COST 100;
101
102
CREATE OR REPLACE FUNCTION genus_is_homonym(genus text)
103
  RETURNS boolean AS
104
$BODY$
105
SELECT EXISTS(SELECT * FROM "IRMNG".genus_homonym_epithet WHERE "taxonNameOrEpithet" = $1)
106
$BODY$
107 9986 aaronmk
  LANGUAGE sql STABLE STRICT
108 9985 aaronmk
  COST 100;
109
110 5183 aaronmk
CREATE TABLE tnrs
111 5110 aaronmk
(
112 10746 aaronmk
  batch text NOT NULL DEFAULT now(),
113 9908 aaronmk
  "Name_number" integer NOT NULL,
114 5110 aaronmk
  "Name_submitted" text NOT NULL,
115 9913 aaronmk
  "Overall_score" double precision,
116 5110 aaronmk
  "Name_matched" text,
117
  "Name_matched_rank" text,
118 9913 aaronmk
  "Name_score" double precision,
119 5110 aaronmk
  "Name_matched_author" text,
120
  "Name_matched_url" text,
121
  "Author_matched" text,
122 9913 aaronmk
  "Author_score" double precision,
123 5110 aaronmk
  "Family_matched" text,
124 9913 aaronmk
  "Family_score" double precision,
125 9529 aaronmk
  "Name_matched_accepted_family" text,
126 5110 aaronmk
  "Genus_matched" text,
127 9913 aaronmk
  "Genus_score" double precision,
128 5110 aaronmk
  "Specific_epithet_matched" text,
129 9913 aaronmk
  "Specific_epithet_score" double precision,
130 5110 aaronmk
  "Infraspecific_rank" text,
131
  "Infraspecific_epithet_matched" text,
132 9913 aaronmk
  "Infraspecific_epithet_score" double precision,
133 5110 aaronmk
  "Infraspecific_rank_2" text,
134
  "Infraspecific_epithet_2_matched" text,
135 9913 aaronmk
  "Infraspecific_epithet_2_score" double precision,
136 5110 aaronmk
  "Annotations" text,
137
  "Unmatched_terms" text,
138
  "Taxonomic_status" text,
139
  "Accepted_name" text,
140
  "Accepted_name_author" text,
141
  "Accepted_name_rank" text,
142
  "Accepted_name_url" text,
143 9762 aaronmk
  "Accepted_name_species" text,
144
  "Accepted_name_family" text,
145 5110 aaronmk
  "Selected" text,
146
  "Source" text,
147
  "Warnings" text,
148
  "Accepted_name_lsid" text,
149 7133 aaronmk
  "Accepted_scientific_name" text,
150 7293 aaronmk
  "Max_score" double precision,
151 9972 aaronmk
  "Is_homonym" boolean,
152 9763 aaronmk
  "Is_plant" boolean,
153 10746 aaronmk
  CONSTRAINT tnrs_pkey PRIMARY KEY (batch , "Name_number" ),
154
  CONSTRAINT tnrs_batch_fkey FOREIGN KEY (batch)
155
      REFERENCES batch (id) MATCH SIMPLE
156
      ON UPDATE CASCADE ON DELETE CASCADE,
157 9906 aaronmk
  CONSTRAINT "tnrs_Name_submitted_key" UNIQUE ("Name_submitted" )
158 5110 aaronmk
)
159
WITH (
160
  OIDS=FALSE
161
);
162 10754 aaronmk
COMMENT ON TABLE tnrs
163
  IS 'IMPORTANT: when changing this table''s schema, you must regenerate data.sql:
164
$ <this_file>/../test_taxonomic_names/test_scrub
165
you must also make the same changes on vegbiendev: e.g.
166
$ make backups/TNRS.backup-remake # runtime: 3 min ("real 2m44.277s")
167
SELECT util.set_col_types(''"TNRS".tnrs'', ARRAY[
168
  (''col'', ''new_type'')
169
]::util.col_cast[]);
170
runtime: 9 min ("531282 ms")';
171 7132 aaronmk
172 7844 aaronmk
CREATE UNIQUE INDEX tnrs_score_ok
173
  ON tnrs
174
  USING btree
175
  ("Name_submitted" )
176
  WHERE score_ok("Max_score");
177
178 9512 aaronmk
CREATE OR REPLACE FUNCTION tnrs_populate_fields()
179 7134 aaronmk
  RETURNS trigger AS
180
$BODY$
181 9763 aaronmk
DECLARE
182
    "Specific_epithet_is_plant" boolean :=
183
        (CASE
184
        WHEN   new."Infraspecific_epithet_matched"   IS NOT NULL
185
            OR new."Infraspecific_epithet_2_matched" IS NOT NULL
186 9914 aaronmk
            OR new."Specific_epithet_score" >= 0.9 -- fuzzy match
187 9763 aaronmk
            THEN true
188
        ELSE NULL -- ambiguous
189
        END);
190 9982 aaronmk
    never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); -- author disambiguates
191 9988 aaronmk
    family_is_homonym boolean = NOT never_homonym AND "TNRS".family_is_homonym(new."Family_matched");
192
    genus_is_homonym  boolean = NOT never_homonym AND "TNRS".genus_is_homonym( new."Genus_matched" );
193 7134 aaronmk
BEGIN
194 7848 aaronmk
    new."Accepted_scientific_name" = NULLIF(concat_ws(' '
195 9762 aaronmk
        , NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), new."Accepted_name")
196 7134 aaronmk
        , new."Accepted_name"
197
        , new."Accepted_name_author"
198 7848 aaronmk
    ), '');
199 7293 aaronmk
    new."Max_score" = GREATEST(
200 9914 aaronmk
          new."Overall_score"
201
        , new."Family_score"
202
        , new."Genus_score"
203
        , new."Specific_epithet_score"
204 7293 aaronmk
    );
205 9987 aaronmk
    new."Is_homonym" = COALESCE(genus_is_homonym, family_is_homonym);
206 9763 aaronmk
    new."Is_plant" = (CASE
207 9973 aaronmk
        WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match
208
            THEN true
209
        ELSE -- consider genus
210 9763 aaronmk
            (CASE
211 9914 aaronmk
            WHEN new."Genus_score" =  1    -- exact match
212 9973 aaronmk
                THEN
213
                (CASE
214
                WHEN NOT genus_is_homonym THEN true
215
                ELSE "Specific_epithet_is_plant"
216
                END)
217 9914 aaronmk
            WHEN new."Genus_score" >= 0.85 -- fuzzy match
218 9763 aaronmk
                THEN "Specific_epithet_is_plant"
219
            ELSE NULL -- ambiguous
220
            END)
221
        END);
222 7134 aaronmk
223
    RETURN new;
224
END;
225
$BODY$
226
  LANGUAGE plpgsql VOLATILE
227
  COST 100;
228 10754 aaronmk
COMMENT ON FUNCTION tnrs_populate_fields() IS 'IMPORTANT: when changing this function, you must regenerate the derived cols:
229
UPDATE "TNRS".tnrs SET "Name_submitted" = "Name_submitted"
230
runtime: 25 min ("5221748 rows affected, 1524228 ms execution time")
231
VACUUM ANALYZE "TNRS".tnrs -- to remove previous rows
232
runtime: 1 min ("62350 ms")';
233 7134 aaronmk
234 9512 aaronmk
CREATE TRIGGER tnrs_populate_fields
235 7134 aaronmk
  BEFORE INSERT OR UPDATE
236
  ON tnrs
237
  FOR EACH ROW
238 9512 aaronmk
  EXECUTE PROCEDURE tnrs_populate_fields();
239 7251 aaronmk
240 10728 aaronmk
--
241 7251 aaronmk
242 9759 aaronmk
CREATE OR REPLACE VIEW "MatchedTaxon" AS
243 7823 aaronmk
SELECT
244 10746 aaronmk
  "batch" AS "*Name_matched.batch"
245 7830 aaronmk
, "Name_submitted" AS "concatenatedScientificName"
246 7829 aaronmk
, "Name_matched" AS "matchedTaxonName"
247 7823 aaronmk
, "Name_matched_rank" AS "matchedTaxonRank"
248
, "Name_score" AS "*Name_matched.Name_score"
249
, "Name_matched_author" AS "matchedScientificNameAuthorship"
250
, "Name_matched_url" AS "matchedScientificNameID"
251
, "Author_score" AS "*Name_matched.Author_score"
252
, "Family_score" AS "matchedFamilyConfidence_fraction"
253 9762 aaronmk
, COALESCE("Name_matched_accepted_family", "Accepted_name_family") AS "matchedFamily"
254 7831 aaronmk
, "Genus_matched" AS "matchedGenus"
255 7823 aaronmk
, "Genus_score" AS "matchedGenusConfidence_fraction"
256
, "Specific_epithet_matched" AS "matchedSpecificEpithet"
257
, "Specific_epithet_score" AS "matchedSpeciesConfidence_fraction"
258
, "Infraspecific_epithet_matched" AS "matchedInfraspecificEpithet"
259
, "Infraspecific_epithet_score" AS "*Name_matched.Infraspecific_epithet_score"
260
, "Annotations" AS "identificationQualifier"
261
, "Unmatched_terms" AS "morphospeciesSuffix"
262 7833 aaronmk
, "Taxonomic_status" AS "taxonomicStatus"
263 7823 aaronmk
, "Accepted_name" AS "acceptedTaxonName"
264
, "Accepted_name_author" AS "acceptedScientificNameAuthorship"
265
, "Accepted_name_rank" AS "acceptedTaxonRank"
266
, "Accepted_name_url" AS "acceptedScientificNameID"
267 9762 aaronmk
, "Accepted_name_species" AS "*Name_matched.Accepted_name_species"
268
, "Accepted_name_family" AS "acceptedFamily"
269 7823 aaronmk
, "Selected" AS "*Name_matched.Selected"
270 7828 aaronmk
, "Source" AS "*Name_matched.Source"
271 7823 aaronmk
, "Warnings" AS "*Name_matched.Warnings"
272
, "Accepted_name_lsid" AS "*Name_matched.Accepted_name_lsid"
273
, "Accepted_scientific_name" AS "acceptedScientificName"
274
, "Max_score" AS "matchedTaxonConfidence_fraction"
275 9616 aaronmk
FROM tnrs
276
;
277 7823 aaronmk
278 9759 aaronmk
CREATE OR REPLACE VIEW "ValidMatchedTaxon" AS
279
SELECT *
280
FROM "MatchedTaxon"
281
WHERE score_ok("matchedTaxonConfidence_fraction")
282
;
283 10753 aaronmk
COMMENT ON VIEW "ValidMatchedTaxon"
284
  IS 'to update, use * as the column list';
285 9759 aaronmk
286 7823 aaronmk
CREATE OR REPLACE VIEW "AcceptedTaxon" AS
287
SELECT
288 10746 aaronmk
  "batch" AS "*Accepted_name.batch"
289 7823 aaronmk
, "Name_submitted" AS "acceptedScientificName"
290
, "Genus_matched" AS "acceptedGenus"
291
, "Specific_epithet_matched" AS "acceptedSpecificEpithet"
292
, "Infraspecific_epithet_matched" AS "acceptedInfraspecificEpithet"
293
, "Taxonomic_status" AS "acceptedTaxonomicStatus"
294
, "Selected" AS "*Accepted_name.Selected"
295
, "Source" AS "*Accepted_name.Source"
296
, "Warnings" AS "*Accepted_name.Warnings"
297
, "Accepted_name_lsid" AS "*Accepted_name.Accepted_name_lsid"
298 9760 aaronmk
FROM tnrs
299
;
300 7823 aaronmk
301
CREATE OR REPLACE VIEW "ScrubbedTaxon" AS
302
SELECT *
303 9758 aaronmk
FROM "ValidMatchedTaxon"
304 7823 aaronmk
NATURAL LEFT JOIN "AcceptedTaxon"
305
;
306 10753 aaronmk
COMMENT ON VIEW "ScrubbedTaxon"
307
  IS 'to update, use * as the column list';