CREATE TABLE client_version ( id text NOT NULL, global_rev integer NOT NULL, -- from `svn info .` > Last Changed Rev "/lib/tnrs.py rev" integer, -- from `svn info lib/tnrs.py` > Last Changed Rev "/bin/tnrs_db rev" integer, -- from `svn info bin/tnrs_db` > Last Changed Rev CONSTRAINT client_version_pkey PRIMARY KEY (id ) ) WITH ( OIDS=FALSE ); COMMENT ON TABLE client_version IS 'contains svn revisions'; COMMENT ON COLUMN client_version.global_rev IS 'from `svn info .` > Last Changed Rev'; COMMENT ON COLUMN client_version."/lib/tnrs.py rev" IS 'from `svn info lib/tnrs.py` > Last Changed Rev'; COMMENT ON COLUMN client_version."/bin/tnrs_db rev" IS 'from `svn info bin/tnrs_db` > Last Changed Rev'; -- CREATE TABLE batch ( id text NOT NULL, id_by_time text, time_submitted timestamp with time zone DEFAULT now(), client_version text, CONSTRAINT batch_pkey PRIMARY KEY (id ), CONSTRAINT batch_client_version_fkey FOREIGN KEY (client_version) REFERENCES client_version (id) MATCH SIMPLE ON UPDATE CASCADE ON DELETE CASCADE, CONSTRAINT batch_id_by_time_key UNIQUE (id_by_time ) ) WITH ( OIDS=FALSE ); CREATE OR REPLACE FUNCTION batch__fill() RETURNS trigger AS $BODY$ BEGIN new.id_by_time = new.time_submitted; new.id = COALESCE(new.id, new.id_by_time); RETURN new; END; $BODY$ LANGUAGE plpgsql VOLATILE COST 100; CREATE TRIGGER batch__fill BEFORE INSERT OR UPDATE ON batch FOR EACH ROW EXECUTE PROCEDURE batch__fill(); -- CREATE TABLE batch_download_settings ( id text NOT NULL, "E-mail" text, "Id" text, "Job type" text, "Contains Id" boolean, "Start time" text, "Finish time" text, "TNRS version" text, "Sources selected" text, "Match threshold" double precision, "Classification" text, "Allow partial matches?" boolean, "Sort by source" boolean, "Constrain by higher taxonomy" boolean, CONSTRAINT batch_download_settings_pkey PRIMARY KEY (id ), CONSTRAINT batch_download_settings_id_fkey FOREIGN KEY (id) REFERENCES "TNRS".batch (id) MATCH SIMPLE ON UPDATE CASCADE ON DELETE CASCADE ) WITH ( OIDS=FALSE ); COMMENT ON TABLE batch_download_settings IS 'stores data from http://tnrs.iplantcollaborative.org/TNRSapp.html > Submit List > results section > Download settings > settings.txt'; -- CREATE OR REPLACE FUNCTION score_ok(score double precision) RETURNS boolean AS $BODY$ SELECT $1 >= 0.8 $BODY$ LANGUAGE sql IMMUTABLE STRICT COST 100; CREATE OR REPLACE FUNCTION family_is_homonym(family text) RETURNS boolean AS $BODY$ SELECT EXISTS(SELECT * FROM "IRMNG".family_homonym_epithet WHERE "taxonNameOrEpithet" = $1) $BODY$ LANGUAGE sql STABLE STRICT COST 100; CREATE OR REPLACE FUNCTION genus_is_homonym(genus text) RETURNS boolean AS $BODY$ SELECT EXISTS(SELECT * FROM "IRMNG".genus_homonym_epithet WHERE "taxonNameOrEpithet" = $1) $BODY$ LANGUAGE sql STABLE STRICT COST 100; /* IMPORTANT: when changing this table's schema, you must regenerate data.sql: $ /../test_taxonomic_names/test_scrub you must also make the same changes on vegbiendev: e.g. $ make backups/TNRS.backup-remake # runtime: 3 min ("real 2m44.277s") SELECT util.set_col_types('"TNRS".tnrs', ARRAY[ ('col', 'new_type') ]::util.col_cast[]); runtime: 9 min ("531282 ms") */ CREATE TABLE tnrs ( batch text NOT NULL DEFAULT now(), "Name_number" integer NOT NULL, "Name_submitted" text NOT NULL, "Overall_score" double precision, "Name_matched" text, "Name_matched_rank" text, "Name_score" double precision, "Name_matched_author" text, "Name_matched_url" text, "Author_matched" text, "Author_score" double precision, "Family_matched" text, "Family_score" double precision, "Name_matched_accepted_family" text, "Genus_matched" text, "Genus_score" double precision, "Specific_epithet_matched" text, "Specific_epithet_score" double precision, "Infraspecific_rank" text, "Infraspecific_epithet_matched" text, "Infraspecific_epithet_score" double precision, "Infraspecific_rank_2" text, "Infraspecific_epithet_2_matched" text, "Infraspecific_epithet_2_score" double precision, "Annotations" text, "Unmatched_terms" text, "Taxonomic_status" text, "Accepted_name" text, "Accepted_name_author" text, "Accepted_name_rank" text, "Accepted_name_url" text, "Accepted_name_species" text, "Accepted_name_family" text, "Selected" text, "Source" text, "Warnings" text, "Accepted_name_lsid" text, "Accepted_scientific_name" text, "Max_score" double precision, "Is_homonym" boolean, "Is_plant" boolean, CONSTRAINT tnrs_pkey PRIMARY KEY (batch , "Name_number" ), CONSTRAINT tnrs_batch_fkey FOREIGN KEY (batch) REFERENCES batch (id) MATCH SIMPLE ON UPDATE CASCADE ON DELETE CASCADE, CONSTRAINT "tnrs_Name_submitted_key" UNIQUE ("Name_submitted" ) ) WITH ( OIDS=FALSE ); CREATE UNIQUE INDEX tnrs_score_ok ON tnrs USING btree ("Name_submitted" ) WHERE score_ok("Max_score"); /* IMPORTANT: when changing this function, you must regenerate the derived cols: UPDATE "TNRS".tnrs SET "Name_submitted" = "Name_submitted" runtime: 25 min ("5221748 rows affected, 1524228 ms execution time") VACUUM ANALYZE "TNRS".tnrs -- to remove previous rows runtime: 1 min ("62350 ms") */ CREATE OR REPLACE FUNCTION tnrs_populate_fields() RETURNS trigger AS $BODY$ DECLARE "Specific_epithet_is_plant" boolean := (CASE WHEN new."Infraspecific_epithet_matched" IS NOT NULL OR new."Infraspecific_epithet_2_matched" IS NOT NULL OR new."Specific_epithet_score" >= 0.9 -- fuzzy match THEN true ELSE NULL -- ambiguous END); never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); -- author disambiguates family_is_homonym boolean = NOT never_homonym AND "TNRS".family_is_homonym(new."Family_matched"); genus_is_homonym boolean = NOT never_homonym AND "TNRS".genus_is_homonym( new."Genus_matched" ); BEGIN new."Accepted_scientific_name" = NULLIF(concat_ws(' ' , NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), new."Accepted_name") , new."Accepted_name" , new."Accepted_name_author" ), ''); new."Max_score" = GREATEST( new."Overall_score" , new."Family_score" , new."Genus_score" , new."Specific_epithet_score" ); new."Is_homonym" = COALESCE(genus_is_homonym, family_is_homonym); new."Is_plant" = (CASE WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match THEN true ELSE -- consider genus (CASE WHEN new."Genus_score" = 1 -- exact match THEN (CASE WHEN NOT genus_is_homonym THEN true ELSE "Specific_epithet_is_plant" END) WHEN new."Genus_score" >= 0.85 -- fuzzy match THEN "Specific_epithet_is_plant" ELSE NULL -- ambiguous END) END); RETURN new; END; $BODY$ LANGUAGE plpgsql VOLATILE COST 100; CREATE TRIGGER tnrs_populate_fields BEFORE INSERT OR UPDATE ON tnrs FOR EACH ROW EXECUTE PROCEDURE tnrs_populate_fields(); -- CREATE OR REPLACE VIEW "MatchedTaxon" AS SELECT "batch" AS "*Name_matched.batch" , "Name_submitted" AS "concatenatedScientificName" , "Name_matched" AS "matchedTaxonName" , "Name_matched_rank" AS "matchedTaxonRank" , "Name_score" AS "*Name_matched.Name_score" , "Name_matched_author" AS "matchedScientificNameAuthorship" , "Name_matched_url" AS "matchedScientificNameID" , "Author_score" AS "*Name_matched.Author_score" , "Family_score" AS "matchedFamilyConfidence_fraction" , COALESCE("Name_matched_accepted_family", "Accepted_name_family") AS "matchedFamily" , "Genus_matched" AS "matchedGenus" , "Genus_score" AS "matchedGenusConfidence_fraction" , "Specific_epithet_matched" AS "matchedSpecificEpithet" , "Specific_epithet_score" AS "matchedSpeciesConfidence_fraction" , "Infraspecific_epithet_matched" AS "matchedInfraspecificEpithet" , "Infraspecific_epithet_score" AS "*Name_matched.Infraspecific_epithet_score" , "Annotations" AS "identificationQualifier" , "Unmatched_terms" AS "morphospeciesSuffix" , "Taxonomic_status" AS "taxonomicStatus" , "Accepted_name" AS "acceptedTaxonName" , "Accepted_name_author" AS "acceptedScientificNameAuthorship" , "Accepted_name_rank" AS "acceptedTaxonRank" , "Accepted_name_url" AS "acceptedScientificNameID" , "Accepted_name_species" AS "*Name_matched.Accepted_name_species" , "Accepted_name_family" AS "acceptedFamily" , "Selected" AS "*Name_matched.Selected" , "Source" AS "*Name_matched.Source" , "Warnings" AS "*Name_matched.Warnings" , "Accepted_name_lsid" AS "*Name_matched.Accepted_name_lsid" , "Accepted_scientific_name" AS "acceptedScientificName" , "Max_score" AS "matchedTaxonConfidence_fraction" FROM tnrs ; CREATE OR REPLACE VIEW "ValidMatchedTaxon" AS SELECT * FROM "MatchedTaxon" WHERE score_ok("matchedTaxonConfidence_fraction") ; CREATE OR REPLACE VIEW "AcceptedTaxon" AS SELECT "batch" AS "*Accepted_name.batch" , "Name_submitted" AS "acceptedScientificName" , "Genus_matched" AS "acceptedGenus" , "Specific_epithet_matched" AS "acceptedSpecificEpithet" , "Infraspecific_epithet_matched" AS "acceptedInfraspecificEpithet" , "Taxonomic_status" AS "acceptedTaxonomicStatus" , "Selected" AS "*Accepted_name.Selected" , "Source" AS "*Accepted_name.Source" , "Warnings" AS "*Accepted_name.Warnings" , "Accepted_name_lsid" AS "*Accepted_name.Accepted_name_lsid" FROM tnrs ; CREATE OR REPLACE VIEW "ScrubbedTaxon" AS SELECT * FROM "ValidMatchedTaxon" NATURAL LEFT JOIN "AcceptedTaxon" ;