CREATE OR REPLACE FUNCTION score_ok(score double precision) RETURNS boolean AS $BODY$ SELECT $1 >= 0.8 $BODY$ LANGUAGE sql IMMUTABLE STRICT COST 100; CREATE OR REPLACE FUNCTION family_is_homonym(family text) RETURNS boolean AS $BODY$ SELECT EXISTS(SELECT * FROM "IRMNG".family_homonym_epithet WHERE "taxonNameOrEpithet" = $1) $BODY$ LANGUAGE sql STABLE STRICT COST 100; CREATE OR REPLACE FUNCTION genus_is_homonym(genus text) RETURNS boolean AS $BODY$ SELECT EXISTS(SELECT * FROM "IRMNG".genus_homonym_epithet WHERE "taxonNameOrEpithet" = $1) $BODY$ LANGUAGE sql STABLE STRICT COST 100; /* IMPORTANT: when changing this table's schema, you must regenerate data.sql: $ /../test_taxonomic_names/test_scrub you must also make the same changes on vegbiendev: e.g. $ make backups/TNRS.backup-remake # runtime: 3 min ("real 2m44.277s") SELECT util.set_col_types('"TNRS".tnrs', ARRAY[ ('col', 'new_type') ]::util.col_cast[]); runtime: 4 min ("242100 ms") */ CREATE TABLE tnrs ( "Time_submitted" timestamp with time zone NOT NULL DEFAULT now(), "Name_number" integer NOT NULL, "Name_submitted" text NOT NULL, "Overall_score" double precision, "Name_matched" text, "Name_matched_rank" text, "Name_score" double precision, "Name_matched_author" text, "Name_matched_url" text, "Author_matched" text, "Author_score" double precision, "Family_matched" text, "Family_score" double precision, "Name_matched_accepted_family" text, "Genus_matched" text, "Genus_score" double precision, "Specific_epithet_matched" text, "Specific_epithet_score" double precision, "Infraspecific_rank" text, "Infraspecific_epithet_matched" text, "Infraspecific_epithet_score" double precision, "Infraspecific_rank_2" text, "Infraspecific_epithet_2_matched" text, "Infraspecific_epithet_2_score" double precision, "Annotations" text, "Unmatched_terms" text, "Taxonomic_status" text, "Accepted_name" text, "Accepted_name_author" text, "Accepted_name_rank" text, "Accepted_name_url" text, "Accepted_name_species" text, "Accepted_name_family" text, "Selected" text, "Source" text, "Warnings" text, "Accepted_name_lsid" text, "Accepted_scientific_name" text, "Max_score" double precision, "Is_homonym" boolean, "Is_plant" boolean, CONSTRAINT tnrs_pkey PRIMARY KEY ("Time_submitted" , "Name_number" ), CONSTRAINT "tnrs_Name_submitted_key" UNIQUE ("Name_submitted" ) ) WITH ( OIDS=FALSE ); CREATE UNIQUE INDEX tnrs_score_ok ON tnrs USING btree ("Name_submitted" ) WHERE score_ok("Max_score"); /* IMPORTANT: when changing this function, you must regenerate the derived cols: UPDATE "TNRS".tnrs SET "Name_submitted" = "Name_submitted" runtime: 16 min ("5052509 rows affected, 980692 ms execution time") VACUUM ANALYZE "TNRS".tnrs -- to remove previous rows runtime: 50 s ("48264 ms") */ CREATE OR REPLACE FUNCTION tnrs_populate_fields() RETURNS trigger AS $BODY$ DECLARE "Specific_epithet_is_plant" boolean := (CASE WHEN new."Infraspecific_epithet_matched" IS NOT NULL OR new."Infraspecific_epithet_2_matched" IS NOT NULL OR new."Specific_epithet_score" >= 0.9 -- fuzzy match THEN true ELSE NULL -- ambiguous END); never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); -- author disambiguates family_is_homonym boolean = NOT never_homonym AND "TNRS".family_is_homonym(new."Family_matched"); genus_is_homonym boolean = NOT never_homonym AND "TNRS".genus_is_homonym( new."Genus_matched" ); BEGIN new."Accepted_scientific_name" = NULLIF(concat_ws(' ' , NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), new."Accepted_name") , new."Accepted_name" , new."Accepted_name_author" ), ''); new."Max_score" = GREATEST( new."Overall_score" , new."Family_score" , new."Genus_score" , new."Specific_epithet_score" ); new."Is_homonym" = COALESCE(genus_is_homonym, family_is_homonym); new."Is_plant" = (CASE WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match THEN true ELSE -- consider genus (CASE WHEN new."Genus_score" = 1 -- exact match THEN (CASE WHEN NOT genus_is_homonym THEN true ELSE "Specific_epithet_is_plant" END) WHEN new."Genus_score" >= 0.85 -- fuzzy match THEN "Specific_epithet_is_plant" ELSE NULL -- ambiguous END) END); RETURN new; END; $BODY$ LANGUAGE plpgsql VOLATILE COST 100; CREATE TRIGGER tnrs_populate_fields BEFORE INSERT OR UPDATE ON tnrs FOR EACH ROW EXECUTE PROCEDURE tnrs_populate_fields(); CREATE OR REPLACE VIEW "MatchedTaxon" AS SELECT "Time_submitted" AS "*Name_matched.Time_submitted" , "Name_submitted" AS "concatenatedScientificName" , "Name_matched" AS "matchedTaxonName" , "Name_matched_rank" AS "matchedTaxonRank" , "Name_score" AS "*Name_matched.Name_score" , "Name_matched_author" AS "matchedScientificNameAuthorship" , "Name_matched_url" AS "matchedScientificNameID" , "Author_score" AS "*Name_matched.Author_score" , "Family_score" AS "matchedFamilyConfidence_fraction" , COALESCE("Name_matched_accepted_family", "Accepted_name_family") AS "matchedFamily" , "Genus_matched" AS "matchedGenus" , "Genus_score" AS "matchedGenusConfidence_fraction" , "Specific_epithet_matched" AS "matchedSpecificEpithet" , "Specific_epithet_score" AS "matchedSpeciesConfidence_fraction" , "Infraspecific_epithet_matched" AS "matchedInfraspecificEpithet" , "Infraspecific_epithet_score" AS "*Name_matched.Infraspecific_epithet_score" , "Annotations" AS "identificationQualifier" , "Unmatched_terms" AS "morphospeciesSuffix" , "Taxonomic_status" AS "taxonomicStatus" , "Accepted_name" AS "acceptedTaxonName" , "Accepted_name_author" AS "acceptedScientificNameAuthorship" , "Accepted_name_rank" AS "acceptedTaxonRank" , "Accepted_name_url" AS "acceptedScientificNameID" , "Accepted_name_species" AS "*Name_matched.Accepted_name_species" , "Accepted_name_family" AS "acceptedFamily" , "Selected" AS "*Name_matched.Selected" , "Source" AS "*Name_matched.Source" , "Warnings" AS "*Name_matched.Warnings" , "Accepted_name_lsid" AS "*Name_matched.Accepted_name_lsid" , "Accepted_scientific_name" AS "acceptedScientificName" , "Max_score" AS "matchedTaxonConfidence_fraction" FROM tnrs ; CREATE OR REPLACE VIEW "ValidMatchedTaxon" AS SELECT * FROM "MatchedTaxon" WHERE score_ok("matchedTaxonConfidence_fraction") ; CREATE OR REPLACE VIEW "AcceptedTaxon" AS SELECT "Time_submitted" AS "*Accepted_name.Time_submitted" , "Name_submitted" AS "acceptedScientificName" , "Genus_matched" AS "acceptedGenus" , "Specific_epithet_matched" AS "acceptedSpecificEpithet" , "Infraspecific_epithet_matched" AS "acceptedInfraspecificEpithet" , "Annotations" AS "*Accepted_name.Annotations" , "Taxonomic_status" AS "acceptedTaxonomicStatus" , "Selected" AS "*Accepted_name.Selected" , "Source" AS "*Accepted_name.Source" , "Warnings" AS "*Accepted_name.Warnings" , "Accepted_name_lsid" AS "*Accepted_name.Accepted_name_lsid" FROM tnrs ; CREATE OR REPLACE VIEW "ScrubbedTaxon" AS SELECT * FROM "ValidMatchedTaxon" NATURAL LEFT JOIN "AcceptedTaxon" ;