Revision 13871
Added by Aaron Marcuse-Kubitza over 10 years ago
trunk/inputs/.TNRS/schema.sql | ||
---|---|---|
113 | 113 |
CREATE FUNCTION taxon_match__fill() RETURNS trigger |
114 | 114 |
LANGUAGE plpgsql |
115 | 115 |
AS $$ |
116 |
DECLARE |
|
117 |
"Specific_epithet_is_plant" boolean := |
|
118 |
(CASE |
|
119 |
WHEN new."Infraspecific_epithet_matched" IS NOT NULL |
|
120 |
OR new."Infraspecific_epithet_2_matched" IS NOT NULL |
|
121 |
OR new."Specific_epithet_score" >= 0.9 -- fuzzy match |
|
122 |
THEN true |
|
123 |
ELSE NULL -- ambiguous |
|
124 |
END); |
|
125 |
never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); |
|
126 |
-- author disambiguates |
|
127 |
family_is_homonym boolean = NOT never_homonym |
|
128 |
AND "TNRS".family_is_homonym(new."Family_matched"); |
|
129 |
genus_is_homonym boolean = NOT never_homonym |
|
130 |
AND "TNRS".genus_is_homonym(new."Genus_matched"); |
|
131 |
matched_taxon_name_with_author text = NULLIF(concat_ws(' ' |
|
132 |
, NULLIF(NULLIF(new."Name_matched_accepted_family", 'Unknown'), |
|
133 |
new."Name_matched") |
|
134 |
, NULLIF(new."Name_matched", 'No suitable matches found.') |
|
135 |
, new."Name_matched_author" |
|
136 |
), ''); |
|
137 |
accepted_taxon_name_with_author text = NULLIF(concat_ws(' ' |
|
138 |
, NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), |
|
139 |
new."Accepted_name") |
|
140 |
, new."Accepted_name" |
|
141 |
, new."Accepted_name_author" |
|
142 |
), ''); |
|
143 | 116 |
BEGIN |
144 |
/* exclude homonyms because these are not valid matches (i.e. TNRS provides |
|
145 |
a name, but the name is not meaningful because it is not unambiguous). */ |
|
146 |
new.is_valid_match = new."Taxonomic_status" != 'Invalid' |
|
147 |
AND COALESCE(CASE |
|
148 |
WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match |
|
149 |
THEN true |
|
150 |
ELSE -- consider genus |
|
117 |
DECLARE |
|
118 |
"Specific_epithet_is_plant" boolean := |
|
151 | 119 |
(CASE |
152 |
WHEN new."Genus_score" = 1 -- exact match |
|
153 |
THEN |
|
120 |
WHEN new."Infraspecific_epithet_matched" IS NOT NULL |
|
121 |
OR new."Infraspecific_epithet_2_matched" IS NOT NULL |
|
122 |
OR new."Specific_epithet_score" >= 0.9 -- fuzzy match |
|
123 |
THEN true |
|
124 |
ELSE NULL -- ambiguous |
|
125 |
END); |
|
126 |
never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); |
|
127 |
-- author disambiguates |
|
128 |
family_is_homonym boolean = NOT never_homonym |
|
129 |
AND "TNRS".family_is_homonym(new."Family_matched"); |
|
130 |
genus_is_homonym boolean = NOT never_homonym |
|
131 |
AND "TNRS".genus_is_homonym(new."Genus_matched"); |
|
132 |
BEGIN |
|
133 |
/* exclude homonyms because these are not valid matches (TNRS provides a |
|
134 |
name, but the name is not meaningful because it is not unambiguous) */ |
|
135 |
new.is_valid_match = new."Taxonomic_status" != 'Invalid' |
|
136 |
AND COALESCE(CASE |
|
137 |
WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match |
|
138 |
THEN true |
|
139 |
ELSE -- consider genus |
|
154 | 140 |
(CASE |
155 |
WHEN NOT genus_is_homonym THEN true |
|
156 |
ELSE "Specific_epithet_is_plant" |
|
141 |
WHEN new."Genus_score" = 1 -- exact match |
|
142 |
THEN |
|
143 |
(CASE |
|
144 |
WHEN NOT genus_is_homonym THEN true |
|
145 |
ELSE "Specific_epithet_is_plant" |
|
146 |
END) |
|
147 |
WHEN new."Genus_score" >= 0.85 -- fuzzy match |
|
148 |
THEN "Specific_epithet_is_plant" |
|
149 |
ELSE NULL -- ambiguous |
|
157 | 150 |
END) |
158 |
WHEN new."Genus_score" >= 0.85 -- fuzzy match |
|
159 |
THEN "Specific_epithet_is_plant" |
|
160 |
ELSE NULL -- ambiguous |
|
161 |
END) |
|
162 |
END, false); |
|
163 |
new.scrubbed_unique_taxon_name = COALESCE( |
|
164 |
accepted_taxon_name_with_author, matched_taxon_name_with_author); |
|
151 |
END, false); |
|
152 |
END; |
|
165 | 153 |
|
154 |
DECLARE |
|
155 |
matched_taxon_name_with_author text = NULLIF(concat_ws(' ' |
|
156 |
, NULLIF(NULLIF(new."Name_matched_accepted_family", 'Unknown'), |
|
157 |
new."Name_matched") |
|
158 |
, NULLIF(new."Name_matched", 'No suitable matches found.') |
|
159 |
, new."Name_matched_author" |
|
160 |
), ''); |
|
161 |
accepted_taxon_name_with_author text = NULLIF(concat_ws(' ' |
|
162 |
, NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), |
|
163 |
new."Accepted_name") |
|
164 |
, new."Accepted_name" |
|
165 |
, new."Accepted_name_author" |
|
166 |
), ''); |
|
167 |
BEGIN |
|
168 |
new.scrubbed_unique_taxon_name = COALESCE( |
|
169 |
accepted_taxon_name_with_author, matched_taxon_name_with_author); |
|
170 |
END; |
|
171 |
|
|
166 | 172 |
RETURN new; |
167 | 173 |
END; |
168 | 174 |
$$; |
trunk/schemas/vegbien.sql | ||
---|---|---|
19062 | 19062 |
CREATE FUNCTION taxon_match__fill() RETURNS trigger |
19063 | 19063 |
LANGUAGE plpgsql |
19064 | 19064 |
AS $$ |
19065 |
DECLARE |
|
19066 |
"Specific_epithet_is_plant" boolean := |
|
19067 |
(CASE |
|
19068 |
WHEN new."Infraspecific_epithet_matched" IS NOT NULL |
|
19069 |
OR new."Infraspecific_epithet_2_matched" IS NOT NULL |
|
19070 |
OR new."Specific_epithet_score" >= 0.9 -- fuzzy match |
|
19071 |
THEN true |
|
19072 |
ELSE NULL -- ambiguous |
|
19073 |
END); |
|
19074 |
never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); |
|
19075 |
-- author disambiguates |
|
19076 |
family_is_homonym boolean = NOT never_homonym |
|
19077 |
AND "TNRS".family_is_homonym(new."Family_matched"); |
|
19078 |
genus_is_homonym boolean = NOT never_homonym |
|
19079 |
AND "TNRS".genus_is_homonym(new."Genus_matched"); |
|
19080 |
matched_taxon_name_with_author text = NULLIF(concat_ws(' ' |
|
19081 |
, NULLIF(NULLIF(new."Name_matched_accepted_family", 'Unknown'), |
|
19082 |
new."Name_matched") |
|
19083 |
, NULLIF(new."Name_matched", 'No suitable matches found.') |
|
19084 |
, new."Name_matched_author" |
|
19085 |
), ''); |
|
19086 |
accepted_taxon_name_with_author text = NULLIF(concat_ws(' ' |
|
19087 |
, NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), |
|
19088 |
new."Accepted_name") |
|
19089 |
, new."Accepted_name" |
|
19090 |
, new."Accepted_name_author" |
|
19091 |
), ''); |
|
19092 | 19065 |
BEGIN |
19093 |
/* exclude homonyms because these are not valid matches (i.e. TNRS provides |
|
19094 |
a name, but the name is not meaningful because it is not unambiguous). */ |
|
19095 |
new.is_valid_match = new."Taxonomic_status" != 'Invalid' |
|
19096 |
AND COALESCE(CASE |
|
19097 |
WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match |
|
19098 |
THEN true |
|
19099 |
ELSE -- consider genus |
|
19066 |
DECLARE |
|
19067 |
"Specific_epithet_is_plant" boolean := |
|
19100 | 19068 |
(CASE |
19101 |
WHEN new."Genus_score" = 1 -- exact match |
|
19102 |
THEN |
|
19069 |
WHEN new."Infraspecific_epithet_matched" IS NOT NULL |
|
19070 |
OR new."Infraspecific_epithet_2_matched" IS NOT NULL |
|
19071 |
OR new."Specific_epithet_score" >= 0.9 -- fuzzy match |
|
19072 |
THEN true |
|
19073 |
ELSE NULL -- ambiguous |
|
19074 |
END); |
|
19075 |
never_homonym boolean = COALESCE(new."Author_score" >= 0.6, false); |
|
19076 |
-- author disambiguates |
|
19077 |
family_is_homonym boolean = NOT never_homonym |
|
19078 |
AND "TNRS".family_is_homonym(new."Family_matched"); |
|
19079 |
genus_is_homonym boolean = NOT never_homonym |
|
19080 |
AND "TNRS".genus_is_homonym(new."Genus_matched"); |
|
19081 |
BEGIN |
|
19082 |
/* exclude homonyms because these are not valid matches (TNRS provides a |
|
19083 |
name, but the name is not meaningful because it is not unambiguous) */ |
|
19084 |
new.is_valid_match = new."Taxonomic_status" != 'Invalid' |
|
19085 |
AND COALESCE(CASE |
|
19086 |
WHEN new."Family_score" = 1 AND NOT family_is_homonym -- exact match |
|
19087 |
THEN true |
|
19088 |
ELSE -- consider genus |
|
19103 | 19089 |
(CASE |
19104 |
WHEN NOT genus_is_homonym THEN true |
|
19105 |
ELSE "Specific_epithet_is_plant" |
|
19090 |
WHEN new."Genus_score" = 1 -- exact match |
|
19091 |
THEN |
|
19092 |
(CASE |
|
19093 |
WHEN NOT genus_is_homonym THEN true |
|
19094 |
ELSE "Specific_epithet_is_plant" |
|
19095 |
END) |
|
19096 |
WHEN new."Genus_score" >= 0.85 -- fuzzy match |
|
19097 |
THEN "Specific_epithet_is_plant" |
|
19098 |
ELSE NULL -- ambiguous |
|
19106 | 19099 |
END) |
19107 |
WHEN new."Genus_score" >= 0.85 -- fuzzy match |
|
19108 |
THEN "Specific_epithet_is_plant" |
|
19109 |
ELSE NULL -- ambiguous |
|
19110 |
END) |
|
19111 |
END, false); |
|
19112 |
new.scrubbed_unique_taxon_name = COALESCE( |
|
19113 |
accepted_taxon_name_with_author, matched_taxon_name_with_author); |
|
19100 |
END, false); |
|
19101 |
END; |
|
19114 | 19102 |
|
19103 |
DECLARE |
|
19104 |
matched_taxon_name_with_author text = NULLIF(concat_ws(' ' |
|
19105 |
, NULLIF(NULLIF(new."Name_matched_accepted_family", 'Unknown'), |
|
19106 |
new."Name_matched") |
|
19107 |
, NULLIF(new."Name_matched", 'No suitable matches found.') |
|
19108 |
, new."Name_matched_author" |
|
19109 |
), ''); |
|
19110 |
accepted_taxon_name_with_author text = NULLIF(concat_ws(' ' |
|
19111 |
, NULLIF(NULLIF(new."Accepted_name_family", 'Unknown'), |
|
19112 |
new."Accepted_name") |
|
19113 |
, new."Accepted_name" |
|
19114 |
, new."Accepted_name_author" |
|
19115 |
), ''); |
|
19116 |
BEGIN |
|
19117 |
new.scrubbed_unique_taxon_name = COALESCE( |
|
19118 |
accepted_taxon_name_with_author, matched_taxon_name_with_author); |
|
19119 |
END; |
|
19120 |
|
|
19115 | 19121 |
RETURN new; |
19116 | 19122 |
END; |
19117 | 19123 |
$$; |
Also available in: Unified diff
inputs/.TNRS/schema.sql: taxon_match__fill(): split into separate DECLARE blocks for each field for clarity