-- ------------------------------------------------------------------------- -- Quantitative validation queries on the BIEN database -- -- Applies to any datasource with specimens -- -- DB: vegbien -- Host: vegbiendev.nceas.ucsb.edu -- ------------------------------------------------------------------------- \set datasource '''NY''' SET search_path TO public_validations, public; SET enable_seqscan = off; SET join_collapse_limit = 1; -- turn it off -- _specimens_01_count_of_total_records_specimens_in_source_db SELECT count(*) AS "totalSpecimenRecords" FROM taxonoccurrence WHERE (taxonoccurrence.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)); -- _specimens_02_count_of_unique_verbatim_families SELECT count(DISTINCT taxonverbatim.family) AS families FROM taxonverbatim WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.family IS NOT NULL)); -- _specimens_03_list_of_verbatim_families SELECT DISTINCT taxonverbatim.family FROM taxonverbatim WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.family IS NOT NULL)); -- _specimens_04_count_of_species_binomials SELECT count(DISTINCT concat_ws(' '::text, taxonverbatim.genus, taxonverbatim.specific_epithet)) AS species_binomials FROM taxonverbatim WHERE (((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL)) AND (taxonverbatim.specific_epithet IS NOT NULL)); -- _specimens_05_list_of_species_binomials SELECT DISTINCT concat_ws(' '::text, taxonverbatim.genus, taxonverbatim.specific_epithet) AS species_binomial FROM taxonverbatim WHERE (((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL)) AND (taxonverbatim.specific_epithet IS NOT NULL)); -- _specimens_06_count_of_unique_verb_subsp_taxa_with_author SELECT count(DISTINCT taxonlabel.taxonomicname) AS taxon_names_with_author FROM (taxonverbatim JOIN taxonlabel USING (taxonlabel_id)) WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.subspecies IS NOT NULL)); -- _specimens_07_list_of_verbatim_subspecific_taxa_with_author SELECT DISTINCT taxonlabel.taxonomicname AS taxon_name_with_author FROM (taxonverbatim JOIN taxonlabel USING (taxonlabel_id)) WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.subspecies IS NOT NULL)); -- _specimens_08_count_of_unique_verbatim_author_taxa_with_genus SELECT count(DISTINCT taxonlabel.taxonomicname) AS taxon_names_with_author FROM (taxonverbatim JOIN taxonlabel USING (taxonlabel_id)) WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL)); -- _specimens_09_list_of_unique_verbatim_author_taxa_with_genus SELECT DISTINCT taxonlabel.taxonomicname AS taxon_name_with_author FROM (taxonverbatim JOIN taxonlabel USING (taxonlabel_id)) WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL)); -- _specimens_10_count_number_of_records_by_institution SELECT ( SELECT sourcelist.name FROM sourcelist WHERE (sourcelist.sourcelist_id = specimenreplicate.duplicate_institutions_sourcelist_id)) AS specimen_duplicate_institutions, count(*) AS records FROM specimenreplicate WHERE (specimenreplicate.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) GROUP BY specimenreplicate.duplicate_institutions_sourcelist_id; -- _specimens_11_list_of_three_standard_political_divisions SELECT DISTINCT place.country, place.stateprovince AS "stateProvince", place.county FROM place WHERE (place.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)); /* Note: character set issues may cause mis-matches. This query is a good way to reveal character set issues, either in source db or in BIEN */ -- _specimens_12_distinct_collector_name_collect_num_date_w_count SELECT ( SELECT party.fullname FROM party WHERE (party.party_id = taxonoccurrence.collector_id)) AS "collectorName", specimenreplicate.collectionnumber AS "collectionNumber", (aggregateoccurrence.collectiondate)::text AS "dateCollected", count(*) AS "specimenRecords" FROM (((taxonoccurrence JOIN aggregateoccurrence USING (taxonoccurrence_id)) JOIN plantobservation USING (aggregateoccurrence_id)) JOIN specimenreplicate USING (plantobservation_id)) WHERE (taxonoccurrence.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) GROUP BY ( SELECT party.fullname FROM party WHERE (party.party_id = taxonoccurrence.collector_id)), specimenreplicate.collectionnumber, (aggregateoccurrence.collectiondate)::text ORDER BY ( SELECT party.fullname FROM party WHERE (party.party_id = taxonoccurrence.collector_id)), specimenreplicate.collectionnumber, (aggregateoccurrence.collectiondate)::text; -- _specimens_13_count_of_all_verbatim_and_decimal_lat_long SELECT ( SELECT count(DISTINCT ROW(coordinates.latitude_deg, coordinates.longitude_deg, coordinates.verbatimlatitude, coordinates.verbatimlongitude, coordinates.coordsaccuracy_m)) AS count FROM coordinates WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (((coordinates.verbatimlatitude IS NOT NULL) OR (coordinates.latitude_deg IS NOT NULL)) OR ((coordinates.verbatimlongitude IS NOT NULL) OR (coordinates.longitude_deg IS NOT NULL))))) AS "allLatLongs", ( SELECT count(DISTINCT ROW(coordinates.latitude_deg, coordinates.longitude_deg, coordinates.verbatimlatitude, coordinates.verbatimlongitude, coordinates.coordsaccuracy_m)) AS count FROM coordinates WHERE (((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (coordinates.latitude_deg IS NOT NULL)) AND (coordinates.longitude_deg IS NOT NULL))) AS "decimalLatLongs"; -- _specimens_13a_list_of_all_verbatim_lat_long SELECT DISTINCT COALESCE(coordinates.verbatimlatitude, (coordinates.latitude_deg)::text) AS "verbatimLatitude", COALESCE(coordinates.verbatimlatitude, (coordinates.longitude_deg)::text) AS "verbatimLongitude" FROM coordinates WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (((coordinates.verbatimlatitude IS NOT NULL) OR (coordinates.latitude_deg IS NOT NULL)) OR ((coordinates.verbatimlongitude IS NOT NULL) OR (coordinates.longitude_deg IS NOT NULL)))); -- _specimens_13b_list_of_all_decimal_lat_long SELECT DISTINCT coordinates.latitude_deg AS "decimalLatitude", coordinates.longitude_deg AS "decimalLongitude" FROM coordinates WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND ((coordinates.latitude_deg IS NOT NULL) OR (coordinates.longitude_deg IS NOT NULL))); -- _specimens_14_count_of_all_invalid_verbatim_lat_long SELECT ( SELECT count(*) AS count FROM coordinates WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND ((coordinates.latitude_deg > (90)::double precision) OR (coordinates.latitude_deg < ((-90))::double precision)))) AS "badLats", ( SELECT count(*) AS count FROM coordinates WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND ((coordinates.longitude_deg > (180)::double precision) OR (coordinates.longitude_deg < ((-180))::double precision)))) AS "badLongs"; -- _specimens_15_list_distinct_locality_descriptions SELECT DISTINCT location.locationnarrative AS "localityDescription" FROM location WHERE ((location.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (location.locationnarrative IS NOT NULL)); -- _specimens_16_list_distinct_specimen_descriptions SELECT DISTINCT aggregateoccurrence.notes AS "specimenDescription" FROM aggregateoccurrence WHERE ((aggregateoccurrence.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (aggregateoccurrence.notes IS NOT NULL));