1 |
12055
|
aaronmk
|
-- -------------------------------------------------------------------------
|
2 |
12063
|
aaronmk
|
-- Quantitative validation queries on the BIEN database
|
3 |
12895
|
aaronmk
|
--
|
4 |
12063
|
aaronmk
|
-- Applies to any datasource with specimens
|
5 |
12895
|
aaronmk
|
--
|
6 |
12063
|
aaronmk
|
-- DB: vegbien
|
7 |
|
|
-- Host: vegbiendev.nceas.ucsb.edu
|
8 |
12055
|
aaronmk
|
-- -------------------------------------------------------------------------
|
9 |
|
|
|
10 |
12931
|
aaronmk
|
\set datasource '''NY'''
|
11 |
|
|
|
12 |
12223
|
aaronmk
|
SET search_path TO public_validations, public;
|
13 |
12068
|
aaronmk
|
|
14 |
12931
|
aaronmk
|
SET enable_seqscan = off;
|
15 |
13061
|
aaronmk
|
SET join_collapse_limit = 1; -- turn it off
|
16 |
12931
|
aaronmk
|
|
17 |
13064
|
aaronmk
|
-- _specimens_01_count_of_total_records_specimens_in_source_db
|
18 |
13080
|
aaronmk
|
SELECT count(*) AS "totalSpecimenRecords"
|
19 |
13083
|
aaronmk
|
FROM taxonoccurrence
|
20 |
|
|
WHERE (taxonoccurrence.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname));
|
21 |
12055
|
aaronmk
|
|
22 |
13064
|
aaronmk
|
-- _specimens_02_count_of_unique_verbatim_families
|
23 |
13080
|
aaronmk
|
SELECT count(DISTINCT taxonverbatim.family) AS families
|
24 |
13083
|
aaronmk
|
FROM taxonverbatim
|
25 |
|
|
WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.family IS NOT NULL));
|
26 |
12055
|
aaronmk
|
|
27 |
13064
|
aaronmk
|
-- _specimens_03_list_of_verbatim_families
|
28 |
13080
|
aaronmk
|
SELECT DISTINCT taxonverbatim.family
|
29 |
13083
|
aaronmk
|
FROM taxonverbatim
|
30 |
|
|
WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.family IS NOT NULL));
|
31 |
12055
|
aaronmk
|
|
32 |
13064
|
aaronmk
|
-- _specimens_04_count_of_species_binomials
|
33 |
13080
|
aaronmk
|
SELECT count(DISTINCT concat_ws(' '::text, taxonverbatim.genus, taxonverbatim.specific_epithet)) AS species_binomials
|
34 |
13083
|
aaronmk
|
FROM taxonverbatim
|
35 |
|
|
WHERE (((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL)) AND (taxonverbatim.specific_epithet IS NOT NULL));
|
36 |
12055
|
aaronmk
|
|
37 |
13064
|
aaronmk
|
-- _specimens_05_list_of_species_binomials
|
38 |
13080
|
aaronmk
|
SELECT DISTINCT concat_ws(' '::text, taxonverbatim.genus, taxonverbatim.specific_epithet) AS species_binomial
|
39 |
13083
|
aaronmk
|
FROM taxonverbatim
|
40 |
|
|
WHERE (((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL)) AND (taxonverbatim.specific_epithet IS NOT NULL));
|
41 |
12055
|
aaronmk
|
|
42 |
13064
|
aaronmk
|
-- _specimens_06_count_of_unique_verb_subsp_taxa_with_author
|
43 |
13080
|
aaronmk
|
SELECT count(DISTINCT taxonlabel.taxonomicname) AS taxon_names_with_author
|
44 |
13121
|
aaronmk
|
FROM (taxonverbatim
|
45 |
|
|
JOIN taxonlabel USING (taxonlabel_id))
|
46 |
|
|
WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.subspecies IS NOT NULL));
|
47 |
12055
|
aaronmk
|
|
48 |
13064
|
aaronmk
|
-- _specimens_07_list_of_verbatim_subspecific_taxa_with_author
|
49 |
13080
|
aaronmk
|
SELECT DISTINCT taxonlabel.taxonomicname AS taxon_name_with_author
|
50 |
13121
|
aaronmk
|
FROM (taxonverbatim
|
51 |
|
|
JOIN taxonlabel USING (taxonlabel_id))
|
52 |
|
|
WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.subspecies IS NOT NULL));
|
53 |
12055
|
aaronmk
|
|
54 |
13064
|
aaronmk
|
-- _specimens_08_count_of_unique_verbatim_author_taxa_with_genus
|
55 |
13080
|
aaronmk
|
SELECT count(DISTINCT taxonlabel.taxonomicname) AS taxon_names_with_author
|
56 |
13083
|
aaronmk
|
FROM (taxonverbatim
|
57 |
|
|
JOIN taxonlabel USING (taxonlabel_id))
|
58 |
|
|
WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL));
|
59 |
12055
|
aaronmk
|
|
60 |
13064
|
aaronmk
|
-- _specimens_09_list_of_unique_verbatim_author_taxa_with_genus
|
61 |
13080
|
aaronmk
|
SELECT DISTINCT taxonlabel.taxonomicname AS taxon_name_with_author
|
62 |
13083
|
aaronmk
|
FROM (taxonverbatim
|
63 |
|
|
JOIN taxonlabel USING (taxonlabel_id))
|
64 |
|
|
WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL));
|
65 |
12055
|
aaronmk
|
|
66 |
13064
|
aaronmk
|
-- _specimens_10_count_number_of_records_by_institution
|
67 |
13082
|
aaronmk
|
SELECT ( SELECT sourcelist.name
|
68 |
13083
|
aaronmk
|
FROM sourcelist
|
69 |
13082
|
aaronmk
|
WHERE (sourcelist.sourcelist_id = specimenreplicate.duplicate_institutions_sourcelist_id)) AS specimen_duplicate_institutions,
|
70 |
13080
|
aaronmk
|
count(*) AS records
|
71 |
13083
|
aaronmk
|
FROM specimenreplicate
|
72 |
|
|
WHERE (specimenreplicate.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname))
|
73 |
13080
|
aaronmk
|
GROUP BY specimenreplicate.duplicate_institutions_sourcelist_id;
|
74 |
12055
|
aaronmk
|
|
75 |
13064
|
aaronmk
|
-- _specimens_11_list_of_three_standard_political_divisions
|
76 |
13080
|
aaronmk
|
SELECT DISTINCT place.country,
|
77 |
13101
|
aaronmk
|
place.stateprovince AS "stateProvince",
|
78 |
13080
|
aaronmk
|
place.county
|
79 |
13083
|
aaronmk
|
FROM place
|
80 |
|
|
WHERE (place.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname));
|
81 |
13064
|
aaronmk
|
/*
|
82 |
12906
|
aaronmk
|
Note: character set issues may cause mis-matches. This query is a good way to reveal character set issues, either in source db or in BIEN
|
83 |
13064
|
aaronmk
|
*/
|
84 |
12055
|
aaronmk
|
|
85 |
13064
|
aaronmk
|
-- _specimens_12_distinct_collector_name_collect_num_date_w_count
|
86 |
13080
|
aaronmk
|
SELECT ( SELECT party.fullname
|
87 |
13083
|
aaronmk
|
FROM party
|
88 |
13080
|
aaronmk
|
WHERE (party.party_id = taxonoccurrence.collector_id)) AS "collectorName",
|
89 |
|
|
specimenreplicate.collectionnumber AS "collectionNumber",
|
90 |
13087
|
aaronmk
|
(aggregateoccurrence.collectiondate)::text AS "dateCollected",
|
91 |
13080
|
aaronmk
|
count(*) AS "specimenRecords"
|
92 |
13083
|
aaronmk
|
FROM (((taxonoccurrence
|
93 |
|
|
JOIN aggregateoccurrence USING (taxonoccurrence_id))
|
94 |
|
|
JOIN plantobservation USING (aggregateoccurrence_id))
|
95 |
|
|
JOIN specimenreplicate USING (plantobservation_id))
|
96 |
|
|
WHERE (taxonoccurrence.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname))
|
97 |
13080
|
aaronmk
|
GROUP BY ( SELECT party.fullname
|
98 |
13083
|
aaronmk
|
FROM party
|
99 |
13101
|
aaronmk
|
WHERE (party.party_id = taxonoccurrence.collector_id)), specimenreplicate.collectionnumber, (aggregateoccurrence.collectiondate)::text
|
100 |
13080
|
aaronmk
|
ORDER BY ( SELECT party.fullname
|
101 |
13083
|
aaronmk
|
FROM party
|
102 |
13101
|
aaronmk
|
WHERE (party.party_id = taxonoccurrence.collector_id)), specimenreplicate.collectionnumber, (aggregateoccurrence.collectiondate)::text;
|
103 |
12055
|
aaronmk
|
|
104 |
13064
|
aaronmk
|
-- _specimens_13_count_of_all_verbatim_and_decimal_lat_long
|
105 |
13160
|
aaronmk
|
SELECT ( SELECT count(DISTINCT ROW(coordinates.latitude_deg, coordinates.longitude_deg, coordinates.verbatimlatitude, coordinates.verbatimlongitude, coordinates.coordsaccuracy_m)) AS count
|
106 |
13083
|
aaronmk
|
FROM coordinates
|
107 |
13160
|
aaronmk
|
WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (((coordinates.verbatimlatitude IS NOT NULL) OR (coordinates.latitude_deg IS NOT NULL)) OR ((coordinates.verbatimlongitude IS NOT NULL) OR (coordinates.longitude_deg IS NOT NULL))))) AS "allLatLongs",
|
108 |
|
|
( SELECT count(DISTINCT ROW(coordinates.latitude_deg, coordinates.longitude_deg, coordinates.verbatimlatitude, coordinates.verbatimlongitude, coordinates.coordsaccuracy_m)) AS count
|
109 |
13083
|
aaronmk
|
FROM coordinates
|
110 |
13160
|
aaronmk
|
WHERE (((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (coordinates.latitude_deg IS NOT NULL)) AND (coordinates.longitude_deg IS NOT NULL))) AS "decimalLatLongs";
|
111 |
12055
|
aaronmk
|
|
112 |
13160
|
aaronmk
|
-- _specimens_13a_list_of_all_verbatim_lat_long
|
113 |
|
|
SELECT DISTINCT COALESCE(coordinates.verbatimlatitude, (coordinates.latitude_deg)::text) AS "verbatimLatitude",
|
114 |
|
|
COALESCE(coordinates.verbatimlatitude, (coordinates.longitude_deg)::text) AS "verbatimLongitude"
|
115 |
|
|
FROM coordinates
|
116 |
|
|
WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (((coordinates.verbatimlatitude IS NOT NULL) OR (coordinates.latitude_deg IS NOT NULL)) OR ((coordinates.verbatimlongitude IS NOT NULL) OR (coordinates.longitude_deg IS NOT NULL))));
|
117 |
|
|
|
118 |
|
|
-- _specimens_13b_list_of_all_decimal_lat_long
|
119 |
|
|
SELECT DISTINCT coordinates.latitude_deg AS "decimalLatitude",
|
120 |
|
|
coordinates.longitude_deg AS "decimalLongitude"
|
121 |
|
|
FROM coordinates
|
122 |
|
|
WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND ((coordinates.latitude_deg IS NOT NULL) OR (coordinates.longitude_deg IS NOT NULL)));
|
123 |
|
|
|
124 |
13064
|
aaronmk
|
-- _specimens_14_count_of_all_invalid_verbatim_lat_long
|
125 |
13080
|
aaronmk
|
SELECT ( SELECT count(*) AS count
|
126 |
13083
|
aaronmk
|
FROM coordinates
|
127 |
|
|
WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND ((coordinates.latitude_deg > (90)::double precision) OR (coordinates.latitude_deg < ((-90))::double precision)))) AS "badLats",
|
128 |
13080
|
aaronmk
|
( SELECT count(*) AS count
|
129 |
13083
|
aaronmk
|
FROM coordinates
|
130 |
|
|
WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND ((coordinates.longitude_deg > (180)::double precision) OR (coordinates.longitude_deg < ((-180))::double precision)))) AS "badLongs";
|
131 |
12055
|
aaronmk
|
|
132 |
13064
|
aaronmk
|
-- _specimens_15_list_distinct_locality_descriptions
|
133 |
13080
|
aaronmk
|
SELECT DISTINCT location.locationnarrative AS "localityDescription"
|
134 |
13083
|
aaronmk
|
FROM location
|
135 |
|
|
WHERE ((location.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (location.locationnarrative IS NOT NULL));
|
136 |
12055
|
aaronmk
|
|
137 |
13064
|
aaronmk
|
-- _specimens_16_list_distinct_specimen_descriptions
|
138 |
13080
|
aaronmk
|
SELECT DISTINCT aggregateoccurrence.notes AS "specimenDescription"
|
139 |
13083
|
aaronmk
|
FROM aggregateoccurrence
|
140 |
|
|
WHERE ((aggregateoccurrence.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (aggregateoccurrence.notes IS NOT NULL));
|