1
|
-- -------------------------------------------------------------------------
|
2
|
-- Quantitative validation queries on the BIEN database
|
3
|
--
|
4
|
-- Applies to any datasource with specimens
|
5
|
--
|
6
|
-- DB: vegbien
|
7
|
-- Host: vegbiendev.nceas.ucsb.edu
|
8
|
-- -------------------------------------------------------------------------
|
9
|
|
10
|
\set datasource '''NY'''
|
11
|
|
12
|
SET search_path TO public_validations, public;
|
13
|
|
14
|
SET enable_seqscan = off;
|
15
|
SET join_collapse_limit = 1; -- turn it off
|
16
|
|
17
|
-- _specimens_01_count_of_total_records_specimens_in_source_db
|
18
|
SELECT count(*) AS "totalSpecimenRecords"
|
19
|
FROM taxonoccurrence
|
20
|
WHERE (taxonoccurrence.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname));
|
21
|
|
22
|
-- _specimens_02_count_of_unique_verbatim_families
|
23
|
SELECT count(DISTINCT taxonverbatim.family) AS families
|
24
|
FROM taxonverbatim
|
25
|
WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.family IS NOT NULL));
|
26
|
|
27
|
-- _specimens_03_list_of_verbatim_families
|
28
|
SELECT DISTINCT taxonverbatim.family
|
29
|
FROM taxonverbatim
|
30
|
WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.family IS NOT NULL));
|
31
|
|
32
|
-- _specimens_04_count_of_species_binomials
|
33
|
SELECT count(DISTINCT concat_ws(' '::text, taxonverbatim.genus, taxonverbatim.specific_epithet)) AS species_binomials
|
34
|
FROM taxonverbatim
|
35
|
WHERE (((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL)) AND (taxonverbatim.specific_epithet IS NOT NULL));
|
36
|
|
37
|
-- _specimens_05_list_of_species_binomials
|
38
|
SELECT DISTINCT concat_ws(' '::text, taxonverbatim.genus, taxonverbatim.specific_epithet) AS species_binomial
|
39
|
FROM taxonverbatim
|
40
|
WHERE (((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL)) AND (taxonverbatim.specific_epithet IS NOT NULL));
|
41
|
|
42
|
-- _specimens_06_count_of_unique_verb_subsp_taxa_with_author
|
43
|
SELECT count(DISTINCT taxonlabel.taxonomicname) AS taxon_names_with_author
|
44
|
FROM (taxonverbatim
|
45
|
JOIN taxonlabel USING (taxonlabel_id))
|
46
|
WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.subspecies IS NOT NULL));
|
47
|
|
48
|
-- _specimens_07_list_of_verbatim_subspecific_taxa_with_author
|
49
|
SELECT DISTINCT taxonlabel.taxonomicname AS taxon_name_with_author
|
50
|
FROM (taxonverbatim
|
51
|
JOIN taxonlabel USING (taxonlabel_id))
|
52
|
WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.subspecies IS NOT NULL));
|
53
|
|
54
|
-- _specimens_08_count_of_unique_verbatim_author_taxa_with_genus
|
55
|
SELECT count(DISTINCT taxonlabel.taxonomicname) AS taxon_names_with_author
|
56
|
FROM (taxonverbatim
|
57
|
JOIN taxonlabel USING (taxonlabel_id))
|
58
|
WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL));
|
59
|
|
60
|
-- _specimens_09_list_of_unique_verbatim_author_taxa_with_genus
|
61
|
SELECT DISTINCT taxonlabel.taxonomicname AS taxon_name_with_author
|
62
|
FROM (taxonverbatim
|
63
|
JOIN taxonlabel USING (taxonlabel_id))
|
64
|
WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL));
|
65
|
|
66
|
-- _specimens_10_count_number_of_records_by_institution
|
67
|
SELECT ( SELECT sourcelist.name
|
68
|
FROM sourcelist
|
69
|
WHERE (sourcelist.sourcelist_id = specimenreplicate.duplicate_institutions_sourcelist_id)) AS specimen_duplicate_institutions,
|
70
|
count(*) AS records
|
71
|
FROM specimenreplicate
|
72
|
WHERE (specimenreplicate.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname))
|
73
|
GROUP BY specimenreplicate.duplicate_institutions_sourcelist_id;
|
74
|
|
75
|
-- _specimens_11_list_of_three_standard_political_divisions
|
76
|
SELECT DISTINCT place.country,
|
77
|
place.stateprovince AS "stateProvince",
|
78
|
place.county
|
79
|
FROM place
|
80
|
WHERE (place.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname));
|
81
|
/*
|
82
|
Note: character set issues may cause mis-matches. This query is a good way to reveal character set issues, either in source db or in BIEN
|
83
|
*/
|
84
|
|
85
|
-- _specimens_12_distinct_collector_name_collect_num_date_w_count
|
86
|
SELECT ( SELECT party.fullname
|
87
|
FROM party
|
88
|
WHERE (party.party_id = taxonoccurrence.collector_id)) AS "collectorName",
|
89
|
specimenreplicate.collectionnumber AS "collectionNumber",
|
90
|
(aggregateoccurrence.collectiondate)::text AS "dateCollected",
|
91
|
count(*) AS "specimenRecords"
|
92
|
FROM (((taxonoccurrence
|
93
|
JOIN aggregateoccurrence USING (taxonoccurrence_id))
|
94
|
JOIN plantobservation USING (aggregateoccurrence_id))
|
95
|
JOIN specimenreplicate USING (plantobservation_id))
|
96
|
WHERE (taxonoccurrence.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname))
|
97
|
GROUP BY ( SELECT party.fullname
|
98
|
FROM party
|
99
|
WHERE (party.party_id = taxonoccurrence.collector_id)), specimenreplicate.collectionnumber, (aggregateoccurrence.collectiondate)::text
|
100
|
ORDER BY ( SELECT party.fullname
|
101
|
FROM party
|
102
|
WHERE (party.party_id = taxonoccurrence.collector_id)), specimenreplicate.collectionnumber, (aggregateoccurrence.collectiondate)::text;
|
103
|
|
104
|
-- _specimens_13_count_of_all_verbatim_and_decimal_lat_long
|
105
|
SELECT ( SELECT count(DISTINCT ROW(coordinates.latitude_deg, coordinates.longitude_deg, coordinates.verbatimlatitude, coordinates.verbatimlongitude, coordinates.coordsaccuracy_m)) AS count
|
106
|
FROM coordinates
|
107
|
WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (((coordinates.verbatimlatitude IS NOT NULL) OR (coordinates.latitude_deg IS NOT NULL)) OR ((coordinates.verbatimlongitude IS NOT NULL) OR (coordinates.longitude_deg IS NOT NULL))))) AS "allLatLongs",
|
108
|
( SELECT count(DISTINCT ROW(coordinates.latitude_deg, coordinates.longitude_deg, coordinates.verbatimlatitude, coordinates.verbatimlongitude, coordinates.coordsaccuracy_m)) AS count
|
109
|
FROM coordinates
|
110
|
WHERE (((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (coordinates.latitude_deg IS NOT NULL)) AND (coordinates.longitude_deg IS NOT NULL))) AS "decimalLatLongs";
|
111
|
|
112
|
-- _specimens_13a_list_of_all_verbatim_lat_long
|
113
|
SELECT DISTINCT COALESCE(coordinates.verbatimlatitude, (coordinates.latitude_deg)::text) AS "verbatimLatitude",
|
114
|
COALESCE(coordinates.verbatimlatitude, (coordinates.longitude_deg)::text) AS "verbatimLongitude"
|
115
|
FROM coordinates
|
116
|
WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (((coordinates.verbatimlatitude IS NOT NULL) OR (coordinates.latitude_deg IS NOT NULL)) OR ((coordinates.verbatimlongitude IS NOT NULL) OR (coordinates.longitude_deg IS NOT NULL))));
|
117
|
|
118
|
-- _specimens_13b_list_of_all_decimal_lat_long
|
119
|
SELECT DISTINCT coordinates.latitude_deg AS "decimalLatitude",
|
120
|
coordinates.longitude_deg AS "decimalLongitude"
|
121
|
FROM coordinates
|
122
|
WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND ((coordinates.latitude_deg IS NOT NULL) OR (coordinates.longitude_deg IS NOT NULL)));
|
123
|
|
124
|
-- _specimens_14_count_of_all_invalid_verbatim_lat_long
|
125
|
SELECT ( SELECT count(*) AS count
|
126
|
FROM coordinates
|
127
|
WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND ((coordinates.latitude_deg > (90)::double precision) OR (coordinates.latitude_deg < ((-90))::double precision)))) AS "badLats",
|
128
|
( SELECT count(*) AS count
|
129
|
FROM coordinates
|
130
|
WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND ((coordinates.longitude_deg > (180)::double precision) OR (coordinates.longitude_deg < ((-180))::double precision)))) AS "badLongs";
|
131
|
|
132
|
-- _specimens_15_list_distinct_locality_descriptions
|
133
|
SELECT DISTINCT location.locationnarrative AS "localityDescription"
|
134
|
FROM location
|
135
|
WHERE ((location.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (location.locationnarrative IS NOT NULL));
|
136
|
|
137
|
-- _specimens_16_list_distinct_specimen_descriptions
|
138
|
SELECT DISTINCT aggregateoccurrence.notes AS "specimenDescription"
|
139
|
FROM aggregateoccurrence
|
140
|
WHERE ((aggregateoccurrence.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (aggregateoccurrence.notes IS NOT NULL));
|