Project

General

Profile

1 12055 aaronmk
-- -------------------------------------------------------------------------
2 12063 aaronmk
-- Quantitative validation queries on the BIEN database
3 12895 aaronmk
--
4 12063 aaronmk
-- Applies to any datasource with specimens
5 12895 aaronmk
--
6 12063 aaronmk
-- DB: vegbien
7
-- Host: vegbiendev.nceas.ucsb.edu
8 12055 aaronmk
-- -------------------------------------------------------------------------
9
10 12931 aaronmk
\set datasource '''NY'''
11
12 12223 aaronmk
SET search_path TO public_validations, public;
13 12068 aaronmk
14 12931 aaronmk
SET enable_seqscan = off;
15 13061 aaronmk
SET join_collapse_limit = 1; -- turn it off
16 12931 aaronmk
17 13064 aaronmk
-- _specimens_01_count_of_total_records_specimens_in_source_db
18 13080 aaronmk
 SELECT count(*) AS "totalSpecimenRecords"
19 13083 aaronmk
   FROM taxonoccurrence
20
  WHERE (taxonoccurrence.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname));
21 12055 aaronmk
22 13064 aaronmk
-- _specimens_02_count_of_unique_verbatim_families
23 13080 aaronmk
 SELECT count(DISTINCT taxonverbatim.family) AS families
24 13083 aaronmk
   FROM taxonverbatim
25
  WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.family IS NOT NULL));
26 12055 aaronmk
27 13064 aaronmk
-- _specimens_03_list_of_verbatim_families
28 13080 aaronmk
 SELECT DISTINCT taxonverbatim.family
29 13083 aaronmk
   FROM taxonverbatim
30
  WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.family IS NOT NULL));
31 12055 aaronmk
32 13064 aaronmk
-- _specimens_04_count_of_species_binomials
33 13080 aaronmk
 SELECT count(DISTINCT concat_ws(' '::text, taxonverbatim.genus, taxonverbatim.specific_epithet)) AS species_binomials
34 13083 aaronmk
   FROM taxonverbatim
35
  WHERE (((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL)) AND (taxonverbatim.specific_epithet IS NOT NULL));
36 12055 aaronmk
37 13064 aaronmk
-- _specimens_05_list_of_species_binomials
38 13080 aaronmk
 SELECT DISTINCT concat_ws(' '::text, taxonverbatim.genus, taxonverbatim.specific_epithet) AS species_binomial
39 13083 aaronmk
   FROM taxonverbatim
40
  WHERE (((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL)) AND (taxonverbatim.specific_epithet IS NOT NULL));
41 12055 aaronmk
42 13064 aaronmk
-- _specimens_06_count_of_unique_verb_subsp_taxa_with_author
43 13080 aaronmk
 SELECT count(DISTINCT taxonlabel.taxonomicname) AS taxon_names_with_author
44 13121 aaronmk
   FROM (taxonverbatim
45
   JOIN taxonlabel USING (taxonlabel_id))
46
  WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.subspecies IS NOT NULL));
47 12055 aaronmk
48 13064 aaronmk
-- _specimens_07_list_of_verbatim_subspecific_taxa_with_author
49 13080 aaronmk
 SELECT DISTINCT taxonlabel.taxonomicname AS taxon_name_with_author
50 13121 aaronmk
   FROM (taxonverbatim
51
   JOIN taxonlabel USING (taxonlabel_id))
52
  WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.subspecies IS NOT NULL));
53 12055 aaronmk
54 13064 aaronmk
-- _specimens_08_count_of_unique_verbatim_author_taxa_with_genus
55 13080 aaronmk
 SELECT count(DISTINCT taxonlabel.taxonomicname) AS taxon_names_with_author
56 13083 aaronmk
   FROM (taxonverbatim
57
   JOIN taxonlabel USING (taxonlabel_id))
58
  WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL));
59 12055 aaronmk
60 13064 aaronmk
-- _specimens_09_list_of_unique_verbatim_author_taxa_with_genus
61 13080 aaronmk
 SELECT DISTINCT taxonlabel.taxonomicname AS taxon_name_with_author
62 13083 aaronmk
   FROM (taxonverbatim
63
   JOIN taxonlabel USING (taxonlabel_id))
64
  WHERE ((taxonverbatim.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (taxonverbatim.genus IS NOT NULL));
65 12055 aaronmk
66 13064 aaronmk
-- _specimens_10_count_number_of_records_by_institution
67 13082 aaronmk
 SELECT ( SELECT sourcelist.name
68 13083 aaronmk
           FROM sourcelist
69 13082 aaronmk
          WHERE (sourcelist.sourcelist_id = specimenreplicate.duplicate_institutions_sourcelist_id)) AS specimen_duplicate_institutions,
70 13080 aaronmk
    count(*) AS records
71 13083 aaronmk
   FROM specimenreplicate
72
  WHERE (specimenreplicate.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname))
73 13080 aaronmk
  GROUP BY specimenreplicate.duplicate_institutions_sourcelist_id;
74 12055 aaronmk
75 13064 aaronmk
-- _specimens_11_list_of_three_standard_political_divisions
76 13080 aaronmk
 SELECT DISTINCT place.country,
77 13101 aaronmk
    place.stateprovince AS "stateProvince",
78 13080 aaronmk
    place.county
79 13083 aaronmk
   FROM place
80
  WHERE (place.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname));
81 13064 aaronmk
/*
82 12906 aaronmk
Note: character set issues may cause mis-matches. This query is a good way to reveal character set issues, either in source db or in BIEN
83 13064 aaronmk
*/
84 12055 aaronmk
85 13064 aaronmk
-- _specimens_12_distinct_collector_name_collect_num_date_w_count
86 13080 aaronmk
 SELECT ( SELECT party.fullname
87 13083 aaronmk
           FROM party
88 13080 aaronmk
          WHERE (party.party_id = taxonoccurrence.collector_id)) AS "collectorName",
89
    specimenreplicate.collectionnumber AS "collectionNumber",
90 13087 aaronmk
    (aggregateoccurrence.collectiondate)::text AS "dateCollected",
91 13080 aaronmk
    count(*) AS "specimenRecords"
92 13083 aaronmk
   FROM (((taxonoccurrence
93
   JOIN aggregateoccurrence USING (taxonoccurrence_id))
94
   JOIN plantobservation USING (aggregateoccurrence_id))
95
   JOIN specimenreplicate USING (plantobservation_id))
96
  WHERE (taxonoccurrence.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname))
97 13080 aaronmk
  GROUP BY ( SELECT party.fullname
98 13083 aaronmk
   FROM party
99 13101 aaronmk
  WHERE (party.party_id = taxonoccurrence.collector_id)), specimenreplicate.collectionnumber, (aggregateoccurrence.collectiondate)::text
100 13080 aaronmk
  ORDER BY ( SELECT party.fullname
101 13083 aaronmk
   FROM party
102 13101 aaronmk
  WHERE (party.party_id = taxonoccurrence.collector_id)), specimenreplicate.collectionnumber, (aggregateoccurrence.collectiondate)::text;
103 12055 aaronmk
104 13064 aaronmk
-- _specimens_13_count_of_all_verbatim_and_decimal_lat_long
105 13160 aaronmk
 SELECT ( SELECT count(DISTINCT ROW(coordinates.latitude_deg, coordinates.longitude_deg, coordinates.verbatimlatitude, coordinates.verbatimlongitude, coordinates.coordsaccuracy_m)) AS count
106 13083 aaronmk
           FROM coordinates
107 13160 aaronmk
          WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (((coordinates.verbatimlatitude IS NOT NULL) OR (coordinates.latitude_deg IS NOT NULL)) OR ((coordinates.verbatimlongitude IS NOT NULL) OR (coordinates.longitude_deg IS NOT NULL))))) AS "allLatLongs",
108
    ( SELECT count(DISTINCT ROW(coordinates.latitude_deg, coordinates.longitude_deg, coordinates.verbatimlatitude, coordinates.verbatimlongitude, coordinates.coordsaccuracy_m)) AS count
109 13083 aaronmk
           FROM coordinates
110 13160 aaronmk
          WHERE (((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (coordinates.latitude_deg IS NOT NULL)) AND (coordinates.longitude_deg IS NOT NULL))) AS "decimalLatLongs";
111 12055 aaronmk
112 13160 aaronmk
-- _specimens_13a_list_of_all_verbatim_lat_long
113
 SELECT DISTINCT COALESCE(coordinates.verbatimlatitude, (coordinates.latitude_deg)::text) AS "verbatimLatitude",
114
    COALESCE(coordinates.verbatimlatitude, (coordinates.longitude_deg)::text) AS "verbatimLongitude"
115
   FROM coordinates
116
  WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (((coordinates.verbatimlatitude IS NOT NULL) OR (coordinates.latitude_deg IS NOT NULL)) OR ((coordinates.verbatimlongitude IS NOT NULL) OR (coordinates.longitude_deg IS NOT NULL))));
117
118
-- _specimens_13b_list_of_all_decimal_lat_long
119
 SELECT DISTINCT coordinates.latitude_deg AS "decimalLatitude",
120
    coordinates.longitude_deg AS "decimalLongitude"
121
   FROM coordinates
122
  WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND ((coordinates.latitude_deg IS NOT NULL) OR (coordinates.longitude_deg IS NOT NULL)));
123
124 13064 aaronmk
-- _specimens_14_count_of_all_invalid_verbatim_lat_long
125 13080 aaronmk
 SELECT ( SELECT count(*) AS count
126 13083 aaronmk
           FROM coordinates
127
          WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND ((coordinates.latitude_deg > (90)::double precision) OR (coordinates.latitude_deg < ((-90))::double precision)))) AS "badLats",
128 13080 aaronmk
    ( SELECT count(*) AS count
129 13083 aaronmk
           FROM coordinates
130
          WHERE ((coordinates.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND ((coordinates.longitude_deg > (180)::double precision) OR (coordinates.longitude_deg < ((-180))::double precision)))) AS "badLongs";
131 12055 aaronmk
132 13064 aaronmk
-- _specimens_15_list_distinct_locality_descriptions
133 13080 aaronmk
 SELECT DISTINCT location.locationnarrative AS "localityDescription"
134 13083 aaronmk
   FROM location
135
  WHERE ((location.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (location.locationnarrative IS NOT NULL));
136 12055 aaronmk
137 13064 aaronmk
-- _specimens_16_list_distinct_specimen_descriptions
138 13080 aaronmk
 SELECT DISTINCT aggregateoccurrence.notes AS "specimenDescription"
139 13083 aaronmk
   FROM aggregateoccurrence
140
  WHERE ((aggregateoccurrence.source_id = ( SELECT source_by_shortname((:datasource)::text) AS source_by_shortname)) AND (aggregateoccurrence.notes IS NOT NULL));