Project

General

Profile

1
-- -------------------------------------------------------------------------
2
--  Quantitative validation queries against NYBG source db
3
--
4
-- Queries staging table vegbien."NY"."Ecatalog_all" @vegbiendev.nceas.ucsb.edu
5
-- -------------------------------------------------------------------------
6

    
7
SET search_path TO "NY";
8

    
9
CREATE VIEW _specimens_01_count_of_total_records_specimens_in_source_db AS
10
 SELECT count(*) AS "totalSpecimenRecords"
11
   FROM "Ecatalog_all";
12
COMMENT ON VIEW _specimens_01_count_of_total_records_specimens_in_source_db IS '
13
Check: should return 1 row
14
';
15

    
16
CREATE VIEW _specimens_02_count_of_unique_verbatim_families AS
17
 SELECT count(DISTINCT "Ecatalog_all".family) AS families
18
   FROM "Ecatalog_all"
19
  WHERE ("Ecatalog_all".family IS NOT NULL);
20
COMMENT ON VIEW _specimens_02_count_of_unique_verbatim_families IS '
21
Check: should return 1 row
22
';
23

    
24
CREATE VIEW _specimens_03_list_of_verbatim_families AS
25
 SELECT DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet") AS "specificEpithet"
26
   FROM "Ecatalog_all"
27
  WHERE (("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL));
28
COMMENT ON VIEW _specimens_03_list_of_verbatim_families IS '
29
Check: should return same number of rows
30
';
31

    
32
CREATE VIEW _specimens_04_count_of_unique_verbatim_species_without_author AS
33
 SELECT count(DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet")) AS subspecies
34
   FROM "Ecatalog_all"
35
  WHERE ((("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL)) AND ("Ecatalog_all".subspecies IS NOT NULL));
36
COMMENT ON VIEW _specimens_04_count_of_unique_verbatim_species_without_author IS '
37
Check: should return 1 row
38
';
39

    
40
CREATE VIEW _specimens_05_list_of_verbatim_species_excluding_author AS
41
 SELECT DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet") AS subspecies
42
   FROM "Ecatalog_all"
43
  WHERE ((("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL)) AND ("Ecatalog_all".subspecies IS NOT NULL));
44
COMMENT ON VIEW _specimens_05_list_of_verbatim_species_excluding_author IS '
45
Check: should return 3335 rows
46
';
47

    
48
CREATE VIEW _specimens_06_count_of_unique_verb_subsp_taxa_without_author AS
49
 SELECT count(DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet", "Ecatalog_all".subspecies)) AS "specificEpithet"
50
   FROM "Ecatalog_all"
51
  WHERE (("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL));
52
COMMENT ON VIEW _specimens_06_count_of_unique_verb_subsp_taxa_without_author IS '
53
Check: should return 1 row
54
';
55

    
56
CREATE VIEW _specimens_07_list_of_verbatim_subspecific_taxa_without_author AS
57
 SELECT DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet") AS "specificEpithet"
58
   FROM "Ecatalog_all"
59
  WHERE (("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL));
60
COMMENT ON VIEW _specimens_07_list_of_verbatim_subspecific_taxa_without_author IS '
61
Check: should return 40145 rows
62
';
63

    
64
CREATE VIEW _specimens_08_count_of_unique_verbatim_author_taxa_with_genus AS
65
 SELECT count(DISTINCT btrim(concat_ws(' '::text, btrim(concat_ws(' '::text, "Ecatalog_all".genus, ifnull("Ecatalog_all"."specificEpithet", ''::text), ifnull("Ecatalog_all".subspecies, ''::text))), btrim(ifnull("Ecatalog_all"."scientificNameAuthorship", ''::text))))) AS "fullScientificNames"
66
   FROM "Ecatalog_all"
67
  WHERE ("Ecatalog_all".genus IS NOT NULL);
68
COMMENT ON VIEW _specimens_08_count_of_unique_verbatim_author_taxa_with_genus IS '
69
Check: should return 1 row
70
';
71

    
72
CREATE VIEW _specimens_09_list_of_unique_verbatim_author_taxa_with_genus AS
73
 SELECT DISTINCT btrim(concat_ws(' '::text, btrim(concat_ws(' '::text, "Ecatalog_all".genus, ifnull("Ecatalog_all"."specificEpithet", ''::text), ifnull("Ecatalog_all".subspecies, ''::text))), btrim(ifnull("Ecatalog_all"."scientificNameAuthorship", ''::text)))) AS "fullScientificName"
74
   FROM "Ecatalog_all"
75
  WHERE ("Ecatalog_all".genus IS NOT NULL);
76
COMMENT ON VIEW _specimens_09_list_of_unique_verbatim_author_taxa_with_genus IS '
77
Check: should return 45997 rows
78
';
79

    
80
CREATE VIEW _specimens_10_count_number_of_records_by_institution AS
81
 SELECT "Ecatalog_all".specimen_duplicate_institutions, 
82
    count(*) AS records
83
   FROM "Ecatalog_all"
84
  GROUP BY "Ecatalog_all".specimen_duplicate_institutions;
85
COMMENT ON VIEW _specimens_10_count_number_of_records_by_institution IS '
86
Note: Majority should be from ''NY''; these are standard herbarium acronyms
87

    
88
Check: should return 6 rows
89
';
90

    
91
CREATE VIEW _specimens_11_list_of_three_standard_political_divisions AS
92
 SELECT DISTINCT "Ecatalog_all".country, 
93
    "Ecatalog_all"."stateProvince", 
94
    "Ecatalog_all".county
95
   FROM "Ecatalog_all";
96
COMMENT ON VIEW _specimens_11_list_of_three_standard_political_divisions IS '
97
Note: character set issues may cause mis-matches. This query is a good way to reveal character set issues, either in source db or in BIEN
98

    
99
Check: should return 5232 rows
100
';
101

    
102
CREATE VIEW _specimens_12_distinct_collector_name_collect_num_date_w_count AS
103
 SELECT ifnull("Ecatalog_all"."recordedBy", ''::text) AS "collectorName", 
104
    ifnull("Ecatalog_all"."collectorNumber", ''::text) AS "collectionNumber", 
105
    concat_ws('-'::text, "Ecatalog_all"."yearCollected", "Ecatalog_all"."monthCollected", "Ecatalog_all"."dayCollected") AS "dateCollected", 
106
    count(*) AS "specimenRecords"
107
   FROM "Ecatalog_all"
108
  GROUP BY "Ecatalog_all"."recordedBy", "Ecatalog_all"."collectorNumber", concat_ws('-'::text, "Ecatalog_all"."yearCollected", "Ecatalog_all"."monthCollected", "Ecatalog_all"."dayCollected")
109
  ORDER BY "Ecatalog_all"."recordedBy", "Ecatalog_all"."collectorNumber", concat_ws('-'::text, "Ecatalog_all"."yearCollected", "Ecatalog_all"."monthCollected", "Ecatalog_all"."dayCollected");
110
COMMENT ON VIEW _specimens_12_distinct_collector_name_collect_num_date_w_count IS '
111
Check: should return 309396 rows
112
';
113

    
114
CREATE VIEW _specimens_13_count_of_all_verbatim_and_decimal_lat_long AS
115
 SELECT ( SELECT count(*) AS count
116
           FROM "Ecatalog_all"
117
          WHERE ("Ecatalog_all"."decimalLatitude" IS NOT NULL)) AS "allLats", 
118
    ( SELECT count(*) AS count
119
           FROM "Ecatalog_all"
120
          WHERE ("Ecatalog_all"."decimalLatitude" ~ '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'::text)) AS "decimalLats", 
121
    ( SELECT count(*) AS count
122
           FROM "Ecatalog_all"
123
          WHERE ("Ecatalog_all"."decimalLongitude" IS NOT NULL)) AS "allLongs", 
124
    ( SELECT count(*) AS count
125
           FROM "Ecatalog_all"
126
          WHERE ("Ecatalog_all"."decimalLongitude" ~ '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'::text)) AS "decimalLongs";
127
COMMENT ON VIEW _specimens_13_count_of_all_verbatim_and_decimal_lat_long IS '
128
Check: should return 1 row
129
';
130

    
131
CREATE VIEW _specimens_14_count_of_all_invalid_verbatim_lat_long AS
132
 SELECT ( SELECT count(*) AS count
133
           FROM "Ecatalog_all"
134
          WHERE ((("Ecatalog_all"."decimalLatitude")::double precision > (90)::double precision) OR (("Ecatalog_all"."decimalLatitude")::double precision < ((-90))::double precision))) AS "badLats", 
135
    ( SELECT count(*) AS count
136
           FROM "Ecatalog_all"
137
          WHERE ((("Ecatalog_all"."decimalLongitude")::double precision > (180)::double precision) OR (("Ecatalog_all"."decimalLongitude")::double precision < ((-180))::double precision))) AS "badLongs";
138
COMMENT ON VIEW _specimens_14_count_of_all_invalid_verbatim_lat_long IS '
139
Check: should return 1 row
140
';
141

    
142
CREATE VIEW _specimens_15_list_distinct_locality_descriptions AS
143
 SELECT DISTINCT "Ecatalog_all".locality__main AS "localityDescription"
144
   FROM "Ecatalog_all"
145
  WHERE ("Ecatalog_all".locality__main IS NOT NULL);
146
COMMENT ON VIEW _specimens_15_list_distinct_locality_descriptions IS '
147
Check: should return 125592 records
148
';
149

    
150
CREATE VIEW _specimens_16_list_distinct_specimen_descriptions AS
151
 SELECT "Ecatalog_all"."specimenDescription"
152
   FROM "Ecatalog_all"
153
  WHERE ("Ecatalog_all"."specimenDescription" IS NOT NULL);
154
COMMENT ON VIEW _specimens_16_list_distinct_specimen_descriptions IS '
155
Note: specimens descriptions in nybg extract is in column "specimenDescription"
156

    
157
Check: should return 158460 records
158
';
    (1-1/1)