1
|
-- -------------------------------------------------------------------------
|
2
|
-- Quantitative validation queries against NYBG source db
|
3
|
--
|
4
|
-- Queries staging table vegbien."NY"."Ecatalog_all" @vegbiendev.nceas.ucsb.edu
|
5
|
-- -------------------------------------------------------------------------
|
6
|
|
7
|
SET search_path TO "NY";
|
8
|
|
9
|
CREATE VIEW _specimens_01_count_of_total_records_specimens_in_source_db AS
|
10
|
SELECT count(*) AS "totalSpecimenRecords"
|
11
|
FROM "Ecatalog_all";
|
12
|
COMMENT ON VIEW _specimens_01_count_of_total_records_specimens_in_source_db IS '
|
13
|
Check: should return 1 row
|
14
|
';
|
15
|
|
16
|
CREATE VIEW _specimens_02_count_of_unique_verbatim_families AS
|
17
|
SELECT count(DISTINCT "Ecatalog_all".family) AS families
|
18
|
FROM "Ecatalog_all"
|
19
|
WHERE ("Ecatalog_all".family IS NOT NULL);
|
20
|
COMMENT ON VIEW _specimens_02_count_of_unique_verbatim_families IS '
|
21
|
Check: should return 1 row
|
22
|
';
|
23
|
|
24
|
CREATE VIEW _specimens_03_list_of_verbatim_families AS
|
25
|
SELECT DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet") AS "specificEpithet"
|
26
|
FROM "Ecatalog_all"
|
27
|
WHERE (("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL));
|
28
|
COMMENT ON VIEW _specimens_03_list_of_verbatim_families IS '
|
29
|
Check: should return same number of rows
|
30
|
';
|
31
|
|
32
|
CREATE VIEW _specimens_04_count_of_unique_verbatim_species_without_author AS
|
33
|
SELECT count(DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet")) AS subspecies
|
34
|
FROM "Ecatalog_all"
|
35
|
WHERE ((("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL)) AND ("Ecatalog_all".subspecies IS NOT NULL));
|
36
|
COMMENT ON VIEW _specimens_04_count_of_unique_verbatim_species_without_author IS '
|
37
|
Check: should return 1 row
|
38
|
';
|
39
|
|
40
|
CREATE VIEW _specimens_05_list_of_verbatim_species_excluding_author AS
|
41
|
SELECT DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet") AS subspecies
|
42
|
FROM "Ecatalog_all"
|
43
|
WHERE ((("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL)) AND ("Ecatalog_all".subspecies IS NOT NULL));
|
44
|
COMMENT ON VIEW _specimens_05_list_of_verbatim_species_excluding_author IS '
|
45
|
Check: should return 3335 rows
|
46
|
';
|
47
|
|
48
|
CREATE VIEW _specimens_06_count_of_unique_verb_subsp_taxa_without_author AS
|
49
|
SELECT count(DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet", "Ecatalog_all".subspecies)) AS "specificEpithet"
|
50
|
FROM "Ecatalog_all"
|
51
|
WHERE (("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL));
|
52
|
COMMENT ON VIEW _specimens_06_count_of_unique_verb_subsp_taxa_without_author IS '
|
53
|
Check: should return 1 row
|
54
|
';
|
55
|
|
56
|
CREATE VIEW _specimens_07_list_of_verbatim_subspecific_taxa_without_author AS
|
57
|
SELECT DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet") AS "specificEpithet"
|
58
|
FROM "Ecatalog_all"
|
59
|
WHERE (("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL));
|
60
|
COMMENT ON VIEW _specimens_07_list_of_verbatim_subspecific_taxa_without_author IS '
|
61
|
Check: should return 40145 rows
|
62
|
';
|
63
|
|
64
|
CREATE VIEW _specimens_08_count_of_unique_verbatim_author_taxa_with_genus AS
|
65
|
SELECT count(DISTINCT btrim(concat_ws(' '::text, btrim(concat_ws(' '::text, "Ecatalog_all".genus, ifnull("Ecatalog_all"."specificEpithet", ''::text), ifnull("Ecatalog_all".subspecies, ''::text))), btrim(ifnull("Ecatalog_all"."scientificNameAuthorship", ''::text))))) AS "fullScientificNames"
|
66
|
FROM "Ecatalog_all"
|
67
|
WHERE ("Ecatalog_all".genus IS NOT NULL);
|
68
|
COMMENT ON VIEW _specimens_08_count_of_unique_verbatim_author_taxa_with_genus IS '
|
69
|
Check: should return 1 row
|
70
|
';
|
71
|
|
72
|
CREATE VIEW _specimens_09_list_of_unique_verbatim_author_taxa_with_genus AS
|
73
|
SELECT DISTINCT btrim(concat_ws(' '::text, btrim(concat_ws(' '::text, "Ecatalog_all".genus, ifnull("Ecatalog_all"."specificEpithet", ''::text), ifnull("Ecatalog_all".subspecies, ''::text))), btrim(ifnull("Ecatalog_all"."scientificNameAuthorship", ''::text)))) AS "fullScientificName"
|
74
|
FROM "Ecatalog_all"
|
75
|
WHERE ("Ecatalog_all".genus IS NOT NULL);
|
76
|
COMMENT ON VIEW _specimens_09_list_of_unique_verbatim_author_taxa_with_genus IS '
|
77
|
Check: should return 45997 rows
|
78
|
';
|
79
|
|
80
|
CREATE VIEW _specimens_10_count_number_of_records_by_institution AS
|
81
|
SELECT "Ecatalog_all".specimen_duplicate_institutions,
|
82
|
count(*) AS records
|
83
|
FROM "Ecatalog_all"
|
84
|
GROUP BY "Ecatalog_all".specimen_duplicate_institutions;
|
85
|
COMMENT ON VIEW _specimens_10_count_number_of_records_by_institution IS '
|
86
|
Note: Majority should be from ''NY''; these are standard herbarium acronyms
|
87
|
|
88
|
Check: should return 6 rows
|
89
|
';
|
90
|
|
91
|
CREATE VIEW _specimens_11_list_of_three_standard_political_divisions AS
|
92
|
SELECT DISTINCT "Ecatalog_all".country,
|
93
|
"Ecatalog_all"."stateProvince",
|
94
|
"Ecatalog_all".county
|
95
|
FROM "Ecatalog_all";
|
96
|
COMMENT ON VIEW _specimens_11_list_of_three_standard_political_divisions IS '
|
97
|
Note: character set issues may cause mis-matches. This query is a good way to reveal character set issues, either in source db or in BIEN
|
98
|
|
99
|
Check: should return 5232 rows
|
100
|
';
|
101
|
|
102
|
CREATE VIEW _specimens_12_distinct_collector_name_collect_num_date_w_count AS
|
103
|
SELECT ifnull("Ecatalog_all"."recordedBy", ''::text) AS "collectorName",
|
104
|
ifnull("Ecatalog_all"."collectorNumber", ''::text) AS "collectionNumber",
|
105
|
concat_ws('-'::text, "Ecatalog_all"."yearCollected", "Ecatalog_all"."monthCollected", "Ecatalog_all"."dayCollected") AS "dateCollected",
|
106
|
count(*) AS "specimenRecords"
|
107
|
FROM "Ecatalog_all"
|
108
|
GROUP BY "Ecatalog_all"."recordedBy", "Ecatalog_all"."collectorNumber", concat_ws('-'::text, "Ecatalog_all"."yearCollected", "Ecatalog_all"."monthCollected", "Ecatalog_all"."dayCollected")
|
109
|
ORDER BY "Ecatalog_all"."recordedBy", "Ecatalog_all"."collectorNumber", concat_ws('-'::text, "Ecatalog_all"."yearCollected", "Ecatalog_all"."monthCollected", "Ecatalog_all"."dayCollected");
|
110
|
COMMENT ON VIEW _specimens_12_distinct_collector_name_collect_num_date_w_count IS '
|
111
|
Check: should return 309396 rows
|
112
|
';
|
113
|
|
114
|
CREATE VIEW _specimens_13_count_of_all_verbatim_and_decimal_lat_long AS
|
115
|
SELECT ( SELECT count(*) AS count
|
116
|
FROM "Ecatalog_all"
|
117
|
WHERE ("Ecatalog_all"."decimalLatitude" IS NOT NULL)) AS "allLats",
|
118
|
( SELECT count(*) AS count
|
119
|
FROM "Ecatalog_all"
|
120
|
WHERE ("Ecatalog_all"."decimalLatitude" ~ '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'::text)) AS "decimalLats",
|
121
|
( SELECT count(*) AS count
|
122
|
FROM "Ecatalog_all"
|
123
|
WHERE ("Ecatalog_all"."decimalLongitude" IS NOT NULL)) AS "allLongs",
|
124
|
( SELECT count(*) AS count
|
125
|
FROM "Ecatalog_all"
|
126
|
WHERE ("Ecatalog_all"."decimalLongitude" ~ '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'::text)) AS "decimalLongs";
|
127
|
COMMENT ON VIEW _specimens_13_count_of_all_verbatim_and_decimal_lat_long IS '
|
128
|
Check: should return 1 row
|
129
|
';
|
130
|
|
131
|
CREATE VIEW _specimens_14_count_of_all_invalid_verbatim_lat_long AS
|
132
|
SELECT ( SELECT count(*) AS count
|
133
|
FROM "Ecatalog_all"
|
134
|
WHERE ((("Ecatalog_all"."decimalLatitude")::double precision > (90)::double precision) OR (("Ecatalog_all"."decimalLatitude")::double precision < ((-90))::double precision))) AS "badLats",
|
135
|
( SELECT count(*) AS count
|
136
|
FROM "Ecatalog_all"
|
137
|
WHERE ((("Ecatalog_all"."decimalLongitude")::double precision > (180)::double precision) OR (("Ecatalog_all"."decimalLongitude")::double precision < ((-180))::double precision))) AS "badLongs";
|
138
|
COMMENT ON VIEW _specimens_14_count_of_all_invalid_verbatim_lat_long IS '
|
139
|
Check: should return 1 row
|
140
|
';
|
141
|
|
142
|
CREATE VIEW _specimens_15_list_distinct_locality_descriptions AS
|
143
|
SELECT DISTINCT "Ecatalog_all".locality__main AS "localityDescription"
|
144
|
FROM "Ecatalog_all"
|
145
|
WHERE ("Ecatalog_all".locality__main IS NOT NULL);
|
146
|
COMMENT ON VIEW _specimens_15_list_distinct_locality_descriptions IS '
|
147
|
Check: should return 125592 records
|
148
|
';
|
149
|
|
150
|
CREATE VIEW _specimens_16_list_distinct_specimen_descriptions AS
|
151
|
SELECT "Ecatalog_all"."specimenDescription"
|
152
|
FROM "Ecatalog_all"
|
153
|
WHERE ("Ecatalog_all"."specimenDescription" IS NOT NULL);
|
154
|
COMMENT ON VIEW _specimens_16_list_distinct_specimen_descriptions IS '
|
155
|
Note: specimens descriptions in nybg extract is in column "specimenDescription"
|
156
|
|
157
|
Check: should return 158460 records
|
158
|
';
|