Revision 12904
Added by Aaron Marcuse-Kubitza almost 11 years ago
trunk/validation/aggregating/specimens/NY/qualitative_validations_source_db_NYBG.VegCore.sql | ||
---|---|---|
8 | 8 |
|
9 | 9 |
-- ------------------------------- |
10 | 10 |
-- 1. Count of total records (specimens) in source db |
11 |
-- Check: should return 1 row |
|
12 | 11 |
-- ------------------------------- |
13 | 12 |
CREATE OR REPLACE VIEW _specimens_01_count_of_total_records_specimens_in_source_db AS |
14 | 13 |
SELECT COUNT(*) "totalSpecimenRecords" |
15 | 14 |
FROM "Ecatalog_all"; |
15 |
/* |
|
16 |
Check: should return 1 row |
|
17 |
*/ |
|
16 | 18 |
|
17 | 19 |
-- ------------------------------- |
18 | 20 |
-- 2. Count of unique (verbatim) non-null families |
19 |
-- Check: should return 1 row |
|
20 | 21 |
-- ------------------------------- |
21 | 22 |
CREATE OR REPLACE VIEW _specimens_02_count_of_unique_verbatim_families AS |
22 | 23 |
SELECT COUNT(DISTINCT family) AS families FROM "Ecatalog_all" WHERE family IS NOT NULL; |
24 |
/* |
|
25 |
Check: should return 1 row |
|
26 |
*/ |
|
23 | 27 |
|
24 | 28 |
-- ------------------------------- |
25 | 29 |
-- 3. List of verbatim families |
26 |
-- Check: should return same number of rows |
|
27 | 30 |
-- ------------------------------- |
28 | 31 |
CREATE OR REPLACE VIEW _specimens_03_list_of_verbatim_families AS |
29 | 32 |
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS "specificEpithet" |
30 | 33 |
FROM "Ecatalog_all" |
31 | 34 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL; |
35 |
/* |
|
36 |
Check: should return same number of rows |
|
37 |
*/ |
|
32 | 38 |
|
33 | 39 |
-- ------------------------------- |
34 | 40 |
-- 4. Count of unique (verbatim) non-null species, without author |
35 |
-- Check: should return 1 row |
|
36 | 41 |
-- ------------------------------- |
37 | 42 |
CREATE OR REPLACE VIEW _specimens_04_count_of_unique_verbatim_species_without_author AS |
38 | 43 |
SELECT COUNT(DISTINCT CONCAT_WS(' ', genus, "specificEpithet")) AS subspecies FROM "Ecatalog_all" |
39 | 44 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL AND subspecies IS NOT NULL; |
45 |
/* |
|
46 |
Check: should return 1 row |
|
47 |
*/ |
|
40 | 48 |
|
41 | 49 |
-- ------------------------------- |
42 | 50 |
-- 5. List of verbatim species, excluding author |
43 |
-- Check: should return 3335 rows |
|
44 | 51 |
-- ------------------------------- |
45 | 52 |
CREATE OR REPLACE VIEW _specimens_05_list_of_verbatim_species_excluding_author AS |
46 | 53 |
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS subspecies FROM "Ecatalog_all" |
47 | 54 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL AND subspecies IS NOT NULL; |
55 |
/* |
|
56 |
Check: should return 3335 rows |
|
57 |
*/ |
|
48 | 58 |
|
49 | 59 |
-- ------------------------------- |
50 | 60 |
-- 6. Count of unique (verbatim) non-null subspecific taxa, without author |
51 |
-- Check: should return 1 row |
|
52 | 61 |
-- ------------------------------- |
53 | 62 |
CREATE OR REPLACE VIEW _specimens_06_count_of_unique_verbatim_subsp_taxa_without_author AS |
54 | 63 |
SELECT COUNT(DISTINCT CONCAT_WS(' ', genus, "specificEpithet", subspecies)) AS "specificEpithet" FROM "Ecatalog_all" |
55 | 64 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL; |
65 |
/* |
|
66 |
Check: should return 1 row |
|
67 |
*/ |
|
56 | 68 |
|
57 | 69 |
-- ------------------------------- |
58 | 70 |
-- 7. List of verbatim subspecific taxa, without author |
59 |
-- Check: should return 40145 rows |
|
60 | 71 |
-- ------------------------------- |
61 | 72 |
CREATE OR REPLACE VIEW _specimens_07_list_of_verbatim_subspecific_taxa_without_author AS |
62 | 73 |
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS "specificEpithet" |
63 | 74 |
FROM "Ecatalog_all" |
64 | 75 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL; |
76 |
/* |
|
77 |
Check: should return 40145 rows |
|
78 |
*/ |
|
65 | 79 |
|
66 | 80 |
-- ------------------------------- |
67 | 81 |
-- 8. Count of unique (verbatim) taxa including author, for all taxa identified at least to genus |
68 |
-- Check: should return 1 row |
|
69 | 82 |
-- ------------------------------- |
70 | 83 |
CREATE OR REPLACE VIEW _specimens_08_count_of_unique_verbatim_author_taxa_with_genus AS |
71 | 84 |
SELECT COUNT(DISTINCT TRIM(CONCAT_WS(' ', |
... | ... | |
74 | 87 |
))) AS "fullScientificNames" |
75 | 88 |
FROM "Ecatalog_all" |
76 | 89 |
WHERE genus IS NOT NULL; |
90 |
/* |
|
91 |
Check: should return 1 row |
|
92 |
*/ |
|
77 | 93 |
|
78 | 94 |
-- ------------------------------- |
79 | 95 |
-- 9. List of unique (verbatim) taxa including author, for all taxa identified at least to genus |
80 |
-- Check: should return 45997 rows |
|
81 | 96 |
-- ------------------------------- |
82 | 97 |
CREATE OR REPLACE VIEW _specimens_09_list_of_unique_verbatim_author_taxa_with_genus AS |
83 | 98 |
SELECT DISTINCT TRIM(CONCAT_WS(' ', |
... | ... | |
86 | 101 |
)) AS "fullScientificName" |
87 | 102 |
FROM "Ecatalog_all" |
88 | 103 |
WHERE genus IS NOT NULL; |
104 |
/* |
|
105 |
Check: should return 45997 rows |
|
106 |
*/ |
|
89 | 107 |
|
90 | 108 |
-- ------------------------------- |
91 | 109 |
-- 10. Count number of records by institution |
92 |
-- Check: should return 6 rows |
|
93 | 110 |
-- Note: Majority should be from 'NY'; these are standard herbarium acronyms |
94 | 111 |
-- ------------------------------- |
95 | 112 |
CREATE OR REPLACE VIEW _specimens_10_count_number_of_records_by_institution AS |
96 | 113 |
SELECT specimen_duplicate_institutions, COUNT(*) AS records |
97 | 114 |
FROM "Ecatalog_all" |
98 | 115 |
GROUP BY specimen_duplicate_institutions; |
116 |
/* |
|
117 |
Check: should return 6 rows |
|
118 |
*/ |
|
99 | 119 |
|
100 | 120 |
-- ------------------------------- |
101 | 121 |
-- 11. List of three standard political divisions |
102 |
-- Check: should return 5232 rows |
|
103 | 122 |
-- Note: character set issues may cause mis-matches. This query is a good way to reveal |
104 | 123 |
-- character set issues, either in source db or in BIEN |
105 | 124 |
-- ------------------------------- |
106 | 125 |
CREATE OR REPLACE VIEW _specimens_11_list_of_three_standard_political_divisions AS |
107 | 126 |
SELECT DISTINCT country, "stateProvince", county |
108 | 127 |
FROM "Ecatalog_all"; |
128 |
/* |
|
129 |
Check: should return 5232 rows |
|
130 |
*/ |
|
109 | 131 |
|
110 | 132 |
-- ------------------------------- |
111 | 133 |
-- 12. Check distinct Collector names + collection numbers + collection dates, |
112 | 134 |
-- plus total records |
113 |
-- Check: should return 309396 rows |
|
114 | 135 |
-- ------------------------------- |
115 | 136 |
CREATE OR REPLACE VIEW _specimens_12_distinct_collector_name_collect_num_date_w_count AS |
116 | 137 |
SELECT IFNULL(recordedBy,'') AS "collectorName", IFNULL("collectorNumber",'') AS "collectionNumber", |
... | ... | |
118 | 139 |
FROM "Ecatalog_all" |
119 | 140 |
GROUP BY recordedBy, "collectorNumber", "CollectedDate" |
120 | 141 |
ORDER BY recordedBy, "collectorNumber", "CollectedDate"; |
142 |
/* |
|
143 |
Check: should return 309396 rows |
|
144 |
*/ |
|
121 | 145 |
|
122 | 146 |
-- ------------------------------- |
123 | 147 |
-- 13. Count of all verbatim Latitude and Longitude values, as well as all |
124 | 148 |
-- latitude and longitude values that are decimals numbers |
125 |
-- Check: should return 1 row |
|
126 | 149 |
-- ------------------------------- |
127 | 150 |
CREATE OR REPLACE VIEW _specimens_13_count_of_all_verbatim_and_decimal_lat_long AS |
128 | 151 |
SELECT |
... | ... | |
146 | 169 |
FROM "Ecatalog_all" |
147 | 170 |
WHERE "decimalLongitude" REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$' |
148 | 171 |
) AS "decimalLongs"; |
172 |
/* |
|
173 |
Check: should return 1 row |
|
174 |
*/ |
|
149 | 175 |
|
150 | 176 |
-- ------------------------------- |
151 | 177 |
-- 14. Count of all verbatim Latitude and Longitude values that are not valid values |
152 | 178 |
-- of decimal latitude or decimal longitude |
153 |
-- Check: should return 1 row |
|
154 | 179 |
-- ------------------------------- |
155 | 180 |
CREATE OR REPLACE VIEW _specimens_14_count_of_all_invalid_verbatim_lat_long AS |
156 | 181 |
SELECT |
... | ... | |
164 | 189 |
FROM "Ecatalog_all" |
165 | 190 |
WHERE "decimalLongitude">180 OR "decimalLongitude"<-180 |
166 | 191 |
) AS "badLongs"; |
192 |
/* |
|
193 |
Check: should return 1 row |
|
194 |
*/ |
|
167 | 195 |
|
168 | 196 |
-- ------------------------------- |
169 | 197 |
-- 15. List distinct non-null locality descriptions |
170 |
-- Check: should return 125592 records |
|
171 | 198 |
-- ------------------------------- |
172 | 199 |
CREATE OR REPLACE VIEW _specimens_15_list_distinct_locality_descriptions AS |
173 | 200 |
SELECT DISTINCT locality__main AS "localityDescription" |
174 | 201 |
FROM "Ecatalog_all" |
175 | 202 |
WHERE locality__main IS NOT NULL; |
203 |
/* |
|
204 |
Check: should return 125592 records |
|
205 |
*/ |
|
176 | 206 |
|
177 | 207 |
-- ------------------------------- |
178 | 208 |
-- 16. List distinct non-null specimen descriptions |
179 |
-- Check: should return 158460 records |
|
180 | 209 |
-- Note: specimens descriptions in nybg extract is in column PlantFungDescription |
181 | 210 |
-- ------------------------------- |
182 | 211 |
CREATE OR REPLACE VIEW _specimens_16_list_distinct_specimen_descriptions AS |
183 | 212 |
SELECT "specimenDescription" AS "specimenDescription" |
184 | 213 |
FROM "Ecatalog_all" |
185 | 214 |
WHERE "specimenDescription" IS NOT NULL; |
215 |
/* |
|
216 |
Check: should return 158460 records |
|
217 |
*/ |
Also available in: Unified diff
validation/aggregating/specimens/NY/qualitative_validations_source_db_NYBG.VegCore.sql: moved "Check" comments to after the query, using the steps at wiki.vegpath.org/Aggregating_validations_refactoring#translate-to-Postgres