Revision 12902
Added by Aaron Marcuse-Kubitza almost 11 years ago
trunk/validation/aggregating/specimens/NY/qualitative_validations_source_db_NYBG.VegCore.sql | ||
---|---|---|
10 | 10 |
-- 1. Count of total records (specimens) in source db |
11 | 11 |
-- Check: should return 1 row |
12 | 12 |
-- ------------------------------- |
13 |
CREATE OR REPLACE VIEW _specimens_01_count_of_total_records_specimens_in_source_db AS |
|
13 | 14 |
SELECT COUNT(*) "totalSpecimenRecords" |
14 | 15 |
FROM "Ecatalog_all"; |
15 | 16 |
|
... | ... | |
17 | 18 |
-- 2. Count of unique (verbatim) non-null families |
18 | 19 |
-- Check: should return 1 row |
19 | 20 |
-- ------------------------------- |
21 |
CREATE OR REPLACE VIEW _specimens_02_count_of_unique_verbatim_families AS |
|
20 | 22 |
SELECT COUNT(DISTINCT family) AS families FROM "Ecatalog_all" WHERE family IS NOT NULL; |
21 | 23 |
|
22 | 24 |
-- ------------------------------- |
23 | 25 |
-- 3. List of verbatim families |
24 | 26 |
-- Check: should return same number of rows |
25 | 27 |
-- ------------------------------- |
28 |
CREATE OR REPLACE VIEW _specimens_03_list_of_verbatim_families AS |
|
26 | 29 |
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS "specificEpithet" |
27 | 30 |
FROM "Ecatalog_all" |
28 | 31 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL; |
... | ... | |
31 | 34 |
-- 4. Count of unique (verbatim) non-null species, without author |
32 | 35 |
-- Check: should return 1 row |
33 | 36 |
-- ------------------------------- |
37 |
CREATE OR REPLACE VIEW _specimens_04_count_of_unique_verbatim_species_without_author AS |
|
34 | 38 |
SELECT COUNT(DISTINCT CONCAT_WS(' ', genus, "specificEpithet")) AS subspecies FROM "Ecatalog_all" |
35 | 39 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL AND subspecies IS NOT NULL; |
36 | 40 |
|
... | ... | |
38 | 42 |
-- 5. List of verbatim species, excluding author |
39 | 43 |
-- Check: should return 3335 rows |
40 | 44 |
-- ------------------------------- |
45 |
CREATE OR REPLACE VIEW _specimens_05_list_of_verbatim_species_excluding_author AS |
|
41 | 46 |
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS subspecies FROM "Ecatalog_all" |
42 | 47 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL AND subspecies IS NOT NULL; |
43 | 48 |
|
... | ... | |
45 | 50 |
-- 6. Count of unique (verbatim) non-null subspecific taxa, without author |
46 | 51 |
-- Check: should return 1 row |
47 | 52 |
-- ------------------------------- |
53 |
CREATE OR REPLACE VIEW _specimens_06_count_of_unique_verbatim_subsp_taxa_without_author AS |
|
48 | 54 |
SELECT COUNT(DISTINCT CONCAT_WS(' ', genus, "specificEpithet", subspecies)) AS "specificEpithet" FROM "Ecatalog_all" |
49 | 55 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL; |
50 | 56 |
|
... | ... | |
52 | 58 |
-- 7. List of verbatim subspecific taxa, without author |
53 | 59 |
-- Check: should return 40145 rows |
54 | 60 |
-- ------------------------------- |
61 |
CREATE OR REPLACE VIEW _specimens_07_list_of_verbatim_subspecific_taxa_without_author AS |
|
55 | 62 |
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS "specificEpithet" |
56 | 63 |
FROM "Ecatalog_all" |
57 | 64 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL; |
... | ... | |
60 | 67 |
-- 8. Count of unique (verbatim) taxa including author, for all taxa identified at least to genus |
61 | 68 |
-- Check: should return 1 row |
62 | 69 |
-- ------------------------------- |
70 |
CREATE OR REPLACE VIEW _specimens_08_count_of_unique_verbatim_author_taxa_with_genus AS |
|
63 | 71 |
SELECT COUNT(DISTINCT TRIM(CONCAT_WS(' ', |
64 | 72 |
TRIM(CONCAT_WS(' ', genus, IFNULL("specificEpithet",''), IFNULL(subspecies,''))), |
65 | 73 |
TRIM(IFNULL("scientificNameAuthorship",'')) |
... | ... | |
71 | 79 |
-- 9. List of unique (verbatim) taxa including author, for all taxa identified at least to genus |
72 | 80 |
-- Check: should return 45997 rows |
73 | 81 |
-- ------------------------------- |
82 |
CREATE OR REPLACE VIEW _specimens_09_list_of_unique_verbatim_author_taxa_with_genus AS |
|
74 | 83 |
SELECT DISTINCT TRIM(CONCAT_WS(' ', |
75 | 84 |
TRIM(CONCAT_WS(' ', genus, IFNULL("specificEpithet",''), IFNULL(subspecies,''))), |
76 | 85 |
TRIM(IFNULL("scientificNameAuthorship",'')) |
... | ... | |
83 | 92 |
-- Check: should return 6 rows |
84 | 93 |
-- Note: Majority should be from 'NY'; these are standard herbarium acronyms |
85 | 94 |
-- ------------------------------- |
95 |
CREATE OR REPLACE VIEW _specimens_10_count_number_of_records_by_institution AS |
|
86 | 96 |
SELECT specimen_duplicate_institutions, COUNT(*) AS records |
87 | 97 |
FROM "Ecatalog_all" |
88 | 98 |
GROUP BY specimen_duplicate_institutions; |
... | ... | |
93 | 103 |
-- Note: character set issues may cause mis-matches. This query is a good way to reveal |
94 | 104 |
-- character set issues, either in source db or in BIEN |
95 | 105 |
-- ------------------------------- |
106 |
CREATE OR REPLACE VIEW _specimens_11_list_of_three_standard_political_divisions AS |
|
96 | 107 |
SELECT DISTINCT country, "stateProvince", county |
97 | 108 |
FROM "Ecatalog_all"; |
98 | 109 |
|
... | ... | |
101 | 112 |
-- plus total records |
102 | 113 |
-- Check: should return 309396 rows |
103 | 114 |
-- ------------------------------- |
115 |
CREATE OR REPLACE VIEW _specimens_12_distinct_collector_name_collect_num_date_w_count AS |
|
104 | 116 |
SELECT IFNULL(recordedBy,'') AS "collectorName", IFNULL("collectorNumber",'') AS "collectionNumber", |
105 | 117 |
IFNULL("CollectedDate",'') AS "dataCollected", COUNT(*) AS "specimenRecords" |
106 | 118 |
FROM "Ecatalog_all" |
... | ... | |
112 | 124 |
-- latitude and longitude values that are decimals numbers |
113 | 125 |
-- Check: should return 1 row |
114 | 126 |
-- ------------------------------- |
127 |
CREATE OR REPLACE VIEW _specimens_13_count_of_all_verbatim_and_decimal_lat_long AS |
|
115 | 128 |
SELECT |
116 | 129 |
( |
117 | 130 |
SELECT COUNT(*) |
... | ... | |
139 | 152 |
-- of decimal latitude or decimal longitude |
140 | 153 |
-- Check: should return 1 row |
141 | 154 |
-- ------------------------------- |
155 |
CREATE OR REPLACE VIEW _specimens_14_count_of_all_invalid_verbatim_lat_long AS |
|
142 | 156 |
SELECT |
143 | 157 |
( |
144 | 158 |
SELECT COUNT(*) |
... | ... | |
155 | 169 |
-- 15. List distinct non-null locality descriptions |
156 | 170 |
-- Check: should return 125592 records |
157 | 171 |
-- ------------------------------- |
172 |
CREATE OR REPLACE VIEW _specimens_15_list_distinct_locality_descriptions AS |
|
158 | 173 |
SELECT DISTINCT locality__main AS "localityDescription" |
159 | 174 |
FROM "Ecatalog_all" |
160 | 175 |
WHERE locality__main IS NOT NULL; |
... | ... | |
164 | 179 |
-- Check: should return 158460 records |
165 | 180 |
-- Note: specimens descriptions in nybg extract is in column PlantFungDescription |
166 | 181 |
-- ------------------------------- |
182 |
CREATE OR REPLACE VIEW _specimens_16_list_distinct_specimen_descriptions AS |
|
167 | 183 |
SELECT "specimenDescription" AS "specimenDescription" |
168 | 184 |
FROM "Ecatalog_all" |
169 | 185 |
WHERE "specimenDescription" IS NOT NULL; |
Also available in: Unified diff
validation/aggregating/specimens/NY/qualitative_validations_source_db_NYBG.VegCore.sql: prepended CREATE VIEW, using the steps at wiki.vegpath.org/Aggregating_validations_refactoring#prepend-CREATE-VIEW and the same abbreviations as the output queries (validation/aggregating/specimens/qualitative_validations_specimens.sql)