Project

General

Profile

« Previous | Next » 

Revision 12904

validation/aggregating/specimens/NY/qualitative_validations_source_db_NYBG.VegCore.sql: moved "Check" comments to after the query, using the steps at wiki.vegpath.org/Aggregating_validations_refactoring#translate-to-Postgres

View differences:

trunk/validation/aggregating/specimens/NY/qualitative_validations_source_db_NYBG.VegCore.sql
8 8

  
9 9
-- -------------------------------
10 10
-- 1. Count of total records (specimens) in source db
11
-- Check: should return 1 row
12 11
-- -------------------------------
13 12
CREATE OR REPLACE VIEW _specimens_01_count_of_total_records_specimens_in_source_db AS
14 13
SELECT COUNT(*) "totalSpecimenRecords"
15 14
FROM "Ecatalog_all";
15
/*
16
Check: should return 1 row
17
*/
16 18

  
17 19
-- -------------------------------
18 20
-- 2. Count of unique (verbatim) non-null families
19
-- Check: should return 1 row
20 21
-- -------------------------------
21 22
CREATE OR REPLACE VIEW _specimens_02_count_of_unique_verbatim_families AS
22 23
SELECT COUNT(DISTINCT family) AS families FROM "Ecatalog_all" WHERE family IS NOT NULL;
24
/*
25
Check: should return 1 row
26
*/
23 27

  
24 28
-- -------------------------------
25 29
-- 3. List of verbatim families
26
-- Check: should return same number of rows
27 30
-- -------------------------------
28 31
CREATE OR REPLACE VIEW _specimens_03_list_of_verbatim_families AS
29 32
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS "specificEpithet"
30 33
FROM "Ecatalog_all"
31 34
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL;
35
/*
36
Check: should return same number of rows
37
*/
32 38

  
33 39
-- -------------------------------
34 40
-- 4. Count of unique (verbatim) non-null species, without author
35
-- Check: should return 1 row
36 41
-- -------------------------------
37 42
CREATE OR REPLACE VIEW _specimens_04_count_of_unique_verbatim_species_without_author AS
38 43
SELECT COUNT(DISTINCT CONCAT_WS(' ', genus, "specificEpithet")) AS subspecies FROM "Ecatalog_all"
39 44
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL AND subspecies IS NOT NULL;
45
/*
46
Check: should return 1 row
47
*/
40 48

  
41 49
-- -------------------------------
42 50
-- 5. List of verbatim species, excluding author
43
-- Check: should return 3335 rows
44 51
-- -------------------------------
45 52
CREATE OR REPLACE VIEW _specimens_05_list_of_verbatim_species_excluding_author AS
46 53
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS subspecies FROM "Ecatalog_all"
47 54
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL AND subspecies IS NOT NULL;
55
/*
56
Check: should return 3335 rows
57
*/
48 58

  
49 59
-- -------------------------------
50 60
-- 6. Count of unique (verbatim) non-null subspecific taxa, without author
51
-- Check: should return 1 row
52 61
-- -------------------------------
53 62
CREATE OR REPLACE VIEW _specimens_06_count_of_unique_verbatim_subsp_taxa_without_author AS
54 63
SELECT COUNT(DISTINCT CONCAT_WS(' ', genus, "specificEpithet", subspecies)) AS "specificEpithet" FROM "Ecatalog_all"
55 64
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL;
65
/*
66
Check: should return 1 row
67
*/
56 68

  
57 69
-- -------------------------------
58 70
-- 7. List of verbatim subspecific taxa, without author
59
-- Check: should return 40145 rows
60 71
-- -------------------------------
61 72
CREATE OR REPLACE VIEW _specimens_07_list_of_verbatim_subspecific_taxa_without_author AS
62 73
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS "specificEpithet"
63 74
FROM "Ecatalog_all"
64 75
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL;
76
/*
77
Check: should return 40145 rows
78
*/
65 79

  
66 80
-- -------------------------------
67 81
-- 8. Count of unique (verbatim) taxa including author, for all taxa identified at least to genus
68
-- Check: should return 1 row
69 82
-- -------------------------------
70 83
CREATE OR REPLACE VIEW _specimens_08_count_of_unique_verbatim_author_taxa_with_genus AS
71 84
SELECT COUNT(DISTINCT TRIM(CONCAT_WS(' ',
......
74 87
))) AS "fullScientificNames"
75 88
FROM "Ecatalog_all"
76 89
WHERE genus IS NOT NULL;
90
/*
91
Check: should return 1 row
92
*/
77 93

  
78 94
-- -------------------------------
79 95
-- 9. List of unique (verbatim) taxa including author, for all taxa identified at least to genus
80
-- Check: should return 45997 rows
81 96
-- -------------------------------
82 97
CREATE OR REPLACE VIEW _specimens_09_list_of_unique_verbatim_author_taxa_with_genus AS
83 98
SELECT DISTINCT TRIM(CONCAT_WS(' ',
......
86 101
)) AS "fullScientificName"
87 102
FROM "Ecatalog_all"
88 103
WHERE genus IS NOT NULL;
104
/*
105
Check: should return 45997 rows
106
*/
89 107

  
90 108
-- -------------------------------
91 109
-- 10. Count number of records by institution
92
-- Check: should return 6 rows
93 110
-- Note: Majority should be from 'NY'; these are standard herbarium acronyms
94 111
-- -------------------------------
95 112
CREATE OR REPLACE VIEW _specimens_10_count_number_of_records_by_institution AS
96 113
SELECT specimen_duplicate_institutions, COUNT(*) AS records
97 114
FROM "Ecatalog_all"
98 115
GROUP BY specimen_duplicate_institutions;
116
/*
117
Check: should return 6 rows
118
*/
99 119

  
100 120
-- -------------------------------
101 121
-- 11. List of three standard political divisions
102
-- Check: should return 5232 rows
103 122
-- Note: character set issues may cause mis-matches. This query is a good way to reveal
104 123
-- character set issues, either in source db or in BIEN
105 124
-- -------------------------------
106 125
CREATE OR REPLACE VIEW _specimens_11_list_of_three_standard_political_divisions AS
107 126
SELECT DISTINCT country, "stateProvince", county
108 127
FROM "Ecatalog_all";
128
/*
129
Check: should return 5232 rows
130
*/
109 131

  
110 132
-- -------------------------------
111 133
-- 12. Check distinct Collector names + collection numbers + collection dates,
112 134
-- plus total records
113
-- Check: should return 309396 rows
114 135
-- -------------------------------
115 136
CREATE OR REPLACE VIEW _specimens_12_distinct_collector_name_collect_num_date_w_count AS
116 137
SELECT IFNULL(recordedBy,'') AS "collectorName", IFNULL("collectorNumber",'') AS "collectionNumber",
......
118 139
FROM "Ecatalog_all"
119 140
GROUP BY recordedBy, "collectorNumber", "CollectedDate"
120 141
ORDER BY recordedBy, "collectorNumber", "CollectedDate";
142
/*
143
Check: should return 309396 rows
144
*/
121 145

  
122 146
-- -------------------------------
123 147
-- 13. Count of all  verbatim Latitude and Longitude values, as well as all
124 148
-- latitude and longitude values that are decimals numbers
125
-- Check: should return 1 row
126 149
-- -------------------------------
127 150
CREATE OR REPLACE VIEW _specimens_13_count_of_all_verbatim_and_decimal_lat_long AS
128 151
SELECT
......
146 169
FROM "Ecatalog_all"
147 170
WHERE "decimalLongitude" REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'
148 171
) AS "decimalLongs";
172
/*
173
Check: should return 1 row
174
*/
149 175

  
150 176
-- -------------------------------
151 177
-- 14. Count of all verbatim Latitude and Longitude values that are not valid values
152 178
-- of decimal latitude or decimal longitude
153
-- Check: should return 1 row
154 179
-- -------------------------------
155 180
CREATE OR REPLACE VIEW _specimens_14_count_of_all_invalid_verbatim_lat_long AS
156 181
SELECT
......
164 189
FROM "Ecatalog_all"
165 190
WHERE "decimalLongitude">180 OR "decimalLongitude"<-180
166 191
) AS "badLongs";
192
/*
193
Check: should return 1 row
194
*/
167 195

  
168 196
-- -------------------------------
169 197
-- 15. List distinct non-null locality descriptions
170
-- Check: should return 125592 records
171 198
-- -------------------------------
172 199
CREATE OR REPLACE VIEW _specimens_15_list_distinct_locality_descriptions AS
173 200
SELECT DISTINCT locality__main AS "localityDescription"
174 201
FROM "Ecatalog_all"
175 202
WHERE locality__main IS NOT NULL;
203
/*
204
Check: should return 125592 records
205
*/
176 206

  
177 207
-- -------------------------------
178 208
-- 16. List distinct non-null specimen descriptions
179
-- Check: should return 158460 records
180 209
-- Note: specimens descriptions in nybg extract is in column PlantFungDescription
181 210
-- -------------------------------
182 211
CREATE OR REPLACE VIEW _specimens_16_list_distinct_specimen_descriptions AS
183 212
SELECT "specimenDescription" AS "specimenDescription"
184 213
FROM "Ecatalog_all"
185 214
WHERE "specimenDescription" IS NOT NULL;
215
/*
216
Check: should return 158460 records
217
*/

Also available in: Unified diff