Revision 12925
Added by Aaron Marcuse-Kubitza over 10 years ago
trunk/validation/aggregating/specimens/NY/qualitative_validations_source_db_NYBG.VegCore.sql | ||
---|---|---|
6 | 6 |
|
7 | 7 |
SET search_path TO "NY"; |
8 | 8 |
|
9 |
CREATE VIEW _specimens_01_count_of_total_records_specimens_in_source_db AS
|
|
9 |
-- _specimens_01_count_of_total_records_specimens_in_source_db
|
|
10 | 10 |
SELECT count(*) AS "totalSpecimenRecords" |
11 | 11 |
FROM "Ecatalog_all"; |
12 |
COMMENT ON VIEW _specimens_01_count_of_total_records_specimens_in_source_db IS '
|
|
12 |
/*
|
|
13 | 13 |
Check: should return 1 row |
14 |
';
|
|
14 |
*/
|
|
15 | 15 |
|
16 |
CREATE VIEW _specimens_02_count_of_unique_verbatim_families AS
|
|
16 |
-- _specimens_02_count_of_unique_verbatim_families
|
|
17 | 17 |
SELECT count(DISTINCT "Ecatalog_all".family) AS families |
18 | 18 |
FROM "Ecatalog_all" |
19 | 19 |
WHERE ("Ecatalog_all".family IS NOT NULL); |
20 |
COMMENT ON VIEW _specimens_02_count_of_unique_verbatim_families IS '
|
|
20 |
/*
|
|
21 | 21 |
Check: should return 1 row |
22 |
';
|
|
22 |
*/
|
|
23 | 23 |
|
24 |
CREATE VIEW _specimens_03_list_of_verbatim_families AS
|
|
24 |
-- _specimens_03_list_of_verbatim_families
|
|
25 | 25 |
SELECT DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet") AS "specificEpithet" |
26 | 26 |
FROM "Ecatalog_all" |
27 | 27 |
WHERE (("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL)); |
28 |
COMMENT ON VIEW _specimens_03_list_of_verbatim_families IS '
|
|
28 |
/*
|
|
29 | 29 |
Check: should return same number of rows |
30 |
';
|
|
30 |
*/
|
|
31 | 31 |
|
32 |
CREATE VIEW _specimens_04_count_of_unique_verbatim_species_without_author AS
|
|
32 |
-- _specimens_04_count_of_unique_verbatim_species_without_author
|
|
33 | 33 |
SELECT count(DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet")) AS subspecies |
34 | 34 |
FROM "Ecatalog_all" |
35 | 35 |
WHERE ((("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL)) AND ("Ecatalog_all".subspecies IS NOT NULL)); |
36 |
COMMENT ON VIEW _specimens_04_count_of_unique_verbatim_species_without_author IS '
|
|
36 |
/*
|
|
37 | 37 |
Check: should return 1 row |
38 |
';
|
|
38 |
*/
|
|
39 | 39 |
|
40 |
CREATE VIEW _specimens_05_list_of_verbatim_species_excluding_author AS
|
|
40 |
-- _specimens_05_list_of_verbatim_species_excluding_author
|
|
41 | 41 |
SELECT DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet") AS subspecies |
42 | 42 |
FROM "Ecatalog_all" |
43 | 43 |
WHERE ((("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL)) AND ("Ecatalog_all".subspecies IS NOT NULL)); |
44 |
COMMENT ON VIEW _specimens_05_list_of_verbatim_species_excluding_author IS '
|
|
44 |
/*
|
|
45 | 45 |
Check: should return 3335 rows |
46 |
';
|
|
46 |
*/
|
|
47 | 47 |
|
48 |
CREATE VIEW _specimens_06_count_of_unique_verb_subsp_taxa_without_author AS
|
|
48 |
-- _specimens_06_count_of_unique_verb_subsp_taxa_without_author
|
|
49 | 49 |
SELECT count(DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet", "Ecatalog_all".subspecies)) AS "specificEpithet" |
50 | 50 |
FROM "Ecatalog_all" |
51 | 51 |
WHERE (("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL)); |
52 |
COMMENT ON VIEW _specimens_06_count_of_unique_verb_subsp_taxa_without_author IS '
|
|
52 |
/*
|
|
53 | 53 |
Check: should return 1 row |
54 |
';
|
|
54 |
*/
|
|
55 | 55 |
|
56 |
CREATE VIEW _specimens_07_list_of_verbatim_subspecific_taxa_without_author AS
|
|
56 |
-- _specimens_07_list_of_verbatim_subspecific_taxa_without_author
|
|
57 | 57 |
SELECT DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet") AS "specificEpithet" |
58 | 58 |
FROM "Ecatalog_all" |
59 | 59 |
WHERE (("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL)); |
60 |
COMMENT ON VIEW _specimens_07_list_of_verbatim_subspecific_taxa_without_author IS '
|
|
60 |
/*
|
|
61 | 61 |
Check: should return 40145 rows |
62 |
';
|
|
62 |
*/
|
|
63 | 63 |
|
64 |
CREATE VIEW _specimens_08_count_of_unique_verbatim_author_taxa_with_genus AS
|
|
64 |
-- _specimens_08_count_of_unique_verbatim_author_taxa_with_genus
|
|
65 | 65 |
SELECT count(DISTINCT btrim(concat_ws(' '::text, btrim(concat_ws(' '::text, "Ecatalog_all".genus, ifnull("Ecatalog_all"."specificEpithet", ''::text), ifnull("Ecatalog_all".subspecies, ''::text))), btrim(ifnull("Ecatalog_all"."scientificNameAuthorship", ''::text))))) AS "fullScientificNames" |
66 | 66 |
FROM "Ecatalog_all" |
67 | 67 |
WHERE ("Ecatalog_all".genus IS NOT NULL); |
68 |
COMMENT ON VIEW _specimens_08_count_of_unique_verbatim_author_taxa_with_genus IS '
|
|
68 |
/*
|
|
69 | 69 |
Check: should return 1 row |
70 |
';
|
|
70 |
*/
|
|
71 | 71 |
|
72 |
CREATE VIEW _specimens_09_list_of_unique_verbatim_author_taxa_with_genus AS
|
|
72 |
-- _specimens_09_list_of_unique_verbatim_author_taxa_with_genus
|
|
73 | 73 |
SELECT DISTINCT btrim(concat_ws(' '::text, btrim(concat_ws(' '::text, "Ecatalog_all".genus, ifnull("Ecatalog_all"."specificEpithet", ''::text), ifnull("Ecatalog_all".subspecies, ''::text))), btrim(ifnull("Ecatalog_all"."scientificNameAuthorship", ''::text)))) AS "fullScientificName" |
74 | 74 |
FROM "Ecatalog_all" |
75 | 75 |
WHERE ("Ecatalog_all".genus IS NOT NULL); |
76 |
COMMENT ON VIEW _specimens_09_list_of_unique_verbatim_author_taxa_with_genus IS '
|
|
76 |
/*
|
|
77 | 77 |
Check: should return 45997 rows |
78 |
';
|
|
78 |
*/
|
|
79 | 79 |
|
80 |
CREATE VIEW _specimens_10_count_number_of_records_by_institution AS
|
|
80 |
-- _specimens_10_count_number_of_records_by_institution
|
|
81 | 81 |
SELECT "Ecatalog_all".specimen_duplicate_institutions, |
82 | 82 |
count(*) AS records |
83 | 83 |
FROM "Ecatalog_all" |
84 | 84 |
GROUP BY "Ecatalog_all".specimen_duplicate_institutions; |
85 |
COMMENT ON VIEW _specimens_10_count_number_of_records_by_institution IS '
|
|
85 |
/*
|
|
86 | 86 |
Note: Majority should be from ''NY''; these are standard herbarium acronyms |
87 | 87 |
|
88 | 88 |
Check: should return 6 rows |
89 |
';
|
|
89 |
*/
|
|
90 | 90 |
|
91 |
CREATE VIEW _specimens_11_list_of_three_standard_political_divisions AS
|
|
91 |
-- _specimens_11_list_of_three_standard_political_divisions
|
|
92 | 92 |
SELECT DISTINCT "Ecatalog_all".country, |
93 | 93 |
"Ecatalog_all"."stateProvince", |
94 | 94 |
"Ecatalog_all".county |
95 | 95 |
FROM "Ecatalog_all"; |
96 |
COMMENT ON VIEW _specimens_11_list_of_three_standard_political_divisions IS '
|
|
96 |
/*
|
|
97 | 97 |
Note: character set issues may cause mis-matches. This query is a good way to reveal character set issues, either in source db or in BIEN |
98 | 98 |
|
99 | 99 |
Check: should return 5232 rows |
100 |
';
|
|
100 |
*/
|
|
101 | 101 |
|
102 |
CREATE VIEW _specimens_12_distinct_collector_name_collect_num_date_w_count AS
|
|
102 |
-- _specimens_12_distinct_collector_name_collect_num_date_w_count
|
|
103 | 103 |
SELECT ifnull("Ecatalog_all"."recordedBy", ''::text) AS "collectorName", |
104 | 104 |
ifnull("Ecatalog_all"."collectorNumber", ''::text) AS "collectionNumber", |
105 | 105 |
concat_ws('-'::text, "Ecatalog_all"."yearCollected", "Ecatalog_all"."monthCollected", "Ecatalog_all"."dayCollected") AS "dateCollected", |
... | ... | |
107 | 107 |
FROM "Ecatalog_all" |
108 | 108 |
GROUP BY "Ecatalog_all"."recordedBy", "Ecatalog_all"."collectorNumber", concat_ws('-'::text, "Ecatalog_all"."yearCollected", "Ecatalog_all"."monthCollected", "Ecatalog_all"."dayCollected") |
109 | 109 |
ORDER BY "Ecatalog_all"."recordedBy", "Ecatalog_all"."collectorNumber", concat_ws('-'::text, "Ecatalog_all"."yearCollected", "Ecatalog_all"."monthCollected", "Ecatalog_all"."dayCollected"); |
110 |
COMMENT ON VIEW _specimens_12_distinct_collector_name_collect_num_date_w_count IS '
|
|
110 |
/*
|
|
111 | 111 |
Check: should return 309396 rows |
112 |
';
|
|
112 |
*/
|
|
113 | 113 |
|
114 |
CREATE VIEW _specimens_13_count_of_all_verbatim_and_decimal_lat_long AS
|
|
114 |
-- _specimens_13_count_of_all_verbatim_and_decimal_lat_long
|
|
115 | 115 |
SELECT ( SELECT count(*) AS count |
116 | 116 |
FROM "Ecatalog_all" |
117 | 117 |
WHERE ("Ecatalog_all"."decimalLatitude" IS NOT NULL)) AS "allLats", |
... | ... | |
124 | 124 |
( SELECT count(*) AS count |
125 | 125 |
FROM "Ecatalog_all" |
126 | 126 |
WHERE ("Ecatalog_all"."decimalLongitude" ~ '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'::text)) AS "decimalLongs"; |
127 |
COMMENT ON VIEW _specimens_13_count_of_all_verbatim_and_decimal_lat_long IS '
|
|
127 |
/*
|
|
128 | 128 |
Check: should return 1 row |
129 |
';
|
|
129 |
*/
|
|
130 | 130 |
|
131 |
CREATE VIEW _specimens_14_count_of_all_invalid_verbatim_lat_long AS
|
|
131 |
-- _specimens_14_count_of_all_invalid_verbatim_lat_long
|
|
132 | 132 |
SELECT ( SELECT count(*) AS count |
133 | 133 |
FROM "Ecatalog_all" |
134 | 134 |
WHERE ((("Ecatalog_all"."decimalLatitude")::double precision > (90)::double precision) OR (("Ecatalog_all"."decimalLatitude")::double precision < ((-90))::double precision))) AS "badLats", |
135 | 135 |
( SELECT count(*) AS count |
136 | 136 |
FROM "Ecatalog_all" |
137 | 137 |
WHERE ((("Ecatalog_all"."decimalLongitude")::double precision > (180)::double precision) OR (("Ecatalog_all"."decimalLongitude")::double precision < ((-180))::double precision))) AS "badLongs"; |
138 |
COMMENT ON VIEW _specimens_14_count_of_all_invalid_verbatim_lat_long IS '
|
|
138 |
/*
|
|
139 | 139 |
Check: should return 1 row |
140 |
';
|
|
140 |
*/
|
|
141 | 141 |
|
142 |
CREATE VIEW _specimens_15_list_distinct_locality_descriptions AS
|
|
142 |
-- _specimens_15_list_distinct_locality_descriptions
|
|
143 | 143 |
SELECT DISTINCT "Ecatalog_all".locality__main AS "localityDescription" |
144 | 144 |
FROM "Ecatalog_all" |
145 | 145 |
WHERE ("Ecatalog_all".locality__main IS NOT NULL); |
146 |
COMMENT ON VIEW _specimens_15_list_distinct_locality_descriptions IS '
|
|
146 |
/*
|
|
147 | 147 |
Check: should return 125592 records |
148 |
';
|
|
148 |
*/
|
|
149 | 149 |
|
150 |
CREATE VIEW _specimens_16_list_distinct_specimen_descriptions AS
|
|
150 |
-- _specimens_16_list_distinct_specimen_descriptions
|
|
151 | 151 |
SELECT "Ecatalog_all"."specimenDescription" |
152 | 152 |
FROM "Ecatalog_all" |
153 | 153 |
WHERE ("Ecatalog_all"."specimenDescription" IS NOT NULL); |
154 |
COMMENT ON VIEW _specimens_16_list_distinct_specimen_descriptions IS '
|
|
154 |
/*
|
|
155 | 155 |
Note: specimens descriptions in nybg extract is in column "specimenDescription" |
156 | 156 |
|
157 | 157 |
Check: should return 158460 records |
158 |
'; |
|
158 |
*/ |
Also available in: Unified diff
validation/aggregating/specimens/NY/qualitative_validations_source_db_NYBG.VegCore.sql: removed DDL statements so that running the query file does not alter the database, using the steps at wiki.vegpath.org/Aggregating_validations_refactoring#remove-DDL-statements