Revision 12924
Added by Aaron Marcuse-Kubitza over 10 years ago
trunk/validation/aggregating/specimens/NY/qualitative_validations_source_db_NYBG.VegCore.sql | ||
---|---|---|
7 | 7 |
SET search_path TO "NY"; |
8 | 8 |
|
9 | 9 |
CREATE VIEW _specimens_01_count_of_total_records_specimens_in_source_db AS |
10 |
SELECT COUNT(*) "totalSpecimenRecords"
|
|
11 |
FROM "Ecatalog_all"; |
|
10 |
SELECT count(*) AS "totalSpecimenRecords"
|
|
11 |
FROM "Ecatalog_all";
|
|
12 | 12 |
COMMENT ON VIEW _specimens_01_count_of_total_records_specimens_in_source_db IS ' |
13 | 13 |
Check: should return 1 row |
14 | 14 |
'; |
15 | 15 |
|
16 | 16 |
CREATE VIEW _specimens_02_count_of_unique_verbatim_families AS |
17 |
SELECT COUNT(DISTINCT family) AS families FROM "Ecatalog_all" WHERE family IS NOT NULL; |
|
17 |
SELECT count(DISTINCT "Ecatalog_all".family) AS families |
|
18 |
FROM "Ecatalog_all" |
|
19 |
WHERE ("Ecatalog_all".family IS NOT NULL); |
|
18 | 20 |
COMMENT ON VIEW _specimens_02_count_of_unique_verbatim_families IS ' |
19 | 21 |
Check: should return 1 row |
20 | 22 |
'; |
21 | 23 |
|
22 | 24 |
CREATE VIEW _specimens_03_list_of_verbatim_families AS |
23 |
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS "specificEpithet"
|
|
24 |
FROM "Ecatalog_all" |
|
25 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL;
|
|
25 |
SELECT DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet") AS "specificEpithet"
|
|
26 |
FROM "Ecatalog_all"
|
|
27 |
WHERE (("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL));
|
|
26 | 28 |
COMMENT ON VIEW _specimens_03_list_of_verbatim_families IS ' |
27 | 29 |
Check: should return same number of rows |
28 | 30 |
'; |
29 | 31 |
|
30 | 32 |
CREATE VIEW _specimens_04_count_of_unique_verbatim_species_without_author AS |
31 |
SELECT COUNT(DISTINCT CONCAT_WS(' ', genus, "specificEpithet")) AS subspecies FROM "Ecatalog_all" |
|
32 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL AND subspecies IS NOT NULL; |
|
33 |
SELECT count(DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet")) AS subspecies |
|
34 |
FROM "Ecatalog_all" |
|
35 |
WHERE ((("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL)) AND ("Ecatalog_all".subspecies IS NOT NULL)); |
|
33 | 36 |
COMMENT ON VIEW _specimens_04_count_of_unique_verbatim_species_without_author IS ' |
34 | 37 |
Check: should return 1 row |
35 | 38 |
'; |
36 | 39 |
|
37 | 40 |
CREATE VIEW _specimens_05_list_of_verbatim_species_excluding_author AS |
38 |
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS subspecies FROM "Ecatalog_all" |
|
39 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL AND subspecies IS NOT NULL; |
|
41 |
SELECT DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet") AS subspecies |
|
42 |
FROM "Ecatalog_all" |
|
43 |
WHERE ((("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL)) AND ("Ecatalog_all".subspecies IS NOT NULL)); |
|
40 | 44 |
COMMENT ON VIEW _specimens_05_list_of_verbatim_species_excluding_author IS ' |
41 | 45 |
Check: should return 3335 rows |
42 | 46 |
'; |
43 | 47 |
|
44 | 48 |
CREATE VIEW _specimens_06_count_of_unique_verb_subsp_taxa_without_author AS |
45 |
SELECT COUNT(DISTINCT CONCAT_WS(' ', genus, "specificEpithet", subspecies)) AS "specificEpithet" FROM "Ecatalog_all" |
|
46 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL; |
|
49 |
SELECT count(DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet", "Ecatalog_all".subspecies)) AS "specificEpithet" |
|
50 |
FROM "Ecatalog_all" |
|
51 |
WHERE (("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL)); |
|
47 | 52 |
COMMENT ON VIEW _specimens_06_count_of_unique_verb_subsp_taxa_without_author IS ' |
48 | 53 |
Check: should return 1 row |
49 | 54 |
'; |
50 | 55 |
|
51 | 56 |
CREATE VIEW _specimens_07_list_of_verbatim_subspecific_taxa_without_author AS |
52 |
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS "specificEpithet"
|
|
53 |
FROM "Ecatalog_all" |
|
54 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL;
|
|
57 |
SELECT DISTINCT concat_ws(' '::text, "Ecatalog_all".genus, "Ecatalog_all"."specificEpithet") AS "specificEpithet"
|
|
58 |
FROM "Ecatalog_all"
|
|
59 |
WHERE (("Ecatalog_all".genus IS NOT NULL) AND ("Ecatalog_all"."specificEpithet" IS NOT NULL));
|
|
55 | 60 |
COMMENT ON VIEW _specimens_07_list_of_verbatim_subspecific_taxa_without_author IS ' |
56 | 61 |
Check: should return 40145 rows |
57 | 62 |
'; |
58 | 63 |
|
59 | 64 |
CREATE VIEW _specimens_08_count_of_unique_verbatim_author_taxa_with_genus AS |
60 |
SELECT COUNT(DISTINCT TRIM(CONCAT_WS(' ', |
|
61 |
TRIM(CONCAT_WS(' ', genus, IFNULL("specificEpithet",''), IFNULL(subspecies,''))), |
|
62 |
TRIM(IFNULL("scientificNameAuthorship",'')) |
|
63 |
))) AS "fullScientificNames" |
|
64 |
FROM "Ecatalog_all" |
|
65 |
WHERE genus IS NOT NULL; |
|
65 |
SELECT count(DISTINCT btrim(concat_ws(' '::text, btrim(concat_ws(' '::text, "Ecatalog_all".genus, ifnull("Ecatalog_all"."specificEpithet", ''::text), ifnull("Ecatalog_all".subspecies, ''::text))), btrim(ifnull("Ecatalog_all"."scientificNameAuthorship", ''::text))))) AS "fullScientificNames" |
|
66 |
FROM "Ecatalog_all" |
|
67 |
WHERE ("Ecatalog_all".genus IS NOT NULL); |
|
66 | 68 |
COMMENT ON VIEW _specimens_08_count_of_unique_verbatim_author_taxa_with_genus IS ' |
67 | 69 |
Check: should return 1 row |
68 | 70 |
'; |
69 | 71 |
|
70 | 72 |
CREATE VIEW _specimens_09_list_of_unique_verbatim_author_taxa_with_genus AS |
71 |
SELECT DISTINCT TRIM(CONCAT_WS(' ', |
|
72 |
TRIM(CONCAT_WS(' ', genus, IFNULL("specificEpithet",''), IFNULL(subspecies,''))), |
|
73 |
TRIM(IFNULL("scientificNameAuthorship",'')) |
|
74 |
)) AS "fullScientificName" |
|
75 |
FROM "Ecatalog_all" |
|
76 |
WHERE genus IS NOT NULL; |
|
73 |
SELECT DISTINCT btrim(concat_ws(' '::text, btrim(concat_ws(' '::text, "Ecatalog_all".genus, ifnull("Ecatalog_all"."specificEpithet", ''::text), ifnull("Ecatalog_all".subspecies, ''::text))), btrim(ifnull("Ecatalog_all"."scientificNameAuthorship", ''::text)))) AS "fullScientificName" |
|
74 |
FROM "Ecatalog_all" |
|
75 |
WHERE ("Ecatalog_all".genus IS NOT NULL); |
|
77 | 76 |
COMMENT ON VIEW _specimens_09_list_of_unique_verbatim_author_taxa_with_genus IS ' |
78 | 77 |
Check: should return 45997 rows |
79 | 78 |
'; |
80 | 79 |
|
81 | 80 |
CREATE VIEW _specimens_10_count_number_of_records_by_institution AS |
82 |
SELECT specimen_duplicate_institutions, COUNT(*) AS records |
|
83 |
FROM "Ecatalog_all" |
|
84 |
GROUP BY specimen_duplicate_institutions; |
|
81 |
SELECT "Ecatalog_all".specimen_duplicate_institutions, |
|
82 |
count(*) AS records |
|
83 |
FROM "Ecatalog_all" |
|
84 |
GROUP BY "Ecatalog_all".specimen_duplicate_institutions; |
|
85 | 85 |
COMMENT ON VIEW _specimens_10_count_number_of_records_by_institution IS ' |
86 | 86 |
Note: Majority should be from ''NY''; these are standard herbarium acronyms |
87 | 87 |
|
... | ... | |
89 | 89 |
'; |
90 | 90 |
|
91 | 91 |
CREATE VIEW _specimens_11_list_of_three_standard_political_divisions AS |
92 |
SELECT DISTINCT country, "stateProvince", county |
|
93 |
FROM "Ecatalog_all"; |
|
92 |
SELECT DISTINCT "Ecatalog_all".country, |
|
93 |
"Ecatalog_all"."stateProvince", |
|
94 |
"Ecatalog_all".county |
|
95 |
FROM "Ecatalog_all"; |
|
94 | 96 |
COMMENT ON VIEW _specimens_11_list_of_three_standard_political_divisions IS ' |
95 | 97 |
Note: character set issues may cause mis-matches. This query is a good way to reveal character set issues, either in source db or in BIEN |
96 | 98 |
|
... | ... | |
98 | 100 |
'; |
99 | 101 |
|
100 | 102 |
CREATE VIEW _specimens_12_distinct_collector_name_collect_num_date_w_count AS |
101 |
SELECT IFNULL("recordedBy",'') AS "collectorName", IFNULL("collectorNumber",'') AS "collectionNumber", |
|
102 |
CONCAT_WS('-', "yearCollected", "monthCollected", "dayCollected") AS "dateCollected", COUNT(*) AS "specimenRecords" |
|
103 |
FROM "Ecatalog_all" |
|
104 |
GROUP BY "recordedBy", "collectorNumber", "dateCollected" |
|
105 |
ORDER BY "recordedBy", "collectorNumber", "dateCollected"; |
|
103 |
SELECT ifnull("Ecatalog_all"."recordedBy", ''::text) AS "collectorName", |
|
104 |
ifnull("Ecatalog_all"."collectorNumber", ''::text) AS "collectionNumber", |
|
105 |
concat_ws('-'::text, "Ecatalog_all"."yearCollected", "Ecatalog_all"."monthCollected", "Ecatalog_all"."dayCollected") AS "dateCollected", |
|
106 |
count(*) AS "specimenRecords" |
|
107 |
FROM "Ecatalog_all" |
|
108 |
GROUP BY "Ecatalog_all"."recordedBy", "Ecatalog_all"."collectorNumber", concat_ws('-'::text, "Ecatalog_all"."yearCollected", "Ecatalog_all"."monthCollected", "Ecatalog_all"."dayCollected") |
|
109 |
ORDER BY "Ecatalog_all"."recordedBy", "Ecatalog_all"."collectorNumber", concat_ws('-'::text, "Ecatalog_all"."yearCollected", "Ecatalog_all"."monthCollected", "Ecatalog_all"."dayCollected"); |
|
106 | 110 |
COMMENT ON VIEW _specimens_12_distinct_collector_name_collect_num_date_w_count IS ' |
107 | 111 |
Check: should return 309396 rows |
108 | 112 |
'; |
109 | 113 |
|
110 | 114 |
CREATE VIEW _specimens_13_count_of_all_verbatim_and_decimal_lat_long AS |
111 |
SELECT |
|
112 |
( |
|
113 |
SELECT COUNT(*) |
|
114 |
FROM "Ecatalog_all" |
|
115 |
WHERE "decimalLatitude" IS NOT NULL |
|
116 |
) AS "allLats", |
|
117 |
( |
|
118 |
SELECT COUNT(*) |
|
119 |
FROM "Ecatalog_all" |
|
120 |
WHERE "decimalLatitude" ~ '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$' |
|
121 |
) AS "decimalLats", |
|
122 |
( |
|
123 |
SELECT COUNT(*) |
|
124 |
FROM "Ecatalog_all" |
|
125 |
WHERE "decimalLongitude" IS NOT NULL |
|
126 |
) AS "allLongs", |
|
127 |
( |
|
128 |
SELECT COUNT(*) |
|
129 |
FROM "Ecatalog_all" |
|
130 |
WHERE "decimalLongitude" ~ '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$' |
|
131 |
) AS "decimalLongs"; |
|
115 |
SELECT ( SELECT count(*) AS count |
|
116 |
FROM "Ecatalog_all" |
|
117 |
WHERE ("Ecatalog_all"."decimalLatitude" IS NOT NULL)) AS "allLats", |
|
118 |
( SELECT count(*) AS count |
|
119 |
FROM "Ecatalog_all" |
|
120 |
WHERE ("Ecatalog_all"."decimalLatitude" ~ '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'::text)) AS "decimalLats", |
|
121 |
( SELECT count(*) AS count |
|
122 |
FROM "Ecatalog_all" |
|
123 |
WHERE ("Ecatalog_all"."decimalLongitude" IS NOT NULL)) AS "allLongs", |
|
124 |
( SELECT count(*) AS count |
|
125 |
FROM "Ecatalog_all" |
|
126 |
WHERE ("Ecatalog_all"."decimalLongitude" ~ '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'::text)) AS "decimalLongs"; |
|
132 | 127 |
COMMENT ON VIEW _specimens_13_count_of_all_verbatim_and_decimal_lat_long IS ' |
133 | 128 |
Check: should return 1 row |
134 | 129 |
'; |
135 | 130 |
|
136 | 131 |
CREATE VIEW _specimens_14_count_of_all_invalid_verbatim_lat_long AS |
137 |
SELECT |
|
138 |
( |
|
139 |
SELECT COUNT(*) |
|
140 |
FROM "Ecatalog_all" |
|
141 |
WHERE "decimalLatitude"::double precision>90 OR "decimalLatitude"::double precision<-90 |
|
142 |
) AS "badLats", |
|
143 |
( |
|
144 |
SELECT COUNT(*) |
|
145 |
FROM "Ecatalog_all" |
|
146 |
WHERE "decimalLongitude"::double precision>180 OR "decimalLongitude"::double precision<-180 |
|
147 |
) AS "badLongs"; |
|
132 |
SELECT ( SELECT count(*) AS count |
|
133 |
FROM "Ecatalog_all" |
|
134 |
WHERE ((("Ecatalog_all"."decimalLatitude")::double precision > (90)::double precision) OR (("Ecatalog_all"."decimalLatitude")::double precision < ((-90))::double precision))) AS "badLats", |
|
135 |
( SELECT count(*) AS count |
|
136 |
FROM "Ecatalog_all" |
|
137 |
WHERE ((("Ecatalog_all"."decimalLongitude")::double precision > (180)::double precision) OR (("Ecatalog_all"."decimalLongitude")::double precision < ((-180))::double precision))) AS "badLongs"; |
|
148 | 138 |
COMMENT ON VIEW _specimens_14_count_of_all_invalid_verbatim_lat_long IS ' |
149 | 139 |
Check: should return 1 row |
150 | 140 |
'; |
151 | 141 |
|
152 | 142 |
CREATE VIEW _specimens_15_list_distinct_locality_descriptions AS |
153 |
SELECT DISTINCT locality__main AS "localityDescription"
|
|
154 |
FROM "Ecatalog_all" |
|
155 |
WHERE locality__main IS NOT NULL;
|
|
143 |
SELECT DISTINCT "Ecatalog_all".locality__main AS "localityDescription"
|
|
144 |
FROM "Ecatalog_all"
|
|
145 |
WHERE ("Ecatalog_all".locality__main IS NOT NULL);
|
|
156 | 146 |
COMMENT ON VIEW _specimens_15_list_distinct_locality_descriptions IS ' |
157 | 147 |
Check: should return 125592 records |
158 | 148 |
'; |
159 | 149 |
|
160 | 150 |
CREATE VIEW _specimens_16_list_distinct_specimen_descriptions AS |
161 |
SELECT "specimenDescription" AS "specimenDescription"
|
|
162 |
FROM "Ecatalog_all" |
|
163 |
WHERE "specimenDescription" IS NOT NULL;
|
|
151 |
SELECT "Ecatalog_all"."specimenDescription"
|
|
152 |
FROM "Ecatalog_all"
|
|
153 |
WHERE ("Ecatalog_all"."specimenDescription" IS NOT NULL);
|
|
164 | 154 |
COMMENT ON VIEW _specimens_16_list_distinct_specimen_descriptions IS ' |
165 | 155 |
Note: specimens descriptions in nybg extract is in column "specimenDescription" |
166 | 156 |
|
Also available in: Unified diff
validation/aggregating/specimens/NY/qualitative_validations_source_db_NYBG.VegCore.sql: updated to DB, which pg_dump-formats the views