Revision 12900
Added by Aaron Marcuse-Kubitza almost 11 years ago
trunk/validation/aggregating/specimens/NY/qualitative_validations_source_db_NYBG.VegCore.sql | ||
---|---|---|
10 | 10 |
-- 1. Count of total records (specimens) in source db |
11 | 11 |
-- Check: full join against equivalent query on BIEN3 db should return 1 row |
12 | 12 |
-- ------------------------------- |
13 |
SELECT COUNT(*) totalSpecimenRecords
|
|
13 |
SELECT COUNT(*) "totalSpecimenRecords"
|
|
14 | 14 |
FROM "Ecatalog_all"; |
15 | 15 |
|
16 | 16 |
-- ------------------------------- |
... | ... | |
23 | 23 |
-- 3. List of verbatim families |
24 | 24 |
-- Check: Full inner join to equivalent query on BIEN3 db should return same number of rows |
25 | 25 |
-- ------------------------------- |
26 |
SELECT DISTINCT CONCAT_WS(' ', genus, specificEpithet) AS specificEpithet
|
|
26 |
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS "specificEpithet"
|
|
27 | 27 |
FROM "Ecatalog_all" |
28 |
WHERE genus IS NOT NULL AND specificEpithet IS NOT NULL;
|
|
28 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL;
|
|
29 | 29 |
|
30 | 30 |
-- ------------------------------- |
31 | 31 |
-- 4. Count of unique (verbatim) non-null species, without author |
32 | 32 |
-- Check: Full inner join to equivalent query on BIEN3 db should return 1 row |
33 | 33 |
-- ------------------------------- |
34 |
SELECT COUNT(DISTINCT CONCAT_WS(' ', genus, specificEpithet)) AS subspecies FROM "Ecatalog_all"
|
|
35 |
WHERE genus IS NOT NULL AND specificEpithet IS NOT NULL AND subspecies IS NOT NULL;
|
|
34 |
SELECT COUNT(DISTINCT CONCAT_WS(' ', genus, "specificEpithet")) AS subspecies FROM "Ecatalog_all"
|
|
35 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL AND subspecies IS NOT NULL;
|
|
36 | 36 |
|
37 | 37 |
-- ------------------------------- |
38 | 38 |
-- 5. List of verbatim species, excluding author |
39 | 39 |
-- Check: Full inner join to equivalent query on BIEN3 db should return 3335 rows |
40 | 40 |
-- ------------------------------- |
41 |
SELECT DISTINCT CONCAT_WS(' ', genus, specificEpithet) AS subspecies FROM "Ecatalog_all"
|
|
42 |
WHERE genus IS NOT NULL AND specificEpithet IS NOT NULL AND subspecies IS NOT NULL;
|
|
41 |
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS subspecies FROM "Ecatalog_all"
|
|
42 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL AND subspecies IS NOT NULL;
|
|
43 | 43 |
|
44 | 44 |
-- ------------------------------- |
45 | 45 |
-- 6. Count of unique (verbatim) non-null subspecific taxa, without author |
46 | 46 |
-- Check: Full inner join to equivalent query on BIEN3 db should return 1 row |
47 | 47 |
-- ------------------------------- |
48 |
SELECT COUNT(DISTINCT CONCAT_WS(' ', genus, specificEpithet, subspecies)) AS specificEpithet FROM "Ecatalog_all"
|
|
49 |
WHERE genus IS NOT NULL AND specificEpithet IS NOT NULL;
|
|
48 |
SELECT COUNT(DISTINCT CONCAT_WS(' ', genus, "specificEpithet", subspecies)) AS "specificEpithet" FROM "Ecatalog_all"
|
|
49 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL;
|
|
50 | 50 |
|
51 | 51 |
-- ------------------------------- |
52 | 52 |
-- 7. List of verbatim subspecific taxa, without author |
53 | 53 |
-- Check: Full inner join to equivalent query on BIEN3 db should return 40145 rows |
54 | 54 |
-- ------------------------------- |
55 |
SELECT DISTINCT CONCAT_WS(' ', genus, specificEpithet) AS specificEpithet
|
|
55 |
SELECT DISTINCT CONCAT_WS(' ', genus, "specificEpithet") AS "specificEpithet"
|
|
56 | 56 |
FROM "Ecatalog_all" |
57 |
WHERE genus IS NOT NULL AND specificEpithet IS NOT NULL;
|
|
57 |
WHERE genus IS NOT NULL AND "specificEpithet" IS NOT NULL;
|
|
58 | 58 |
|
59 | 59 |
-- ------------------------------- |
60 | 60 |
-- 8. Count of unique (verbatim) taxa including author, for all taxa identified at least to genus |
61 | 61 |
-- Check: Full inner join to equivalent query on BIEN3 db should return 1 row |
62 | 62 |
-- ------------------------------- |
63 | 63 |
SELECT COUNT(DISTINCT TRIM(CONCAT_WS(' ', |
64 |
TRIM(CONCAT_WS(' ', genus, IFNULL(specificEpithet,''), IFNULL(subspecies,''))),
|
|
65 |
TRIM(IFNULL(scientificNameAuthorship,''))
|
|
66 |
))) AS fullScientificNames
|
|
64 |
TRIM(CONCAT_WS(' ', genus, IFNULL("specificEpithet",''), IFNULL(subspecies,''))),
|
|
65 |
TRIM(IFNULL("scientificNameAuthorship",''))
|
|
66 |
))) AS "fullScientificNames"
|
|
67 | 67 |
FROM "Ecatalog_all" |
68 | 68 |
WHERE genus IS NOT NULL; |
69 | 69 |
|
... | ... | |
72 | 72 |
-- Check: Full inner join to equivalent query on BIEN3 db should return 45997 rows |
73 | 73 |
-- ------------------------------- |
74 | 74 |
SELECT DISTINCT TRIM(CONCAT_WS(' ', |
75 |
TRIM(CONCAT_WS(' ', genus, IFNULL(specificEpithet,''), IFNULL(subspecies,''))),
|
|
76 |
TRIM(IFNULL(scientificNameAuthorship,''))
|
|
77 |
)) AS fullScientificName
|
|
75 |
TRIM(CONCAT_WS(' ', genus, IFNULL("specificEpithet",''), IFNULL(subspecies,''))),
|
|
76 |
TRIM(IFNULL("scientificNameAuthorship",''))
|
|
77 |
)) AS "fullScientificName"
|
|
78 | 78 |
FROM "Ecatalog_all" |
79 | 79 |
WHERE genus IS NOT NULL; |
80 | 80 |
|
... | ... | |
93 | 93 |
-- Note: character set issues may cause mis-matches. This query is a good way to reveal |
94 | 94 |
-- character set issues, either in source db or in BIEN |
95 | 95 |
-- ------------------------------- |
96 |
SELECT DISTINCT country, stateProvince, county
|
|
96 |
SELECT DISTINCT country, "stateProvince", county
|
|
97 | 97 |
FROM "Ecatalog_all"; |
98 | 98 |
|
99 | 99 |
-- ------------------------------- |
... | ... | |
101 | 101 |
-- plus total records |
102 | 102 |
-- Check: Full inner join to equivalent query on BIEN3 db should return 309396 rows |
103 | 103 |
-- ------------------------------- |
104 |
SELECT IFNULL(recordedBy,'') AS collectorName, IFNULL(collectorNumber,'') AS collectionNumber,
|
|
105 |
IFNULL(CollectedDate,'') AS dataCollected, COUNT(*) AS specimenRecords
|
|
104 |
SELECT IFNULL(recordedBy,'') AS "collectorName", IFNULL("collectorNumber",'') AS "collectionNumber",
|
|
105 |
IFNULL("CollectedDate",'') AS "dataCollected", COUNT(*) AS "specimenRecords"
|
|
106 | 106 |
FROM "Ecatalog_all" |
107 |
GROUP BY recordedBy, collectorNumber, CollectedDate
|
|
108 |
ORDER BY recordedBy, collectorNumber, CollectedDate;
|
|
107 |
GROUP BY recordedBy, "collectorNumber", "CollectedDate"
|
|
108 |
ORDER BY recordedBy, "collectorNumber", "CollectedDate";
|
|
109 | 109 |
|
110 | 110 |
-- ------------------------------- |
111 | 111 |
-- 13. Count of all verbatim Latitude and Longitude values, as well as all |
... | ... | |
116 | 116 |
( |
117 | 117 |
SELECT COUNT(*) |
118 | 118 |
FROM "Ecatalog_all" |
119 |
WHERE decimalLatitude IS NOT NULL
|
|
120 |
) AS allLats,
|
|
119 |
WHERE "decimalLatitude" IS NOT NULL
|
|
120 |
) AS "allLats",
|
|
121 | 121 |
( |
122 | 122 |
SELECT COUNT(*) |
123 | 123 |
FROM "Ecatalog_all" |
124 |
WHERE decimalLatitude REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'
|
|
125 |
) AS decimalLats,
|
|
124 |
WHERE "decimalLatitude" REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'
|
|
125 |
) AS "decimalLats",
|
|
126 | 126 |
( |
127 | 127 |
SELECT COUNT(*) |
128 | 128 |
FROM "Ecatalog_all" |
129 |
WHERE decimalLongitude IS NOT NULL
|
|
130 |
) AS allLongs,
|
|
129 |
WHERE "decimalLongitude" IS NOT NULL
|
|
130 |
) AS "allLongs",
|
|
131 | 131 |
( |
132 | 132 |
SELECT COUNT(*) |
133 | 133 |
FROM "Ecatalog_all" |
134 |
WHERE decimalLongitude REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'
|
|
135 |
) AS decimalLongs;
|
|
134 |
WHERE "decimalLongitude" REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'
|
|
135 |
) AS "decimalLongs";
|
|
136 | 136 |
|
137 | 137 |
-- ------------------------------- |
138 | 138 |
-- 14. Count of all verbatim Latitude and Longitude values that are not valid values |
... | ... | |
143 | 143 |
( |
144 | 144 |
SELECT COUNT(*) |
145 | 145 |
FROM "Ecatalog_all" |
146 |
WHERE decimalLatitude>90 OR decimalLatitude<-90
|
|
147 |
) AS badLats,
|
|
146 |
WHERE "decimalLatitude">90 OR "decimalLatitude"<-90
|
|
147 |
) AS "badLats",
|
|
148 | 148 |
( |
149 | 149 |
SELECT COUNT(*) |
150 | 150 |
FROM "Ecatalog_all" |
151 |
WHERE decimalLongitude>180 OR decimalLongitude<-180
|
|
152 |
) AS badLongs;
|
|
151 |
WHERE "decimalLongitude">180 OR "decimalLongitude"<-180
|
|
152 |
) AS "badLongs";
|
|
153 | 153 |
|
154 | 154 |
-- ------------------------------- |
155 | 155 |
-- 15. List distinct non-null locality descriptions |
156 | 156 |
-- Check: full join to equivalent query against BIEN3 should return 125592 records |
157 | 157 |
-- ------------------------------- |
158 |
SELECT DISTINCT locality__main AS localityDescription
|
|
158 |
SELECT DISTINCT locality__main AS "localityDescription"
|
|
159 | 159 |
FROM "Ecatalog_all" |
160 | 160 |
WHERE locality__main IS NOT NULL; |
161 | 161 |
|
... | ... | |
164 | 164 |
-- Check: full join to equivalent query against BIEN3 should return 158460 records |
165 | 165 |
-- Note: specimens descriptions in nybg extract is in column PlantFungDescription |
166 | 166 |
-- ------------------------------- |
167 |
SELECT specimenDescription AS specimenDescription
|
|
167 |
SELECT "specimenDescription" AS "specimenDescription"
|
|
168 | 168 |
FROM "Ecatalog_all" |
169 |
WHERE specimenDescription IS NOT NULL; |
|
169 |
WHERE "specimenDescription" IS NOT NULL; |
Also available in: Unified diff
validation/aggregating/specimens/NY/qualitative_validations_source_db_NYBG.VegCore.sql: enclosed mixed-case identifiers in "" using the steps at wiki.vegpath.org/Aggregating_validations_refactoring#translate-to-Postgres