Revision 4403
Added by Aaron Marcuse-Kubitza over 12 years ago
inputs/NY/verify/specimens.ref.sql | ||
---|---|---|
1 | 1 |
SELECT '# specimen replicates' AS ___; |
2 |
SELECT count(*) AS count FROM nybg_raw;
|
|
2 |
SELECT count(*) AS count FROM "Specimen";
|
|
3 | 3 |
|
4 | 4 |
SELECT '# families' AS ___; |
5 |
SELECT count(DISTINCT Family) AS count FROM nybg_raw;
|
|
5 |
SELECT count(DISTINCT "Family") AS count FROM "Specimen";
|
|
6 | 6 |
|
7 | 7 |
SELECT '# genera' AS ___; |
8 |
SELECT count(DISTINCT Genus) AS count FROM nybg_raw;
|
|
8 |
SELECT count(DISTINCT "Genus") AS count FROM "Specimen";
|
|
9 | 9 |
|
10 | 10 |
SELECT '# species' AS ___; |
11 |
SELECT count(DISTINCT Genus, Species) AS count FROM nybg_raw;
|
|
11 |
SELECT count(DISTINCT "Genus"||':'||"Species") AS count FROM "Specimen";
|
|
12 | 12 |
|
13 | 13 |
SELECT '# binomials' AS ___; |
14 |
SELECT count(DISTINCT ScientificName) AS count FROM nybg_raw;
|
|
14 |
SELECT count(DISTINCT "ScientificName") AS count FROM "Specimen";
|
|
15 | 15 |
|
16 | 16 |
SELECT 'collection codes' AS ___; |
17 |
SELECT DISTINCT CollectionCode AS collectioncode_dwc FROM nybg_raw;
|
|
17 |
SELECT DISTINCT "CollectionCode" AS collectioncode_dwc FROM "Specimen";
|
|
18 | 18 |
|
19 | 19 |
SELECT '# catalog numbers' AS ___; |
20 |
SELECT count(DISTINCT CatalogNumber) AS count FROM nybg_raw;
|
|
20 |
SELECT count(DISTINCT "CatalogNumber") AS count FROM "Specimen";
|
|
21 | 21 |
|
22 | 22 |
SELECT 'duplicate catalog numbers' AS ___; |
23 |
SELECT CatalogNumber AS catalognumber_dwc, count(*) AS count
|
|
24 |
FROM nybg_raw
|
|
25 |
GROUP BY CatalogNumber
|
|
26 |
HAVING count > 1 |
|
23 |
SELECT "CatalogNumber" AS catalognumber_dwc, count(*) AS count
|
|
24 |
FROM "Specimen"
|
|
25 |
GROUP BY "CatalogNumber"
|
|
26 |
HAVING count(*) > 1
|
|
27 | 27 |
; |
28 | 28 |
|
29 | 29 |
SELECT '# counties' AS ___; |
30 |
SELECT count(DISTINCT Country, StateProvince, County) AS count FROM nybg_raw;
|
|
30 |
SELECT count(DISTINCT "Country"||':'||"StateProvince"||':'||"County") AS count FROM "Specimen";
|
|
31 | 31 |
|
32 | 32 |
/* |
33 |
SELECT COUNT(*) as totalRecords, COUNT(DISTINCT CatalogNumber) as uniqueCatalogNumbers
|
|
34 |
FROM nybg_raw;
|
|
33 |
SELECT COUNT(*) as totalRecords, COUNT(DISTINCT "CatalogNumber") as uniqueCatalogNumbers
|
|
34 |
FROM "Specimen";
|
|
35 | 35 |
|
36 | 36 |
SELECT CatalogNumber, COUNT(*) AS records |
37 |
FROM nybg_raw
|
|
37 |
FROM "Specimen"
|
|
38 | 38 |
GROUP BY CatalogNumber |
39 | 39 |
HAVING records>1; |
40 | 40 |
|
41 | 41 |
SELECT b.CatalogNumber, DateLastModified, ScientificName, Collector, FieldNumber, |
42 | 42 |
CollectedDate |
43 |
FROM nybg_raw AS a JOIN (
|
|
43 |
FROM "Specimen" AS a JOIN (
|
|
44 | 44 |
SELECT CatalogNumber, COUNT(*) AS records |
45 |
FROM nybg_raw
|
|
45 |
FROM "Specimen"
|
|
46 | 46 |
WHERE CatalogNumber IS NOT NULL |
47 | 47 |
GROUP BY CatalogNumber |
48 | 48 |
HAVING records>1 |
... | ... | |
52 | 52 |
|
53 | 53 |
SELECT DateLastModified, ScientificName, Collector, FieldNumber, |
54 | 54 |
CollectedDate |
55 |
FROM nybg_raw
|
|
55 |
FROM "Specimen"
|
|
56 | 56 |
WHERE CatalogNumber IS NULL |
57 | 57 |
ORDER BY CatalogNumber ASC, DateLastModified DESC |
58 | 58 |
LIMIT 12; |
... | ... | |
60 | 60 |
SELECT |
61 | 61 |
( |
62 | 62 |
SELECT COUNT(*) |
63 |
FROM nybg_raw
|
|
63 |
FROM "Specimen"
|
|
64 | 64 |
WHERE Latitude IS NOT NULL |
65 | 65 |
) AS allLats, |
66 | 66 |
( |
67 | 67 |
SELECT COUNT(*) |
68 |
FROM nybg_raw
|
|
68 |
FROM "Specimen"
|
|
69 | 69 |
WHERE Latitude REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$' |
70 | 70 |
) AS decimalLats, |
71 | 71 |
( |
72 | 72 |
SELECT COUNT(*) |
73 |
FROM nybg_raw
|
|
73 |
FROM "Specimen"
|
|
74 | 74 |
WHERE Longitude IS NOT NULL |
75 | 75 |
) AS allLongs, |
76 | 76 |
( |
77 | 77 |
SELECT COUNT(*) |
78 |
FROM nybg_raw
|
|
78 |
FROM "Specimen"
|
|
79 | 79 |
WHERE Longitude REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$' |
80 | 80 |
) AS decimalLongs; |
81 | 81 |
|
82 | 82 |
SELECT Latitude, Longitude |
83 |
FROM nybg_raw
|
|
83 |
FROM "Specimen"
|
|
84 | 84 |
WHERE Latitude IS NOT NULL AND Longitude IS NOT NULL |
85 | 85 |
LIMIT 10; |
86 | 86 |
|
87 | 87 |
SELECT |
88 | 88 |
( |
89 | 89 |
SELECT COUNT(*) |
90 |
FROM nybg_raw
|
|
90 |
FROM "Specimen"
|
|
91 | 91 |
WHERE Latitude>90 OR Latitude<-90 |
92 | 92 |
) AS badLats, |
93 | 93 |
( |
94 | 94 |
SELECT COUNT(*) |
95 |
FROM nybg_raw
|
|
95 |
FROM "Specimen"
|
|
96 | 96 |
WHERE Longitude>180 OR Longitude<-180 |
97 | 97 |
) AS badLongs; |
98 | 98 |
|
99 | 99 |
SELECT DayCollected, MonthCollected, YearCollected |
100 |
FROM nybg_raw
|
|
100 |
FROM "Specimen"
|
|
101 | 101 |
WHERE DayCollected IS NOT NULL OR MonthCollected IS NOT NULL OR YearCollected IS NOT NULL |
102 | 102 |
LIMIT 10; |
103 | 103 |
|
104 | 104 |
SELECT |
105 | 105 |
( |
106 | 106 |
SELECT COUNT(*) |
107 |
FROM nybg_raw
|
|
107 |
FROM "Specimen"
|
|
108 | 108 |
WHERE NOT(DayCollected REGEXP '^-?[0-9]+$') |
109 | 109 |
OR NOT(MonthCollected REGEXP '^-?[0-9]+$') |
110 | 110 |
OR NOT(YearCollected REGEXP '^-?[0-9]+$') |
111 | 111 |
) AS badDateCollected, |
112 | 112 |
( |
113 | 113 |
SELECT COUNT(*) |
114 |
FROM nybg_raw
|
|
114 |
FROM "Specimen"
|
|
115 | 115 |
WHERE NOT(DayIdentified REGEXP '^-?[0-9]+$') |
116 | 116 |
OR NOT(MonthIdentified REGEXP '^-?[0-9]+$') |
117 | 117 |
OR NOT(YearIdentified REGEXP '^-?[0-9]+$') |
118 | 118 |
) AS badDateIdentified; |
119 | 119 |
|
120 | 120 |
SELECT Locality |
121 |
FROM nybg_raw
|
|
121 |
FROM "Specimen"
|
|
122 | 122 |
WHERE Locality IS NOT NULL |
123 | 123 |
LIMIT 10; |
124 | 124 |
|
125 | 125 |
SELECT PlantFungusDescription |
126 |
FROM nybg_raw
|
|
126 |
FROM "Specimen"
|
|
127 | 127 |
WHERE PlantFungusDescription IS NOT NULL |
128 | 128 |
LIMIT 10; |
129 | 129 |
|
130 | 130 |
SELECT Habitat, Vegetation |
131 |
FROM nybg_raw
|
|
131 |
FROM "Specimen"
|
|
132 | 132 |
WHERE Habitat IS NOT NULL OR Vegetation IS NOT NULL |
133 | 133 |
LIMIT 10; |
134 | 134 |
*/ |
Also available in: Unified diff
inputs/NY/verify/specimens.ref.sql: Retrofitted to use PostgreSQL instead of MySQL syntax, since this now runs on the PostgreSQL staging tables