1
|
SELECT '# specimen replicates' AS ___;
|
2
|
SELECT count(*) AS count FROM nybg_raw;
|
3
|
|
4
|
SELECT '# families' AS ___;
|
5
|
SELECT count(DISTINCT Family) AS count FROM nybg_raw;
|
6
|
|
7
|
SELECT '# genera' AS ___;
|
8
|
SELECT count(DISTINCT Genus) AS count FROM nybg_raw;
|
9
|
|
10
|
SELECT '# species' AS ___;
|
11
|
SELECT count(DISTINCT Genus, Species) AS count FROM nybg_raw;
|
12
|
|
13
|
SELECT '# binomials' AS ___;
|
14
|
SELECT count(DISTINCT ScientificName) AS count FROM nybg_raw;
|
15
|
|
16
|
SELECT 'collection codes' AS ___;
|
17
|
SELECT DISTINCT CollectionCode AS collectioncode_dwc FROM nybg_raw;
|
18
|
|
19
|
SELECT '# catalog numbers' AS ___;
|
20
|
SELECT count(DISTINCT CatalogNumber) AS count FROM nybg_raw;
|
21
|
|
22
|
SELECT 'duplicate catalog numbers' AS ___;
|
23
|
SELECT CatalogNumber AS catalognumber_dwc, count(*) AS count
|
24
|
FROM nybg_raw
|
25
|
GROUP BY CatalogNumber
|
26
|
HAVING count > 1
|
27
|
;
|
28
|
|
29
|
SELECT '# counties' AS ___;
|
30
|
SELECT count(DISTINCT Country, StateProvince, County) AS count FROM nybg_raw;
|
31
|
|
32
|
/*
|
33
|
SELECT COUNT(*) as totalRecords, COUNT(DISTINCT CatalogNumber) as uniqueCatalogNumbers
|
34
|
FROM nybg_raw;
|
35
|
|
36
|
SELECT CatalogNumber, COUNT(*) AS records
|
37
|
FROM nybg_raw
|
38
|
GROUP BY CatalogNumber
|
39
|
HAVING records>1;
|
40
|
|
41
|
SELECT b.CatalogNumber, DateLastModified, ScientificName, Collector, FieldNumber,
|
42
|
CollectedDate
|
43
|
FROM nybg_raw AS a JOIN (
|
44
|
SELECT CatalogNumber, COUNT(*) AS records
|
45
|
FROM nybg_raw
|
46
|
WHERE CatalogNumber IS NOT NULL
|
47
|
GROUP BY CatalogNumber
|
48
|
HAVING records>1
|
49
|
) AS b
|
50
|
ON a.CatalogNumber=b.CatalogNumber
|
51
|
ORDER BY b.CatalogNumber ASC, DateLastModified DESC;
|
52
|
|
53
|
SELECT DateLastModified, ScientificName, Collector, FieldNumber,
|
54
|
CollectedDate
|
55
|
FROM nybg_raw
|
56
|
WHERE CatalogNumber IS NULL
|
57
|
ORDER BY CatalogNumber ASC, DateLastModified DESC
|
58
|
LIMIT 12;
|
59
|
|
60
|
SELECT
|
61
|
(
|
62
|
SELECT COUNT(*)
|
63
|
FROM nybg_raw
|
64
|
WHERE Latitude IS NOT NULL
|
65
|
) AS allLats,
|
66
|
(
|
67
|
SELECT COUNT(*)
|
68
|
FROM nybg_raw
|
69
|
WHERE Latitude REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'
|
70
|
) AS decimalLats,
|
71
|
(
|
72
|
SELECT COUNT(*)
|
73
|
FROM nybg_raw
|
74
|
WHERE Longitude IS NOT NULL
|
75
|
) AS allLongs,
|
76
|
(
|
77
|
SELECT COUNT(*)
|
78
|
FROM nybg_raw
|
79
|
WHERE Longitude REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'
|
80
|
) AS decimalLongs;
|
81
|
|
82
|
SELECT Latitude, Longitude
|
83
|
FROM nybg_raw
|
84
|
WHERE Latitude IS NOT NULL AND Longitude IS NOT NULL
|
85
|
LIMIT 10;
|
86
|
|
87
|
SELECT
|
88
|
(
|
89
|
SELECT COUNT(*)
|
90
|
FROM nybg_raw
|
91
|
WHERE Latitude>90 OR Latitude<-90
|
92
|
) AS badLats,
|
93
|
(
|
94
|
SELECT COUNT(*)
|
95
|
FROM nybg_raw
|
96
|
WHERE Longitude>180 OR Longitude<-180
|
97
|
) AS badLongs;
|
98
|
|
99
|
SELECT DayCollected, MonthCollected, YearCollected
|
100
|
FROM nybg_raw
|
101
|
WHERE DayCollected IS NOT NULL OR MonthCollected IS NOT NULL OR YearCollected IS NOT NULL
|
102
|
LIMIT 10;
|
103
|
|
104
|
SELECT
|
105
|
(
|
106
|
SELECT COUNT(*)
|
107
|
FROM nybg_raw
|
108
|
WHERE NOT(DayCollected REGEXP '^-?[0-9]+$')
|
109
|
OR NOT(MonthCollected REGEXP '^-?[0-9]+$')
|
110
|
OR NOT(YearCollected REGEXP '^-?[0-9]+$')
|
111
|
) AS badDateCollected,
|
112
|
(
|
113
|
SELECT COUNT(*)
|
114
|
FROM nybg_raw
|
115
|
WHERE NOT(DayIdentified REGEXP '^-?[0-9]+$')
|
116
|
OR NOT(MonthIdentified REGEXP '^-?[0-9]+$')
|
117
|
OR NOT(YearIdentified REGEXP '^-?[0-9]+$')
|
118
|
) AS badDateIdentified;
|
119
|
|
120
|
SELECT Locality
|
121
|
FROM nybg_raw
|
122
|
WHERE Locality IS NOT NULL
|
123
|
LIMIT 10;
|
124
|
|
125
|
SELECT PlantFungusDescription
|
126
|
FROM nybg_raw
|
127
|
WHERE PlantFungusDescription IS NOT NULL
|
128
|
LIMIT 10;
|
129
|
|
130
|
SELECT Habitat, Vegetation
|
131
|
FROM nybg_raw
|
132
|
WHERE Habitat IS NOT NULL OR Vegetation IS NOT NULL
|
133
|
LIMIT 10;
|
134
|
*/
|