Project

General

Profile

1
SELECT '# specimen replicates' AS ___;
2
SELECT count(*) AS count FROM nybg_raw;
3

    
4
SELECT '# families' AS ___;
5
SELECT count(DISTINCT Family) AS count FROM nybg_raw;
6

    
7
SELECT '# genera' AS ___;
8
SELECT count(DISTINCT Genus) AS count FROM nybg_raw;
9

    
10
SELECT '# species' AS ___;
11
SELECT count(DISTINCT Genus, Species) AS count FROM nybg_raw;
12

    
13
SELECT '# binomials' AS ___;
14
SELECT count(DISTINCT ScientificName) AS count FROM nybg_raw;
15

    
16
SELECT 'collection codes' AS ___;
17
SELECT DISTINCT CollectionCode AS collectioncode_dwc FROM nybg_raw;
18

    
19
SELECT '# catalog numbers' AS ___;
20
SELECT count(DISTINCT CatalogNumber) AS count FROM nybg_raw;
21

    
22
SELECT 'duplicate catalog numbers' AS ___;
23
SELECT CatalogNumber AS catalognumber_dwc, count(*) AS count
24
FROM nybg_raw
25
GROUP BY CatalogNumber
26
HAVING count > 1
27
;
28

    
29
SELECT '# counties' AS ___;
30
SELECT count(DISTINCT Country, StateProvince, County) AS count FROM nybg_raw;
31

    
32
/*
33
SELECT COUNT(*) as totalRecords, COUNT(DISTINCT CatalogNumber) as uniqueCatalogNumbers
34
FROM nybg_raw;
35

    
36
SELECT CatalogNumber, COUNT(*) AS records
37
FROM nybg_raw
38
GROUP BY CatalogNumber
39
HAVING records>1;
40

    
41
SELECT b.CatalogNumber, DateLastModified, ScientificName, Collector, FieldNumber, 
42
CollectedDate
43
FROM nybg_raw AS a JOIN (
44
SELECT CatalogNumber, COUNT(*) AS records
45
FROM nybg_raw
46
WHERE CatalogNumber IS NOT NULL
47
GROUP BY CatalogNumber
48
HAVING records>1
49
) AS b
50
ON a.CatalogNumber=b.CatalogNumber
51
ORDER BY b.CatalogNumber ASC, DateLastModified DESC;
52

    
53
SELECT DateLastModified, ScientificName, Collector, FieldNumber, 
54
CollectedDate
55
FROM nybg_raw 
56
WHERE CatalogNumber IS NULL
57
ORDER BY CatalogNumber ASC, DateLastModified DESC
58
LIMIT 12;
59

    
60
SELECT 
61
(
62
SELECT COUNT(*)
63
FROM nybg_raw
64
WHERE Latitude IS NOT NULL
65
) AS allLats,
66
(
67
SELECT COUNT(*)
68
FROM nybg_raw
69
WHERE Latitude REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'
70
) AS decimalLats,
71
(
72
SELECT COUNT(*)
73
FROM nybg_raw
74
WHERE Longitude IS NOT NULL
75
) AS allLongs,
76
(
77
SELECT COUNT(*)
78
FROM nybg_raw
79
WHERE Longitude REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'
80
) AS decimalLongs;
81

    
82
SELECT Latitude, Longitude 
83
FROM nybg_raw
84
WHERE  Latitude IS NOT NULL AND  Longitude IS NOT NULL
85
LIMIT 10;
86

    
87
SELECT 
88
(
89
SELECT COUNT(*)
90
FROM nybg_raw
91
WHERE Latitude>90 OR Latitude<-90
92
) AS badLats,
93
(
94
SELECT COUNT(*)
95
FROM nybg_raw
96
WHERE Longitude>180 OR Longitude<-180
97
) AS badLongs;
98

    
99
SELECT DayCollected, MonthCollected, YearCollected 
100
FROM nybg_raw
101
WHERE DayCollected IS NOT NULL OR MonthCollected IS NOT NULL OR YearCollected IS NOT NULL
102
LIMIT 10;
103

    
104
SELECT
105
(
106
SELECT COUNT(*)
107
FROM nybg_raw
108
WHERE NOT(DayCollected REGEXP '^-?[0-9]+$')
109
OR NOT(MonthCollected REGEXP '^-?[0-9]+$')
110
OR NOT(YearCollected REGEXP '^-?[0-9]+$')
111
) AS badDateCollected,
112
(
113
SELECT COUNT(*)
114
FROM nybg_raw
115
WHERE NOT(DayIdentified REGEXP '^-?[0-9]+$')
116
OR NOT(MonthIdentified REGEXP '^-?[0-9]+$')
117
OR NOT(YearIdentified REGEXP '^-?[0-9]+$')
118
) AS badDateIdentified;
119

    
120
SELECT Locality
121
FROM nybg_raw
122
WHERE Locality IS NOT NULL
123
LIMIT 10;
124

    
125
SELECT PlantFungusDescription
126
FROM nybg_raw
127
WHERE PlantFungusDescription IS NOT NULL
128
LIMIT 10;
129

    
130
SELECT Habitat, Vegetation
131
FROM nybg_raw
132
WHERE Habitat IS NOT NULL OR Vegetation IS NOT NULL
133
LIMIT 10;
134
*/
(2-2/2)