Project

General

Profile

« Previous | Next » 

Revision 1244

inputs/NYBG: Added verify/specimens.ref.sql

View differences:

inputs/NYBG/verify/specimens.ref.sql
1
SELECT '# specimen replicates' AS ___;
2
SELECT count(*) AS count FROM nybg_raw;
3

  
4
SELECT '# families' AS ___;
5
SELECT count(DISTINCT Family) AS count FROM nybg_raw;
6

  
7
/*
8
SELECT 
9
(
10
SELECT COUNT(DISTINCT Family) FROM nybg_raw WHERE Family IS NOT NULL
11
) AS families, 
12
(
13
SELECT COUNT(DISTINCT Genus) FROM nybg_raw WHERE Genus IS NOT NULL
14
) AS genera,
15
(
16
SELECT COUNT(DISTINCT Genus, Species) FROM nybg_raw 
17
WHERE Genus IS NOT NULL AND Species IS NOT NULL
18
) AS species;
19

  
20
SELECT DISTINCT CONCAT_WS(' ', Genus, Species) AS Species
21
FROM nybg_raw 
22
WHERE Genus IS NOT NULL AND Species IS NOT NULL;
23

  
24
SELECT DISTINCT Genus AS Genera
25
FROM nybg_raw 
26
WHERE Genus IS NOT NULL;
27

  
28
SELECT DISTINCT Family as Families
29
FROM nybg_raw 
30
WHERE Family IS NOT NULL;
31

  
32
SELECT ScientificName, Genus, Species, Subspecies, ScientificNameAuthor
33
FROM nybg_raw
34
LIMIT 25;
35

  
36
SELECT ScientificName, Genus, Species, Subspecies, ScientificNameAuthor
37
FROM nybg_raw
38
WHERE Subspecies IS NOT NULL
39
LIMIT 15;
40

  
41
SELECT InstitutionCode, COUNT(*) AS records
42
FROM nybg_raw
43
GROUP BY InstitutionCode;
44

  
45
SELECT DISTINCT CollectionCode 
46
FROM nybg_raw;
47

  
48
SELECT DISTINCT Country, StateProvince, County 
49
FROM nybg_raw
50
LIMIT 25;
51

  
52
SELECT COUNT(*) as totalRecords, COUNT(DISTINCT CatalogNumber) as uniqueCatalogNumbers
53
FROM nybg_raw;
54

  
55
SELECT CatalogNumber, COUNT(*) AS records
56
FROM nybg_raw
57
GROUP BY CatalogNumber
58
HAVING records>1;
59

  
60
SELECT b.CatalogNumber, DateLastModified, ScientificName, Collector, FieldNumber, 
61
CollectedDate
62
FROM nybg_raw AS a JOIN (
63
SELECT CatalogNumber, COUNT(*) AS records
64
FROM nybg_raw
65
WHERE CatalogNumber IS NOT NULL
66
GROUP BY CatalogNumber
67
HAVING records>1
68
) AS b
69
ON a.CatalogNumber=b.CatalogNumber
70
ORDER BY b.CatalogNumber ASC, DateLastModified DESC;
71

  
72
SELECT DateLastModified, ScientificName, Collector, FieldNumber, 
73
CollectedDate
74
FROM nybg_raw 
75
WHERE CatalogNumber IS NULL
76
ORDER BY CatalogNumber ASC, DateLastModified DESC
77
LIMIT 12;
78

  
79
SELECT 
80
(
81
SELECT COUNT(*)
82
FROM nybg_raw
83
WHERE Latitude IS NOT NULL
84
) AS allLats,
85
(
86
SELECT COUNT(*)
87
FROM nybg_raw
88
WHERE Latitude REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'
89
) AS decimalLats,
90
(
91
SELECT COUNT(*)
92
FROM nybg_raw
93
WHERE Longitude IS NOT NULL
94
) AS allLongs,
95
(
96
SELECT COUNT(*)
97
FROM nybg_raw
98
WHERE Longitude REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$'
99
) AS decimalLongs;
100

  
101
SELECT Latitude, Longitude 
102
FROM nybg_raw
103
WHERE  Latitude IS NOT NULL AND  Longitude IS NOT NULL
104
LIMIT 10;
105

  
106
SELECT 
107
(
108
SELECT COUNT(*)
109
FROM nybg_raw
110
WHERE Latitude>90 OR Latitude<-90
111
) AS badLats,
112
(
113
SELECT COUNT(*)
114
FROM nybg_raw
115
WHERE Longitude>180 OR Longitude<-180
116
) AS badLongs;
117

  
118
SELECT DayCollected, MonthCollected, YearCollected 
119
FROM nybg_raw
120
WHERE DayCollected IS NOT NULL OR MonthCollected IS NOT NULL OR YearCollected IS NOT NULL
121
LIMIT 10;
122

  
123
SELECT
124
(
125
SELECT COUNT(*)
126
FROM nybg_raw
127
WHERE NOT(DayCollected REGEXP '^-?[0-9]+$')
128
OR NOT(MonthCollected REGEXP '^-?[0-9]+$')
129
OR NOT(YearCollected REGEXP '^-?[0-9]+$')
130
) AS badDateCollected,
131
(
132
SELECT COUNT(*)
133
FROM nybg_raw
134
WHERE NOT(DayIdentified REGEXP '^-?[0-9]+$')
135
OR NOT(MonthIdentified REGEXP '^-?[0-9]+$')
136
OR NOT(YearIdentified REGEXP '^-?[0-9]+$')
137
) AS badDateIdentified;
138

  
139
SELECT Locality
140
FROM nybg_raw
141
WHERE Locality IS NOT NULL
142
LIMIT 10;
143

  
144
SELECT PlantFungusDescription
145
FROM nybg_raw
146
WHERE PlantFungusDescription IS NOT NULL
147
LIMIT 10;
148

  
149
SELECT Habitat, Vegetation
150
FROM nybg_raw
151
WHERE Habitat IS NOT NULL OR Vegetation IS NOT NULL
152
LIMIT 10;
153
*/
inputs/NYBG/verify/specimens.ref
1
___
2
# specimen replicates
3
count
4
328040
5
___
6
# families
7
count
8
741

Also available in: Unified diff