SELECT '# specimen replicates' AS ___; SELECT count(*) AS count FROM nybg_raw; SELECT '# families' AS ___; SELECT count(DISTINCT Family) AS count FROM nybg_raw; SELECT '# genera' AS ___; SELECT count(DISTINCT Genus) AS count FROM nybg_raw; SELECT '# species' AS ___; SELECT count(DISTINCT Genus, Species) AS count FROM nybg_raw; SELECT '# binomials' AS ___; SELECT count(DISTINCT ScientificName) AS count FROM nybg_raw; SELECT 'collection codes' AS ___; SELECT DISTINCT CollectionCode AS collectioncode_dwc FROM nybg_raw; SELECT '# catalog numbers' AS ___; SELECT count(DISTINCT CatalogNumber) AS count FROM nybg_raw; SELECT 'duplicate catalog numbers' AS ___; SELECT CatalogNumber AS catalognumber_dwc, count(*) AS count FROM nybg_raw GROUP BY CatalogNumber HAVING count > 1 ; SELECT '# counties' AS ___; SELECT count(DISTINCT Country, StateProvince, County) AS count FROM nybg_raw; /* SELECT COUNT(*) as totalRecords, COUNT(DISTINCT CatalogNumber) as uniqueCatalogNumbers FROM nybg_raw; SELECT CatalogNumber, COUNT(*) AS records FROM nybg_raw GROUP BY CatalogNumber HAVING records>1; SELECT b.CatalogNumber, DateLastModified, ScientificName, Collector, FieldNumber, CollectedDate FROM nybg_raw AS a JOIN ( SELECT CatalogNumber, COUNT(*) AS records FROM nybg_raw WHERE CatalogNumber IS NOT NULL GROUP BY CatalogNumber HAVING records>1 ) AS b ON a.CatalogNumber=b.CatalogNumber ORDER BY b.CatalogNumber ASC, DateLastModified DESC; SELECT DateLastModified, ScientificName, Collector, FieldNumber, CollectedDate FROM nybg_raw WHERE CatalogNumber IS NULL ORDER BY CatalogNumber ASC, DateLastModified DESC LIMIT 12; SELECT ( SELECT COUNT(*) FROM nybg_raw WHERE Latitude IS NOT NULL ) AS allLats, ( SELECT COUNT(*) FROM nybg_raw WHERE Latitude REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$' ) AS decimalLats, ( SELECT COUNT(*) FROM nybg_raw WHERE Longitude IS NOT NULL ) AS allLongs, ( SELECT COUNT(*) FROM nybg_raw WHERE Longitude REGEXP '^(-|\\+){0,1}([0-9]+\\.[0-9]*|[0-9]*\\.[0-9]+|[0-9]+)$' ) AS decimalLongs; SELECT Latitude, Longitude FROM nybg_raw WHERE Latitude IS NOT NULL AND Longitude IS NOT NULL LIMIT 10; SELECT ( SELECT COUNT(*) FROM nybg_raw WHERE Latitude>90 OR Latitude<-90 ) AS badLats, ( SELECT COUNT(*) FROM nybg_raw WHERE Longitude>180 OR Longitude<-180 ) AS badLongs; SELECT DayCollected, MonthCollected, YearCollected FROM nybg_raw WHERE DayCollected IS NOT NULL OR MonthCollected IS NOT NULL OR YearCollected IS NOT NULL LIMIT 10; SELECT ( SELECT COUNT(*) FROM nybg_raw WHERE NOT(DayCollected REGEXP '^-?[0-9]+$') OR NOT(MonthCollected REGEXP '^-?[0-9]+$') OR NOT(YearCollected REGEXP '^-?[0-9]+$') ) AS badDateCollected, ( SELECT COUNT(*) FROM nybg_raw WHERE NOT(DayIdentified REGEXP '^-?[0-9]+$') OR NOT(MonthIdentified REGEXP '^-?[0-9]+$') OR NOT(YearIdentified REGEXP '^-?[0-9]+$') ) AS badDateIdentified; SELECT Locality FROM nybg_raw WHERE Locality IS NOT NULL LIMIT 10; SELECT PlantFungusDescription FROM nybg_raw WHERE PlantFungusDescription IS NOT NULL LIMIT 10; SELECT Habitat, Vegetation FROM nybg_raw WHERE Habitat IS NOT NULL OR Vegetation IS NOT NULL LIMIT 10; */