Project

General

Profile

« Previous | Next » 

Revision 12895

validation/aggregating/specimens/**.sql: removed trailing whitespace, using the steps at wiki.vegpath.org/Aggregating_validations_refactoring#translate-to-Postgres

View differences:

trunk/validation/aggregating/specimens/NY/qualitative_validations_source_db_NYBG.VegCore.sql
1 1
-- -------------------------------------------------------------------------
2 2
--  Quantitative validation queries against NYBG source db
3
-- 
3
--
4 4
-- Queries table `nybg_raw` in mysql db `bien2_staging` on nimoy.nceas.ucsb.edu
5
-- Verbatim import of NYSpecimenDataAmericas.csv, a DwC format dump of 
5
-- Verbatim import of NYSpecimenDataAmericas.csv, a DwC format dump of
6 6
-- specimen records from NYBG specimen database, except for the three
7
-- columns key, IdentifiedDate and CollectedDate, which where added and 
8
-- populated by Brad after importing raw data. 
7
-- columns key, IdentifiedDate and CollectedDate, which where added and
8
-- populated by Brad after importing raw data.
9 9
-- -------------------------------------------------------------------------
10 10

  
11 11
-- -------------------------------
......
26 26
-- Check: Full inner join to equivalent query on BIEN3 db should return same number of rows
27 27
-- -------------------------------
28 28
SELECT DISTINCT CONCAT_WS(' ', Genus, Species) AS Species
29
FROM nybg_raw 
29
FROM nybg_raw
30 30
WHERE Genus IS NOT NULL AND Species IS NOT NULL;
31 31

  
32 32
-- -------------------------------
33 33
-- 4. Count of unique (verbatim) non-null species, without author
34 34
-- Check: Full inner join to equivalent query on BIEN3 db should return 1 row
35 35
-- -------------------------------
36
SELECT COUNT(DISTINCT CONCAT_WS(' ', Genus, Species)) AS subspecies FROM nybg_raw 
36
SELECT COUNT(DISTINCT CONCAT_WS(' ', Genus, Species)) AS subspecies FROM nybg_raw
37 37
WHERE Genus IS NOT NULL AND Species IS NOT NULL AND Subspecies IS NOT NULL;
38 38

  
39 39
-- -------------------------------
40 40
-- 5. List of verbatim species, excluding author
41 41
-- Check: Full inner join to equivalent query on BIEN3 db should return 3335 rows
42 42
-- -------------------------------
43
SELECT DISTINCT CONCAT_WS(' ', Genus, Species) AS subspecies FROM nybg_raw 
43
SELECT DISTINCT CONCAT_WS(' ', Genus, Species) AS subspecies FROM nybg_raw
44 44
WHERE Genus IS NOT NULL AND Species IS NOT NULL AND Subspecies IS NOT NULL;
45 45

  
46 46
-- -------------------------------
47 47
-- 6. Count of unique (verbatim) non-null subspecific taxa, without author
48 48
-- Check: Full inner join to equivalent query on BIEN3 db should return 1 row
49 49
-- -------------------------------
50
SELECT COUNT(DISTINCT CONCAT_WS(' ', Genus, Species, Subspecies)) AS Species FROM nybg_raw 
50
SELECT COUNT(DISTINCT CONCAT_WS(' ', Genus, Species, Subspecies)) AS Species FROM nybg_raw
51 51
WHERE Genus IS NOT NULL AND Species IS NOT NULL;
52 52

  
53 53
-- -------------------------------
......
55 55
-- Check: Full inner join to equivalent query on BIEN3 db should return 40145 rows
56 56
-- -------------------------------
57 57
SELECT DISTINCT CONCAT_WS(' ', Genus, Species) AS Species
58
FROM nybg_raw 
58
FROM nybg_raw
59 59
WHERE Genus IS NOT NULL AND Species IS NOT NULL;
60 60

  
61 61
-- -------------------------------
......
63 63
-- Check: Full inner join to equivalent query on BIEN3 db should return 1 row
64 64
-- -------------------------------
65 65
SELECT COUNT(DISTINCT TRIM(CONCAT_WS(' ',
66
TRIM(CONCAT_WS(' ', Genus, IFNULL(Species,''), IFNULL(Subspecies,''))), 
66
TRIM(CONCAT_WS(' ', Genus, IFNULL(Species,''), IFNULL(Subspecies,''))),
67 67
TRIM(IFNULL(ScientificNameAuthor,''))
68 68
))) AS fullScientificNames
69
FROM nybg_raw 
69
FROM nybg_raw
70 70
WHERE Genus IS NOT NULL;
71 71

  
72 72
-- -------------------------------
......
74 74
-- Check: Full inner join to equivalent query on BIEN3 db should return 45997 rows
75 75
-- -------------------------------
76 76
SELECT DISTINCT TRIM(CONCAT_WS(' ',
77
TRIM(CONCAT_WS(' ', Genus, IFNULL(Species,''), IFNULL(Subspecies,''))), 
77
TRIM(CONCAT_WS(' ', Genus, IFNULL(Species,''), IFNULL(Subspecies,''))),
78 78
TRIM(IFNULL(ScientificNameAuthor,''))
79 79
)) AS fullScientificName
80
FROM nybg_raw 
80
FROM nybg_raw
81 81
WHERE Genus IS NOT NULL;
82 82

  
83 83
-- -------------------------------
......
95 95
-- Note: character set issues may cause mis-matches. This query is a good way to reveal
96 96
-- character set issues, either in source db or in BIEN
97 97
-- -------------------------------
98
SELECT DISTINCT Country, StateProvince, County 
99
FROM nybg_raw; 
98
SELECT DISTINCT Country, StateProvince, County
99
FROM nybg_raw;
100 100

  
101 101
-- -------------------------------
102
-- 12. Check distinct Collector names + collection numbers + collection dates, 
102
-- 12. Check distinct Collector names + collection numbers + collection dates,
103 103
-- plus total records
104 104
-- Check: Full inner join to equivalent query on BIEN3 db should return 309396 rows
105 105
-- -------------------------------
106
SELECT IFNULL(Collector,'') AS collectorName, IFNULL(FieldNumber,'') AS collectionNumber, 
106
SELECT IFNULL(Collector,'') AS collectorName, IFNULL(FieldNumber,'') AS collectionNumber,
107 107
IFNULL(CollectedDate,'') AS dataCollected, COUNT(*) AS specimenRecords
108 108
FROM nybg_raw
109 109
GROUP BY Collector, FieldNumber, CollectedDate
110 110
ORDER BY Collector, FieldNumber, CollectedDate;
111 111

  
112 112
-- -------------------------------
113
-- 13. Count of all  verbatim Latitude and Longitude values, as well as all 
113
-- 13. Count of all  verbatim Latitude and Longitude values, as well as all
114 114
-- latitude and longitude values that are decimals numbers
115 115
-- Check: full join to equivalent query against BIEN3 should return 1 row
116 116
-- -------------------------------
117
SELECT 
117
SELECT
118 118
(
119 119
SELECT COUNT(*)
120 120
FROM nybg_raw
......
141 141
-- of decimal latitude or decimal longitude
142 142
-- Check: full join to equivalent query against BIEN3 should return 1 row
143 143
-- -------------------------------
144
SELECT 
144
SELECT
145 145
(
146 146
SELECT COUNT(*)
147 147
FROM nybg_raw
trunk/validation/aggregating/specimens/qualitative_validations_specimens.sql
1 1
-- -------------------------------------------------------------------------
2 2
-- Quantitative validation queries on the BIEN database
3
-- 
3
--
4 4
-- Applies to any datasource with specimens
5
-- 
5
--
6 6
-- DB: vegbien
7 7
-- Host: vegbiendev.nceas.ucsb.edu
8 8
-- -------------------------------------------------------------------------
......
89 89
;
90 90

  
91 91
-- -------------------------------
92
-- 12. Check distinct Collector names + collection numbers + collection dates, 
92
-- 12. Check distinct Collector names + collection numbers + collection dates,
93 93
-- plus total records
94 94
-- Check: should return 309396 rows
95 95
-- -------------------------------
......
97 97
;
98 98

  
99 99
-- -------------------------------
100
-- 13. Count of all  verbatim Latitude and Longitude values, as well as all 
100
-- 13. Count of all  verbatim Latitude and Longitude values, as well as all
101 101
-- latitude and longitude values that are decimals numbers
102 102
-- Check: should return 1 row
103 103
-- -------------------------------

Also available in: Unified diff