Project

General

Profile

« Previous | Next » 

Revision 12380

added validation/aggregating/traits/BIEN2_traits/_archive/bien3_validations_traits_complete.sql from Brad's e-mail

View differences:

trunk/validation/aggregating/traits/BIEN2_traits/_archive/bien3_validations_traits_complete.sql
1
-- ----------------------------------------------------------------------
2
-- Complete quantitative validations for BIEN3 trait data
3
-- 
4
-- Compares results of queries on original and normalized data.
5
-- Each query returns pass or fail
6
-- 
7
-- Prepare by: Brad
8
-- Date created: 23 Feb. 2014
9
-- ----------------------------------------------------------------------
10

  
11
-- ------------------
12
-- 1. Count records
13
-- ------------------
14
SELECT CASE innerjoin.recordsmatched
15
WHEN 1 THEN 'pass' ELSE 'fail' END
16
AS query1
17
FROM
18
(
19
SELECT COUNT(*) AS recordsmatched FROM
20
(
21
SELECT COUNT(*) AS totalrecords
22
FROM bien2_traits."TraitObservation"
23
WHERE "taxonName" IS NOT NULL AND "taxonName"<>'' 
24
AND "measurementName" IS NOT NULL AND "measurementName"<>''
25
) AS orig
26
INNER JOIN
27
(
28
SELECT COUNT(*) AS totalrecords
29
FROM public.trait
30
) AS bien
31
ON orig.totalrecords=bien.totalrecords
32
) AS innerjoin
33
;
34

  
35
-- ------------------
36
-- 2. Count trait names
37
-- ------------------
38
SELECT CASE innerjoin.recordsmatched
39
WHEN 1 THEN 'pass' ELSE 'fail' END
40
AS query2
41
FROM
42
(
43
SELECT COUNT(*) AS recordsmatched FROM
44
(
45
SELECT COUNT(DISTINCT "measurementName") AS totalrecords
46
FROM bien2_traits."TraitObservation"
47
WHERE "taxonName" IS NOT NULL AND "taxonName"<>'' 
48
AND "measurementName" IS NOT NULL AND "measurementName"<>''
49
) AS orig
50
INNER JOIN
51
(
52
SELECT COUNT(DISTINCT name) AS totalrecords
53
FROM public.trait
54
) AS bien
55
ON orig.totalrecords=bien.totalrecords
56
) AS innerjoin
57
;
58

  
59
-- ------------------
60
-- 3. List trait names
61
-- ------------------
62
SELECT CASE leftjoin.unmatched+rightjoin.unmatched
63
WHEN 0 THEN 'pass' ELSE 'fail' END
64
AS query3
65
FROM
66
(
67
SELECT COUNT(*) AS unmatched FROM
68
(
69
SELECT DISTINCT "measurementName" AS values
70
FROM bien2_traits."TraitObservation"
71
WHERE "taxonName" IS NOT NULL AND "taxonName"<>'' 
72
AND "measurementName" IS NOT NULL AND "measurementName"<>''
73
ORDER BY values
74
) AS original
75
LEFT JOIN
76
(
77
SELECT DISTINCT name AS values
78
FROM public.trait
79
ORDER BY values
80
) AS normalized
81
ON original.values=normalized.values
82
WHERE normalized.values IS NULL
83
) AS leftjoin
84
,
85
(
86
SELECT COUNT(*) AS unmatched FROM
87
(
88
SELECT DISTINCT "measurementName" AS values
89
FROM bien2_traits."TraitObservation"
90
WHERE "taxonName" IS NOT NULL AND "taxonName"<>'' 
91
AND "measurementName" IS NOT NULL AND "measurementName"<>''
92
ORDER BY values
93
) AS original
94
RIGHT JOIN
95
(
96
SELECT DISTINCT name AS values
97
FROM public.trait
98
ORDER BY values
99
) AS normalized
100
ON original.values=normalized.values
101
WHERE original.values IS NULL
102
) AS rightjoin
103
;
104

  
105
-- ------------------
106
-- 4. Count records per trait
107
-- ------------------
108
SELECT CASE leftjoin.unmatched+rightjoin.unmatched
109
WHEN 0 THEN 'pass' ELSE 'fail' END
110
AS query4
111
FROM
112
(
113
SELECT COUNT(*) AS unmatched FROM
114
(
115
SELECT "measurementName" AS value, COUNT(*) AS countofvalue
116
FROM bien2_traits."TraitObservation"
117
WHERE "taxonName" IS NOT NULL AND "taxonName"<>'' 
118
GROUP BY value
119
ORDER BY value
120
) AS original
121
LEFT JOIN
122
(
123
SELECT name AS value, COUNT(*) AS countofvalue
124
FROM public.trait
125
GROUP BY name
126
ORDER BY name
127
) AS normalized
128
ON original.value=normalized.value AND original.countofvalue=normalized.countofvalue
129
WHERE normalized.value IS NULL
130
) AS leftjoin
131
,
132
(
133
SELECT COUNT(*) AS unmatched FROM
134
(
135
SELECT "measurementName" AS value, COUNT(*) AS countofvalue
136
FROM bien2_traits."TraitObservation"
137
WHERE "taxonName" IS NOT NULL AND "taxonName"<>'' 
138
GROUP BY value
139
ORDER BY value
140
) AS original
141
RIGHT JOIN
142
(
143
SELECT name AS value, COUNT(*) AS countofvalue
144
FROM public.trait
145
GROUP BY name
146
ORDER BY name
147
) AS normalized
148
ON original.value=normalized.value AND original.countofvalue=normalized.countofvalue
149
WHERE original.value IS NULL
150
) AS rightjoin
151
;
152

  
153
-- ------------------
154
-- 5. Count taxa
155
-- ------------------
156
SELECT CASE innerjoin.recordsmatched
157
WHEN 1 THEN 'pass' ELSE 'fail' END
158
AS query1
159
FROM
160
(
161
SELECT COUNT(*) AS recordsmatched FROM
162
(
163
SELECT COUNT(DISTINCT 
164
TRIM(CONCAT_WS(' ',IFNULL("taxonName",''),IFNULL("scientificNameAuthorship",'')))) 
165
AS totalrecords
166
FROM bien2_traits."TraitObservation"
167
WHERE "taxonName" IS NOT NULL AND "taxonName"<>'' 
168
AND "measurementName" IS NOT NULL AND "measurementName"<>''
169
) AS original
170
INNER JOIN
171
(
172
SELECT COUNT(DISTINCT (taxonname, author)) AS totalrecords
173
FROM public.taxonverbatim t JOIN public.source s
174
ON t.source_id=s.source_id
175
WHERE s.shortname='bien2_traits'
176
) AS normalized
177
ON original.totalrecords=normalized.totalrecords
178
) AS innerjoin
179
;
180

  
181
-- ------------------
182
-- 6. List distinct taxa
183
-- ------------------
184
SELECT CASE leftjoin.unmatched+rightjoin.unmatched
185
WHEN 0 THEN 'pass' ELSE 'fail' END
186
AS query3
187
FROM
188
(
189
SELECT COUNT(*) AS unmatched FROM
190
(
191
SELECT DISTINCT 
192
TRIM(CONCAT_WS(' ',IFNULL("taxonName",''),IFNULL("scientificNameAuthorship",'')))
193
AS values
194
FROM bien2_traits."TraitObservation"
195
WHERE "taxonName" IS NOT NULL AND "taxonName"<>'' 
196
AND "measurementName" IS NOT NULL AND "measurementName"<>''
197
ORDER BY values
198
) AS original
199
LEFT JOIN
200
(
201
SELECT DISTINCT concat_ws(' ', taxonname, author) AS values
202
FROM public.taxonverbatim t JOIN public.source s
203
ON t.source_id=s.source_id
204
WHERE s.shortname='bien2_traits'
205
) AS normalized
206
ON original.values=normalized.values
207
WHERE normalized.values IS NULL
208
) AS leftjoin
209
,
210
(
211
SELECT COUNT(*) AS unmatched FROM
212
(
213
SELECT DISTINCT 
214
TRIM(CONCAT_WS(' ',IFNULL("taxonName",''),IFNULL("scientificNameAuthorship",'')))
215
AS values
216
FROM bien2_traits."TraitObservation"
217
WHERE "taxonName" IS NOT NULL AND "taxonName"<>'' 
218
AND "measurementName" IS NOT NULL AND "measurementName"<>''
219
ORDER BY values
220
) AS original
221
RIGHT JOIN
222
(
223
SELECT DISTINCT concat_ws(' ', taxonname, author) AS values
224
FROM public.taxonverbatim t JOIN public.source s
225
ON t.source_id=s.source_id
226
WHERE s.shortname='bien2_traits'
227
) AS normalized
228
ON original.values=normalized.values
229
WHERE original.values IS NULL
230
) AS rightjoin
231
;
232

  
233
-- ------------------
234
-- 7. Trait, value and units 
235
-- ------------------
236
SELECT CASE leftjoin.unmatched+rightjoin.unmatched
237
WHEN 0 THEN 'pass' ELSE 'fail' END
238
AS query4
239
FROM
240
(
241
SELECT COUNT(*) AS unmatched FROM
242
(
243
SELECT "measurementName" AS col1, "measurementValue" AS  col2, "measurementUnit" AS col3
244
FROM bien2_traits."TraitObservation"
245
WHERE "measurementName" IS NOT NULL AND "measurementName"<>''
246
AND "measurementValue" IS NOT NULL AND "measurementValue"<>''
247
ORDER BY "measurementName", "measurementValue", "measurementUnit"
248
) AS original
249
LEFT JOIN
250
(
251
SELECT name AS col1, value AS col2, units AS col3
252
FROM public.trait
253
WHERE name IS NOT NULL AND name<>''
254
AND value IS NOT NULL AND value<>'' 
255
ORDER BY trait, value, units
256
) AS normalized
257
ON original.col1=normalized.col1 
258
AND original.col2=normalized.col2
259
AND original.col3=normalized.col3
260
WHERE normalized.col1 IS NULL
261
) AS leftjoin
262
,
263
(
264
SELECT COUNT(*) AS unmatched FROM
265
(
266
SELECT "measurementName" AS col1, "measurementValue" AS col2, "measurementUnit" AS col3
267
FROM bien2_traits."TraitObservation"
268
WHERE "measurementName" IS NOT NULL AND "measurementName"<>''
269
AND "measurementValue" IS NOT NULL AND "measurementValue"<>''
270
ORDER BY "measurementName", "measurementValue", "measurementUnit"
271
) AS original
272
RIGHT JOIN
273
(
274
SELECT name AS col1, value AS col2, units AS col3
275
FROM public.trait
276
WHERE name IS NOT NULL AND name<>''
277
AND value IS NOT NULL AND value<>'' 
278
ORDER BY trait, value, units
279
) AS normalized
280
ON original.col1=normalized.col1 
281
AND original.col2=normalized.col2
282
AND original.col3=normalized.col3
283
WHERE original.col1 IS NULL
284
) AS rightjoin
285
;
286

  
287
-- ------------------
288
-- 8. Taxon, trait and value for first 5000 records 
289
-- ------------------
290
SELECT CASE leftjoin.unmatched+rightjoin.unmatched
291
WHEN 0 THEN 'pass' ELSE 'fail' END
292
AS query4
293
FROM
294
(
295
SELECT COUNT(*) AS unmatched FROM
296
(
297
SELECT 
298
TRIM(CONCAT_WS(' ',IFNULL("taxonName",''),IFNULL("scientificNameAuthorship",'')))
299
AS col1,
300
"measurementName" AS col2, 
301
"measurementValue" AS col3
302
FROM bien2_traits."TraitObservation"
303
WHERE "taxonName" IS NOT NULL AND "taxonName"<>'' 
304
AND "measurementName" IS NOT NULL AND "measurementName"<>''
305
AND "measurementValue" IS NOT NULL AND "measurementValue"<>''
306
ORDER BY "taxonName","measurementName", "measurementValue"
307
) AS original
308
LEFT JOIN
309
(
310
SELECT concat_ws(' ', taxonname, author) AS col1, name AS col2, value AS col3
311
FROM public.source s JOIN public.taxonverbatim tv 
312
ON s.source_id=tv.source_id
313
JOIN public.taxondetermination td 
314
ON tv.taxonverbatim_id=td.taxonverbatim_id
315
JOIN public.trait t
316
ON td.taxonoccurrence_id=t.taxonoccurrence_id
317
WHERE s.shortname='bien2_traits' AND td.iscurrent
318
AND name IS NOT NULL AND name<>''
319
AND value IS NOT NULL AND value<>'' 
320
ORDER BY col1, col2, col3
321
) AS normalized
322
ON original.col1=normalized.col1 
323
AND original.col2=normalized.col2
324
AND original.col3=normalized.col3
325
WHERE normalized.col1 IS NULL
326
) AS leftjoin
327
,
328
(
329
SELECT COUNT(*) AS unmatched FROM
330
(
331
SELECT 
332
TRIM(CONCAT_WS(' ',IFNULL("taxonName",''),IFNULL("scientificNameAuthorship",'')))
333
AS col1,
334
"measurementName" AS col2, 
335
"measurementValue" AS col3
336
FROM bien2_traits."TraitObservation"
337
WHERE "taxonName" IS NOT NULL AND "taxonName"<>'' 
338
AND "measurementName" IS NOT NULL AND "measurementName"<>''
339
AND "measurementValue" IS NOT NULL AND "measurementValue"<>''
340
ORDER BY "taxonName","measurementName", "measurementValue"
341
) AS original
342
RIGHT JOIN
343
(
344
SELECT concat_ws(' ', taxonname, author) AS col1, name AS col2, value AS col3
345
FROM public.source s JOIN public.taxonverbatim tv 
346
ON s.source_id=tv.source_id
347
JOIN public.taxondetermination td 
348
ON tv.taxonverbatim_id=td.taxonverbatim_id
349
JOIN public.trait t
350
ON td.taxonoccurrence_id=t.taxonoccurrence_id
351
WHERE s.shortname='bien2_traits' AND td.iscurrent
352
AND name IS NOT NULL AND name<>''
353
AND value IS NOT NULL AND value<>'' 
354
ORDER BY col1, col2, col3
355
) AS normalized
356
ON original.col1=normalized.col1 
357
AND original.col2=normalized.col2
358
AND original.col3=normalized.col3
359
WHERE original.col1 IS NULL
360
) AS rightjoin
361
;
trunk/validation/aggregating/traits/BIEN2_traits/_archive/bien3_validations_traits_complete.sql.url
1
mailto:bboyle@email.arizona.edu?Brad_Boyle.2014-2-23-13:56.Re:+[Bien-db]+traits+aggregating+validations+all+passed!.(bien3_validations_traits_complete.sql)

Also available in: Unified diff