1
|
SELECT util.search_path_append('util');
|
2
|
|
3
|
SELECT create_if_not_exists($$CREATE INDEX raw_occurrence_record_plants_institution_code ON raw_occurrence_record_plants ("specimenHolderInstitutions")$$);
|
4
|
|
5
|
-- Remove institutions that we have direct data for
|
6
|
-- rerun time: ~0 ("Time: 4.924 ms") thanks to index
|
7
|
DELETE FROM :table
|
8
|
WHERE "specimenHolderInstitutions" IN (
|
9
|
'ACAD'
|
10
|
, 'ARIZ'
|
11
|
, 'BRIT'
|
12
|
, 'MO'
|
13
|
, 'MT'
|
14
|
, 'NY'
|
15
|
, 'QFA'
|
16
|
, 'TEX'
|
17
|
, 'TRT'
|
18
|
, 'TRTE'
|
19
|
, 'U'
|
20
|
, 'UBC'
|
21
|
, 'WIN'
|
22
|
)
|
23
|
/* there are 4.5 million duplicates as of r9855 [1]
|
24
|
|
25
|
[1] 59,998,354 rows before [2] - 55,417,646 rows after [3] = 4,580,708
|
26
|
|
27
|
[2] 59998354
|
28
|
SELECT "raw_occurrence_record-row_num"
|
29
|
FROM "GBIF".raw_occurrence_record_plants
|
30
|
ORDER BY "raw_occurrence_record-row_num" DESC
|
31
|
LIMIT 1
|
32
|
Total query runtime: 19436 ms.
|
33
|
|
34
|
[3] 55417646
|
35
|
SELECT COUNT(*) FROM "GBIF".raw_occurrence_record_plants
|
36
|
Total query runtime: 23 ms.
|
37
|
*/
|
38
|
/* list obtained using the following on r9459:
|
39
|
SELECT DISTINCT dataprovider
|
40
|
FROM sourcelist
|
41
|
JOIN provider_count ON provider_count.dataprovider = sourcelist.name
|
42
|
WHERE source_id = source_by_shortname('GBIF')
|
43
|
ORDER BY dataprovider
|
44
|
*/
|
45
|
;
|