Project

General

Profile

1
SELECT util.search_path_append('util');
2

    
3
SELECT create_if_not_exists($$CREATE INDEX raw_occurrence_record_plants_institution_code ON raw_occurrence_record_plants ("specimenHolderInstitutions")$$);
4

    
5
-- Remove institutions that we have direct data for
6
-- rerun time: ~0 ("Time: 4.924 ms") thanks to index
7
DELETE FROM :table
8
WHERE "specimenHolderInstitutions" IN (
9
      'ACAD'
10
    , 'ARIZ'
11
    , 'BRIT'
12
    , 'MO'
13
    , 'MT'
14
    , 'NY'
15
    , 'QFA'
16
    , 'TEX'
17
    , 'TRT'
18
    , 'TRTE'
19
    , 'U'
20
    , 'UBC'
21
    , 'WIN'
22
)
23
/* there are 4.5 million duplicates [1]
24

    
25
[1] 59,998,354 rows before [2] - 55,417,646 rows after [3] = 4,580,708
26

    
27
[2] 59998354
28
SELECT "raw_occurrence_record-row_num"
29
FROM "GBIF".raw_occurrence_record_plants
30
ORDER BY "raw_occurrence_record-row_num" DESC
31
LIMIT 1
32
Total query runtime: 19436 ms.
33

    
34
[3] 55417646
35
SELECT COUNT(*) FROM "GBIF".raw_occurrence_record_plants
36
Total query runtime: 23 ms.
37
*/
38
/* list obtained using the following on r9459:
39
SELECT DISTINCT dataprovider
40
FROM sourcelist
41
JOIN provider_count ON provider_count.dataprovider = sourcelist.name
42
WHERE source_id = source_by_shortname('GBIF')
43
ORDER BY dataprovider
44
*/
45
;
(6-6/9)