Project

General

Profile

1
SELECT util.search_path_append('util');
2

    
3
SELECT util.rename_cols(:table_str, $$
4
country  => country_verbatim,
5
state    => stateProvince_verbatim,
6
county   => county_verbatim,
7
locality => locality_verbatim,
8
long_deg => longitude_deg_verbatim,
9
long_min => longitude_min_verbatim,
10
long_sec => longitude_sec_verbatim,
11
lat_deg  => latitude_deg_verbatim,
12
lat_min  => latitude_min_verbatim,
13
lat_sec  => latitude_sec_verbatim,
14
habitat  => habitat_verbatim,
15
$$::hstore);
16

    
17
SELECT create_if_not_exists($$CREATE INDEX "Specimen.acronym" ON $$||:table_str||$$ (acronym)$$);
18

    
19
-- remove frameshifted rows
20
-- rerun time: 6 s ("Time: 5537.211 ms") thanks to index
21
DELETE FROM :table WHERE acronym IS NULL;
22
DELETE FROM :table WHERE coll_year !~ '^(?:1[7-9]|20)\d{2}$';
23
DELETE FROM :table WHERE country_verbatim ~ '\d';
24
DELETE FROM :table WHERE longitude_deg_verbatim ~ '[[:alpha:]]' AND longitude_deg_verbatim NOT IN ('RESTRINGIDO');
25
DELETE FROM :table WHERE longitude_min_verbatim !~ '^\d*$';
26
DELETE FROM :table WHERE longitude_sec_verbatim !~ '^\d*$';
27
DELETE FROM :table WHERE latitude_min_verbatim  !~ '^\d*$';
28
DELETE FROM :table WHERE latitude_sec_verbatim  !~ '^\d*$';
29

    
30
-- Remove institutions that we have direct data for
31
DELETE FROM :table
32
WHERE acronym IN (
33
    -- Comments are from e-mail from Brad Boyle on 2013-1-16
34
    'MO' -- "all MO records in REMIB are also available from MO's own website"
35
    --, 'ARIZ' -- Some REMIB ARIZ specimens not yet in ARIZ itself
36
    --, 'NY' -- Some REMIB NY specimens not yet in NY itself
37
    , 'TEX'
38
)
39
/* list obtained using the following on r9459:
40
SELECT DISTINCT dataprovider
41
FROM sourcelist
42
JOIN provider_count ON provider_count.dataprovider = sourcelist.name
43
WHERE source_id = source_by_shortname('REMIB')
44
ORDER BY dataprovider
45
*/
46
;
47

    
48

    
49
CREATE OR REPLACE FUNCTION map_nulls__text(value text)
50
  RETURNS text AS
51
$BODY$
52
-- "ND = no disponible = not available" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)
53
SELECT util.map_nulls('{ND,NA}', $1)
54
$BODY$
55
  LANGUAGE sql IMMUTABLE STRICT
56
  COST 100;
57

    
58
SELECT mk_derived_col((:table_str, 'country'      ), $$map_nulls__text("country_verbatim"      )$$); -- runtime: 3   min ("Time: 183171.010 ms")
59
SELECT mk_derived_col((:table_str, 'stateProvince'), $$map_nulls__text("stateProvince_verbatim")$$); -- runtime: 2.5 min ("Time: 159935.791 ms")
60
SELECT mk_derived_col((:table_str, 'county'       ), $$map_nulls__text("county_verbatim"       )$$); -- runtime: 2.5 min ("Time: 151076.487 ms")
61
SELECT mk_derived_col((:table_str, 'locality'     ), $$map_nulls__text("locality_verbatim"     )$$); -- runtime: 2.5 min ("Time: 155188.486 ms")
62
SELECT mk_derived_col((:table_str, 'habitat'      ), $$map_nulls__text("habitat_verbatim"      )$$); -- runtime: 2   min ("Time: 131904.068 ms")
63

    
64

    
65
CREATE OR REPLACE FUNCTION map_nulls__coord__longitude(value text)
66
  RETURNS double precision AS
67
$BODY$
68
-- TODO: sometimes also +-99, but not when min/sec are valid
69
SELECT util.map_nulls('{-999,999,1000}',
70
       util.map_nulls('{RESTRINGIDO}', $1)::double precision)
71
$BODY$
72
  LANGUAGE sql IMMUTABLE STRICT
73
  COST 100;
74

    
75
CREATE OR REPLACE FUNCTION map_nulls__coord__other(value text)
76
  RETURNS double precision AS
77
$BODY$
78
-- also map nulls that are valid longitudes
79
SELECT util.map_nulls('{-99,99}', map_nulls__coord__longitude($1))
80
$BODY$
81
  LANGUAGE sql IMMUTABLE STRICT
82
  COST 100;
83

    
84
SELECT mk_derived_col((:table_str, 'longitude_deg'), $$map_nulls__coord__longitude("longitude_deg_verbatim")$$); -- runtime: 6   min ("Time: 367070.877 ms")
85
SELECT mk_derived_col((:table_str, 'longitude_min'), $$map_nulls__coord__other    ("longitude_min_verbatim")$$);
86
SELECT mk_derived_col((:table_str, 'longitude_sec'), $$map_nulls__coord__other    ("longitude_sec_verbatim")$$); -- runtime: 8   min ("Time: 473198.465 ms")
87
SELECT mk_derived_col((:table_str, 'latitude_deg' ), $$map_nulls__coord__other    ("latitude_deg_verbatim" )$$); -- runtime: 8.5 min ("Time: 508835.119 ms")
88
SELECT mk_derived_col((:table_str, 'latitude_min' ), $$map_nulls__coord__other    ("latitude_min_verbatim" )$$); -- runtime: 8   min ("Time: 484806.499 ms")
89
SELECT mk_derived_col((:table_str, 'latitude_sec' ), $$map_nulls__coord__other    ("latitude_sec_verbatim" )$$); -- runtime: 8.5 min ("Time: 505171.027 ms")
(7-7/9)