1
|
SELECT util.search_path_append('util');
|
2
|
|
3
|
SELECT util.rename_cols(:table_str, $$
|
4
|
country => country_verbatim,
|
5
|
state => stateProvince_verbatim,
|
6
|
county => county_verbatim,
|
7
|
locality => locality_verbatim,
|
8
|
long_deg => longitude_deg_verbatim,
|
9
|
long_min => longitude_min_verbatim,
|
10
|
long_sec => longitude_sec_verbatim,
|
11
|
lat_deg => latitude_deg_verbatim,
|
12
|
lat_min => latitude_min_verbatim,
|
13
|
lat_sec => latitude_sec_verbatim,
|
14
|
habitat => habitat_verbatim,
|
15
|
$$::hstore);
|
16
|
|
17
|
SELECT create_if_not_exists($$CREATE INDEX "Specimen.acronym" ON $$||:table_str||$$ (acronym)$$);
|
18
|
|
19
|
-- remove frameshifted rows
|
20
|
DELETE FROM :table WHERE acronym IS NULL;
|
21
|
DELETE FROM :table WHERE coll_year !~ '^(?:1[7-9]|20)\d{2}$';
|
22
|
DELETE FROM :table WHERE country_verbatim ~ '\d';
|
23
|
DELETE FROM :table WHERE longitude_deg_verbatim ~ '[[:alpha:]]' AND longitude_deg_verbatim NOT IN ('RESTRINGIDO');
|
24
|
DELETE FROM :table WHERE longitude_min_verbatim !~ '^\d*$';
|
25
|
DELETE FROM :table WHERE longitude_sec_verbatim !~ '^\d*$';
|
26
|
DELETE FROM :table WHERE latitude_min_verbatim !~ '^\d*$';
|
27
|
DELETE FROM :table WHERE latitude_sec_verbatim !~ '^\d*$';
|
28
|
|
29
|
-- Remove institutions that we have direct data for
|
30
|
DELETE FROM :table
|
31
|
WHERE acronym IN (
|
32
|
-- Comments are from e-mail from Brad Boyle on 2013-1-16
|
33
|
'MO' -- "all MO records in REMIB are also available from MO's own website"
|
34
|
--, 'ARIZ' -- Some REMIB ARIZ specimens not yet in ARIZ itself
|
35
|
--, 'NY' -- Some REMIB NY specimens not yet in NY itself
|
36
|
, 'TEX'
|
37
|
)
|
38
|
/* list obtained using the following on r9459:
|
39
|
SELECT DISTINCT dataprovider
|
40
|
FROM sourcelist
|
41
|
JOIN provider_count ON provider_count.dataprovider = sourcelist.name
|
42
|
WHERE source_id = source_by_shortname('REMIB')
|
43
|
ORDER BY dataprovider
|
44
|
*/
|
45
|
;
|
46
|
|
47
|
|
48
|
CREATE OR REPLACE FUNCTION map_nulls__text(value text)
|
49
|
RETURNS text AS
|
50
|
$BODY$
|
51
|
-- "ND = no disponible = not available" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)
|
52
|
SELECT util.map_nulls('{ND,NA}', $1)
|
53
|
$BODY$
|
54
|
LANGUAGE sql IMMUTABLE STRICT
|
55
|
COST 100;
|
56
|
|
57
|
SELECT mk_derived_col((:table_str, 'country' ), $$map_nulls__text("country_verbatim" )$$); -- runtime: 3 min ("Time: 183171.010 ms")
|
58
|
SELECT mk_derived_col((:table_str, 'stateProvince'), $$map_nulls__text("stateProvince_verbatim")$$); -- runtime: 2.5 min ("Time: 159935.791 ms")
|
59
|
SELECT mk_derived_col((:table_str, 'county' ), $$map_nulls__text("county_verbatim" )$$); -- runtime: 2.5 min ("Time: 151076.487 ms")
|
60
|
SELECT mk_derived_col((:table_str, 'locality' ), $$map_nulls__text("locality_verbatim" )$$); -- runtime: 2.5 min ("Time: 155188.486 ms")
|
61
|
SELECT mk_derived_col((:table_str, 'habitat' ), $$map_nulls__text("habitat_verbatim" )$$); -- runtime: 2 min ("Time: 131904.068 ms")
|
62
|
|
63
|
|
64
|
CREATE OR REPLACE FUNCTION map_nulls__coord__longitude(value text)
|
65
|
RETURNS double precision AS
|
66
|
$BODY$
|
67
|
-- TODO: sometimes also +-99, but not when min/sec are valid
|
68
|
SELECT util.map_nulls('{-999,999,1000}',
|
69
|
util.map_nulls('{RESTRINGIDO}', $1)::double precision)
|
70
|
$BODY$
|
71
|
LANGUAGE sql IMMUTABLE STRICT
|
72
|
COST 100;
|
73
|
|
74
|
CREATE OR REPLACE FUNCTION map_nulls__coord__other(value text)
|
75
|
RETURNS double precision AS
|
76
|
$BODY$
|
77
|
-- also map nulls that are valid longitudes
|
78
|
SELECT util.map_nulls('{-99,99}', map_nulls__coord__longitude($1))
|
79
|
$BODY$
|
80
|
LANGUAGE sql IMMUTABLE STRICT
|
81
|
COST 100;
|
82
|
|
83
|
SELECT mk_derived_col((:table_str, 'longitude_deg'), $$map_nulls__coord__longitude("longitude_deg_verbatim")$$); -- runtime: 6 min ("Time: 367070.877 ms")
|
84
|
SELECT mk_derived_col((:table_str, 'longitude_min'), $$map_nulls__coord__other ("longitude_min_verbatim")$$);
|
85
|
SELECT mk_derived_col((:table_str, 'longitude_sec'), $$map_nulls__coord__other ("longitude_sec_verbatim")$$); -- runtime: 8 min ("Time: 473198.465 ms")
|
86
|
SELECT mk_derived_col((:table_str, 'latitude_deg' ), $$map_nulls__coord__other ("latitude_deg_verbatim" )$$); -- runtime: 8.5 min ("Time: 508835.119 ms")
|
87
|
SELECT mk_derived_col((:table_str, 'latitude_min' ), $$map_nulls__coord__other ("latitude_min_verbatim" )$$); -- runtime: 8 min ("Time: 484806.499 ms")
|
88
|
SELECT mk_derived_col((:table_str, 'latitude_sec' ), $$map_nulls__coord__other ("latitude_sec_verbatim" )$$); -- runtime: 8.5 min ("Time: 505171.027 ms")
|