Project

General

Profile

1 10245 aaronmk
SELECT util.search_path_append('util');
2 7249 aaronmk
3 10339 aaronmk
SELECT util.rename_cols(:table_str, $$
4
country  => country_verbatim,
5
state    => stateProvince_verbatim,
6
county   => county_verbatim,
7
locality => locality_verbatim,
8
long_deg => longitude_deg_verbatim,
9
long_min => longitude_min_verbatim,
10
long_sec => longitude_sec_verbatim,
11
lat_deg  => latitude_deg_verbatim,
12
lat_min  => latitude_min_verbatim,
13
lat_sec  => latitude_sec_verbatim,
14
habitat  => habitat_verbatim,
15
$$::hstore);
16
17 10334 aaronmk
SELECT create_if_not_exists($$CREATE INDEX "Specimen.acronym" ON $$||:table_str||$$ (acronym)$$);
18 10330 aaronmk
19
-- remove frameshifted rows
20 10333 aaronmk
DELETE FROM :table WHERE acronym IS NULL;
21 10336 aaronmk
DELETE FROM :table WHERE coll_year !~ '^(?:1[7-9]|20)\d{2}$';
22 10339 aaronmk
DELETE FROM :table WHERE country_verbatim ~ '\d';
23
DELETE FROM :table WHERE longitude_deg_verbatim ~ '[[:alpha:]]' AND longitude_deg_verbatim NOT IN ('RESTRINGIDO');
24
DELETE FROM :table WHERE longitude_min_verbatim !~ '^\d*$';
25
DELETE FROM :table WHERE longitude_sec_verbatim !~ '^\d*$';
26
DELETE FROM :table WHERE latitude_min_verbatim  !~ '^\d*$';
27
DELETE FROM :table WHERE latitude_sec_verbatim  !~ '^\d*$';
28 10330 aaronmk
29 7249 aaronmk
-- Remove institutions that we have direct data for
30
DELETE FROM :table
31
WHERE acronym IN (
32 7250 aaronmk
    -- Comments are from e-mail from Brad Boyle on 2013-1-16
33
    'MO' -- "all MO records in REMIB are also available from MO's own website"
34
    --, 'ARIZ' -- Some REMIB ARIZ specimens not yet in ARIZ itself
35
    --, 'NY' -- Some REMIB NY specimens not yet in NY itself
36 9502 aaronmk
    , 'TEX'
37 7249 aaronmk
)
38 9502 aaronmk
/* list obtained using the following on r9459:
39 9501 aaronmk
SELECT DISTINCT dataprovider
40 7249 aaronmk
FROM sourcelist
41 9501 aaronmk
JOIN provider_count ON provider_count.dataprovider = sourcelist.name
42 7249 aaronmk
WHERE source_id = source_by_shortname('REMIB')
43 9501 aaronmk
ORDER BY dataprovider
44 7249 aaronmk
*/
45
;
46 10339 aaronmk
47
48
CREATE OR REPLACE FUNCTION map_nulls__text(value text)
49
  RETURNS text AS
50
$BODY$
51
-- "ND = no disponible = not available" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)
52
SELECT util.map_nulls('{ND,NA}', $1)
53
$BODY$
54
  LANGUAGE sql IMMUTABLE STRICT
55
  COST 100;
56
57
SELECT mk_derived_col((:table_str, 'country'      ), $$map_nulls__text("country_verbatim"      )$$); -- runtime: 3   min ("Time: 183171.010 ms")
58
SELECT mk_derived_col((:table_str, 'stateProvince'), $$map_nulls__text("stateProvince_verbatim")$$); -- runtime: 2.5 min ("Time: 159935.791 ms")
59
SELECT mk_derived_col((:table_str, 'county'       ), $$map_nulls__text("county_verbatim"       )$$); -- runtime: 2.5 min ("Time: 151076.487 ms")
60
SELECT mk_derived_col((:table_str, 'locality'     ), $$map_nulls__text("locality_verbatim"     )$$); -- runtime: 2.5 min ("Time: 155188.486 ms")
61
SELECT mk_derived_col((:table_str, 'habitat'      ), $$map_nulls__text("habitat_verbatim"      )$$); -- runtime: 2   min ("Time: 131904.068 ms")
62
63
64
CREATE OR REPLACE FUNCTION map_nulls__coord__longitude(value text)
65
  RETURNS double precision AS
66
$BODY$
67
-- TODO: sometimes also +-99, but not when min/sec are valid
68
SELECT util.map_nulls('{-999,999,1000}',
69
       util.map_nulls('{RESTRINGIDO}', $1)::double precision)
70
$BODY$
71
  LANGUAGE sql IMMUTABLE STRICT
72
  COST 100;
73
74
CREATE OR REPLACE FUNCTION map_nulls__coord__other(value text)
75
  RETURNS double precision AS
76
$BODY$
77
-- also map nulls that are valid longitudes
78
SELECT util.map_nulls('{-99,99}', map_nulls__coord__longitude($1))
79
$BODY$
80
  LANGUAGE sql IMMUTABLE STRICT
81
  COST 100;
82
83
SELECT mk_derived_col((:table_str, 'longitude_deg'), $$map_nulls__coord__longitude("longitude_deg_verbatim")$$); -- runtime: 6   min ("Time: 367070.877 ms")
84
SELECT mk_derived_col((:table_str, 'longitude_min'), $$map_nulls__coord__other    ("longitude_min_verbatim")$$);
85
SELECT mk_derived_col((:table_str, 'longitude_sec'), $$map_nulls__coord__other    ("longitude_sec_verbatim")$$); -- runtime: 8   min ("Time: 473198.465 ms")
86
SELECT mk_derived_col((:table_str, 'latitude_deg' ), $$map_nulls__coord__other    ("latitude_deg_verbatim" )$$); -- runtime: 8.5 min ("Time: 508835.119 ms")
87
SELECT mk_derived_col((:table_str, 'latitude_min' ), $$map_nulls__coord__other    ("latitude_min_verbatim" )$$); -- runtime: 8   min ("Time: 484806.499 ms")
88
SELECT mk_derived_col((:table_str, 'latitude_sec' ), $$map_nulls__coord__other    ("latitude_sec_verbatim" )$$); -- runtime: 8.5 min ("Time: 505171.027 ms")