Project

General

Profile

1 10245 aaronmk
SELECT util.search_path_append('util');
2 7249 aaronmk
3 10349 aaronmk
SELECT create_if_not_exists($$CREATE INDEX "Specimen.specimenHolderInstitutions" ON $$||:table_str||$$ ("specimenHolderInstitutions")$$);
4 10339 aaronmk
5 10330 aaronmk
-- remove frameshifted rows
6 10349 aaronmk
DELETE FROM :table WHERE "specimenHolderInstitutions" IS NULL;
7
DELETE FROM :table WHERE "yearCollected" !~ '^(?:1[7-9]|20)\d{2}$';
8 10339 aaronmk
DELETE FROM :table WHERE country_verbatim ~ '\d';
9
DELETE FROM :table WHERE longitude_deg_verbatim ~ '[[:alpha:]]' AND longitude_deg_verbatim NOT IN ('RESTRINGIDO');
10
DELETE FROM :table WHERE longitude_min_verbatim !~ '^\d*$';
11
DELETE FROM :table WHERE longitude_sec_verbatim !~ '^\d*$';
12
DELETE FROM :table WHERE latitude_min_verbatim  !~ '^\d*$';
13
DELETE FROM :table WHERE latitude_sec_verbatim  !~ '^\d*$';
14 10330 aaronmk
15 7249 aaronmk
-- Remove institutions that we have direct data for
16
DELETE FROM :table
17 10349 aaronmk
WHERE "specimenHolderInstitutions" IN (
18 7250 aaronmk
    -- Comments are from e-mail from Brad Boyle on 2013-1-16
19
    'MO' -- "all MO records in REMIB are also available from MO's own website"
20
    --, 'ARIZ' -- Some REMIB ARIZ specimens not yet in ARIZ itself
21
    --, 'NY' -- Some REMIB NY specimens not yet in NY itself
22 9502 aaronmk
    , 'TEX'
23 7249 aaronmk
)
24 9502 aaronmk
/* list obtained using the following on r9459:
25 9501 aaronmk
SELECT DISTINCT dataprovider
26 7249 aaronmk
FROM sourcelist
27 9501 aaronmk
JOIN provider_count ON provider_count.dataprovider = sourcelist.name
28 12516 aaronmk
WHERE source_id = (SELECT source_by_shortname('REMIB'))
29 9501 aaronmk
ORDER BY dataprovider
30 7249 aaronmk
*/
31
;
32 10339 aaronmk
33
34 10377 aaronmk
-- map_nulls() derived cols
35
-- runtime: 7.5 min ("real 7m27.083s") @vegbiendev
36
37 10339 aaronmk
CREATE OR REPLACE FUNCTION map_nulls__text(value text)
38
  RETURNS text AS
39
$BODY$
40
-- "ND = no disponible = not available" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)
41
SELECT util.map_nulls('{ND,NA}', $1)
42
$BODY$
43 10361 aaronmk
  LANGUAGE sql IMMUTABLE
44 10339 aaronmk
  COST 100;
45
46 10376 aaronmk
SELECT mk_derived_col((:table_str, 'country'      ), $$map_nulls__text("country_verbatim"      )$$); -- runtime: 35 s ("Time: 35690.797 ms") @vegbiendev
47
SELECT mk_derived_col((:table_str, 'stateProvince'), $$map_nulls__text("stateProvince_verbatim")$$); -- runtime: 35 s ("Time: 36074.430 ms") @vegbiendev
48
SELECT mk_derived_col((:table_str, 'county'       ), $$map_nulls__text("county_verbatim"       )$$); -- runtime: 35 s ("Time: 36096.911 ms") @vegbiendev
49
SELECT mk_derived_col((:table_str, 'locality'     ), $$map_nulls__text("locality_verbatim"     )$$); -- runtime: 35 s ("Time: 36076.364 ms") @vegbiendev
50
SELECT mk_derived_col((:table_str, 'habitat'      ), $$map_nulls__text("habitat_verbatim"      )$$); -- runtime: 35 s ("Time: 35481.828 ms") @vegbiendev
51 10339 aaronmk
52
53
CREATE OR REPLACE FUNCTION map_nulls__coord__longitude(value text)
54
  RETURNS double precision AS
55
$BODY$
56
-- TODO: sometimes also +-99, but not when min/sec are valid
57
SELECT util.map_nulls('{-999,999,1000}',
58
       util.map_nulls('{RESTRINGIDO}', $1)::double precision)
59
$BODY$
60 10361 aaronmk
  LANGUAGE sql IMMUTABLE
61 10339 aaronmk
  COST 100;
62
63
CREATE OR REPLACE FUNCTION map_nulls__coord__other(value text)
64
  RETURNS double precision AS
65
$BODY$
66
-- also map nulls that are valid longitudes
67
SELECT util.map_nulls('{-99,99}', map_nulls__coord__longitude($1))
68
$BODY$
69 10361 aaronmk
  LANGUAGE sql IMMUTABLE
70 10339 aaronmk
  COST 100;
71
72 10376 aaronmk
SELECT mk_derived_col((:table_str, 'longitude_deg'), $$map_nulls__coord__longitude("longitude_deg_verbatim")$$); -- runtime: 40 s ("Time: 39417.099 ms") @vegbiendev
73
SELECT mk_derived_col((:table_str, 'longitude_min'), $$map_nulls__coord__other    ("longitude_min_verbatim")$$); -- runtime: 40 s ("Time: 41929.772 ms") @vegbiendev
74
SELECT mk_derived_col((:table_str, 'longitude_sec'), $$map_nulls__coord__other    ("longitude_sec_verbatim")$$); -- runtime: 40 s ("Time: 42106.205 ms") @vegbiendev
75
SELECT mk_derived_col((:table_str, 'latitude_deg' ), $$map_nulls__coord__other    ("latitude_deg_verbatim" )$$); -- runtime: 40 s ("Time: 42187.294 ms") @vegbiendev
76
SELECT mk_derived_col((:table_str, 'latitude_min' ), $$map_nulls__coord__other    ("latitude_min_verbatim" )$$); -- runtime: 40 s ("Time: 43220.851 ms") @vegbiendev
77
SELECT mk_derived_col((:table_str, 'latitude_sec' ), $$map_nulls__coord__other    ("latitude_sec_verbatim" )$$); -- runtime: 40 s ("Time: 42267.566 ms") @vegbiendev