Project

General

Profile

« Previous | Next » 

Revision 10339

inputs/REMIB/Specimen/: translated single-column filters to postprocessing derived columns, using the steps at wiki.vegpath.org/Switching_to_new-style_import#stage-I-source-specific > "translate single-column filters to postprocessing derived columns". null-mapping filters now use wrappers around new util.map_nulls(). note that the verbatim columns input to the filters need to be renamed to avoid name collisions with their filtered columns, which must be VegCore terms for new-style import.

View differences:

postprocess.sql
1 1
SELECT util.search_path_append('util');
2 2

  
3
SELECT util.rename_cols(:table_str, $$
4
country  => country_verbatim,
5
state    => stateProvince_verbatim,
6
county   => county_verbatim,
7
locality => locality_verbatim,
8
long_deg => longitude_deg_verbatim,
9
long_min => longitude_min_verbatim,
10
long_sec => longitude_sec_verbatim,
11
lat_deg  => latitude_deg_verbatim,
12
lat_min  => latitude_min_verbatim,
13
lat_sec  => latitude_sec_verbatim,
14
habitat  => habitat_verbatim,
15
$$::hstore);
16

  
3 17
SELECT create_if_not_exists($$CREATE INDEX "Specimen.acronym" ON $$||:table_str||$$ (acronym)$$);
4 18

  
5 19
-- remove frameshifted rows
6 20
-- rerun time: 6 s ("Time: 5537.211 ms") thanks to index
7 21
DELETE FROM :table WHERE acronym IS NULL;
8 22
DELETE FROM :table WHERE coll_year !~ '^(?:1[7-9]|20)\d{2}$';
9
DELETE FROM :table WHERE country ~ '\d';
10
DELETE FROM :table WHERE long_deg ~ '[[:alpha:]]' AND long_deg NOT IN ('RESTRINGIDO');
11
DELETE FROM :table WHERE long_min !~ '^\d*$';
12
DELETE FROM :table WHERE long_sec !~ '^\d*$';
13
DELETE FROM :table WHERE lat_min  !~ '^\d*$';
14
DELETE FROM :table WHERE lat_sec  !~ '^\d*$';
23
DELETE FROM :table WHERE country_verbatim ~ '\d';
24
DELETE FROM :table WHERE longitude_deg_verbatim ~ '[[:alpha:]]' AND longitude_deg_verbatim NOT IN ('RESTRINGIDO');
25
DELETE FROM :table WHERE longitude_min_verbatim !~ '^\d*$';
26
DELETE FROM :table WHERE longitude_sec_verbatim !~ '^\d*$';
27
DELETE FROM :table WHERE latitude_min_verbatim  !~ '^\d*$';
28
DELETE FROM :table WHERE latitude_sec_verbatim  !~ '^\d*$';
15 29

  
16 30
-- Remove institutions that we have direct data for
17 31
DELETE FROM :table
......
30 44
ORDER BY dataprovider
31 45
*/
32 46
;
47

  
48

  
49
CREATE OR REPLACE FUNCTION map_nulls__text(value text)
50
  RETURNS text AS
51
$BODY$
52
-- "ND = no disponible = not available" (https://projects.nceas.ucsb.edu/nceas/projects/bien/wiki/Spot-checking#REMIB)
53
SELECT util.map_nulls('{ND,NA}', $1)
54
$BODY$
55
  LANGUAGE sql IMMUTABLE STRICT
56
  COST 100;
57

  
58
SELECT mk_derived_col((:table_str, 'country'      ), $$map_nulls__text("country_verbatim"      )$$); -- runtime: 3   min ("Time: 183171.010 ms")
59
SELECT mk_derived_col((:table_str, 'stateProvince'), $$map_nulls__text("stateProvince_verbatim")$$); -- runtime: 2.5 min ("Time: 159935.791 ms")
60
SELECT mk_derived_col((:table_str, 'county'       ), $$map_nulls__text("county_verbatim"       )$$); -- runtime: 2.5 min ("Time: 151076.487 ms")
61
SELECT mk_derived_col((:table_str, 'locality'     ), $$map_nulls__text("locality_verbatim"     )$$); -- runtime: 2.5 min ("Time: 155188.486 ms")
62
SELECT mk_derived_col((:table_str, 'habitat'      ), $$map_nulls__text("habitat_verbatim"      )$$); -- runtime: 2   min ("Time: 131904.068 ms")
63

  
64

  
65
CREATE OR REPLACE FUNCTION map_nulls__coord__longitude(value text)
66
  RETURNS double precision AS
67
$BODY$
68
-- TODO: sometimes also +-99, but not when min/sec are valid
69
SELECT util.map_nulls('{-999,999,1000}',
70
       util.map_nulls('{RESTRINGIDO}', $1)::double precision)
71
$BODY$
72
  LANGUAGE sql IMMUTABLE STRICT
73
  COST 100;
74

  
75
CREATE OR REPLACE FUNCTION map_nulls__coord__other(value text)
76
  RETURNS double precision AS
77
$BODY$
78
-- also map nulls that are valid longitudes
79
SELECT util.map_nulls('{-99,99}', map_nulls__coord__longitude($1))
80
$BODY$
81
  LANGUAGE sql IMMUTABLE STRICT
82
  COST 100;
83

  
84
SELECT mk_derived_col((:table_str, 'longitude_deg'), $$map_nulls__coord__longitude("longitude_deg_verbatim")$$); -- runtime: 6   min ("Time: 367070.877 ms")
85
SELECT mk_derived_col((:table_str, 'longitude_min'), $$map_nulls__coord__other    ("longitude_min_verbatim")$$);
86
SELECT mk_derived_col((:table_str, 'longitude_sec'), $$map_nulls__coord__other    ("longitude_sec_verbatim")$$); -- runtime: 8   min ("Time: 473198.465 ms")
87
SELECT mk_derived_col((:table_str, 'latitude_deg' ), $$map_nulls__coord__other    ("latitude_deg_verbatim" )$$); -- runtime: 8.5 min ("Time: 508835.119 ms")
88
SELECT mk_derived_col((:table_str, 'latitude_min' ), $$map_nulls__coord__other    ("latitude_min_verbatim" )$$); -- runtime: 8   min ("Time: 484806.499 ms")
89
SELECT mk_derived_col((:table_str, 'latitude_sec' ), $$map_nulls__coord__other    ("latitude_sec_verbatim" )$$); -- runtime: 8.5 min ("Time: 505171.027 ms")

Also available in: Unified diff