Project

General

Profile

1 9920 aaronmk
SELECT util.search_path_append('util');
2
3 11369 aaronmk
-- runtime: 30 s ("36510.730 ms") @starscream
4 9921 aaronmk
SELECT set_col_types(:table_str, ARRAY[
5 10390 aaronmk
  ('decimalLatitude'                , 'double precision')
6
, ('decimalLongitude'               , 'double precision')
7
, ('latLongDomainValid_verbatim'    , 'integer')
8
, ('latLongInCountry_verbatim'      , 'integer')
9
, ('latLongInStateProvince_verbatim', 'integer')
10
, ('latLongInCounty_verbatim'       , 'integer')
11 9921 aaronmk
]::col_cast[]);
12
13 11369 aaronmk
ALTER TABLE :table ALTER COLUMN "decimalLatitude"  SET NOT NULL; -- runtime: 5 s ("5248.843") @starscream
14
ALTER TABLE :table ALTER COLUMN "decimalLongitude" SET NOT NULL; -- runtime: 1 s ("1064.650") @starscream
15 6658 aaronmk
16 11388 aaronmk
SELECT create_if_not_exists($$CREATE UNIQUE INDEX $$||:table_str||%'__unique'||$$ ON $$||:table_str||$$ (
17
(array["decimalLatitude"]), (array["decimalLongitude"]), (array[country]), (array["stateProvince"]), (array[county])
18
)$$); -- runtime: 2 min ("106693.873") @starscream
19 10389 aaronmk
20
21
-- map_*() derived cols
22 11369 aaronmk
-- runtime: 6 min @starscream
23 10389 aaronmk
24
CREATE OR REPLACE FUNCTION map_coord_validity(value integer)
25
  RETURNS boolean AS
26
$BODY$
27
/*
28
"For latlonvalidity:
29
-1: Latitude and/or longitude is null
30
 0: Coordinate is not a valid geographic location
31
 1: Coordinate is a valid geographic location
32
" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)
33
*/
34
SELECT util._map('-1=>NULL, 0=>false, 1=>true', $1::text)::boolean
35
$BODY$
36
  LANGUAGE sql IMMUTABLE
37
  COST 100;
38
39 11369 aaronmk
SELECT mk_derived_col((:table_str, 'latLongDomainValid'), $$map_coord_validity("latLongDomainValid_verbatim")$$); -- runtime: 1.5 min ("Time: 92913.814 ms") @starscream
40 10389 aaronmk
41
CREATE OR REPLACE FUNCTION map_geovalidity(value integer)
42
  RETURNS boolean AS
43
$BODY$
44
/*
45
"For countryvalidity/stateprovincevalidity/countyvalidity:
46
-1: Name is null at this or some higher level
47
 0: Complete name provided, but couldn't be scrubbed to GADM
48
 1: Point is >5km from putative GADM polygon
49
 2: Point is <=5km from putative GADM polygon, but still outside it
50
 3: Point is in (or on border of) putative GADM polygon
51
" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)
52 14556 aaronmk
53
0=>false: unscrubbable names should be geo*in*valid, according to Brad:
54
 "I recommend that for Canada and the US, we do NOT mark as geovalid any record
55
 which cannot be validated at least to the second-level political division"
56
 (mailto:bboyle@email.arizona.edu?Brad_Boyle.2014-8-19-17:50PT.Re:_[Bien-db]_NSR_batch_run)
57 10389 aaronmk
*/
58 14556 aaronmk
SELECT util._map('-1=>NULL, 0=>false, 1=>false, 2=>true, 3=>true', $1::text)::boolean
59 10389 aaronmk
$BODY$
60
  LANGUAGE sql IMMUTABLE
61
  COST 100;
62
63 11369 aaronmk
SELECT mk_derived_col((:table_str, 'latLongInCountry'      ), $$map_geovalidity("latLongInCountry_verbatim"      )$$); -- runtime: 1.5 min ("Time: 95967.016 ms") @starscream
64
SELECT mk_derived_col((:table_str, 'latLongInStateProvince'), $$map_geovalidity("latLongInStateProvince_verbatim")$$); -- runtime: 1.5 min ("Time: 95003.576 ms") @starscream
65
SELECT mk_derived_col((:table_str, 'latLongInCounty'       ), $$map_geovalidity("latLongInCounty_verbatim"       )$$); -- runtime: 1.5 min ("Time: 94405.944 ms") @starscream
66 11375 aaronmk
67
SELECT mk_derived_col((:table_str, 'geovalid'),
68
$$_and("latLongDomainValid", "latLongInCountry", "latLongInStateProvince", "latLongInCounty")$$
69
); -- runtime: 3 min ("Time: 173062.394 ms") @starscream