Revision 10389
Added by Aaron Marcuse-Kubitza over 11 years ago
inputs/.geoscrub/geoscrub_output/postprocess.sql | ||
---|---|---|
1 | 1 |
SELECT util.search_path_append('util'); |
2 | 2 |
|
3 |
-- runtime: 25 s ("real 0m21.770s")
|
|
3 |
-- runtime: 40 s ("real 0m21.770s" + "Time: 17304.769 ms") @starscream
|
|
4 | 4 |
SELECT set_col_types(:table_str, ARRAY[ |
5 |
('decimallatitude', 'double precision') |
|
6 |
, ('decimallongitude', 'double precision') |
|
5 |
('decimallatitude' , 'double precision') |
|
6 |
, ('decimallongitude' , 'double precision') |
|
7 |
, ('latlonvalidity' , 'integer') |
|
8 |
, ('countryvalidity' , 'integer') |
|
9 |
, ('stateprovincevalidity', 'integer') |
|
10 |
, ('countyvalidity' , 'integer') |
|
7 | 11 |
]::col_cast[]); |
8 | 12 |
|
9 | 13 |
ALTER TABLE :table ALTER COLUMN decimallatitude SET NOT NULL; |
... | ... | |
11 | 15 |
|
12 | 16 |
-- runtime: 55 s ("real 0m53.677s") |
13 | 17 |
SELECT create_if_not_exists($$CREATE INDEX geoscrub_output_input ON geoscrub_output (decimallatitude, decimallongitude, country, stateprovince, county)$$); |
18 |
|
|
19 |
|
|
20 |
-- map_*() derived cols |
|
21 |
-- runtime: 2 min @starscream |
|
22 |
|
|
23 |
CREATE OR REPLACE FUNCTION map_coord_validity(value integer) |
|
24 |
RETURNS boolean AS |
|
25 |
$BODY$ |
|
26 |
/* |
|
27 |
"For latlonvalidity: |
|
28 |
-1: Latitude and/or longitude is null |
|
29 |
0: Coordinate is not a valid geographic location |
|
30 |
1: Coordinate is a valid geographic location |
|
31 |
" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT) |
|
32 |
*/ |
|
33 |
SELECT util._map('-1=>NULL, 0=>false, 1=>true', $1::text)::boolean |
|
34 |
$BODY$ |
|
35 |
LANGUAGE sql IMMUTABLE |
|
36 |
COST 100; |
|
37 |
|
|
38 |
SELECT mk_derived_col((:table_str, 'latLongDomainValid'), $$map_coord_validity("latlonvalidity")$$); -- runtime: 25 s ("Time: 26708.932 ms") @starscream |
|
39 |
|
|
40 |
CREATE OR REPLACE FUNCTION map_geovalidity(value integer) |
|
41 |
RETURNS boolean AS |
|
42 |
$BODY$ |
|
43 |
/* |
|
44 |
"For countryvalidity/stateprovincevalidity/countyvalidity: |
|
45 |
-1: Name is null at this or some higher level |
|
46 |
0: Complete name provided, but couldn't be scrubbed to GADM |
|
47 |
1: Point is >5km from putative GADM polygon |
|
48 |
2: Point is <=5km from putative GADM polygon, but still outside it |
|
49 |
3: Point is in (or on border of) putative GADM polygon |
|
50 |
" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT) |
|
51 |
*/ |
|
52 |
SELECT util._map('-1=>NULL, 0=>NULL, 1=>false, 2=>true, 3=>true', $1::text)::boolean |
|
53 |
$BODY$ |
|
54 |
LANGUAGE sql IMMUTABLE |
|
55 |
COST 100; |
|
56 |
|
|
57 |
SELECT mk_derived_col((:table_str, 'latLongInCountry' ), $$map_geovalidity("countryvalidity" )$$); -- runtime: 25 s ("Time: 27884.129 ms") @starscream |
|
58 |
SELECT mk_derived_col((:table_str, 'latLongInStateProvince'), $$map_geovalidity("stateprovincevalidity")$$); -- runtime: 25 s ("Time: 27228.239 ms") @starscream |
|
59 |
SELECT mk_derived_col((:table_str, 'latLongInCounty' ), $$map_geovalidity("countyvalidity" )$$); -- runtime: 25 s ("Time: 27337.520 ms") @starscream |
inputs/.geoscrub/geoscrub_output/map.csv | ||
---|---|---|
8 | 8 |
countrystd,acceptedCountry,, |
9 | 9 |
stateprovincestd,acceptedStateProvince,, |
10 | 10 |
countystd,acceptedCounty,, |
11 |
latlonvalidity,latLongDomainValid,"/_map:[""-1""=,0=false,1=true]/value","""For latlonvalidity: |
|
12 |
-1: Latitude and/or longitude is null |
|
13 |
0: Coordinate is not a valid geographic location |
|
14 |
1: Coordinate is a valid geographic location |
|
15 |
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)" |
|
16 |
countryvalidity,latLongInCountry,"/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity: |
|
17 |
-1: Name is null at this or some higher level |
|
18 |
0: Complete name provided, but couldn't be scrubbed to GADM |
|
19 |
1: Point is >5km from putative GADM polygon |
|
20 |
2: Point is <=5km from putative GADM polygon, but still outside it |
|
21 |
3: Point is in (or on border of) putative GADM polygon |
|
22 |
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)" |
|
23 |
stateprovincevalidity,latLongInStateProvince,"/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity: |
|
24 |
-1: Name is null at this or some higher level |
|
25 |
0: Complete name provided, but couldn't be scrubbed to GADM |
|
26 |
1: Point is >5km from putative GADM polygon |
|
27 |
2: Point is <=5km from putative GADM polygon, but still outside it |
|
28 |
3: Point is in (or on border of) putative GADM polygon |
|
29 |
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)" |
|
30 |
countyvalidity,latLongInCounty,"/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity: |
|
31 |
-1: Name is null at this or some higher level |
|
32 |
0: Complete name provided, but couldn't be scrubbed to GADM |
|
33 |
1: Point is >5km from putative GADM polygon |
|
34 |
2: Point is <=5km from putative GADM polygon, but still outside it |
|
35 |
3: Point is in (or on border of) putative GADM polygon |
|
36 |
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)" |
|
11 |
latlonvalidity,latLongDomainValid_verbatim,, |
|
12 |
countryvalidity,latLongInCountry_verbatim,, |
|
13 |
stateprovincevalidity,latLongInStateProvince_verbatim,, |
|
14 |
countyvalidity,latLongInCounty_verbatim,, |
|
15 |
latLongDomainValid,latLongDomainValid,, |
|
16 |
latLongInCountry,latLongInCountry,, |
|
17 |
latLongInStateProvince,latLongInStateProvince,, |
|
18 |
latLongInCounty,latLongInCounty,, |
inputs/.geoscrub/geoscrub_output/VegBIEN.csv | ||
---|---|---|
10 | 10 |
stateprovincestd,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub,matched_place_id=0]/stateprovince/_first/1", |
11 | 11 |
country,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/country/_first/2, |
12 | 12 |
county,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/county/_first/2, |
13 |
latlonvalidity,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/1/_map:[""-1""=,0=false,1=true]/value","""For latlonvalidity: |
|
14 |
-1: Latitude and/or longitude is null |
|
15 |
0: Coordinate is not a valid geographic location |
|
16 |
1: Coordinate is a valid geographic location |
|
17 |
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)" |
|
18 |
countryvalidity,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/2/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity: |
|
19 |
-1: Name is null at this or some higher level |
|
20 |
0: Complete name provided, but couldn't be scrubbed to GADM |
|
21 |
1: Point is >5km from putative GADM polygon |
|
22 |
2: Point is <=5km from putative GADM polygon, but still outside it |
|
23 |
3: Point is in (or on border of) putative GADM polygon |
|
24 |
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)" |
|
25 |
stateprovincevalidity,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/3/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity: |
|
26 |
-1: Name is null at this or some higher level |
|
27 |
0: Complete name provided, but couldn't be scrubbed to GADM |
|
28 |
1: Point is >5km from putative GADM polygon |
|
29 |
2: Point is <=5km from putative GADM polygon, but still outside it |
|
30 |
3: Point is in (or on border of) putative GADM polygon |
|
31 |
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)" |
|
32 |
countyvalidity,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/4/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity: |
|
33 |
-1: Name is null at this or some higher level |
|
34 |
0: Complete name provided, but couldn't be scrubbed to GADM |
|
35 |
1: Point is >5km from putative GADM polygon |
|
36 |
2: Point is <=5km from putative GADM polygon, but still outside it |
|
37 |
3: Point is in (or on border of) putative GADM polygon |
|
38 |
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)" |
|
13 |
latLongDomainValid,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/1, |
|
14 |
latLongInCountry,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/2, |
|
15 |
latLongInStateProvince,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/3, |
|
16 |
latLongInCounty,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/4, |
|
39 | 17 |
stateprovince,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/stateprovince/_first/2, |
40 | 18 |
stateprovince,/location/locationplace/*_id/place/stateprovince, |
19 |
countryvalidity,,** No join mapping for latLongInCountry_verbatim ** |
|
20 |
countyvalidity,,** No join mapping for latLongInCounty_verbatim ** |
|
21 |
latlonvalidity,,** No join mapping for latLongDomainValid_verbatim ** |
|
41 | 22 |
row_num,,** No join mapping for *row_num ** |
23 |
stateprovincevalidity,,** No join mapping for latLongInStateProvince_verbatim ** |
inputs/.geoscrub/geoscrub_output/test.xml.ref | ||
---|---|---|
61 | 61 |
<2>false</2> |
62 | 62 |
<1> |
63 | 63 |
<_and> |
64 |
<1> |
|
65 |
<_map> |
|
66 |
<-1></-1> |
|
67 |
<0>false</0> |
|
68 |
<1>true</1> |
|
69 |
<value>$latlonvalidity</value> |
|
70 |
</_map> |
|
71 |
</1> |
|
72 |
<2> |
|
73 |
<_map> |
|
74 |
<-1></-1> |
|
75 |
<0></0> |
|
76 |
<1>false</1> |
|
77 |
<2>true</2> |
|
78 |
<3>true</3> |
|
79 |
<value>$countryvalidity</value> |
|
80 |
</_map> |
|
81 |
</2> |
|
82 |
<3> |
|
83 |
<_map> |
|
84 |
<-1></-1> |
|
85 |
<0></0> |
|
86 |
<1>false</1> |
|
87 |
<2>true</2> |
|
88 |
<3>true</3> |
|
89 |
<value>$stateprovincevalidity</value> |
|
90 |
</_map> |
|
91 |
</3> |
|
92 |
<4> |
|
93 |
<_map> |
|
94 |
<-1></-1> |
|
95 |
<0></0> |
|
96 |
<1>false</1> |
|
97 |
<2>true</2> |
|
98 |
<3>true</3> |
|
99 |
<value>$countyvalidity</value> |
|
100 |
</_map> |
|
101 |
</4> |
|
64 |
<1>$latLongDomainValid</1> |
|
65 |
<2>$latLongInCountry</2> |
|
66 |
<3>$latLongInStateProvince</3> |
|
67 |
<4>$latLongInCounty</4> |
|
102 | 68 |
</_and> |
103 | 69 |
</1> |
104 | 70 |
</_alt> |
inputs/.geoscrub/geoscrub_output/new_terms.csv | ||
---|---|---|
1 | 1 |
countrystd,acceptedCountry,, |
2 | 2 |
stateprovincestd,acceptedStateProvince,, |
3 | 3 |
countystd,acceptedCounty,, |
4 |
latlonvalidity,latLongDomainValid,"/_map:[""-1""=,0=false,1=true]/value","""For latlonvalidity: |
|
5 |
-1: Latitude and/or longitude is null |
|
6 |
0: Coordinate is not a valid geographic location |
|
7 |
1: Coordinate is a valid geographic location |
|
8 |
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)" |
|
9 |
countryvalidity,latLongInCountry,"/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity: |
|
10 |
-1: Name is null at this or some higher level |
|
11 |
0: Complete name provided, but couldn't be scrubbed to GADM |
|
12 |
1: Point is >5km from putative GADM polygon |
|
13 |
2: Point is <=5km from putative GADM polygon, but still outside it |
|
14 |
3: Point is in (or on border of) putative GADM polygon |
|
15 |
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)" |
|
16 |
stateprovincevalidity,latLongInStateProvince,"/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity: |
|
17 |
-1: Name is null at this or some higher level |
|
18 |
0: Complete name provided, but couldn't be scrubbed to GADM |
|
19 |
1: Point is >5km from putative GADM polygon |
|
20 |
2: Point is <=5km from putative GADM polygon, but still outside it |
|
21 |
3: Point is in (or on border of) putative GADM polygon |
|
22 |
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)" |
|
23 |
countyvalidity,latLongInCounty,"/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity: |
|
24 |
-1: Name is null at this or some higher level |
|
25 |
0: Complete name provided, but couldn't be scrubbed to GADM |
|
26 |
1: Point is >5km from putative GADM polygon |
|
27 |
2: Point is <=5km from putative GADM polygon, but still outside it |
|
28 |
3: Point is in (or on border of) putative GADM polygon |
|
29 |
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)" |
|
4 |
latlonvalidity,latLongDomainValid_verbatim,, |
|
5 |
countryvalidity,latLongInCountry_verbatim,, |
|
6 |
stateprovincevalidity,latLongInStateProvince_verbatim,, |
|
7 |
countyvalidity,latLongInCounty_verbatim,, |
inputs/.geoscrub/geoscrub_output/unmapped_terms.csv | ||
---|---|---|
1 | 1 |
*row_num |
2 |
latLongDomainValid_verbatim |
|
3 |
latLongInCountry_verbatim |
|
4 |
latLongInStateProvince_verbatim |
|
5 |
latLongInCounty_verbatim |
Also available in: Unified diff
inputs/.geoscrub/geoscrub_output/: translated single-column filters to postprocessing derived columns, using the steps at wiki.vegpath.org/Adding_new-style_import_to_a_datasource#Translating-filters-to-postprocessing-derived-columns