Project

General

Profile

« Previous | Next » 

Revision 10389

inputs/.geoscrub/geoscrub_output/: translated single-column filters to postprocessing derived columns, using the steps at wiki.vegpath.org/Adding_new-style_import_to_a_datasource#Translating-filters-to-postprocessing-derived-columns

View differences:

inputs/.geoscrub/geoscrub_output/postprocess.sql
1 1
SELECT util.search_path_append('util');
2 2

  
3
-- runtime: 25 s ("real 0m21.770s")
3
-- runtime: 40 s ("real 0m21.770s" + "Time: 17304.769 ms") @starscream
4 4
SELECT set_col_types(:table_str, ARRAY[
5
  ('decimallatitude', 'double precision')
6
, ('decimallongitude', 'double precision')
5
  ('decimallatitude'      , 'double precision')
6
, ('decimallongitude'     , 'double precision')
7
, ('latlonvalidity'       , 'integer')
8
, ('countryvalidity'      , 'integer')
9
, ('stateprovincevalidity', 'integer')
10
, ('countyvalidity'       , 'integer')
7 11
]::col_cast[]);
8 12

  
9 13
ALTER TABLE :table ALTER COLUMN decimallatitude SET NOT NULL;
......
11 15

  
12 16
-- runtime: 55 s ("real 0m53.677s")
13 17
SELECT create_if_not_exists($$CREATE INDEX geoscrub_output_input ON geoscrub_output (decimallatitude, decimallongitude, country, stateprovince, county)$$);
18

  
19

  
20
-- map_*() derived cols
21
-- runtime: 2 min @starscream
22

  
23
CREATE OR REPLACE FUNCTION map_coord_validity(value integer)
24
  RETURNS boolean AS
25
$BODY$
26
/*
27
"For latlonvalidity:
28
-1: Latitude and/or longitude is null
29
 0: Coordinate is not a valid geographic location
30
 1: Coordinate is a valid geographic location
31
" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)
32
*/
33
SELECT util._map('-1=>NULL, 0=>false, 1=>true', $1::text)::boolean
34
$BODY$
35
  LANGUAGE sql IMMUTABLE
36
  COST 100;
37

  
38
SELECT mk_derived_col((:table_str, 'latLongDomainValid'), $$map_coord_validity("latlonvalidity")$$); -- runtime: 25 s ("Time: 26708.932 ms") @starscream
39

  
40
CREATE OR REPLACE FUNCTION map_geovalidity(value integer)
41
  RETURNS boolean AS
42
$BODY$
43
/*
44
"For countryvalidity/stateprovincevalidity/countyvalidity:
45
-1: Name is null at this or some higher level
46
 0: Complete name provided, but couldn't be scrubbed to GADM
47
 1: Point is >5km from putative GADM polygon
48
 2: Point is <=5km from putative GADM polygon, but still outside it
49
 3: Point is in (or on border of) putative GADM polygon
50
" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)
51
*/
52
SELECT util._map('-1=>NULL, 0=>NULL, 1=>false, 2=>true, 3=>true', $1::text)::boolean
53
$BODY$
54
  LANGUAGE sql IMMUTABLE
55
  COST 100;
56

  
57
SELECT mk_derived_col((:table_str, 'latLongInCountry'      ), $$map_geovalidity("countryvalidity"      )$$); -- runtime: 25 s ("Time: 27884.129 ms") @starscream
58
SELECT mk_derived_col((:table_str, 'latLongInStateProvince'), $$map_geovalidity("stateprovincevalidity")$$); -- runtime: 25 s ("Time: 27228.239 ms") @starscream
59
SELECT mk_derived_col((:table_str, 'latLongInCounty'       ), $$map_geovalidity("countyvalidity"       )$$); -- runtime: 25 s ("Time: 27337.520 ms") @starscream
inputs/.geoscrub/geoscrub_output/map.csv
8 8
countrystd,acceptedCountry,,
9 9
stateprovincestd,acceptedStateProvince,,
10 10
countystd,acceptedCounty,,
11
latlonvalidity,latLongDomainValid,"/_map:[""-1""=,0=false,1=true]/value","""For latlonvalidity:
12
-1: Latitude and/or longitude is null
13
 0: Coordinate is not a valid geographic location
14
 1: Coordinate is a valid geographic location
15
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)"
16
countryvalidity,latLongInCountry,"/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity:
17
-1: Name is null at this or some higher level
18
 0: Complete name provided, but couldn't be scrubbed to GADM
19
 1: Point is >5km from putative GADM polygon
20
 2: Point is <=5km from putative GADM polygon, but still outside it
21
 3: Point is in (or on border of) putative GADM polygon
22
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)"
23
stateprovincevalidity,latLongInStateProvince,"/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity:
24
-1: Name is null at this or some higher level
25
 0: Complete name provided, but couldn't be scrubbed to GADM
26
 1: Point is >5km from putative GADM polygon
27
 2: Point is <=5km from putative GADM polygon, but still outside it
28
 3: Point is in (or on border of) putative GADM polygon
29
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)"
30
countyvalidity,latLongInCounty,"/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity:
31
-1: Name is null at this or some higher level
32
 0: Complete name provided, but couldn't be scrubbed to GADM
33
 1: Point is >5km from putative GADM polygon
34
 2: Point is <=5km from putative GADM polygon, but still outside it
35
 3: Point is in (or on border of) putative GADM polygon
36
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)"
11
latlonvalidity,latLongDomainValid_verbatim,,
12
countryvalidity,latLongInCountry_verbatim,,
13
stateprovincevalidity,latLongInStateProvince_verbatim,,
14
countyvalidity,latLongInCounty_verbatim,,
15
latLongDomainValid,latLongDomainValid,,
16
latLongInCountry,latLongInCountry,,
17
latLongInStateProvince,latLongInStateProvince,,
18
latLongInCounty,latLongInCounty,,
inputs/.geoscrub/geoscrub_output/VegBIEN.csv
10 10
stateprovincestd,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub,matched_place_id=0]/stateprovince/_first/1",
11 11
country,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/country/_first/2,
12 12
county,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/county/_first/2,
13
latlonvalidity,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/1/_map:[""-1""=,0=false,1=true]/value","""For latlonvalidity:
14
-1: Latitude and/or longitude is null
15
 0: Coordinate is not a valid geographic location
16
 1: Coordinate is a valid geographic location
17
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)"
18
countryvalidity,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/2/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity:
19
-1: Name is null at this or some higher level
20
 0: Complete name provided, but couldn't be scrubbed to GADM
21
 1: Point is >5km from putative GADM polygon
22
 2: Point is <=5km from putative GADM polygon, but still outside it
23
 3: Point is in (or on border of) putative GADM polygon
24
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)"
25
stateprovincevalidity,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/3/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity:
26
-1: Name is null at this or some higher level
27
 0: Complete name provided, but couldn't be scrubbed to GADM
28
 1: Point is >5km from putative GADM polygon
29
 2: Point is <=5km from putative GADM polygon, but still outside it
30
 3: Point is in (or on border of) putative GADM polygon
31
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)"
32
countyvalidity,"/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/4/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity:
33
-1: Name is null at this or some higher level
34
 0: Complete name provided, but couldn't be scrubbed to GADM
35
 1: Point is >5km from putative GADM polygon
36
 2: Point is <=5km from putative GADM polygon, but still outside it
37
 3: Point is in (or on border of) putative GADM polygon
38
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)"
13
latLongDomainValid,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/1,
14
latLongInCountry,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/2,
15
latLongInStateProvince,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/3,
16
latLongInCounty,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/geovalid/_alt:[2=false]/1/_first/2/_and/4,
39 17
stateprovince,/location/locationplace/*_id/place/matched_place_id/place:[source_id/source/shortname=geoscrub]/stateprovince/_first/2,
40 18
stateprovince,/location/locationplace/*_id/place/stateprovince,
19
countryvalidity,,** No join mapping for latLongInCountry_verbatim ** 
20
countyvalidity,,** No join mapping for latLongInCounty_verbatim ** 
21
latlonvalidity,,** No join mapping for latLongDomainValid_verbatim ** 
41 22
row_num,,** No join mapping for *row_num ** 
23
stateprovincevalidity,,** No join mapping for latLongInStateProvince_verbatim ** 
inputs/.geoscrub/geoscrub_output/test.xml.ref
61 61
                                                    <2>false</2>
62 62
                                                    <1>
63 63
                                                        <_and>
64
                                                            <1>
65
                                                                <_map>
66
                                                                    <-1></-1>
67
                                                                    <0>false</0>
68
                                                                    <1>true</1>
69
                                                                    <value>$latlonvalidity</value>
70
                                                                </_map>
71
                                                            </1>
72
                                                            <2>
73
                                                                <_map>
74
                                                                    <-1></-1>
75
                                                                    <0></0>
76
                                                                    <1>false</1>
77
                                                                    <2>true</2>
78
                                                                    <3>true</3>
79
                                                                    <value>$countryvalidity</value>
80
                                                                </_map>
81
                                                            </2>
82
                                                            <3>
83
                                                                <_map>
84
                                                                    <-1></-1>
85
                                                                    <0></0>
86
                                                                    <1>false</1>
87
                                                                    <2>true</2>
88
                                                                    <3>true</3>
89
                                                                    <value>$stateprovincevalidity</value>
90
                                                                </_map>
91
                                                            </3>
92
                                                            <4>
93
                                                                <_map>
94
                                                                    <-1></-1>
95
                                                                    <0></0>
96
                                                                    <1>false</1>
97
                                                                    <2>true</2>
98
                                                                    <3>true</3>
99
                                                                    <value>$countyvalidity</value>
100
                                                                </_map>
101
                                                            </4>
64
                                                            <1>$latLongDomainValid</1>
65
                                                            <2>$latLongInCountry</2>
66
                                                            <3>$latLongInStateProvince</3>
67
                                                            <4>$latLongInCounty</4>
102 68
                                                        </_and>
103 69
                                                    </1>
104 70
                                                </_alt>
inputs/.geoscrub/geoscrub_output/new_terms.csv
1 1
countrystd,acceptedCountry,,
2 2
stateprovincestd,acceptedStateProvince,,
3 3
countystd,acceptedCounty,,
4
latlonvalidity,latLongDomainValid,"/_map:[""-1""=,0=false,1=true]/value","""For latlonvalidity:
5
-1: Latitude and/or longitude is null
6
 0: Coordinate is not a valid geographic location
7
 1: Coordinate is a valid geographic location
8
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)"
9
countryvalidity,latLongInCountry,"/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity:
10
-1: Name is null at this or some higher level
11
 0: Complete name provided, but couldn't be scrubbed to GADM
12
 1: Point is >5km from putative GADM polygon
13
 2: Point is <=5km from putative GADM polygon, but still outside it
14
 3: Point is in (or on border of) putative GADM polygon
15
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)"
16
stateprovincevalidity,latLongInStateProvince,"/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity:
17
-1: Name is null at this or some higher level
18
 0: Complete name provided, but couldn't be scrubbed to GADM
19
 1: Point is >5km from putative GADM polygon
20
 2: Point is <=5km from putative GADM polygon, but still outside it
21
 3: Point is in (or on border of) putative GADM polygon
22
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)"
23
countyvalidity,latLongInCounty,"/_map:[./{""-1"",0}=,1=false,./{2,3}=true]/value","""For countryvalidity/stateprovincevalidity/countyvalidity:
24
-1: Name is null at this or some higher level
25
 0: Complete name provided, but couldn't be scrubbed to GADM
26
 1: Point is >5km from putative GADM polygon
27
 2: Point is <=5km from putative GADM polygon, but still outside it
28
 3: Point is in (or on border of) putative GADM polygon
29
"" (http://vegpath.org/fs/inputs/.geoscrub/_src/README.TXT)"
4
latlonvalidity,latLongDomainValid_verbatim,,
5
countryvalidity,latLongInCountry_verbatim,,
6
stateprovincevalidity,latLongInStateProvince_verbatim,,
7
countyvalidity,latLongInCounty_verbatim,,
inputs/.geoscrub/geoscrub_output/unmapped_terms.csv
1 1
*row_num
2
latLongDomainValid_verbatim
3
latLongInCountry_verbatim
4
latLongInStateProvince_verbatim
5
latLongInCounty_verbatim

Also available in: Unified diff