Revision 11494
Added by Paul Sarando about 11 years ago
derived/biengeo/geonames-to-gadm.sql | ||
---|---|---|
1 |
/* |
|
2 |
SQL statements to generate tables mapping geonames features at each |
|
3 |
relevant administative level (country, state/province, county/parish) |
|
4 |
to GADM2 features. |
|
5 |
|
|
6 |
Todo: |
|
7 |
* Be tolerant to presence/absence of administrative qualifiers in |
|
8 |
names? (Already doing this mildly for County/Co/Co.) |
|
9 |
- Municipality |
|
10 |
- Barrio |
|
11 |
- District |
|
12 |
|
|
13 |
* Get rid of rudundancy |
|
14 |
* Try matching against GADM2 varname_* columns? |
|
15 |
|
|
16 |
Jim Regetz |
|
17 |
NCEAS |
|
18 |
Created Nov 2012 |
|
19 |
*/ |
|
20 |
|
|
21 |
----------------------- |
|
22 |
-- Level 0 (Country) -- |
|
23 |
----------------------- |
|
24 |
|
|
25 |
-- map gadm2 level0 to geonames countries |
|
26 |
CREATE TABLE gadm_country_lookup ( |
|
27 |
countryid integer primary key references geonames (geonameid), |
|
28 |
name_0 text |
|
29 |
); |
|
30 |
INSERT INTO gadm_country_lookup (countryid) |
|
31 |
SELECT DISTINCT geonameid |
|
32 |
FROM countries |
|
33 |
WHERE geonameid IS NOT NULL; |
|
34 |
-- INSERT 0 250 |
|
35 |
-- Time: 20.770 ms |
|
36 |
UPDATE gadm_country_lookup lu |
|
37 |
SET name_0 = gadm.name_0 |
|
38 |
FROM (SELECT DISTINCT iso, name_0 FROM gadm2) gadm, |
|
39 |
countries c |
|
40 |
WHERE lu.countryid = c.geonameid |
|
41 |
AND c.iso3 = gadm.iso; |
|
42 |
-- UPDATE 248 |
|
43 |
-- Time: 3562.601 ms |
|
44 |
|
|
45 |
CREATE INDEX "gadm_country_lookup_name_0_idx" ON gadm_country_lookup (name_0); |
|
46 |
|
|
47 |
------------------------------ |
|
48 |
-- Level 1 (State/Province) -- |
|
49 |
------------------------------ |
|
50 |
|
|
51 |
-- map gadm2 level1 to geonames ADM1 |
|
52 |
CREATE TABLE gadm_stateprovince_lookup ( |
|
53 |
stateprovinceid integer primary key references geonames (geonameid), |
|
54 |
name_0 text, |
|
55 |
name_1 text |
|
56 |
); |
|
57 |
INSERT INTO gadm_stateprovince_lookup (stateprovinceid) |
|
58 |
SELECT DISTINCT geonameid |
|
59 |
FROM geonames |
|
60 |
WHERE featurecode='ADM1'; |
|
61 |
-- INSERT 0 3841 |
|
62 |
-- Time: 7085.635 ms |
|
63 |
|
|
64 |
|
|
65 |
-- try matching against alternatenames table |
|
66 |
UPDATE gadm_stateprovince_lookup gs |
|
67 |
SET name_0 = gadm.name_0, |
|
68 |
name_1 = gadm.name_1 |
|
69 |
FROM (SELECT DISTINCT iso, name_0, name_1 FROM gadm2) gadm, |
|
70 |
alternatenames a, |
|
71 |
geonames g, |
|
72 |
hierarchy h, |
|
73 |
gadm_country_lookup gc |
|
74 |
WHERE lower(gadm.name_1)=lower(alternatename) |
|
75 |
AND gs.stateprovinceid = a.geonameid |
|
76 |
AND a.geonameid = g.geonameid |
|
77 |
AND gs.stateprovinceid = h.childid |
|
78 |
AND h.parentid = gc.countryid |
|
79 |
AND gc.name_0 = gadm.name_0 |
|
80 |
AND g.featurecode='ADM1' |
|
81 |
AND gs.name_1 IS NULL; |
|
82 |
-- UPDATE 2145 |
|
83 |
-- Time: 3453.568 ms |
|
84 |
|
|
85 |
-- try matching against geonames (names and alternatenames) |
|
86 |
UPDATE gadm_stateprovince_lookup gs |
|
87 |
SET name_0 = gadm.name_0, |
|
88 |
name_1 = gadm.name_1 |
|
89 |
FROM (SELECT DISTINCT iso, name_0, name_1 FROM gadm2) gadm, |
|
90 |
geonames g, |
|
91 |
hierarchy h, |
|
92 |
gadm_country_lookup gc |
|
93 |
WHERE (lower(gadm.name_1)=lower(g.name) |
|
94 |
OR lower(gadm.name_1) = |
|
95 |
ANY (string_to_array(lower(g.alternatenames), ','))) |
|
96 |
AND gs.stateprovinceid = h.childid |
|
97 |
AND h.parentid = gc.countryid |
|
98 |
AND gc.name_0 = gadm.name_0 |
|
99 |
AND gs.stateprovinceid = g.geonameid |
|
100 |
AND g.featurecode='ADM1' |
|
101 |
AND gs.name_1 IS NULL; |
|
102 |
-- UPDATE 319 |
|
103 |
-- Time: 857.885 ms |
|
104 |
|
|
105 |
|
|
106 |
-- now again but against our manual mapping |
|
107 |
UPDATE gadm_stateprovince_lookup gs |
|
108 |
SET name_0 = gadm.name_0, |
|
109 |
name_1 = gadm.name_1 |
|
110 |
FROM (SELECT DISTINCT iso, name_0, name_1 FROM gadm2) gadm, |
|
111 |
alt_stateprovince asp, |
|
112 |
geonames g, |
|
113 |
countries c |
|
114 |
WHERE gadm.name_1=asp.alternatename |
|
115 |
AND gadm.iso=c.iso3 |
|
116 |
AND asp.country=c.country |
|
117 |
AND gs.stateprovinceid = g.geonameid |
|
118 |
AND g.countrycode=c.iso |
|
119 |
AND g.name=asp.stateprovince |
|
120 |
AND gs.name_0 IS NULL; |
|
121 |
-- UPDATE 29 |
|
122 |
-- Time: 777.376 ms |
|
123 |
|
|
124 |
---------------------- |
|
125 |
-- Level 2 (County) -- |
|
126 |
---------------------- |
|
127 |
|
|
128 |
-- map gadm2 level2 to geonames ADM2 |
|
129 |
CREATE TABLE gadm_county_lookup ( |
|
130 |
countyid integer primary key references geonames (geonameid), |
|
131 |
name_0 text, |
|
132 |
name_1 text, |
|
133 |
name_2 text |
|
134 |
); |
|
135 |
INSERT INTO gadm_county_lookup (countyid) |
|
136 |
SELECT DISTINCT geonameid |
|
137 |
FROM geonames |
|
138 |
WHERE featurecode='ADM2'; |
|
139 |
-- INSERT 0 32374 |
|
140 |
-- Time: 13177.539 ms |
|
141 |
|
|
142 |
-- try matching against geonames (names and alternatenames) |
|
143 |
UPDATE gadm_county_lookup gcl |
|
144 |
SET name_0 = gadm.name_0, |
|
145 |
name_1 = gadm.name_1, |
|
146 |
name_2 = gadm.name_2 |
|
147 |
FROM (SELECT DISTINCT name_0, name_1, name_2 FROM gadm2) gadm, |
|
148 |
geonames g, |
|
149 |
hierarchy h, |
|
150 |
gadm_stateprovince_lookup gsl |
|
151 |
WHERE (lower(gadm.name_2)=lower(g.name) |
|
152 |
OR lower(gadm.name_2) = |
|
153 |
ANY (string_to_array(lower(g.alternatenames), ','))) |
|
154 |
AND gcl.countyid = g.geonameid |
|
155 |
AND gcl.countyid = h.childid |
|
156 |
AND h.parentid = gsl.stateprovinceid |
|
157 |
AND gsl.name_0 = gadm.name_0 |
|
158 |
AND gsl.name_1 = gadm.name_1 |
|
159 |
AND g.featurecode='ADM2'; |
|
160 |
-- UPDATE 12352 |
|
161 |
-- Time: 27390.357 ms |
|
162 |
|
|
163 |
-- try matching against alternatenames table |
|
164 |
UPDATE gadm_county_lookup gcl |
|
165 |
SET name_0 = gadm.name_0, |
|
166 |
name_1 = gadm.name_1, |
|
167 |
name_2 = gadm.name_2 |
|
168 |
FROM (SELECT DISTINCT name_0, name_1, name_2 FROM gadm2) gadm, |
|
169 |
alternatenames a, |
|
170 |
hierarchy h, |
|
171 |
geonames g, |
|
172 |
gadm_stateprovince_lookup gsl |
|
173 |
WHERE lower(gadm.name_2)=lower(a.alternatename) |
|
174 |
AND gcl.countyid = a.geonameid |
|
175 |
AND a.geonameid = g.geonameid |
|
176 |
AND gcl.countyid = h.childid |
|
177 |
AND h.parentid = gsl.stateprovinceid |
|
178 |
AND gsl.name_0 = gadm.name_0 |
|
179 |
AND gsl.name_1 = gadm.name_1 |
|
180 |
AND g.featurecode='ADM2' |
|
181 |
AND gcl.name_2 IS NULL; |
|
182 |
-- UPDATE 0 |
|
183 |
-- Time: 6340.441 ms |
|
184 |
|
|
185 |
-- map geonames '/Foo/ County' to gadm2 '/Foo/' |
|
186 |
-- todo: other mappings like this??? |
|
187 |
UPDATE gadm_county_lookup gcl |
|
188 |
SET name_0 = gadm.name_0, |
|
189 |
name_1 = gadm.name_1, |
|
190 |
name_2 = gadm.name_2 |
|
191 |
FROM (SELECT DISTINCT name_0, name_1, name_2 FROM gadm2) gadm, |
|
192 |
geonames g, |
|
193 |
hierarchy h, |
|
194 |
gadm_stateprovince_lookup gsl |
|
195 |
WHERE lower(gadm.name_2||' County')=lower(g.name) |
|
196 |
AND gcl.countyid = g.geonameid |
|
197 |
AND gcl.countyid = h.childid |
|
198 |
AND h.parentid = gsl.stateprovinceid |
|
199 |
AND gsl.name_0 = gadm.name_0 |
|
200 |
AND gsl.name_1 = gadm.name_1 |
|
201 |
AND g.featurecode='ADM2' |
|
202 |
AND gcl.name_2 IS NULL; |
|
203 |
-- UPDATE 3000 |
|
204 |
-- Time: 22248.393 ms |
|
205 |
|
|
206 |
|
|
207 |
/* |
|
208 |
-- another way to create gadm_stateprovince_lookup, though this won't |
|
209 |
-- pick up multiple matches from geonames (good or bad??) |
|
210 |
CREATE TABLE gadm_stateprovince_lookup AS |
|
211 |
SELECT DISTINCT name_0, name_1 |
|
212 |
FROM gadm2; |
|
213 |
ALTER TABLE gadm_stateprovince_lookup ADD COLUMN stateprovinceid integer; |
|
214 |
UPDATE gadm_stateprovince_lookup gs |
|
215 |
SET stateprovinceid = g.geonameid |
|
216 |
FROM gadm_country_lookup gc, |
|
217 |
alternatenames a, |
|
218 |
geonames g, |
|
219 |
countries c |
|
220 |
WHERE gs.name_0=gc.name_0 |
|
221 |
AND gs.name_1=a.alternatename |
|
222 |
AND a.geonameid=g.geonameid |
|
223 |
AND g.countrycode=c.iso |
|
224 |
AND gc.countryid=c.geonameid |
|
225 |
AND g.featurecode='ADM1'; |
|
226 |
-- UPDATE 2137 |
|
227 |
-- Time: 600.345 ms |
|
228 |
*/ |
|
229 |
|
|
230 |
|
|
231 |
|
|
232 |
/* |
|
233 |
-- these geonameids match multiple gadm2 state/provinces, and that's bad |
|
234 |
-- because we don't know which one to use for geovalidation |
|
235 |
select stateprovinceid, name_0, array_agg(name_1) as name_1 |
|
236 |
from gadm_stateprovince_lookup |
|
237 |
group by stateprovinceid, name_0 |
|
238 |
having count(*)>1; |
|
239 |
|
|
240 |
stateprovinceid | name_0 | name_1 |
|
241 |
-----------------+----------+-------------------------------------- |
|
242 |
3653890 | Ecuador | {Orellana,Orellana} |
|
243 |
453751 | Bulgaria | {Razgrad,Ruse} |
|
244 |
1831095 | Cambodia | {"Phnom Penh",Kândal} |
|
245 |
1506272 | Russia | {Altay,Gorno-Altay} |
|
246 |
128222 | Iran | {Kermanshah,Kordestan} |
|
247 |
3457415 | Brazil | {"Mato Grosso do Sul","Mato Grosso"} |
|
248 |
170652 | Syria | {Damascus,"Rif Dimashq"} |
|
249 |
(7 rows) |
|
250 |
|
|
251 |
-- for now, manually clean up after these ambiguous cases |
|
252 |
-- ... this countryid should only apply to Mato Grosso do Sul |
|
253 |
DELETE FROM gadm_stateprovince_lookup |
|
254 |
WHERE name_0 = 'Brazil' |
|
255 |
AND name_1 = 'Mato Grosso' |
|
256 |
AND stateprovinceid = '3457415'; |
|
257 |
*/ |
|
258 |
|
|
259 |
/* |
|
260 |
-- these gadm2 state/provinces match multiple geonameids, but i'm pretty sure |
|
261 |
-- we don't really care in this direction |
|
262 |
select iso, name_1, count(*) |
|
263 |
from (select distinct iso, name_1 from gadm2) gadm2 |
|
264 |
left join ( |
|
265 |
select distinct gadm2.iso, |
|
266 |
name_1, |
|
267 |
alternatenames.geonameid, |
|
268 |
geonames.name |
|
269 |
from (select distinct iso, name_1 from gadm2) gadm2 |
|
270 |
join alternatenames on name_1=alternatename |
|
271 |
join geonames using (geonameid) |
|
272 |
join countries on countrycode=countries.iso |
|
273 |
where featurecode='ADM1' |
|
274 |
and gadm2.iso=countries.iso3) foo |
|
275 |
using (iso, name_1) group by iso, name_1 having count(*)>1; |
|
276 |
|
|
277 |
iso | name_1 | count |
|
278 |
-----+-------------+------- |
|
279 |
AZE | Yevlax | 2 |
|
280 |
BGR | Ruse | 2 |
|
281 |
BMU | Hamilton | 2 |
|
282 |
BRA | Mato Grosso | 2 |
|
283 |
KAZ | Almaty | 2 |
|
284 |
KHM | Phnom Penh | 2 |
|
285 |
RUS | Altay | 2 |
|
286 |
RUS | Moskva | 2 |
|
287 |
(8 rows) |
|
288 |
*/ |
|
289 |
|
derived/biengeo/geonames-to-gadm.country.sql | ||
---|---|---|
1 |
/* |
|
2 |
SQL statements to generate tables mapping geonames features at each |
|
3 |
relevant administative level (country, state/province, county/parish) |
|
4 |
to GADM2 features. |
|
5 |
|
|
6 |
Todo: |
|
7 |
* Be tolerant to presence/absence of administrative qualifiers in |
|
8 |
names? (Already doing this mildly for County/Co/Co.) |
|
9 |
- Municipality |
|
10 |
- Barrio |
|
11 |
- District |
|
12 |
|
|
13 |
* Get rid of rudundancy |
|
14 |
* Try matching against GADM2 varname_* columns? |
|
15 |
|
|
16 |
Jim Regetz |
|
17 |
NCEAS |
|
18 |
Created Nov 2012 |
|
19 |
*/ |
|
20 |
|
|
21 |
----------------------- |
|
22 |
-- Level 0 (Country) -- |
|
23 |
----------------------- |
|
24 |
BEGIN; |
|
25 |
|
|
26 |
TRUNCATE gadm_country_lookup; |
|
27 |
|
|
28 |
DROP INDEX IF EXISTS "gadm_country_lookup_name_0_idx"; |
|
29 |
|
|
30 |
-- map gadm2 level0 to geonames countries |
|
31 |
INSERT INTO gadm_country_lookup (countryid) |
|
32 |
SELECT DISTINCT geonameid |
|
33 |
FROM countries |
|
34 |
WHERE geonameid IS NOT NULL; |
|
35 |
-- INSERT 0 250 |
|
36 |
-- Time: 20.770 ms |
|
37 |
-- EXPLAIN ANALYZE |
|
38 |
UPDATE gadm_country_lookup lu |
|
39 |
SET name_0 = gadm.name_0 |
|
40 |
FROM (SELECT DISTINCT iso, name_0 FROM gadm2) gadm, |
|
41 |
countries c |
|
42 |
WHERE lu.countryid = c.geonameid |
|
43 |
AND c.iso3 = gadm.iso; |
|
44 |
-- UPDATE 248 |
|
45 |
-- Time: 3562.601 ms |
|
46 |
|
|
47 |
CREATE INDEX "gadm_country_lookup_name_0_idx" ON gadm_country_lookup (name_0); |
|
48 |
|
|
49 |
COMMIT; |
|
50 |
|
derived/biengeo/geonames-to-gadm.county.sql | ||
---|---|---|
1 |
/* |
|
2 |
SQL statements to generate tables mapping geonames features at each |
|
3 |
relevant administative level (country, state/province, county/parish) |
|
4 |
to GADM2 features. |
|
5 |
|
|
6 |
Todo: |
|
7 |
* Be tolerant to presence/absence of administrative qualifiers in |
|
8 |
names? (Already doing this mildly for County/Co/Co.) |
|
9 |
- Municipality |
|
10 |
- Barrio |
|
11 |
- District |
|
12 |
|
|
13 |
* Get rid of rudundancy |
|
14 |
* Try matching against GADM2 varname_* columns? |
|
15 |
|
|
16 |
Jim Regetz |
|
17 |
NCEAS |
|
18 |
Created Nov 2012 |
|
19 |
*/ |
|
20 |
|
|
21 |
---------------------- |
|
22 |
-- Level 2 (County) -- |
|
23 |
---------------------- |
|
24 |
BEGIN; |
|
25 |
|
|
26 |
TRUNCATE gadm_county_lookup; |
|
27 |
|
|
28 |
-- map gadm2 level2 to geonames ADM2 |
|
29 |
INSERT INTO gadm_county_lookup (countyid) |
|
30 |
SELECT DISTINCT geonameid |
|
31 |
FROM geonames |
|
32 |
WHERE featurecode='ADM2'; |
|
33 |
-- INSERT 0 32374 |
|
34 |
-- Time: 13177.539 ms |
|
35 |
|
|
36 |
-- try matching against geonames (names and alternatenames) |
|
37 |
UPDATE gadm_county_lookup gcl |
|
38 |
SET name_0 = gadm.name_0, |
|
39 |
name_1 = gadm.name_1, |
|
40 |
name_2 = gadm.name_2 |
|
41 |
FROM (SELECT DISTINCT name_0, name_1, name_2 FROM gadm2) gadm, |
|
42 |
geonames g, |
|
43 |
hierarchy h, |
|
44 |
gadm_stateprovince_lookup gsl |
|
45 |
WHERE (lower(gadm.name_2)=lower(g.name) |
|
46 |
OR lower(gadm.name_2) = |
|
47 |
ANY (string_to_array(lower(g.alternatenames), ','))) |
|
48 |
AND gcl.countyid = g.geonameid |
|
49 |
AND gcl.countyid = h.childid |
|
50 |
AND h.parentid = gsl.stateprovinceid |
|
51 |
AND gsl.name_0 = gadm.name_0 |
|
52 |
AND gsl.name_1 = gadm.name_1 |
|
53 |
AND g.featurecode='ADM2'; |
|
54 |
-- UPDATE 12352 |
|
55 |
-- Time: 27390.357 ms |
|
56 |
|
|
57 |
-- try matching against alternatenames table |
|
58 |
UPDATE gadm_county_lookup gcl |
|
59 |
SET name_0 = gadm.name_0, |
|
60 |
name_1 = gadm.name_1, |
|
61 |
name_2 = gadm.name_2 |
|
62 |
FROM (SELECT DISTINCT name_0, name_1, name_2 FROM gadm2) gadm, |
|
63 |
alternatenames a, |
|
64 |
hierarchy h, |
|
65 |
geonames g, |
|
66 |
gadm_stateprovince_lookup gsl |
|
67 |
WHERE lower(gadm.name_2)=lower(a.alternatename) |
|
68 |
AND gcl.countyid = a.geonameid |
|
69 |
AND a.geonameid = g.geonameid |
|
70 |
AND gcl.countyid = h.childid |
|
71 |
AND h.parentid = gsl.stateprovinceid |
|
72 |
AND gsl.name_0 = gadm.name_0 |
|
73 |
AND gsl.name_1 = gadm.name_1 |
|
74 |
AND g.featurecode='ADM2' |
|
75 |
AND gcl.name_2 IS NULL; |
|
76 |
-- UPDATE 0 |
|
77 |
-- Time: 6340.441 ms |
|
78 |
|
|
79 |
-- map geonames '/Foo/ County' to gadm2 '/Foo/' |
|
80 |
-- todo: other mappings like this??? |
|
81 |
UPDATE gadm_county_lookup gcl |
|
82 |
SET name_0 = gadm.name_0, |
|
83 |
name_1 = gadm.name_1, |
|
84 |
name_2 = gadm.name_2 |
|
85 |
FROM (SELECT DISTINCT name_0, name_1, name_2 FROM gadm2) gadm, |
|
86 |
geonames g, |
|
87 |
hierarchy h, |
|
88 |
gadm_stateprovince_lookup gsl |
|
89 |
WHERE lower(gadm.name_2||' County')=lower(g.name) |
|
90 |
AND gcl.countyid = g.geonameid |
|
91 |
AND gcl.countyid = h.childid |
|
92 |
AND h.parentid = gsl.stateprovinceid |
|
93 |
AND gsl.name_0 = gadm.name_0 |
|
94 |
AND gsl.name_1 = gadm.name_1 |
|
95 |
AND g.featurecode='ADM2' |
|
96 |
AND gcl.name_2 IS NULL; |
|
97 |
-- UPDATE 3000 |
|
98 |
-- Time: 22248.393 ms |
|
99 |
|
|
100 |
COMMIT; |
|
101 |
|
derived/biengeo/geonames-to-gadm.stateprovince.sql | ||
---|---|---|
1 |
/* |
|
2 |
SQL statements to generate tables mapping geonames features at each |
|
3 |
relevant administative level (country, state/province, county/parish) |
|
4 |
to GADM2 features. |
|
5 |
|
|
6 |
Todo: |
|
7 |
* Be tolerant to presence/absence of administrative qualifiers in |
|
8 |
names? (Already doing this mildly for County/Co/Co.) |
|
9 |
- Municipality |
|
10 |
- Barrio |
|
11 |
- District |
|
12 |
|
|
13 |
* Get rid of rudundancy |
|
14 |
* Try matching against GADM2 varname_* columns? |
|
15 |
|
|
16 |
Jim Regetz |
|
17 |
NCEAS |
|
18 |
Created Nov 2012 |
|
19 |
*/ |
|
20 |
|
|
21 |
------------------------------ |
|
22 |
-- Level 1 (State/Province) -- |
|
23 |
------------------------------ |
|
24 |
BEGIN; |
|
25 |
|
|
26 |
TRUNCATE gadm_stateprovince_lookup; |
|
27 |
|
|
28 |
-- map gadm2 level1 to geonames ADM1 |
|
29 |
INSERT INTO gadm_stateprovince_lookup (stateprovinceid) |
|
30 |
SELECT DISTINCT geonameid |
|
31 |
FROM geonames |
|
32 |
WHERE featurecode='ADM1'; |
|
33 |
-- INSERT 0 3841 |
|
34 |
-- Time: 7085.635 ms |
|
35 |
|
|
36 |
|
|
37 |
-- try matching against alternatenames table |
|
38 |
UPDATE gadm_stateprovince_lookup gs |
|
39 |
SET name_0 = gadm.name_0, |
|
40 |
name_1 = gadm.name_1 |
|
41 |
FROM (SELECT DISTINCT iso, name_0, name_1 FROM gadm2) gadm, |
|
42 |
alternatenames a, |
|
43 |
geonames g, |
|
44 |
hierarchy h, |
|
45 |
gadm_country_lookup gc |
|
46 |
WHERE lower(gadm.name_1)=lower(alternatename) |
|
47 |
AND gs.stateprovinceid = a.geonameid |
|
48 |
AND a.geonameid = g.geonameid |
|
49 |
AND gs.stateprovinceid = h.childid |
|
50 |
AND h.parentid = gc.countryid |
|
51 |
AND gc.name_0 = gadm.name_0 |
|
52 |
AND g.featurecode='ADM1' |
|
53 |
AND gs.name_1 IS NULL; |
|
54 |
-- UPDATE 2145 |
|
55 |
-- Time: 3453.568 ms |
|
56 |
|
|
57 |
-- try matching against geonames (names and alternatenames) |
|
58 |
UPDATE gadm_stateprovince_lookup gs |
|
59 |
SET name_0 = gadm.name_0, |
|
60 |
name_1 = gadm.name_1 |
|
61 |
FROM (SELECT DISTINCT iso, name_0, name_1 FROM gadm2) gadm, |
|
62 |
geonames g, |
|
63 |
hierarchy h, |
|
64 |
gadm_country_lookup gc |
|
65 |
WHERE (lower(gadm.name_1)=lower(g.name) |
|
66 |
OR lower(gadm.name_1) = |
|
67 |
ANY (string_to_array(lower(g.alternatenames), ','))) |
|
68 |
AND gs.stateprovinceid = h.childid |
|
69 |
AND h.parentid = gc.countryid |
|
70 |
AND gc.name_0 = gadm.name_0 |
|
71 |
AND gs.stateprovinceid = g.geonameid |
|
72 |
AND g.featurecode='ADM1' |
|
73 |
AND gs.name_1 IS NULL; |
|
74 |
-- UPDATE 319 |
|
75 |
-- Time: 857.885 ms |
|
76 |
|
|
77 |
|
|
78 |
-- now again but against our manual mapping |
|
79 |
UPDATE gadm_stateprovince_lookup gs |
|
80 |
SET name_0 = gadm.name_0, |
|
81 |
name_1 = gadm.name_1 |
|
82 |
FROM (SELECT DISTINCT iso, name_0, name_1 FROM gadm2) gadm, |
|
83 |
alt_stateprovince asp, |
|
84 |
geonames g, |
|
85 |
countries c |
|
86 |
WHERE gadm.name_1=asp.alternatename |
|
87 |
AND gadm.iso=c.iso3 |
|
88 |
AND asp.country=c.country |
|
89 |
AND gs.stateprovinceid = g.geonameid |
|
90 |
AND g.countrycode=c.iso |
|
91 |
AND g.name=asp.stateprovince |
|
92 |
AND gs.name_0 IS NULL; |
|
93 |
-- UPDATE 29 |
|
94 |
-- Time: 777.376 ms |
|
95 |
|
|
96 |
COMMIT; |
|
97 |
|
|
98 |
/* |
|
99 |
-- these geonameids match multiple gadm2 state/provinces, and that's bad |
|
100 |
-- because we don't know which one to use for geovalidation |
|
101 |
select stateprovinceid, name_0, array_agg(name_1) as name_1 |
|
102 |
from gadm_stateprovince_lookup |
|
103 |
group by stateprovinceid, name_0 |
|
104 |
having count(*)>1; |
|
105 |
|
|
106 |
stateprovinceid | name_0 | name_1 |
|
107 |
-----------------+----------+-------------------------------------- |
|
108 |
3653890 | Ecuador | {Orellana,Orellana} |
|
109 |
453751 | Bulgaria | {Razgrad,Ruse} |
|
110 |
1831095 | Cambodia | {"Phnom Penh",Kândal} |
|
111 |
1506272 | Russia | {Altay,Gorno-Altay} |
|
112 |
128222 | Iran | {Kermanshah,Kordestan} |
|
113 |
3457415 | Brazil | {"Mato Grosso do Sul","Mato Grosso"} |
|
114 |
170652 | Syria | {Damascus,"Rif Dimashq"} |
|
115 |
(7 rows) |
|
116 |
|
|
117 |
-- for now, manually clean up after these ambiguous cases |
|
118 |
-- ... this countryid should only apply to Mato Grosso do Sul |
|
119 |
DELETE FROM gadm_stateprovince_lookup |
|
120 |
WHERE name_0 = 'Brazil' |
|
121 |
AND name_1 = 'Mato Grosso' |
|
122 |
AND stateprovinceid = '3457415'; |
|
123 |
*/ |
|
124 |
|
|
125 |
/* |
|
126 |
-- these gadm2 state/provinces match multiple geonameids, but i'm pretty sure |
|
127 |
-- we don't really care in this direction |
|
128 |
select iso, name_1, count(*) |
|
129 |
from (select distinct iso, name_1 from gadm2) gadm2 |
|
130 |
left join ( |
|
131 |
select distinct gadm2.iso, |
|
132 |
name_1, |
|
133 |
alternatenames.geonameid, |
|
134 |
geonames.name |
|
135 |
from (select distinct iso, name_1 from gadm2) gadm2 |
|
136 |
join alternatenames on name_1=alternatename |
|
137 |
join geonames using (geonameid) |
|
138 |
join countries on countrycode=countries.iso |
|
139 |
where featurecode='ADM1' |
|
140 |
and gadm2.iso=countries.iso3) foo |
|
141 |
using (iso, name_1) group by iso, name_1 having count(*)>1; |
|
142 |
|
|
143 |
iso | name_1 | count |
|
144 |
-----+-------------+------- |
|
145 |
AZE | Yevlax | 2 |
|
146 |
BGR | Ruse | 2 |
|
147 |
BMU | Hamilton | 2 |
|
148 |
BRA | Mato Grosso | 2 |
|
149 |
KAZ | Almaty | 2 |
|
150 |
KHM | Phnom Penh | 2 |
|
151 |
RUS | Altay | 2 |
|
152 |
RUS | Moskva | 2 |
|
153 |
(8 rows) |
|
154 |
*/ |
|
155 |
|
Also available in: Unified diff
Split up geonames-to-gadm.sql into 3 scripts.
Each script only operates on one table within a transaction.
These scripts now assume the tables have already been created (by
install scripts added in a previous commit), and each starts out by
truncating the table it will update with new data.