Revision 11443
Added by Paul Sarando about 11 years ago
geonames.sql | ||
---|---|---|
39 | 39 |
Created Nov 2012 |
40 | 40 |
*/ |
41 | 41 |
|
42 |
-- define custom function to pass characters back through latin-1 |
|
43 |
-- encoding to revert errors; in principle this could introduce new |
|
44 |
-- errors, but it seems to do more good than harm |
|
45 |
CREATE OR REPLACE FUNCTION u2L2u(text) RETURNS text AS $$ |
|
46 |
DECLARE |
|
47 |
string text; |
|
48 |
BEGIN |
|
49 |
string := |
|
50 |
convert_from(convert($1::bytea, 'UTF8', 'LATIN1'), 'UTF8'); |
|
51 |
RETURN string; |
|
52 |
EXCEPTION |
|
53 |
WHEN data_exception THEN |
|
54 |
RETURN $1; |
|
55 |
END; |
|
56 |
$$ LANGUAGE plpgsql; |
|
42 |
BEGIN; |
|
57 | 43 |
|
58 | 44 |
------------------- |
59 | 45 |
-- scrub country -- |
60 | 46 |
------------------- |
61 | 47 |
|
62 |
CREATE TABLE vcountry AS
|
|
48 |
INSERT INTO vcountry
|
|
63 | 49 |
SELECT DISTINCT country |
64 | 50 |
FROM vegbien_geoscrub |
65 | 51 |
WHERE country IS NOT NULL; |
66 | 52 |
-- SELECT 439 |
67 | 53 |
-- Time: 862.413 ms |
68 |
ALTER TABLE vcountry ADD COLUMN countryutf8 text; |
|
69 |
ALTER TABLE vcountry ADD COLUMN countryid integer; |
|
70 | 54 |
|
71 | 55 |
-- try to clean up character representations |
72 | 56 |
UPDATE vcountry |
... | ... | |
156 | 140 |
-- Removes: Congo, CONGO |
157 | 141 |
|
158 | 142 |
-- merge in GADM country names (our standard names) |
159 |
ALTER TABLE vcountry ADD COLUMN countrystd text; |
|
160 | 143 |
UPDATE vcountry vc |
161 | 144 |
SET countrystd = name_0 |
162 | 145 |
FROM gadm_country_lookup gadm0 |
... | ... | |
169 | 152 |
-- scrub stateprovince -- |
170 | 153 |
------------------------- |
171 | 154 |
|
172 |
CREATE TABLE vstate AS
|
|
155 |
INSERT INTO vstate
|
|
173 | 156 |
SELECT DISTINCT countryid, stateprovince |
174 | 157 |
FROM vegbien_geoscrub join vcountry using (country) |
175 | 158 |
WHERE countryid IS NOT NULL |
176 | 159 |
AND stateprovince IS NOT NULL; |
177 | 160 |
-- SELECT 3312 |
178 | 161 |
-- Time: 4164.948 ms |
179 |
ALTER TABLE vstate ADD COLUMN stateprovinceutf8 text; |
|
180 |
ALTER TABLE vstate ADD COLUMN stateprovinceid integer; |
|
181 | 162 |
|
182 | 163 |
-- try to clean up character representations |
183 | 164 |
UPDATE vstate |
... | ... | |
294 | 275 |
-- todo: the same stuff done for country to deal with Congo-type case |
295 | 276 |
|
296 | 277 |
-- merge in GADM stateprovince names (our standard names) |
297 |
ALTER TABLE vstate ADD COLUMN stateprovincestd text; |
|
298 | 278 |
UPDATE vstate vs |
299 | 279 |
SET stateprovincestd = name_1 |
300 | 280 |
FROM gadm_stateprovince_lookup gadm1 |
... | ... | |
352 | 332 |
-- scrub county -- |
353 | 333 |
------------------ |
354 | 334 |
|
355 |
CREATE TABLE vcounty AS
|
|
335 |
INSERT INTO vcounty
|
|
356 | 336 |
SELECT DISTINCT countryid, stateprovinceid, county |
357 | 337 |
FROM vegbien_geoscrub |
358 | 338 |
JOIN vcountry USING (country) |
... | ... | |
362 | 342 |
AND county IS NOT NULL; |
363 | 343 |
-- SELECT 18715 |
364 | 344 |
-- Time: 3590.725 ms |
365 |
ALTER TABLE vcounty ADD COLUMN countyutf8 text; |
|
366 |
ALTER TABLE vcounty ADD COLUMN countyid integer; |
|
367 | 345 |
|
368 | 346 |
-- try to clean up character representations |
369 | 347 |
UPDATE vcounty |
... | ... | |
413 | 391 |
-- UPDATE 0 |
414 | 392 |
-- Time: 11255.926 ms |
415 | 393 |
|
416 |
ALTER TABLE vcounty ADD COLUMN countystd text; |
|
417 |
|
|
418 | 394 |
-- merge in GADM county names (our standard names) |
419 | 395 |
-- for cases where we have geonameid |
420 | 396 |
UPDATE vcounty vs |
... | ... | |
440 | 416 |
-- UPDATE 5027 |
441 | 417 |
-- Time: 1410.377 ms |
442 | 418 |
|
443 |
----------------------------- |
|
444 |
-- put everything together -- |
|
445 |
----------------------------- |
|
419 |
COMMIT; |
|
446 | 420 |
|
447 |
-- reconstitute the whole dang thang |
|
448 |
|
|
449 |
-- new approach |
|
450 |
CREATE TABLE geoscrub AS |
|
451 |
SELECT decimallatitude, decimallongitude, |
|
452 |
country, stateprovince, county, |
|
453 |
countryid, stateprovinceid, countyid, |
|
454 |
countrystd, stateprovincestd, countystd |
|
455 |
FROM vegbien_geoscrub v |
|
456 |
LEFT JOIN vcountry USING (country) |
|
457 |
LEFT JOIN vstate USING (countryid, stateprovince) |
|
458 |
LEFT JOIN vcounty USING (countryid, stateprovinceid, county); |
|
459 |
-- SELECT 1707970 |
|
460 |
-- Time: 26172.154 ms |
|
461 |
|
|
462 |
-- try to squeeze out a few more direct stateprovince mappings to gadm2 |
|
463 |
UPDATE geoscrub |
|
464 |
SET stateprovincestd = name_1 |
|
465 |
FROM (SELECT DISTINCT name_0, name_1 FROM gadm2) gadm |
|
466 |
WHERE country IS NOT NULL |
|
467 |
AND stateprovince IS NOT NULL |
|
468 |
AND countrystd = name_0 |
|
469 |
AND stateprovincestd IS NULL |
|
470 |
AND stateprovince = name_1; |
|
471 |
-- UPDATE 0 |
|
472 |
-- Time: 3630.973 ms |
|
473 |
|
|
474 |
-- try to squeeze out a few more direct county mappings to gadm2 |
|
475 |
UPDATE geoscrub |
|
476 |
SET countystd = name_2 |
|
477 |
FROM (SELECT DISTINCT name_0, name_1, name_2 FROM gadm2) gadm |
|
478 |
WHERE country IS NOT NULL |
|
479 |
AND stateprovince IS NOT NULL |
|
480 |
AND county IS NOT NULL |
|
481 |
AND countrystd = name_0 |
|
482 |
AND stateprovincestd = name_1 |
|
483 |
AND countystd IS NULL |
|
484 |
AND county = name_2; |
|
485 |
-- UPDATE 69 |
|
486 |
-- Time: 3982.715 ms |
Also available in: Unified diff
Initial checkin of geoscrub install SQL files.
Added install.*.sql files that will do initial table creation for all
required tables.
Added a truncate.vegbien_geoscrub.sql script that will clear tables related to
data downloaded in load-geoscrub-input.sh.
Moved all SQL statements that modify the final geoscrub table into the
geovalidate.sql script.
Modified the geonames.sql and geovalidate.sql scripts to assume all
tables have already been created by the install scripts.