Project

General

Profile

« Previous | Next » 

Revision 11443

Initial checkin of geoscrub install SQL files.

Added install.*.sql files that will do initial table creation for all
required tables.
Added a truncate.vegbien_geoscrub.sql script that will clear tables related to
data downloaded in load-geoscrub-input.sh.
Moved all SQL statements that modify the final geoscrub table into the
geovalidate.sql script.
Modified the geonames.sql and geovalidate.sql scripts to assume all
tables have already been created by the install scripts.

View differences:

geonames.sql
39 39
  Created Nov 2012
40 40
*/
41 41

  
42
-- define custom function to pass characters back through latin-1
43
-- encoding to revert errors; in principle this could introduce new
44
-- errors, but it seems to do more good than harm
45
CREATE OR REPLACE FUNCTION u2L2u(text) RETURNS text AS $$
46
DECLARE
47
    string text;
48
BEGIN
49
  string :=
50
    convert_from(convert($1::bytea, 'UTF8', 'LATIN1'), 'UTF8');
51
  RETURN string;
52
EXCEPTION
53
  WHEN data_exception THEN
54
    RETURN $1;
55
END;
56
$$ LANGUAGE plpgsql;
42
BEGIN;
57 43

  
58 44
-------------------
59 45
-- scrub country --
60 46
-------------------
61 47

  
62
CREATE TABLE vcountry AS
48
INSERT INTO vcountry
63 49
  SELECT DISTINCT country
64 50
    FROM vegbien_geoscrub
65 51
    WHERE country IS NOT NULL;
66 52
-- SELECT 439
67 53
-- Time: 862.413 ms
68
ALTER TABLE vcountry ADD COLUMN countryutf8 text;
69
ALTER TABLE vcountry ADD COLUMN countryid integer;
70 54

  
71 55
-- try to clean up character representations
72 56
UPDATE vcountry
......
156 140
-- Removes: Congo, CONGO
157 141

  
158 142
-- merge in GADM country names (our standard names)
159
ALTER TABLE vcountry ADD COLUMN countrystd text;
160 143
UPDATE vcountry vc
161 144
  SET countrystd = name_0
162 145
  FROM gadm_country_lookup gadm0
......
169 152
-- scrub stateprovince --
170 153
-------------------------
171 154

  
172
CREATE TABLE vstate AS
155
INSERT INTO vstate
173 156
  SELECT DISTINCT countryid, stateprovince
174 157
  FROM vegbien_geoscrub join vcountry using (country)
175 158
  WHERE countryid IS NOT NULL
176 159
    AND stateprovince IS NOT NULL;
177 160
-- SELECT 3312
178 161
-- Time: 4164.948 ms
179
ALTER TABLE vstate ADD COLUMN stateprovinceutf8 text;
180
ALTER TABLE vstate ADD COLUMN stateprovinceid integer;
181 162

  
182 163
-- try to clean up character representations
183 164
UPDATE vstate
......
294 275
-- todo: the same stuff done for country to deal with Congo-type case
295 276

  
296 277
-- merge in GADM stateprovince names (our standard names)
297
ALTER TABLE vstate ADD COLUMN stateprovincestd text;
298 278
UPDATE vstate vs
299 279
  SET stateprovincestd = name_1
300 280
  FROM gadm_stateprovince_lookup gadm1
......
352 332
-- scrub county --
353 333
------------------
354 334

  
355
CREATE TABLE vcounty AS
335
INSERT INTO vcounty
356 336
  SELECT DISTINCT countryid, stateprovinceid, county
357 337
  FROM vegbien_geoscrub
358 338
  JOIN vcountry USING (country)
......
362 342
    AND county IS NOT NULL;
363 343
-- SELECT 18715
364 344
-- Time: 3590.725 ms
365
ALTER TABLE vcounty ADD COLUMN countyutf8 text;
366
ALTER TABLE vcounty ADD COLUMN countyid integer;
367 345

  
368 346
-- try to clean up character representations
369 347
UPDATE vcounty
......
413 391
-- UPDATE 0
414 392
-- Time: 11255.926 ms
415 393

  
416
ALTER TABLE vcounty ADD COLUMN countystd text;
417

  
418 394
-- merge in GADM county names (our standard names)
419 395
-- for cases where we have geonameid
420 396
UPDATE vcounty vs
......
440 416
-- UPDATE 5027
441 417
-- Time: 1410.377 ms
442 418

  
443
-----------------------------
444
-- put everything together --
445
-----------------------------
419
COMMIT;
446 420

  
447
-- reconstitute the whole dang thang
448

  
449
-- new approach
450
CREATE TABLE geoscrub AS
451
SELECT decimallatitude, decimallongitude,
452
       country, stateprovince, county,
453
       countryid, stateprovinceid, countyid,
454
       countrystd, stateprovincestd, countystd
455
  FROM vegbien_geoscrub v
456
  LEFT JOIN vcountry USING (country)
457
  LEFT JOIN vstate USING (countryid, stateprovince)
458
  LEFT JOIN vcounty USING (countryid, stateprovinceid, county);
459
-- SELECT 1707970
460
-- Time: 26172.154 ms
461

  
462
-- try to squeeze out a few more direct stateprovince mappings to gadm2
463
UPDATE geoscrub
464
  SET stateprovincestd = name_1
465
  FROM (SELECT DISTINCT name_0, name_1 FROM gadm2) gadm
466
  WHERE country IS NOT NULL
467
    AND stateprovince IS NOT NULL
468
    AND countrystd = name_0
469
    AND stateprovincestd IS NULL
470
    AND stateprovince = name_1;
471
-- UPDATE 0
472
-- Time: 3630.973 ms
473

  
474
-- try to squeeze out a few more direct county mappings to gadm2
475
UPDATE geoscrub
476
  SET countystd = name_2
477
  FROM (SELECT DISTINCT name_0, name_1, name_2 FROM gadm2) gadm
478
  WHERE country IS NOT NULL
479
    AND stateprovince IS NOT NULL
480
    AND county IS NOT NULL
481
    AND countrystd = name_0
482
    AND stateprovincestd = name_1
483
    AND countystd IS NULL
484
    AND county = name_2;
485
-- UPDATE 69
486
-- Time: 3982.715 ms

Also available in: Unified diff