Project

General

Profile

1 10707 aaronmk
/*
2
  SQL statements to generate tables mapping geonames features at each
3
  relevant administative level (country, state/province, county/parish)
4
  to GADM2 features.
5
6
  Todo:
7
  * Be tolerant to presence/absence of administrative qualifiers in
8
    names? (Already doing this mildly for County/Co/Co.)
9
    - Municipality
10
    - Barrio
11
    - District
12
13
  * Get rid of rudundancy
14
  * Try matching against GADM2 varname_* columns?
15
16
  Jim Regetz
17
  NCEAS
18
  Created Nov 2012
19
*/
20
21
------------------------------
22
-- Level 1 (State/Province) --
23
------------------------------
24 11494 psarando
BEGIN;
25 10707 aaronmk
26 11494 psarando
TRUNCATE gadm_stateprovince_lookup;
27
28 10707 aaronmk
-- map gadm2 level1 to geonames ADM1
29
INSERT INTO gadm_stateprovince_lookup (stateprovinceid)
30
  SELECT DISTINCT geonameid
31
    FROM geonames
32
    WHERE featurecode='ADM1';
33
-- INSERT 0 3841
34
-- Time: 7085.635 ms
35
36
37
-- try matching against alternatenames table
38
UPDATE gadm_stateprovince_lookup gs
39
  SET name_0 = gadm.name_0,
40
      name_1 = gadm.name_1
41
  FROM (SELECT DISTINCT iso, name_0, name_1 FROM gadm2) gadm,
42
       alternatenames a,
43
       geonames g,
44
       hierarchy h,
45
       gadm_country_lookup gc
46
  WHERE lower(gadm.name_1)=lower(alternatename)
47
    AND gs.stateprovinceid = a.geonameid
48
    AND a.geonameid = g.geonameid
49
    AND gs.stateprovinceid = h.childid
50
    AND h.parentid = gc.countryid
51
    AND gc.name_0 = gadm.name_0
52
    AND g.featurecode='ADM1'
53
    AND gs.name_1 IS NULL;
54
-- UPDATE 2145
55
-- Time: 3453.568 ms
56
57
-- try matching against geonames (names and alternatenames)
58
UPDATE gadm_stateprovince_lookup gs
59
  SET name_0 = gadm.name_0,
60
      name_1 = gadm.name_1
61
  FROM (SELECT DISTINCT iso, name_0, name_1 FROM gadm2) gadm,
62
       geonames g,
63
       hierarchy h,
64
       gadm_country_lookup gc
65
  WHERE (lower(gadm.name_1)=lower(g.name)
66
         OR lower(gadm.name_1) =
67
            ANY (string_to_array(lower(g.alternatenames), ',')))
68
    AND gs.stateprovinceid = h.childid
69
    AND h.parentid = gc.countryid
70
    AND gc.name_0 = gadm.name_0
71
    AND gs.stateprovinceid = g.geonameid
72
    AND g.featurecode='ADM1'
73
    AND gs.name_1 IS NULL;
74
-- UPDATE 319
75
-- Time: 857.885 ms
76
77
78
-- now again but against our manual mapping
79
UPDATE gadm_stateprovince_lookup gs
80
  SET name_0 = gadm.name_0,
81
      name_1 = gadm.name_1
82
  FROM (SELECT DISTINCT iso, name_0, name_1 FROM gadm2) gadm,
83
       alt_stateprovince asp,
84
       geonames g,
85
       countries c
86
  WHERE gadm.name_1=asp.alternatename
87
    AND gadm.iso=c.iso3
88
    AND asp.country=c.country
89
    AND gs.stateprovinceid = g.geonameid
90
    AND g.countrycode=c.iso
91
    AND g.name=asp.stateprovince
92
    AND gs.name_0 IS NULL;
93
-- UPDATE 29
94
-- Time: 777.376 ms
95
96 11494 psarando
COMMIT;
97 10707 aaronmk
98
/*
99
-- these geonameids match multiple gadm2 state/provinces, and that's bad
100
-- because we don't know which one to use for geovalidation
101
select stateprovinceid, name_0, array_agg(name_1) as name_1
102
  from gadm_stateprovince_lookup
103
  group by stateprovinceid, name_0
104
  having count(*)>1;
105
106
 stateprovinceid |  name_0  |                name_1
107
-----------------+----------+--------------------------------------
108
         3653890 | Ecuador  | {Orellana,Orellana}
109
          453751 | Bulgaria | {Razgrad,Ruse}
110
         1831095 | Cambodia | {"Phnom Penh",Kândal}
111
         1506272 | Russia   | {Altay,Gorno-Altay}
112
          128222 | Iran     | {Kermanshah,Kordestan}
113
         3457415 | Brazil   | {"Mato Grosso do Sul","Mato Grosso"}
114
          170652 | Syria    | {Damascus,"Rif Dimashq"}
115
(7 rows)
116
117
-- for now, manually clean up after these ambiguous cases
118
-- ... this countryid should only apply to Mato Grosso do Sul
119
DELETE FROM gadm_stateprovince_lookup
120
  WHERE name_0 = 'Brazil'
121
    AND name_1 = 'Mato Grosso'
122
    AND stateprovinceid = '3457415';
123
*/
124
125
/*
126
-- these gadm2 state/provinces match multiple geonameids, but i'm pretty sure
127
-- we don't really care in this direction
128
select iso, name_1, count(*)
129
  from (select distinct iso, name_1 from gadm2) gadm2
130
  left join (
131
      select distinct gadm2.iso,
132
             name_1,
133
             alternatenames.geonameid,
134
             geonames.name
135
        from (select distinct iso, name_1 from gadm2) gadm2
136
        join alternatenames on name_1=alternatename
137
        join geonames using (geonameid)
138
        join countries on countrycode=countries.iso
139
        where featurecode='ADM1'
140
          and gadm2.iso=countries.iso3) foo
141
    using (iso, name_1) group by iso, name_1 having count(*)>1;
142
143
 iso |   name_1    | count
144
-----+-------------+-------
145
 AZE | Yevlax      |     2
146
 BGR | Ruse        |     2
147
 BMU | Hamilton    |     2
148
 BRA | Mato Grosso |     2
149
 KAZ | Almaty      |     2
150
 KHM | Phnom Penh  |     2
151
 RUS | Altay       |     2
152
 RUS | Moskva      |     2
153
(8 rows)
154
*/