Project

General

Profile

1 11495 psarando
#!/bin/bash
2
3
# Script to geoscrub-enable a postgres database, developed in the
4
# context of BIEN3 geoscrubbing but with intentions of generality.
5
#
6
# Basic workflow:
7
#  1. Import table dumps obtained from geonames.org
8
#  2. Add additional custom mapping info, including:
9
#     a. Madagascar provinces used in GADM2 but not geonames
10
#     b. extra country name mappings/fixes specific to BIEN3 data
11
#     c. extra state/prov name mappings/fixes specific to BIEN3 data
12
#     d. custom state/prov name mappings binding GADM2 to geonames
13
#
14
# Jim Regetz
15
# NCEAS
16
# Created Nov 2012
17
#
18
# refactored and reorganized by
19
# Paul Sarando
20
# iPlant Collaborative
21
# Updated Oct 2013
22
23
DB_NAME="geoscrub"
24
DB_USER="bien"
25
DB_HOST="localhost"
26
SCRIPT_DIR="$(dirname $0)"
27
GEONAMES_DUMP_URL="http://download.geonames.org/export/dump"
28
ALL_COUNTRIES_ZIP=allCountries.zip
29
ALL_COUNTRIES_TXT=allCountries.txt
30
ALTERNATE_NAMES_ZIP=alternateNames.zip
31
ALTERNATE_NAMES_TXT=alternateNames.txt
32
COUNTRY_INFO_TXT=countryInfo.txt
33
HIERARCHY_ZIP=hierarchy.zip
34
HIERARCHY_TXT=hierarchy.txt
35
36
function run_sql_cmd {
37
    local SQL_CMD=$1
38
39
    psql -e -U "$DB_USER" -h "$DB_HOST" -d "$DB_NAME" -c "$SQL_CMD"
40
    if [[ $? != 0 ]]; then
41
        echo "Error while executing SQL command."
42
        exit 1
43
    fi
44
}
45
46
function run_sql_script {
47
    local SCRIPT=$1
48
49
    psql --set ON_ERROR_STOP=1 -U "$DB_USER" -h "$DB_HOST" -d "$DB_NAME" < "$SCRIPT"
50
    if [[ $? != 0 ]]; then
51
        echo "Error while executing SQL script ${SCRIPT}"
52
        exit 1
53
    fi
54
}
55
56
echo "Updating geonames.org tables..."
57
58
# Check for data downloaded into geonames under the current directory.
59
DATADIR="$(dirname $0)/geonames"
60
if [[ ! -d "${DATADIR}" ]]; then
61
    echo "making directory ${DATADIR}"
62
    mkdir -p "${DATADIR}"
63
64
    if [[ $? != 0 ]]; then
65
        echo "Could not create directory ${DATADIR}"
66
        exit 1
67
    fi
68
fi
69
70
run_sql_script "${SCRIPT_DIR}/truncate.geonames.sql"
71
72
#
73
# load geonames.org geonames
74
#
75
76
if [[ ! -r "${DATADIR}/${ALL_COUNTRIES_TXT}" ]]; then
77
    pushd "${DATADIR}"
78
    wget "${GEONAMES_DUMP_URL}/${ALL_COUNTRIES_ZIP}"
79
80
    if [[ $? != 0 ]]; then
81
        echo "Could not download ${ALL_COUNTRIES_ZIP}"
82
        exit 1
83
    fi
84
    unzip "$ALL_COUNTRIES_ZIP"
85
    rm -f "$ALL_COUNTRIES_ZIP"
86
    popd
87
fi
88
89
# import data
90
run_sql_cmd "\COPY geonames FROM '${DATADIR}/${ALL_COUNTRIES_TXT}' DELIMITER E'\t' NULL ''"
91
92
#
93
# load geonames.org alternate names
94
#
95
96
if [[ ! -r "${DATADIR}/${ALTERNATE_NAMES_TXT}" ]]; then
97
    pushd "${DATADIR}"
98
    wget "${GEONAMES_DUMP_URL}/${ALTERNATE_NAMES_ZIP}"
99
100
    if [[ $? != 0 ]]; then
101
        echo "Could not download ${ALTERNATE_NAMES_ZIP}"
102
        exit 1
103
    fi
104
    unzip "$ALTERNATE_NAMES_ZIP"
105
    rm -f "$ALTERNATE_NAMES_ZIP"
106
    popd
107
fi
108
109
# import data
110
run_sql_cmd "\COPY alternateNames FROM '${DATADIR}/${ALTERNATE_NAMES_TXT}' DELIMITER E'\t' NULL ''"
111
run_sql_script "${SCRIPT_DIR}/update.alternateNames.sql"
112
113
#
114
# load geonames.org country info
115
#
116
117
if [[ ! -r "${DATADIR}/${COUNTRY_INFO_TXT}" ]]; then
118
    pushd "${DATADIR}"
119
    wget "${GEONAMES_DUMP_URL}/${COUNTRY_INFO_TXT}"
120
121
    if [[ $? != 0 ]]; then
122
        echo "Could not download ${COUNTRY_INFO_TXT}"
123
        exit 1
124
    fi
125
    popd
126
fi
127
128
# import geonames country table
129
grep -ve '^#' "${DATADIR}/${COUNTRY_INFO_TXT}" | \
130
    psql -U "$DB_USER" -h "$DB_HOST" -d "$DB_NAME" -c \
131
    "\COPY countries FROM STDIN DELIMITER E'\t' NULL ''"
132
if [[ $? != 0 ]]; then
133
    echo "Could not copy countries table data."
134
    exit 1
135
fi
136
run_sql_script "${SCRIPT_DIR}/update.alt_country.sql"
137
run_sql_script "${SCRIPT_DIR}/update.alt_stateprovince.sql"
138
139
#
140
# load geonames.org hierarchy info
141
#
142
143
if [[ ! -r "${DATADIR}/${HIERARCHY_TXT}" ]]; then
144
    pushd "${DATADIR}"
145
    wget "${GEONAMES_DUMP_URL}/${HIERARCHY_ZIP}"
146
147
    if [[ $? != 0 ]]; then
148
        echo "Could not download ${HIERARCHY_ZIP}"
149
        exit 1
150
    fi
151
    unzip "$HIERARCHY_ZIP"
152
    rm -f "$HIERARCHY_ZIP"
153
    popd
154
fi
155
156
# import geonames country table
157
run_sql_cmd "\COPY hierarchy FROM '${DATADIR}/${HIERARCHY_TXT}' DELIMITER E'\t' NULL ''"
158
run_sql_script "${SCRIPT_DIR}/update.hierarchy.sql"
159
160
# augment geonames with some additional places we need, and update
161
# hierarchy table accordingly
162
run_sql_script "${SCRIPT_DIR}/update.geonames.sql"
163
164
echo "Update geonames.org tables successfully completed."