Revision 11496
Added by Paul Sarando about 11 years ago
derived/biengeo/geovalidate.sh | ||
---|---|---|
1 |
# Bash script to create a new postgis database and prep it with GADM |
|
2 |
# data for geovalidation purposes. |
|
3 |
# |
|
4 |
# Basic workflow: |
|
5 |
# 1. Create postgis-enabled db |
|
6 |
# 2. Load gadm2 data into database |
|
7 |
# a. create polygon geometry upon import (loader does this) |
|
8 |
# b. create simplified polygon geometry after import |
|
9 |
# c. create polygon geography after import |
|
10 |
# d. create indexes |
|
11 |
# e. cluster data in geom index order |
|
12 |
# |
|
13 |
# For postgis installation see: |
|
14 |
# http://trac.osgeo.org/postgis/wiki/UsersWikiPostGIS20Ubuntu1204 |
|
15 |
# |
|
16 |
# todo: |
|
17 |
# * better handle Antarctica problem so it can be included instead of |
|
18 |
# just dropped? probably not a huge deal for plants... |
|
19 |
# |
|
20 |
# Jim Regetz |
|
21 |
# NCEAS |
|
22 |
# Created November 2012 |
|
23 |
|
|
24 |
PGROOT="/usr/share/postgresql/9.1/contrib/postgis-2.0" |
|
25 |
|
|
26 |
# create postgis template if one doesn't already exist |
|
27 |
sudo -u postgres createdb template_postgis |
|
28 |
sudo -u postgres psql -d template_postgis -f $PGROOT/postgis.sql |
|
29 |
sudo -u postgres psql -d template_postgis -f $PGROOT/spatial_ref_sys.sql |
|
30 |
sudo -u postgres psql -d template_postgis -f $PGROOT/postgis_comments.sql |
|
31 |
|
|
32 |
# create postgis db from template |
|
33 |
createdb geoscrub -T template_postgis |
|
34 |
|
|
35 |
# |
|
36 |
# assemble input data |
|
37 |
# |
|
38 |
|
|
39 |
# get GADM data (320MB zip file) and unzip |
|
40 |
wget http://www.gadm.org/data2/gadm_v2_shp.zip |
|
41 |
unzip gadm_v2_shp.zip |
|
42 |
|
|
43 |
# |
|
44 |
# create and populate gadm2 table |
|
45 |
# |
|
46 |
|
|
47 |
# load gadm2 data (took 4.7 minutes on willow, 26-Oct-2012) |
|
48 |
shp2pgsql -s 4326 -W latin1 gadm2 gadm2 geoscrub | psql -d geoscrub |
|
49 |
|
|
50 |
# create indexes |
|
51 |
psql -c 'CREATE INDEX "gadm2_iso_idx" ON gadm2 (iso)' geoscrub |
|
52 |
psql -c 'CREATE INDEX "gadm2_geom_gist" ON gadm2 USING GIST (geom)' geoscrub |
|
53 |
psql -c 'CREATE INDEX "gadm2_name_0_idx" ON gadm2 (name_0)' geoscrub |
|
54 |
psql -c 'CREATE INDEX "gadm2_name_1_idx" ON gadm2 (name_1)' geoscrub |
|
55 |
|
|
56 |
|
|
57 |
# physically reorder tuples on disk in country name index order |
|
58 |
# nb: i _think_ this helped, but am not 100% sure |
|
59 |
psql -c 'CLUSTER gadm2 USING gadm2_name_0_idx' geoscrub |
|
60 |
|
|
61 |
# create simplified country polygons |
|
62 |
psql -c \ |
|
63 |
"SELECT AddGeometryColumn('public', 'gadm2', 'simple_geom', 4326, 'MULTIPOLYGON', 2 ); |
|
64 |
UPDATE gadm2 SET simple_geom = (SELECT st_simplify(geom, 0.001))" geoscrub |
|
65 |
psql -c \ |
|
66 |
'CREATE INDEX "gadm2_simple_geom_gist" ON gadm2 USING gist (simple_geom)' geoscrub |
|
67 |
# ... and create geography versions ... |
|
68 |
# note workaround to exclude Antarctica because it has an invalid |
|
69 |
# western extent of -180.000015258789 |
|
70 |
psql -c \ |
|
71 |
"ALTER TABLE gadm2 ADD COLUMN simple_geog geography; |
|
72 |
UPDATE gadm2 SET simple_geog = simple_geom::geography WHERE name_0<>'Antarctica'" geoscrub |
|
73 |
psql -c \ |
|
74 |
'CREATE INDEX "gadm2_simple_geog_gist" ON gadm2 USING gist (simple_geog)' geoscrub |
derived/biengeo/install.sh | ||
---|---|---|
1 |
#!/bin/bash |
|
2 |
|
|
3 |
# Bash script to create a new postgis database and prep it with GADM |
|
4 |
# data for geovalidation purposes. |
|
5 |
# |
|
6 |
# Basic workflow: |
|
7 |
# 1. Create postgis-enabled template db |
|
8 |
# 2. Create postgis-enabled db from the template |
|
9 |
# 3. Create required tables in postgis-enabled db |
|
10 |
# 4. Load gadm2 and geonames.org data into database |
|
11 |
# |
|
12 |
# For postgis installation see: |
|
13 |
# http://trac.osgeo.org/postgis/wiki/UsersWikiPostGIS20Ubuntu1204 |
|
14 |
# |
|
15 |
# Jim Regetz |
|
16 |
# NCEAS |
|
17 |
# Created November 2012 |
|
18 |
# |
|
19 |
# refactored and reorganized by |
|
20 |
# Paul Sarando |
|
21 |
# iPlant Collaborative |
|
22 |
# Updated Oct 2013 |
|
23 |
|
|
24 |
DB_NAME="geoscrub" |
|
25 |
DB_USER="bien" |
|
26 |
DB_HOST="localhost" |
|
27 |
SCRIPT_DIR="$(dirname $0)" |
|
28 |
|
|
29 |
function run_sudo_sql_cmd { |
|
30 |
local SQL_CMD="$1" |
|
31 |
|
|
32 |
sudo -u postgres psql -e -h "$DB_HOST" -d "$DB_NAME" -c "$SQL_CMD" |
|
33 |
if [[ $? != 0 ]]; then |
|
34 |
echo "Error while executing SQL command." |
|
35 |
echo -n "You may drop the ${DB_NAME} database" |
|
36 |
echo " and attempt to run this $0 script again." |
|
37 |
exit 1 |
|
38 |
fi |
|
39 |
} |
|
40 |
|
|
41 |
function run_sql_script { |
|
42 |
local SCRIPT="$1" |
|
43 |
|
|
44 |
psql -e --set ON_ERROR_STOP=1 -U "$DB_USER" -h "$DB_HOST" -d "$DB_NAME" < "$SCRIPT" |
|
45 |
if [[ $? != 0 ]]; then |
|
46 |
echo "Error while executing SQL script ${SCRIPT}" |
|
47 |
exit 1 |
|
48 |
fi |
|
49 |
} |
|
50 |
|
|
51 |
# create postgis template if one doesn't already exist |
|
52 |
echo "Setting up template_postgis database..." |
|
53 |
sudo -u postgres createdb template_postgis |
|
54 |
if [[ $? == 0 ]]; then |
|
55 |
# successfully created template_postgis, which means it didn't already exist |
|
56 |
sudo -u postgres psql -e -d template_postgis < "${SCRIPT_DIR}"/update.template_postgis.sql |
|
57 |
if [[ $? != 0 ]]; then |
|
58 |
echo "Could not create postgis extesion on template_postgis database." |
|
59 |
exit 1 |
|
60 |
fi |
|
61 |
fi |
|
62 |
|
|
63 |
# create postgis db from template |
|
64 |
echo "Creating the ${DB_NAME} database..." |
|
65 |
createdb -U "$DB_USER" -h "$DB_HOST" "$DB_NAME" -T template_postgis |
|
66 |
if [[ $? != 0 ]]; then |
|
67 |
echo "Could not create ${DB_NAME} database." |
|
68 |
exit 1 |
|
69 |
fi |
|
70 |
|
|
71 |
run_sudo_sql_cmd "ALTER TABLE spatial_ref_sys OWNER TO ${DB_USER}" |
|
72 |
run_sudo_sql_cmd "ALTER VIEW geography_columns OWNER TO ${DB_USER}" |
|
73 |
run_sudo_sql_cmd "ALTER VIEW geometry_columns OWNER TO ${DB_USER}" |
|
74 |
|
|
75 |
echo "Setting up ${DB_NAME} database tables..." |
|
76 |
run_sql_script "${SCRIPT_DIR}"/install.geonames.sql |
|
77 |
run_sql_script "${SCRIPT_DIR}"/install.geonames-to-gadm.sql |
|
78 |
run_sql_script "${SCRIPT_DIR}"/install.geoscrub.sql |
|
79 |
|
|
80 |
echo "Install successfully completed." |
|
81 |
|
|
0 | 82 |
derived/biengeo/update_validation_data.sh | ||
---|---|---|
1 |
#!/bin/bash |
|
2 |
|
|
3 |
# Bash script to create a new postgis database and prep it with GADM |
|
4 |
# data for geovalidation purposes. |
|
5 |
# |
|
6 |
# Basic workflow: |
|
7 |
# 1. Load gadm2 data into database |
|
8 |
# 2. Load geonames data into database |
|
9 |
# 3. Update geonames to gadm2 data tables |
|
10 |
# |
|
11 |
# Paul Sarando |
|
12 |
# iPlant Collaborative |
|
13 |
# Oct 2013 |
|
14 |
|
|
15 |
DB_NAME="geoscrub" |
|
16 |
DB_USER="bien" |
|
17 |
DB_HOST="localhost" |
|
18 |
SCRIPT_DIR="$(dirname $0)" |
|
19 |
|
|
20 |
function run_sql_script { |
|
21 |
local SCRIPT=$1 |
|
22 |
|
|
23 |
psql -e -U "$DB_USER" -h "$DB_HOST" -d "$DB_NAME" --set ON_ERROR_STOP=1 < "$SCRIPT" |
|
24 |
if [[ $? != 0 ]]; then |
|
25 |
echo "Error while executing SQL script ${SCRIPT}" |
|
26 |
exit 1 |
|
27 |
fi |
|
28 |
} |
|
29 |
|
|
30 |
echo "Updating geoscrub validation data." |
|
31 |
|
|
32 |
"${SCRIPT_DIR}"/update_gadm_data.sh |
|
33 |
if [[ $? != 0 ]]; then |
|
34 |
echo "Could not update ${DB_NAME} database with GADM data." |
|
35 |
exit 1 |
|
36 |
fi |
|
37 |
|
|
38 |
"${SCRIPT_DIR}"/update_geonames_data.sh |
|
39 |
if [[ $? != 0 ]]; then |
|
40 |
echo "Could not update ${DB_NAME} database with geonames.org data." |
|
41 |
exit 1 |
|
42 |
fi |
|
43 |
|
|
44 |
run_sql_script "${SCRIPT_DIR}/geonames-to-gadm.country.sql" |
|
45 |
run_sql_script "${SCRIPT_DIR}/geonames-to-gadm.stateprovince.sql" |
|
46 |
run_sql_script "${SCRIPT_DIR}/geonames-to-gadm.county.sql" |
|
47 |
|
|
48 |
echo "Validation data successfully updated." |
|
49 |
|
|
0 | 50 |
derived/biengeo/update.template_postgis.sql | ||
---|---|---|
1 |
BEGIN; |
|
2 |
|
|
3 |
CREATE EXTENSION IF NOT EXISTS postgis; |
|
4 |
UPDATE pg_database SET datistemplate = true WHERE datname = 'template_postgis'; |
|
5 |
|
|
6 |
COMMIT; |
|
7 |
|
derived/biengeo/update.gadm2.sql | ||
---|---|---|
1 |
BEGIN; |
|
2 |
|
|
3 |
-- create indexes |
|
4 |
CREATE INDEX "gadm2_iso_idx" ON gadm2 (iso); |
|
5 |
CREATE INDEX "gadm2_geom_gist" ON gadm2 USING GIST (geom); |
|
6 |
CREATE INDEX "gadm2_name_0_idx" ON gadm2 (name_0); |
|
7 |
CREATE INDEX "gadm2_name_1_idx" ON gadm2 (name_1); |
|
8 |
|
|
9 |
-- physically reorder tuples on disk in country name index order |
|
10 |
-- nb: i _think_ this helped, but am not 100% sure |
|
11 |
CLUSTER gadm2 USING gadm2_name_0_idx; |
|
12 |
|
|
13 |
-- create simplified country polygons |
|
14 |
SELECT AddGeometryColumn('public', 'gadm2', 'simple_geom', 4326, 'MULTIPOLYGON', 2 ); |
|
15 |
UPDATE gadm2 SET simple_geom = (SELECT st_simplify(geom, 0.001)); |
|
16 |
CREATE INDEX "gadm2_simple_geom_gist" ON gadm2 USING gist (simple_geom); |
|
17 |
-- ... and create geography versions ... |
|
18 |
-- note workaround to exclude Antarctica because it has an invalid |
|
19 |
-- western extent of -180.000015258789 |
|
20 |
ALTER TABLE gadm2 ADD COLUMN simple_geog geography; |
|
21 |
UPDATE gadm2 SET simple_geog = simple_geom::geography WHERE name_0<>'Antarctica'; |
|
22 |
CREATE INDEX "gadm2_simple_geog_gist" ON gadm2 USING gist (simple_geog); |
|
23 |
|
|
24 |
COMMIT; |
|
25 |
|
derived/biengeo/update_gadm_data.sh | ||
---|---|---|
1 |
#!/bin/bash |
|
2 |
|
|
3 |
# Bash script to create a new postgis database and prep it with GADM |
|
4 |
# data for geovalidation purposes. |
|
5 |
# |
|
6 |
# Basic workflow: |
|
7 |
# 1. create polygon geometry upon import (loader does this) |
|
8 |
# 2. create simplified polygon geometry after import |
|
9 |
# 3. create polygon geography after import |
|
10 |
# 4. create indexes |
|
11 |
# 5. cluster data in geom index order |
|
12 |
# |
|
13 |
# todo: |
|
14 |
# * better handle Antarctica problem so it can be included instead of |
|
15 |
# just dropped? probably not a huge deal for plants... |
|
16 |
# |
|
17 |
# Jim Regetz |
|
18 |
# NCEAS |
|
19 |
# Created November 2012 |
|
20 |
# |
|
21 |
# refactored and reorganized by |
|
22 |
# Paul Sarando |
|
23 |
# iPlant Collaborative |
|
24 |
# Updated Oct 2013 |
|
25 |
|
|
26 |
DB_NAME="geoscrub" |
|
27 |
DB_USER="bien" |
|
28 |
DB_HOST="localhost" |
|
29 |
SCRIPT_DIR="$(dirname $0)" |
|
30 |
|
|
31 |
# GADM data originally available at http://www.gadm.org/data2/gadm_v2_shp.zip |
|
32 |
# gadm.org now links this file from biogeo.ucdavis.edu. |
|
33 |
GADM_DATA_URL="http://biogeo.ucdavis.edu/data/gadm2/gadm_v2_shp.zip" |
|
34 |
GADM_DATA_DIR="${SCRIPT_DIR}/gadm_v2_shp" |
|
35 |
|
|
36 |
# |
|
37 |
# assemble input data |
|
38 |
# |
|
39 |
|
|
40 |
echo "Updating gadm2 tables from GADM data in ${GADM_DATA_DIR}" |
|
41 |
echo -n "Note, to force data to download again from ${GADM_DATA_URL}," |
|
42 |
echo " delete the directory ${GADM_DATA_DIR} before running this script." |
|
43 |
|
|
44 |
# Check for GADM data (320MB zip file) unziped in data directory. |
|
45 |
if [[ ! -d "$GADM_DATA_DIR" ]]; then |
|
46 |
echo "making directory ${GADM_DATA_DIR}" |
|
47 |
mkdir -p "$GADM_DATA_DIR" |
|
48 |
|
|
49 |
if [[ $? != 0 ]]; then |
|
50 |
echo "Could not create directory ${GADM_DATA_DIR}" |
|
51 |
exit 1 |
|
52 |
fi |
|
53 |
|
|
54 |
pushd "$GADM_DATA_DIR" |
|
55 |
wget -O gadm_v2_shp.zip "$GADM_DATA_URL" |
|
56 |
unzip gadm_v2_shp.zip |
|
57 |
rm -f gadm_v2_shp.zip |
|
58 |
popd |
|
59 |
fi |
|
60 |
|
|
61 |
# |
|
62 |
# create and populate gadm2 table |
|
63 |
# |
|
64 |
|
|
65 |
psql -e -U "$DB_USER" -h "$DB_HOST" -d "$DB_NAME" -c "DROP TABLE IF EXISTS gadm2" |
|
66 |
if [[ $? != 0 ]]; then |
|
67 |
echo "Could not drop GADM2 table in ${DB_NAME} database." |
|
68 |
exit 1 |
|
69 |
fi |
|
70 |
|
|
71 |
echo "Creating gadm2 table with shp2pgsql from ${GADM_DATA_DIR} data." |
|
72 |
|
|
73 |
pushd "$GADM_DATA_DIR" |
|
74 |
|
|
75 |
# load gadm2 data (took 4.7 minutes on willow, 26-Oct-2012) |
|
76 |
shp2pgsql -s 4326 -W latin1 gadm2 gadm2 "$DB_NAME" | \ |
|
77 |
psql --set ON_ERROR_STOP=1 -U "$DB_USER" -h "$DB_HOST" -d "$DB_NAME" |
|
78 |
if [[ $? != 0 ]]; then |
|
79 |
popd |
|
80 |
echo "Could not load GADM2 data into ${DB_NAME} database." |
|
81 |
exit 1 |
|
82 |
fi |
|
83 |
|
|
84 |
popd |
|
85 |
|
|
86 |
# create indexes and additional columns |
|
87 |
psql -e -U "$DB_USER" -h "$DB_HOST" -d "$DB_NAME" --set ON_ERROR_STOP=1 < "${SCRIPT_DIR}"/update.gadm2.sql |
|
88 |
if [[ $? != 0 ]]; then |
|
89 |
echo "Could not update GADM2 data and indexes in ${DB_NAME} database." |
|
90 |
exit 1 |
|
91 |
fi |
|
92 |
|
|
93 |
echo "Update gadm2 tables successfully completed." |
|
94 |
|
|
0 | 95 |
Also available in: Unified diff
Split geovalidate.sh into install and update scripts.
Split geovalidate.sh into install.sh and update_gadm_data.sh scripts.
The install.sh script creates the databse and uses the install sql
scripts to create all required tables.
The update_gadm_data.sh script downloads the GADM data and creates the
gadm2 table from the shape files.
Added an update_validation_data.sh script that calls the
update_gadm_data.sh, update_geonames_data.sh, and geonames-to-gadm SQL
scripts.