Project

General

Profile

« Previous | Next » 

Revision 11496

Split geovalidate.sh into install and update scripts.

Split geovalidate.sh into install.sh and update_gadm_data.sh scripts.
The install.sh script creates the databse and uses the install sql
scripts to create all required tables.
The update_gadm_data.sh script downloads the GADM data and creates the
gadm2 table from the shape files.
Added an update_validation_data.sh script that calls the
update_gadm_data.sh, update_geonames_data.sh, and geonames-to-gadm SQL
scripts.

View differences:

derived/biengeo/geovalidate.sh
1
# Bash script to create a new postgis database and prep it with GADM
2
# data for geovalidation purposes.
3
#
4
# Basic workflow:
5
#  1. Create postgis-enabled db
6
#  2. Load gadm2 data into database
7
#     a. create polygon geometry upon import (loader does this)
8
#     b. create simplified polygon geometry after import
9
#     c. create polygon geography after import
10
#     d. create indexes
11
#     e. cluster data in geom index order
12
# 
13
# For postgis installation see:
14
#   http://trac.osgeo.org/postgis/wiki/UsersWikiPostGIS20Ubuntu1204
15
#
16
# todo:
17
# * better handle Antarctica problem so it can be included instead of
18
#   just dropped? probably not a huge deal for plants...
19
#
20
# Jim Regetz
21
# NCEAS
22
# Created November 2012
23

  
24
PGROOT="/usr/share/postgresql/9.1/contrib/postgis-2.0"
25

  
26
# create postgis template if one doesn't already exist
27
sudo -u postgres createdb template_postgis
28
sudo -u postgres psql -d template_postgis -f $PGROOT/postgis.sql
29
sudo -u postgres psql -d template_postgis -f $PGROOT/spatial_ref_sys.sql
30
sudo -u postgres psql -d template_postgis -f $PGROOT/postgis_comments.sql
31

  
32
# create postgis db from template
33
createdb geoscrub -T template_postgis
34

  
35
#
36
# assemble input data
37
#
38

  
39
# get GADM data (320MB zip file) and unzip
40
wget http://www.gadm.org/data2/gadm_v2_shp.zip
41
unzip gadm_v2_shp.zip
42

  
43
#
44
# create and populate gadm2 table
45
#
46

  
47
# load gadm2 data (took 4.7 minutes on willow, 26-Oct-2012)
48
shp2pgsql -s 4326 -W latin1 gadm2 gadm2 geoscrub | psql -d geoscrub
49

  
50
# create indexes
51
psql -c 'CREATE INDEX "gadm2_iso_idx" ON gadm2 (iso)' geoscrub
52
psql -c 'CREATE INDEX "gadm2_geom_gist" ON gadm2 USING GIST (geom)' geoscrub
53
psql -c 'CREATE INDEX "gadm2_name_0_idx" ON gadm2 (name_0)' geoscrub
54
psql -c 'CREATE INDEX "gadm2_name_1_idx" ON gadm2 (name_1)' geoscrub
55

  
56

  
57
# physically reorder tuples on disk in country name index order
58
# nb: i _think_ this helped, but am not 100% sure
59
psql -c 'CLUSTER gadm2 USING gadm2_name_0_idx' geoscrub
60

  
61
# create simplified country polygons
62
psql -c \
63
  "SELECT AddGeometryColumn('public', 'gadm2', 'simple_geom', 4326, 'MULTIPOLYGON', 2 );
64
   UPDATE gadm2 SET simple_geom = (SELECT st_simplify(geom, 0.001))" geoscrub
65
psql -c \
66
  'CREATE INDEX "gadm2_simple_geom_gist" ON gadm2 USING gist (simple_geom)' geoscrub
67
# ... and create geography versions ...
68
# note workaround to exclude Antarctica because it has an invalid
69
# western extent of -180.000015258789
70
psql -c \
71
  "ALTER TABLE gadm2 ADD COLUMN simple_geog geography;
72
   UPDATE gadm2 SET simple_geog = simple_geom::geography WHERE name_0<>'Antarctica'" geoscrub
73
psql -c \
74
  'CREATE INDEX "gadm2_simple_geog_gist" ON gadm2 USING gist (simple_geog)' geoscrub
derived/biengeo/install.sh
1
#!/bin/bash
2

  
3
# Bash script to create a new postgis database and prep it with GADM
4
# data for geovalidation purposes.
5
#
6
# Basic workflow:
7
#  1. Create postgis-enabled template db
8
#  2. Create postgis-enabled db from the template
9
#  3. Create required tables in postgis-enabled db
10
#  4. Load gadm2 and geonames.org data into database
11
#
12
# For postgis installation see:
13
#   http://trac.osgeo.org/postgis/wiki/UsersWikiPostGIS20Ubuntu1204
14
#
15
# Jim Regetz
16
# NCEAS
17
# Created November 2012
18
#
19
# refactored and reorganized by
20
# Paul Sarando
21
# iPlant Collaborative
22
# Updated Oct 2013
23

  
24
DB_NAME="geoscrub"
25
DB_USER="bien"
26
DB_HOST="localhost"
27
SCRIPT_DIR="$(dirname $0)"
28

  
29
function run_sudo_sql_cmd {
30
    local SQL_CMD="$1"
31

  
32
    sudo -u postgres psql -e -h "$DB_HOST" -d "$DB_NAME" -c "$SQL_CMD"
33
    if [[ $? != 0 ]]; then
34
        echo "Error while executing SQL command."
35
        echo -n "You may drop the ${DB_NAME} database"
36
        echo " and attempt to run this $0 script again."
37
        exit 1
38
    fi
39
}
40

  
41
function run_sql_script {
42
    local SCRIPT="$1"
43

  
44
    psql -e --set ON_ERROR_STOP=1 -U "$DB_USER" -h "$DB_HOST" -d "$DB_NAME" < "$SCRIPT"
45
    if [[ $? != 0 ]]; then
46
        echo "Error while executing SQL script ${SCRIPT}"
47
        exit 1
48
    fi
49
}
50

  
51
# create postgis template if one doesn't already exist
52
echo "Setting up template_postgis database..."
53
sudo -u postgres createdb template_postgis
54
if [[ $? == 0 ]]; then
55
    # successfully created template_postgis, which means it didn't already exist
56
    sudo -u postgres psql -e -d template_postgis < "${SCRIPT_DIR}"/update.template_postgis.sql
57
    if [[ $? != 0 ]]; then
58
        echo "Could not create postgis extesion on template_postgis database."
59
        exit 1
60
    fi
61
fi
62

  
63
# create postgis db from template
64
echo "Creating the ${DB_NAME} database..."
65
createdb -U "$DB_USER" -h "$DB_HOST" "$DB_NAME" -T template_postgis
66
if [[ $? != 0 ]]; then
67
    echo "Could not create ${DB_NAME} database."
68
    exit 1
69
fi
70

  
71
run_sudo_sql_cmd "ALTER TABLE spatial_ref_sys OWNER TO ${DB_USER}"
72
run_sudo_sql_cmd "ALTER VIEW geography_columns OWNER TO ${DB_USER}"
73
run_sudo_sql_cmd "ALTER VIEW geometry_columns OWNER TO ${DB_USER}"
74

  
75
echo "Setting up ${DB_NAME} database tables..."
76
run_sql_script "${SCRIPT_DIR}"/install.geonames.sql
77
run_sql_script "${SCRIPT_DIR}"/install.geonames-to-gadm.sql
78
run_sql_script "${SCRIPT_DIR}"/install.geoscrub.sql
79

  
80
echo "Install successfully completed."
81

  
0 82

  
derived/biengeo/update_validation_data.sh
1
#!/bin/bash
2

  
3
# Bash script to create a new postgis database and prep it with GADM
4
# data for geovalidation purposes.
5
#
6
# Basic workflow:
7
#  1. Load gadm2 data into database
8
#  2. Load geonames data into database
9
#  3. Update geonames to gadm2 data tables
10
# 
11
# Paul Sarando
12
# iPlant Collaborative
13
# Oct 2013
14

  
15
DB_NAME="geoscrub"
16
DB_USER="bien"
17
DB_HOST="localhost"
18
SCRIPT_DIR="$(dirname $0)"
19

  
20
function run_sql_script {
21
    local SCRIPT=$1
22

  
23
    psql -e -U "$DB_USER" -h "$DB_HOST" -d "$DB_NAME" --set ON_ERROR_STOP=1 < "$SCRIPT"
24
    if [[ $? != 0 ]]; then
25
        echo "Error while executing SQL script ${SCRIPT}"
26
        exit 1
27
    fi
28
}
29

  
30
echo "Updating geoscrub validation data."
31

  
32
"${SCRIPT_DIR}"/update_gadm_data.sh
33
if [[ $? != 0 ]]; then
34
    echo "Could not update ${DB_NAME} database with GADM data."
35
    exit 1
36
fi
37

  
38
"${SCRIPT_DIR}"/update_geonames_data.sh
39
if [[ $? != 0 ]]; then
40
    echo "Could not update ${DB_NAME} database with geonames.org data."
41
    exit 1
42
fi
43

  
44
run_sql_script "${SCRIPT_DIR}/geonames-to-gadm.country.sql"
45
run_sql_script "${SCRIPT_DIR}/geonames-to-gadm.stateprovince.sql"
46
run_sql_script "${SCRIPT_DIR}/geonames-to-gadm.county.sql"
47

  
48
echo "Validation data successfully updated."
49

  
0 50

  
derived/biengeo/update.template_postgis.sql
1
BEGIN;
2

  
3
CREATE EXTENSION IF NOT EXISTS postgis;
4
UPDATE pg_database SET datistemplate = true WHERE datname = 'template_postgis';
5

  
6
COMMIT;
7

  
derived/biengeo/update.gadm2.sql
1
BEGIN;
2

  
3
-- create indexes
4
CREATE INDEX "gadm2_iso_idx" ON gadm2 (iso);
5
CREATE INDEX "gadm2_geom_gist" ON gadm2 USING GIST (geom);
6
CREATE INDEX "gadm2_name_0_idx" ON gadm2 (name_0);
7
CREATE INDEX "gadm2_name_1_idx" ON gadm2 (name_1);
8

  
9
-- physically reorder tuples on disk in country name index order
10
-- nb: i _think_ this helped, but am not 100% sure
11
CLUSTER gadm2 USING gadm2_name_0_idx;
12

  
13
-- create simplified country polygons
14
SELECT AddGeometryColumn('public', 'gadm2', 'simple_geom', 4326, 'MULTIPOLYGON', 2 );
15
UPDATE gadm2 SET simple_geom = (SELECT st_simplify(geom, 0.001));
16
CREATE INDEX "gadm2_simple_geom_gist" ON gadm2 USING gist (simple_geom);
17
-- ... and create geography versions ...
18
-- note workaround to exclude Antarctica because it has an invalid
19
-- western extent of -180.000015258789
20
ALTER TABLE gadm2 ADD COLUMN simple_geog geography;
21
UPDATE gadm2 SET simple_geog = simple_geom::geography WHERE name_0<>'Antarctica';
22
CREATE INDEX "gadm2_simple_geog_gist" ON gadm2 USING gist (simple_geog);
23

  
24
COMMIT;
25

  
derived/biengeo/update_gadm_data.sh
1
#!/bin/bash
2

  
3
# Bash script to create a new postgis database and prep it with GADM
4
# data for geovalidation purposes.
5
#
6
# Basic workflow:
7
#  1. create polygon geometry upon import (loader does this)
8
#  2. create simplified polygon geometry after import
9
#  3. create polygon geography after import
10
#  4. create indexes
11
#  5. cluster data in geom index order
12
# 
13
# todo:
14
# * better handle Antarctica problem so it can be included instead of
15
#   just dropped? probably not a huge deal for plants...
16
#
17
# Jim Regetz
18
# NCEAS
19
# Created November 2012
20
#
21
# refactored and reorganized by
22
# Paul Sarando
23
# iPlant Collaborative
24
# Updated Oct 2013
25

  
26
DB_NAME="geoscrub"
27
DB_USER="bien"
28
DB_HOST="localhost"
29
SCRIPT_DIR="$(dirname $0)"
30

  
31
# GADM data originally available at http://www.gadm.org/data2/gadm_v2_shp.zip
32
# gadm.org now links this file from biogeo.ucdavis.edu.
33
GADM_DATA_URL="http://biogeo.ucdavis.edu/data/gadm2/gadm_v2_shp.zip"
34
GADM_DATA_DIR="${SCRIPT_DIR}/gadm_v2_shp"
35

  
36
#
37
# assemble input data
38
#
39

  
40
echo "Updating gadm2 tables from GADM data in ${GADM_DATA_DIR}"
41
echo -n "Note, to force data to download again from ${GADM_DATA_URL},"
42
echo " delete the directory ${GADM_DATA_DIR} before running this script."
43

  
44
# Check for GADM data (320MB zip file) unziped in data directory.
45
if [[ ! -d "$GADM_DATA_DIR" ]]; then
46
    echo "making directory ${GADM_DATA_DIR}"
47
    mkdir -p "$GADM_DATA_DIR"
48

  
49
    if [[ $? != 0 ]]; then
50
        echo "Could not create directory ${GADM_DATA_DIR}"
51
        exit 1
52
    fi
53

  
54
    pushd "$GADM_DATA_DIR"
55
    wget -O gadm_v2_shp.zip "$GADM_DATA_URL"
56
    unzip gadm_v2_shp.zip
57
    rm -f gadm_v2_shp.zip
58
    popd
59
fi
60

  
61
#
62
# create and populate gadm2 table
63
#
64

  
65
psql -e -U "$DB_USER" -h "$DB_HOST" -d "$DB_NAME" -c "DROP TABLE IF EXISTS gadm2"
66
if [[ $? != 0 ]]; then
67
    echo "Could not drop GADM2 table in ${DB_NAME} database."
68
    exit 1
69
fi
70

  
71
echo "Creating gadm2 table with shp2pgsql from ${GADM_DATA_DIR} data."
72

  
73
pushd "$GADM_DATA_DIR"
74

  
75
# load gadm2 data (took 4.7 minutes on willow, 26-Oct-2012)
76
shp2pgsql -s 4326 -W latin1 gadm2 gadm2 "$DB_NAME" | \
77
    psql --set ON_ERROR_STOP=1 -U "$DB_USER" -h "$DB_HOST" -d "$DB_NAME"
78
if [[ $? != 0 ]]; then
79
    popd
80
    echo "Could not load GADM2 data into ${DB_NAME} database."
81
    exit 1
82
fi
83

  
84
popd
85

  
86
# create indexes and additional columns
87
psql -e -U "$DB_USER" -h "$DB_HOST" -d "$DB_NAME" --set ON_ERROR_STOP=1 < "${SCRIPT_DIR}"/update.gadm2.sql
88
if [[ $? != 0 ]]; then
89
    echo "Could not update GADM2 data and indexes in ${DB_NAME} database."
90
    exit 1
91
fi
92

  
93
echo "Update gadm2 tables successfully completed."
94

  
0 95

  

Also available in: Unified diff