Project

General

Profile

1
#!/bin/bash
2

    
3
# Bash script to create a new postgis database and prep it with GADM
4
# data for geovalidation purposes.
5
#
6
# Basic workflow:
7
#  1. create polygon geometry upon import (loader does this)
8
#  2. create simplified polygon geometry after import
9
#  3. create polygon geography after import
10
#  4. create indexes
11
#  5. cluster data in geom index order
12
# 
13
# todo:
14
# * better handle Antarctica problem so it can be included instead of
15
#   just dropped? probably not a huge deal for plants...
16
#
17
# Jim Regetz
18
# NCEAS
19
# Created November 2012
20
#
21
# refactored and reorganized by
22
# Paul Sarando
23
# iPlant Collaborative
24
# Updated Oct 2013
25

    
26
DB_NAME="geoscrub"
27
DB_USER="bien"
28
DB_HOST_OPT=""
29
SCRIPT_DIR="$(dirname $0)"
30

    
31
# GADM data originally available at http://www.gadm.org/data2/gadm_v2_shp.zip
32
# gadm.org now links this file from biogeo.ucdavis.edu.
33
GADM_DATA_URL="http://biogeo.ucdavis.edu/data/gadm2/gadm_v2_shp.zip"
34
GADM_DATA_DIR="${HOME}/gadm_v2_shp"
35

    
36
function usage {
37
    echo "Usage: $0 [OPTIONS]" >&2
38
    echo "Valid Options:" >&2
39
    echo "-d, --dbname=DBNAME      database name psql commands will connect to" >&2
40
    echo "-h, --host=HOSTNAME      database server host or socket directory" >&2
41
    echo "-U, --username=USERNAME  database user name" >&2
42
    echo ""
43
    echo "Input Data Options:" >&2
44
    echo "-g, --gadm-data          GADM data directory (default: ${HOME}/gadm_v2_shp)" >&2
45
    echo "                         Delete this directory, or the GADM data in it," >&2
46
    echo "                         to re-download the data." >&2
47
    exit 1;
48
}
49

    
50
while [[ $# -gt 0  ]]; do
51
    case "$1" in
52
        -\? | --help)
53
            usage
54
            ;;
55
        -h)
56
            if [[ -z $2  ]];  then
57
                echo "Option $1 requires an argument." >&2
58
                usage
59
            fi
60
            DB_HOST_OPT="-h $2"
61
            shift 2
62
            ;;
63
        --host=*)
64
            DB_HOST_OPT="-h ${1#*=}"
65
            shift
66
            ;;
67
        -U)
68
            if [[ -z $2  ]];  then
69
                echo "Option $1 requires an argument." >&2
70
                usage
71
            fi
72
            DB_USER="$2"
73
            shift 2
74
            ;;
75
        --username=*)
76
            DB_USER="${1#*=}"
77
            shift
78
            ;;
79
        -d)
80
            if [[ -z $2  ]];  then
81
                echo "Option $1 requires an argument." >&2
82
                usage
83
            fi
84
            DB_NAME="$2"
85
            shift 2
86
            ;;
87
        --dbname=*)
88
            DB_NAME="${1#*=}"
89
            shift
90
            ;;
91
        -g)
92
            if [[ -z $2  ]];  then
93
                echo "Option $1 requires an argument." >&2
94
                usage
95
            fi
96
            GADM_DATA_DIR="$2"
97
            shift 2
98
            ;;
99
        --gadm-data=*)
100
            GADM_DATA_DIR="${1#*=}"
101
            shift
102
            ;;
103
        *)
104
            echo "Invalid option: $1" >&2
105
            usage
106
            ;;
107
    esac
108
done
109

    
110
#
111
# assemble input data
112
#
113

    
114
echo "Updating gadm2 tables from GADM data in ${GADM_DATA_DIR}"
115
echo -n "Note, to force data to download again from ${GADM_DATA_URL},"
116
echo " delete the directory ${GADM_DATA_DIR} before running this script."
117

    
118
# Check for GADM data (320MB zip file) unziped in data directory.
119
if [[ ! -d "$GADM_DATA_DIR" ]]; then
120
    echo "making directory ${GADM_DATA_DIR}"
121
    mkdir -p "$GADM_DATA_DIR"
122

    
123
    if [[ $? != 0 ]]; then
124
        echo "Could not create directory ${GADM_DATA_DIR}"
125
        exit 1
126
    fi
127

    
128
    pushd "$GADM_DATA_DIR"
129
    wget -O gadm_v2_shp.zip "$GADM_DATA_URL"
130
    unzip gadm_v2_shp.zip
131
    rm -f gadm_v2_shp.zip
132
    popd
133
fi
134

    
135
#
136
# create and populate gadm2 table
137
#
138

    
139
psql -e -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" -c "DROP TABLE IF EXISTS gadm2"
140
if [[ $? != 0 ]]; then
141
    echo "Could not drop GADM2 table in ${DB_NAME} database."
142
    exit 1
143
fi
144

    
145
echo "Creating gadm2 table with shp2pgsql from ${GADM_DATA_DIR} data."
146

    
147
pushd "$GADM_DATA_DIR"
148

    
149
# load gadm2 data (took 4.7 minutes on willow, 26-Oct-2012)
150
shp2pgsql -s 4326 -W latin1 gadm2 gadm2 "$DB_NAME" | \
151
    psql --set ON_ERROR_STOP=1 -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME"
152
if [[ $? != 0 ]]; then
153
    popd
154
    echo "Could not load GADM2 data into ${DB_NAME} database."
155
    exit 1
156
fi
157

    
158
popd
159

    
160
# create indexes and additional columns
161
psql -e -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" --set ON_ERROR_STOP=1 < "${SCRIPT_DIR}"/update.gadm2.sql
162
if [[ $? != 0 ]]; then
163
    echo "Could not update GADM2 data and indexes in ${DB_NAME} database."
164
    exit 1
165
fi
166

    
167
echo "Update gadm2 tables successfully completed."
168

    
(24-24/26)