Project

General

Profile

1
#!/bin/bash
2

    
3
# Bash script to create a new postgis database and prep it with GADM
4
# data for geovalidation purposes.
5
#
6
# Basic workflow:
7
#  1. Load geoscrub input data into database
8
#  2. Scrub geoscrub input data with the geonames.sql script
9
#  3. Scrub geoscrub input data with the geovalidate.sql script
10
# 
11
# Paul Sarando
12
# iPlant Collaborative
13
# Oct 2013
14

    
15
DB_NAME="geoscrub"
16
DB_USER="bien"
17
DB_HOST_OPT=""
18
SCRIPT_DIR="$(dirname $0)"
19

    
20
function usage {
21
    echo "Usage: $0 [OPTIONS]" >&2
22
    echo "Valid Options:" >&2
23
    echo "-d, --dbname=DBNAME      database name psql commands will connect to" >&2
24
    echo "-h, --host=HOSTNAME      database server host or socket directory" >&2
25
    echo "-U, --username=USERNAME  database user name" >&2
26
    echo "Input Data Options:" >&2
27
    echo "-i, --geoscrub-input     Geoscrub input directory (default: ${HOME}/geoscrub_input)" >&2
28
    echo "                         Delete this directory, or the input CSV in it," >&2
29
    echo "                         to re-download the data." >&2
30
    exit 1;
31
}
32

    
33
while [[ $# -gt 0  ]]; do
34
    case "$1" in
35
        -\? | --help)
36
            usage
37
            ;;
38
        -h)
39
            if [[ -z $2  ]];  then
40
                echo "Option $1 requires an argument." >&2
41
                usage
42
            fi
43
            DB_HOST_OPT="-h $2"
44
            shift 2
45
            ;;
46
        --host=*)
47
            DB_HOST_OPT="-h ${1#*=}"
48
            shift
49
            ;;
50
        -U)
51
            if [[ -z $2  ]];  then
52
                echo "Option $1 requires an argument." >&2
53
                usage
54
            fi
55
            DB_USER="$2"
56
            shift 2
57
            ;;
58
        --username=*)
59
            DB_USER="${1#*=}"
60
            shift
61
            ;;
62
        -d)
63
            if [[ -z $2  ]];  then
64
                echo "Option $1 requires an argument." >&2
65
                usage
66
            fi
67
            DB_NAME="$2"
68
            shift 2
69
            ;;
70
        --dbname=*)
71
            DB_NAME="${1#*=}"
72
            shift
73
            ;;
74
        -i)
75
            if [[ -z $2  ]];  then
76
                echo "Option $1 requires an argument." >&2
77
                usage
78
            fi
79
            GEOSCRUB_INPUT_OPT="-i $2"
80
            shift 2
81
            ;;
82
        --geoscrub-input=*)
83
            GEOSCRUB_INPUT_OPT="$1"
84
            shift
85
            ;;
86
        *)
87
            echo "Invalid option: $1" >&2
88
            usage
89
            ;;
90
    esac
91
done
92

    
93
function run_sql_script {
94
    local SCRIPT=$1
95

    
96
    psql -e -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" --set ON_ERROR_STOP=1 < "$SCRIPT"
97
    if [[ $? != 0 ]]; then
98
        echo "Error while executing SQL script ${SCRIPT}"
99
        exit 1
100
    fi
101
}
102

    
103
"${SCRIPT_DIR}"/load-geoscrub-input.sh -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" $GEOSCRUB_INPUT_OPT
104
if [[ $? != 0 ]]; then
105
    echo "Could not load ${DB_NAME} database with geonames.org data."
106
    exit 1
107
fi
108

    
109
echo "Scrubbing input with geonames data..."
110
run_sql_script "${SCRIPT_DIR}/geonames.sql"
111

    
112
echo "Scrubbing input with geovalidate data..."
113
run_sql_script "${SCRIPT_DIR}/geovalidate.sql"
114

    
115
echo "Input successfully scrubbed."
116
echo "Scrubbed input available in the geoscrub table of the ${DB_NAME} database."
117

    
(8-8/26)