Project

General

Profile

1
#!/bin/bash
2

    
3
# Bash script to create a new postgis database and prep it with GADM
4
# data for geovalidation purposes.
5
#
6
# Basic workflow:
7
#  1. Load geoscrub input data into database
8
#  2. Scrub geoscrub input data with the geonames.sql script
9
#  3. Scrub geoscrub input data with the geovalidate.sql script
10
# 
11
# Paul Sarando
12
# iPlant Collaborative
13
# Oct 2013
14

    
15
DB_NAME="geoscrub"
16
DB_USER="bien"
17
DB_HOST_OPT=""
18
SCRIPT_DIR="$(dirname $0)"
19

    
20
function usage {
21
    echo "Usage: $0 [OPTIONS]" >&2
22
    echo "Valid Options:" >&2
23
    echo "-d, --dbname=DBNAME      database name psql commands will connect to" >&2
24
    echo "-h, --host=HOSTNAME      database server host or socket directory" >&2
25
    echo "-U, --username=USERNAME  database user name" >&2
26
    echo "Geoscrub Data Options:" >&2
27
    echo "-i, --geoscrub-input     Geoscrub input directory (default: ${HOME}/geoscrub_input)" >&2
28
    echo "                         Delete this directory, or the input CSV in it," >&2
29
    echo "                         to re-download the data." >&2
30
    echo "-o, --output-file        Optional file where geoscrubbed CSV should be saved." >&2
31
    echo "                         If this option is given, then the final geoscrub table" >&2
32
    echo "                         will be dumped to the specified file in CSV format." >&2
33
    echo "                         No output file is saved if this option is omitted." >&2
34
    exit 1;
35
}
36

    
37
while [[ $# -gt 0  ]]; do
38
    case "$1" in
39
        -\? | --help)
40
            usage
41
            ;;
42
        -h)
43
            if [[ -z $2  ]];  then
44
                echo "Option $1 requires an argument." >&2
45
                usage
46
            fi
47
            DB_HOST_OPT="-h $2"
48
            shift 2
49
            ;;
50
        --host=*)
51
            DB_HOST_OPT="-h ${1#*=}"
52
            shift
53
            ;;
54
        -U)
55
            if [[ -z $2  ]];  then
56
                echo "Option $1 requires an argument." >&2
57
                usage
58
            fi
59
            DB_USER="$2"
60
            shift 2
61
            ;;
62
        --username=*)
63
            DB_USER="${1#*=}"
64
            shift
65
            ;;
66
        -d)
67
            if [[ -z $2  ]];  then
68
                echo "Option $1 requires an argument." >&2
69
                usage
70
            fi
71
            DB_NAME="$2"
72
            shift 2
73
            ;;
74
        --dbname=*)
75
            DB_NAME="${1#*=}"
76
            shift
77
            ;;
78
        -i)
79
            if [[ -z $2  ]];  then
80
                echo "Option $1 requires an argument." >&2
81
                usage
82
            fi
83
            GEOSCRUB_INPUT_OPT="-i $2"
84
            shift 2
85
            ;;
86
        --geoscrub-input=*)
87
            GEOSCRUB_INPUT_OPT="$1"
88
            shift
89
            ;;
90
        -o)
91
            if [[ -z $2  ]];  then
92
                echo "Option $1 requires an argument." >&2
93
                exit 1;
94
            fi
95
            OUTPUT_FILE="$2"
96
            shift 2
97
            ;;
98
        --output-file=*)
99
            OUTPUT_FILE="${1#*=}"
100
            shift
101
            ;;
102
        *)
103
            echo "Invalid option: $1" >&2
104
            usage
105
            ;;
106
    esac
107
done
108

    
109
function run_sql_script {
110
    local SCRIPT=$1
111

    
112
    psql -e -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" --set ON_ERROR_STOP=1 < "$SCRIPT"
113
    if [[ $? != 0 ]]; then
114
        echo "Error while executing SQL script ${SCRIPT}"
115
        exit 1
116
    fi
117
}
118

    
119
"${SCRIPT_DIR}"/load-geoscrub-input.sh -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" $GEOSCRUB_INPUT_OPT
120
if [[ $? != 0 ]]; then
121
    echo "Could not load ${DB_NAME} database with geonames.org data."
122
    exit 1
123
fi
124

    
125
echo "Scrubbing input with geonames data..."
126
run_sql_script "${SCRIPT_DIR}/geonames.sql"
127

    
128
echo "Scrubbing input with geovalidate data..."
129
run_sql_script "${SCRIPT_DIR}/geovalidate.sql"
130

    
131
echo "Input successfully scrubbed."
132
if [[ -n $OUTPUT_FILE ]]; then
133
    echo "Saving geoscrubbed data as a CSV file to ${OUTPUT_FILE}..."
134

    
135
    run_sql_script "${SCRIPT_DIR}/output.geoscrub.sql" > "$OUTPUT_FILE"
136
    if [[ ! -w $OUTPUT_FILE ]]; then
137
        echo "Could not save geoscrub table contents to ${OUTPUT_FILE}"
138
        exit 1
139
    fi
140
else
141
    echo "Scrubbed input available in the geoscrub table of the ${DB_NAME} database."
142
fi
143

    
(8-8/27)