Project

General

Profile

1
#!/bin/bash
2

    
3
# Bash script to download geoscrub_input table dump (created by AMK) from the
4
# vegbien database, and load it into the geoscrub database (i.e., the
5
# postgis database prepped with geonames.org data, GADM2 data, and
6
# associated mapping tables).
7
#
8
# Won't be necessary if we end up injecting all of the geoscrubbing and
9
# geovalidation functionality directly into vegbien itself. And if we
10
# end up implementing this stuff as a standalone service instead, we'd
11
# need to rethink (and generalize) how the input data is handled. But
12
# for now, this should at least serve as a placeholder that could be
13
# tweaked manually to load any arbitrary geoscrub input data table.
14
#
15
# Jim Regetz
16
# NCEAS
17
# Created Nov 2012
18
# 
19
# Paul Sarando
20
# iPlant Collaborative
21
# Updated Oct 2013
22

    
23
# Note, to force data to download from DATA_URL, ensure the DATAFILE is deleted
24
# before running this script.
25

    
26
DB_NAME="geoscrub"
27
DB_USER="bien"
28
DB_HOST_OPT=""
29
SCRIPT_DIR="$(dirname $0)"
30
DATA_URL="http://fs.vegpath.org/exports/geoscrub_input.csv"
31
DATADIR="${HOME}/geoscrub_input"
32
: "${DATAFILE:=geoscrub-corpus.csv}"
33

    
34
function usage {
35
    echo "Usage: $0 [OPTIONS]" >&2
36
    echo "Valid Options:" >&2
37
    echo "-d, --dbname=DBNAME      database name psql commands will connect to" >&2
38
    echo "-h, --host=HOSTNAME      database server host or socket directory" >&2
39
    echo "-U, --username=USERNAME  database user name" >&2
40
    echo "-i, --geoscrub-input     Geoscrub input directory (default: ${HOME}/geoscrub_input)" >&2
41
    echo "                         Delete this directory, or the input CSV in it," >&2
42
    echo "                         to re-download the data." >&2
43
    exit 1;
44
}
45

    
46
while [[ $# -gt 0  ]]; do
47
    case "$1" in
48
        -\? | --help)
49
            usage
50
            ;;
51
        -h)
52
            if [[ -z $2  ]];  then
53
                echo "Option $1 requires an argument." >&2
54
                usage
55
            fi
56
            DB_HOST_OPT="-h $2"
57
            shift 2
58
            ;;
59
        --host=*)
60
            DB_HOST_OPT="-h ${1#*=}"
61
            shift
62
            ;;
63
        -U)
64
            if [[ -z $2  ]];  then
65
                echo "Option $1 requires an argument." >&2
66
                usage
67
            fi
68
            DB_USER="$2"
69
            shift 2
70
            ;;
71
        --username=*)
72
            DB_USER="${1#*=}"
73
            shift
74
            ;;
75
        -d)
76
            if [[ -z $2  ]];  then
77
                echo "Option $1 requires an argument." >&2
78
                usage
79
            fi
80
            DB_NAME="$2"
81
            shift 2
82
            ;;
83
        --dbname=*)
84
            DB_NAME="${1#*=}"
85
            shift
86
            ;;
87
        -i)
88
            if [[ -z $2  ]];  then
89
                echo "Option $1 requires an argument." >&2
90
                usage
91
            fi
92
            DATADIR="$2"
93
            shift 2
94
            ;;
95
        --geoscrub-input=*)
96
            DATADIR="${1#*=}"
97
            shift
98
            ;;
99
        *)
100
            echo "Invalid option: $1" >&2
101
            usage
102
            ;;
103
    esac
104
done
105

    
106
if [[ ! -d "$DATADIR" ]]; then
107
    echo "making directory ${DATADIR}"
108
    mkdir -p "$DATADIR"
109

    
110
    if [[ $? != 0 ]]; then
111
        echo "Could not create directory ${DATADIR}"
112
        exit 1
113
    fi
114
fi
115

    
116
if [[ ! -r "${DATADIR}/${DATAFILE}" ]]; then
117
    # download distinct records from vegbien
118
    wget -O "${DATADIR}/${DATAFILE}" "$DATA_URL"
119

    
120
    if [[ $? != 0 ]]; then
121
        echo "Could not download input to ${DATADIR}/${DATAFILE}"
122
        exit 1
123
    fi
124
fi
125

    
126
echo "Loading vegbien data from ${DATADIR}/${DATAFILE}"
127

    
128
# clear previous data
129
psql -e -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" --set ON_ERROR_STOP=1 < "${SCRIPT_DIR}/truncate.vegbien_geoscrub.sql"
130
if [[ $? != 0 ]]; then
131
    echo "Could not clear data from vegbien_geoscrub tables."
132
    exit 1
133
fi
134

    
135
# load vegbien_geoscrub table with input data
136
echo "Copying vegbien_geoscrub from ${DATADIR}/${DATAFILE}"
137
psql -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" -c "\COPY vegbien_geoscrub FROM '${DATADIR}/${DATAFILE}' WITH CSV HEADER"
138

    
(10-10/27)