Project

General

Profile

1 11442 psarando
#!/bin/bash
2
3
# Bash script to download geoscrub_input table dump (created by AMK) from the
4 10707 aaronmk
# vegbien database, and load it into the geoscrub database (i.e., the
5
# postgis database prepped with geonames.org data, GADM2 data, and
6
# associated mapping tables).
7
#
8
# Won't be necessary if we end up injecting all of the geoscrubbing and
9
# geovalidation functionality directly into vegbien itself. And if we
10
# end up implementing this stuff as a standalone service instead, we'd
11
# need to rethink (and generalize) how the input data is handled. But
12
# for now, this should at least serve as a placeholder that could be
13
# tweaked manually to load any arbitrary geoscrub input data table.
14
#
15
# Jim Regetz
16
# NCEAS
17
# Created Nov 2012
18
19 11442 psarando
# Note, to force data to download from DATA_URL, ensure the DATAFILE is deleted
20
# before running this script.
21 11443 psarando
22 11450 psarando
DB_USER="bien"
23
DB_HOST="vegbiendev"
24 11443 psarando
SCRIPT_DIR=$(dirname $0)
25 11442 psarando
DATA_URL="http://fs.vegpath.org/exports/geoscrub_input.no_header.cols=country,stateProvince,county,decimalLatitude,decimalLongitude.csv"
26 11443 psarando
DATADIR="${SCRIPT_DIR}/input"
27 11442 psarando
DATAFILE="${DATADIR}/geoscrub-corpus.csv"
28 10707 aaronmk
29 11442 psarando
if [[ ! -d "$DATADIR" ]]; then
30
    echo "making directory ${DATADIR}"
31
    mkdir -p "$DATADIR"
32
33
    if [[ $? != 0 ]]; then
34
        echo "Could not create directory ${DATADIR}"
35
        exit 1
36
    fi
37
fi
38
39
if [[ ! -r "$DATAFILE" ]]; then
40
    # download distinct records from vegbien
41
    wget -O "$DATAFILE" "$DATA_URL"
42
43
    if [[ $? != 0 ]]; then
44
        echo "Could not download input to ${DATAFILE}"
45
        exit 1
46
    fi
47
fi
48
49 11443 psarando
echo "Loading vegbien data from ${DATAFILE}"
50 10707 aaronmk
51 11443 psarando
# clear previous data
52 11450 psarando
psql -e -U "$DB_USER" -h "$DB_HOST" -d geoscrub --set ON_ERROR_STOP=1 < "${SCRIPT_DIR}/truncate.vegbien_geoscrub.sql"
53 11443 psarando
if [[ $? != 0 ]]; then
54
    echo "Could not clear data from vegbien_geoscrub tables."
55
    exit 1
56
fi
57
58 10707 aaronmk
# load
59 11450 psarando
psql -U "$DB_USER" -h "$DB_HOST" -c "\COPY vegbien_geoscrub FROM '${DATAFILE}' WITH CSV" geoscrub