Project

General

Profile

1
#!/bin/bash
2

    
3
# Bash script to download geoscrub_input table dump (created by AMK) from the
4
# vegbien database, and load it into the geoscrub database (i.e., the
5
# postgis database prepped with geonames.org data, GADM2 data, and
6
# associated mapping tables).
7
#
8
# Won't be necessary if we end up injecting all of the geoscrubbing and
9
# geovalidation functionality directly into vegbien itself. And if we
10
# end up implementing this stuff as a standalone service instead, we'd
11
# need to rethink (and generalize) how the input data is handled. But
12
# for now, this should at least serve as a placeholder that could be
13
# tweaked manually to load any arbitrary geoscrub input data table.
14
#
15
# Jim Regetz
16
# NCEAS
17
# Created Nov 2012
18
# 
19
# Paul Sarando
20
# iPlant Collaborative
21
# Updated Oct 2013
22

    
23
# Note, to force data to download from DATA_URL, ensure the DATAFILE is deleted
24
# before running this script.
25

    
26
DB_NAME="geoscrub"
27
DB_USER="bien"
28
DB_HOST_OPT=""
29
SCRIPT_DIR="$(dirname $0)"
30
DATA_URL="http://fs.vegpath.org/exports/geoscrub_input.no_header.cols=country,stateProvince,county,decimalLatitude,decimalLongitude.csv"
31
DATADIR="${SCRIPT_DIR}/input"
32
DATAFILE="${DATADIR}/geoscrub-corpus.csv"
33

    
34
function usage {
35
    echo "Usage: $0 [OPTIONS]" >&2
36
    echo "Valid Options:" >&2
37
    echo "-d, --dbname=DBNAME      database name psql commands will connect to" >&2
38
    echo "-h, --host=HOSTNAME      database server host or socket directory" >&2
39
    echo "-U, --username=USERNAME  database user name" >&2
40
    exit 1;
41
}
42

    
43
while [[ $# -gt 0  ]]; do
44
    case "$1" in
45
        -\? | --help)
46
            usage
47
            ;;
48
        -h)
49
            if [[ -z $2  ]];  then
50
                echo "Option $1 requires an argument." >&2
51
                usage
52
            fi
53
            DB_HOST_OPT="-h $2"
54
            shift 2
55
            ;;
56
        --host=*)
57
            DB_HOST_OPT="-h ${1#*=}"
58
            shift
59
            ;;
60
        -U)
61
            if [[ -z $2  ]];  then
62
                echo "Option $1 requires an argument." >&2
63
                usage
64
            fi
65
            DB_USER="$2"
66
            shift 2
67
            ;;
68
        --username=*)
69
            DB_USER="${1#*=}"
70
            shift
71
            ;;
72
        -d)
73
            if [[ -z $2  ]];  then
74
                echo "Option $1 requires an argument." >&2
75
                usage
76
            fi
77
            DB_NAME="$2"
78
            shift 2
79
            ;;
80
        --dbname=*)
81
            DB_NAME="${1#*=}"
82
            shift
83
            ;;
84
        *)
85
            echo "Invalid option: $1" >&2
86
            usage
87
            ;;
88
    esac
89
done
90

    
91
if [[ ! -d "$DATADIR" ]]; then
92
    echo "making directory ${DATADIR}"
93
    mkdir -p "$DATADIR"
94

    
95
    if [[ $? != 0 ]]; then
96
        echo "Could not create directory ${DATADIR}"
97
        exit 1
98
    fi
99
fi
100

    
101
if [[ ! -r "$DATAFILE" ]]; then
102
    # download distinct records from vegbien
103
    wget -O "$DATAFILE" "$DATA_URL"
104

    
105
    if [[ $? != 0 ]]; then
106
        echo "Could not download input to ${DATAFILE}"
107
        exit 1
108
    fi
109
fi
110

    
111
echo "Loading vegbien data from ${DATAFILE}"
112

    
113
# clear previous data
114
psql -e -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" --set ON_ERROR_STOP=1 < "${SCRIPT_DIR}/truncate.vegbien_geoscrub.sql"
115
if [[ $? != 0 ]]; then
116
    echo "Could not clear data from vegbien_geoscrub tables."
117
    exit 1
118
fi
119

    
120
# load vegbien_geoscrub table with input data
121
echo "Copying vegbien_geoscrub from ${DATAFILE}"
122
psql -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" -c "\COPY vegbien_geoscrub FROM '${DATAFILE}' WITH CSV"
123

    
(10-10/26)