Project

General

Profile

1
#!/bin/bash
2

    
3
# Bash script to download geoscrub_input table dump (created by AMK) from the
4
# vegbien database, and load it into the geoscrub database (i.e., the
5
# postgis database prepped with geonames.org data, GADM2 data, and
6
# associated mapping tables).
7
#
8
# Won't be necessary if we end up injecting all of the geoscrubbing and
9
# geovalidation functionality directly into vegbien itself. And if we
10
# end up implementing this stuff as a standalone service instead, we'd
11
# need to rethink (and generalize) how the input data is handled. But
12
# for now, this should at least serve as a placeholder that could be
13
# tweaked manually to load any arbitrary geoscrub input data table.
14
#
15
# Jim Regetz
16
# NCEAS
17
# Created Nov 2012
18

    
19
# Note, to force data to download from DATA_URL, ensure the DATAFILE is deleted
20
# before running this script.
21
DATA_URL="http://fs.vegpath.org/exports/geoscrub_input.no_header.cols=country,stateProvince,county,decimalLatitude,decimalLongitude.csv"
22
DATADIR="$(dirname $0)/input"
23
DATAFILE="${DATADIR}/geoscrub-corpus.csv"
24

    
25
if [[ ! -d "$DATADIR" ]]; then
26
    echo "making directory ${DATADIR}"
27
    mkdir -p "$DATADIR"
28

    
29
    if [[ $? != 0 ]]; then
30
        echo "Could not create directory ${DATADIR}"
31
        exit 1
32
    fi
33
fi
34

    
35
if [[ ! -r "$DATAFILE" ]]; then
36
    # download distinct records from vegbien
37
    wget -O "$DATAFILE" "$DATA_URL"
38

    
39
    if [[ $? != 0 ]]; then
40
        echo "Could not download input to ${DATAFILE}"
41
        exit 1
42
    fi
43
fi
44

    
45
# generate table
46
psql -c \
47
   'CREATE TABLE vegbien_geoscrub (
48
        country text,
49
        stateProvince text,
50
        county text,
51
        decimalLatitude double precision,
52
        decimalLongitude double precision
53
    )' geoscrub
54

    
55
# load
56
psql -c "COPY vegbien_geoscrub FROM '${DATAFILE}' WITH CSV" geoscrub
57

    
(9-9/9)