Revision 11442
Added by Paul Sarando about 11 years ago
load-geoscrub-input.sh | ||
---|---|---|
1 |
# Bash script to dump geoscrub_input table (created by AMK) from the |
|
1 |
#!/bin/bash |
|
2 |
|
|
3 |
# Bash script to download geoscrub_input table dump (created by AMK) from the |
|
2 | 4 |
# vegbien database, and load it into the geoscrub database (i.e., the |
3 | 5 |
# postgis database prepped with geonames.org data, GADM2 data, and |
4 | 6 |
# associated mapping tables). |
... | ... | |
14 | 16 |
# NCEAS |
15 | 17 |
# Created Nov 2012 |
16 | 18 |
|
17 |
# dump distinct records from vegbien
|
|
18 |
psql --host vegbiendev -U bien vegbien \
|
|
19 |
-c 'COPY "public.2012-11-04-07-34-10.r5984".geoscrub_input
|
|
20 |
TO STDOUT
|
|
21 |
WITH CSV' > geoscrub-corpus.csv
|
|
19 |
# Note, to force data to download from DATA_URL, ensure the DATAFILE is deleted
|
|
20 |
# before running this script.
|
|
21 |
DATA_URL="http://fs.vegpath.org/exports/geoscrub_input.no_header.cols=country,stateProvince,county,decimalLatitude,decimalLongitude.csv"
|
|
22 |
DATADIR="$(dirname $0)/input"
|
|
23 |
DATAFILE="${DATADIR}/geoscrub-corpus.csv"
|
|
22 | 24 |
|
25 |
if [[ ! -d "$DATADIR" ]]; then |
|
26 |
echo "making directory ${DATADIR}" |
|
27 |
mkdir -p "$DATADIR" |
|
28 |
|
|
29 |
if [[ $? != 0 ]]; then |
|
30 |
echo "Could not create directory ${DATADIR}" |
|
31 |
exit 1 |
|
32 |
fi |
|
33 |
fi |
|
34 |
|
|
35 |
if [[ ! -r "$DATAFILE" ]]; then |
|
36 |
# download distinct records from vegbien |
|
37 |
wget -O "$DATAFILE" "$DATA_URL" |
|
38 |
|
|
39 |
if [[ $? != 0 ]]; then |
|
40 |
echo "Could not download input to ${DATAFILE}" |
|
41 |
exit 1 |
|
42 |
fi |
|
43 |
fi |
|
44 |
|
|
23 | 45 |
# generate table |
24 | 46 |
psql -c \ |
25 | 47 |
'CREATE TABLE vegbien_geoscrub ( |
... | ... | |
31 | 53 |
)' geoscrub |
32 | 54 |
|
33 | 55 |
# load |
34 |
psql -c "COPY vegbien_geoscrub FROM '$DATADIR/geoscrub-corpus.csv' WITH CSV" geoscrub
|
|
56 |
psql -c "COPY vegbien_geoscrub FROM '${DATAFILE}' WITH CSV" geoscrub
|
|
35 | 57 |
|
36 |
|
|
37 | 58 |
Also available in: Unified diff
Update load-geoscrub-input.sh to download from URL.
Removed logic to dump input data directly from the vegbien database and
to download the input from a URL provided by AMK instead.
Also updated this script to download the file into an input data
directory, rather than just into the current working directory.