Revision 11561
Added by Paul Sarando about 11 years ago
derived/biengeo/geoscrub.sh | ||
---|---|---|
23 | 23 |
echo "-d, --dbname=DBNAME database name psql commands will connect to" >&2 |
24 | 24 |
echo "-h, --host=HOSTNAME database server host or socket directory" >&2 |
25 | 25 |
echo "-U, --username=USERNAME database user name" >&2 |
26 |
echo "Input Data Options:" >&2 |
|
27 |
echo "-i, --geoscrub-input Geoscrub input directory (default: ${HOME}/geoscrub_input)" >&2 |
|
28 |
echo " Delete this directory, or the input CSV in it," >&2 |
|
29 |
echo " to re-download the data." >&2 |
|
26 | 30 |
exit 1; |
27 | 31 |
} |
28 | 32 |
|
... | ... | |
67 | 71 |
DB_NAME="${1#*=}" |
68 | 72 |
shift |
69 | 73 |
;; |
74 |
-i) |
|
75 |
if [[ -z $2 ]]; then |
|
76 |
echo "Option $1 requires an argument." >&2 |
|
77 |
usage |
|
78 |
fi |
|
79 |
GEOSCRUB_INPUT_OPT="-i $2" |
|
80 |
shift 2 |
|
81 |
;; |
|
82 |
--geoscrub-input=*) |
|
83 |
GEOSCRUB_INPUT_OPT="$1" |
|
84 |
shift |
|
85 |
;; |
|
70 | 86 |
*) |
71 | 87 |
echo "Invalid option: $1" >&2 |
72 | 88 |
usage |
... | ... | |
84 | 100 |
fi |
85 | 101 |
} |
86 | 102 |
|
87 |
"${SCRIPT_DIR}"/load-geoscrub-input.sh -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" |
|
103 |
"${SCRIPT_DIR}"/load-geoscrub-input.sh -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" $GEOSCRUB_INPUT_OPT
|
|
88 | 104 |
if [[ $? != 0 ]]; then |
89 | 105 |
echo "Could not load ${DB_NAME} database with geonames.org data." |
90 | 106 |
exit 1 |
derived/biengeo/load-geoscrub-input.sh | ||
---|---|---|
28 | 28 |
DB_HOST_OPT="" |
29 | 29 |
SCRIPT_DIR="$(dirname $0)" |
30 | 30 |
DATA_URL="http://fs.vegpath.org/exports/geoscrub_input.no_header.cols=country,stateProvince,county,decimalLatitude,decimalLongitude.csv" |
31 |
DATADIR="${SCRIPT_DIR}/input"
|
|
32 |
DATAFILE="${DATADIR}/geoscrub-corpus.csv"
|
|
31 |
DATADIR="${HOME}/geoscrub_input"
|
|
32 |
DATAFILE="geoscrub-corpus.csv" |
|
33 | 33 |
|
34 | 34 |
function usage { |
35 | 35 |
echo "Usage: $0 [OPTIONS]" >&2 |
... | ... | |
37 | 37 |
echo "-d, --dbname=DBNAME database name psql commands will connect to" >&2 |
38 | 38 |
echo "-h, --host=HOSTNAME database server host or socket directory" >&2 |
39 | 39 |
echo "-U, --username=USERNAME database user name" >&2 |
40 |
echo "-i, --geoscrub-input Geoscrub input directory (default: ${HOME}/geoscrub_input)" >&2 |
|
41 |
echo " Delete this directory, or the input CSV in it," >&2 |
|
42 |
echo " to re-download the data." >&2 |
|
40 | 43 |
exit 1; |
41 | 44 |
} |
42 | 45 |
|
... | ... | |
81 | 84 |
DB_NAME="${1#*=}" |
82 | 85 |
shift |
83 | 86 |
;; |
87 |
-i) |
|
88 |
if [[ -z $2 ]]; then |
|
89 |
echo "Option $1 requires an argument." >&2 |
|
90 |
usage |
|
91 |
fi |
|
92 |
DATADIR="$2" |
|
93 |
shift 2 |
|
94 |
;; |
|
95 |
--geoscrub-input=*) |
|
96 |
DATADIR="${1#*=}" |
|
97 |
shift |
|
98 |
;; |
|
84 | 99 |
*) |
85 | 100 |
echo "Invalid option: $1" >&2 |
86 | 101 |
usage |
... | ... | |
98 | 113 |
fi |
99 | 114 |
fi |
100 | 115 |
|
101 |
if [[ ! -r "$DATAFILE" ]]; then
|
|
116 |
if [[ ! -r "${DATADIR}/${DATAFILE}" ]]; then
|
|
102 | 117 |
# download distinct records from vegbien |
103 |
wget -O "$DATAFILE" "$DATA_URL"
|
|
118 |
wget -O "${DATADIR}/${DATAFILE}" "$DATA_URL"
|
|
104 | 119 |
|
105 | 120 |
if [[ $? != 0 ]]; then |
106 |
echo "Could not download input to ${DATAFILE}" |
|
121 |
echo "Could not download input to ${DATADIR}/${DATAFILE}"
|
|
107 | 122 |
exit 1 |
108 | 123 |
fi |
109 | 124 |
fi |
110 | 125 |
|
111 |
echo "Loading vegbien data from ${DATAFILE}" |
|
126 |
echo "Loading vegbien data from ${DATADIR}/${DATAFILE}"
|
|
112 | 127 |
|
113 | 128 |
# clear previous data |
114 | 129 |
psql -e -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" --set ON_ERROR_STOP=1 < "${SCRIPT_DIR}/truncate.vegbien_geoscrub.sql" |
... | ... | |
118 | 133 |
fi |
119 | 134 |
|
120 | 135 |
# load vegbien_geoscrub table with input data |
121 |
echo "Copying vegbien_geoscrub from ${DATAFILE}" |
|
122 |
psql -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" -c "\COPY vegbien_geoscrub FROM '${DATAFILE}' WITH CSV" |
|
136 |
echo "Copying vegbien_geoscrub from ${DATADIR}/${DATAFILE}"
|
|
137 |
psql -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" -c "\COPY vegbien_geoscrub FROM '${DATADIR}/${DATAFILE}' WITH CSV"
|
|
123 | 138 |
|
derived/biengeo/update_validation_data.sh | ||
---|---|---|
23 | 23 |
echo "-d, --dbname=DBNAME database name psql commands will connect to" >&2 |
24 | 24 |
echo "-h, --host=HOSTNAME database server host or socket directory" >&2 |
25 | 25 |
echo "-U, --username=USERNAME database user name" >&2 |
26 |
echo "" |
|
26 | 27 |
echo "Update Options:" >&2 |
27 | 28 |
echo "-G, --gadm-only update only GADM data" >&2 |
28 | 29 |
echo "-N, --geonames-only update only geonames.org data" >&2 |
29 | 30 |
echo "-M, --geonames-to-gadm-only update niether GADM nor geonames.org data," >&2 |
30 | 31 |
echo " but geonames-to-gadm mappings are always updated." >&2 |
32 |
echo "" |
|
33 |
echo "Input Data Options:" >&2 |
|
34 |
echo "-g, --gadm-data GADM data directory (default: ${HOME}/gadm_v2_shp)" >&2 |
|
35 |
echo " Delete this directory, or the GADM data in it," >&2 |
|
36 |
echo " to re-download the data." >&2 |
|
37 |
echo "-m, --geonames-data Geonames.org data directory (default: ${HOME}/geonames)" >&2 |
|
38 |
echo " Delete this directory, or the geonames.org data in it," >&2 |
|
39 |
echo " to re-download the data." >&2 |
|
31 | 40 |
exit 1; |
32 | 41 |
} |
33 | 42 |
|
... | ... | |
85 | 94 |
NO_GEONAMES=TRUE |
86 | 95 |
shift |
87 | 96 |
;; |
97 |
-g) |
|
98 |
if [[ -z $2 ]]; then |
|
99 |
echo "Option $1 requires an argument." >&2 |
|
100 |
usage |
|
101 |
fi |
|
102 |
GADM_DATA_DIR_OPT="-g $2" |
|
103 |
shift 2 |
|
104 |
;; |
|
105 |
--gadm-data=*) |
|
106 |
GADM_DATA_DIR_OPT="$1" |
|
107 |
shift |
|
108 |
;; |
|
109 |
-m) |
|
110 |
if [[ -z $2 ]]; then |
|
111 |
echo "Option $1 requires an argument." >&2 |
|
112 |
usage |
|
113 |
fi |
|
114 |
GEONAMES_DATA_DIR_OPT="-m $2" |
|
115 |
shift 2 |
|
116 |
;; |
|
117 |
--geonames-data=*) |
|
118 |
GEONAMES_DATA_DIR_OPT="$1" |
|
119 |
shift |
|
120 |
;; |
|
88 | 121 |
*) |
89 | 122 |
echo "Invalid option: $1" >&2 |
90 | 123 |
usage |
... | ... | |
105 | 138 |
echo "Updating geoscrub validation data." |
106 | 139 |
|
107 | 140 |
if [[ -z $NO_GADM ]]; then |
108 |
"${SCRIPT_DIR}"/update_gadm_data.sh -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" |
|
141 |
"${SCRIPT_DIR}"/update_gadm_data.sh -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" $GADM_DATA_DIR_OPT
|
|
109 | 142 |
if [[ $? != 0 ]]; then |
110 | 143 |
echo "Could not update ${DB_NAME} database with GADM data." |
111 | 144 |
exit 1 |
... | ... | |
113 | 146 |
fi |
114 | 147 |
|
115 | 148 |
if [[ -z $NO_GEONAMES ]]; then |
116 |
"${SCRIPT_DIR}"/update_geonames_data.sh -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" |
|
149 |
"${SCRIPT_DIR}"/update_geonames_data.sh -U "$DB_USER" $DB_HOST_OPT -d "$DB_NAME" $GEONAMES_DATA_DIR_OPT
|
|
117 | 150 |
if [[ $? != 0 ]]; then |
118 | 151 |
echo "Could not update ${DB_NAME} database with geonames.org data." |
119 | 152 |
exit 1 |
derived/biengeo/update_geonames_data.sh | ||
---|---|---|
24 | 24 |
DB_USER="bien" |
25 | 25 |
DB_HOST_OPT="" |
26 | 26 |
SCRIPT_DIR="$(dirname $0)" |
27 |
DATADIR="${HOME}/geonames" |
|
27 | 28 |
GEONAMES_DUMP_URL="http://download.geonames.org/export/dump" |
28 | 29 |
ALL_COUNTRIES_ZIP=allCountries.zip |
29 | 30 |
ALL_COUNTRIES_TXT=allCountries.txt |
... | ... | |
39 | 40 |
echo "-d, --dbname=DBNAME database name psql commands will connect to" >&2 |
40 | 41 |
echo "-h, --host=HOSTNAME database server host or socket directory" >&2 |
41 | 42 |
echo "-U, --username=USERNAME database user name" >&2 |
43 |
echo "" |
|
44 |
echo "Input Data Options:" >&2 |
|
45 |
echo "-m, --geonames-data Geonames.org data directory (default: ${HOME}/geonames)" >&2 |
|
46 |
echo " Delete this directory, or the geonames.org data in it," >&2 |
|
47 |
echo " to re-download the data." >&2 |
|
42 | 48 |
exit 1; |
43 | 49 |
} |
44 | 50 |
|
... | ... | |
83 | 89 |
DB_NAME="${1#*=}" |
84 | 90 |
shift |
85 | 91 |
;; |
92 |
-m) |
|
93 |
if [[ -z $2 ]]; then |
|
94 |
echo "Option $1 requires an argument." >&2 |
|
95 |
usage |
|
96 |
fi |
|
97 |
DATADIR="$2" |
|
98 |
shift 2 |
|
99 |
;; |
|
100 |
--geonames-data=*) |
|
101 |
DATADIR="${1#*=}" |
|
102 |
shift |
|
103 |
;; |
|
86 | 104 |
*) |
87 | 105 |
echo "Invalid option: $1" >&2 |
88 | 106 |
usage |
... | ... | |
113 | 131 |
echo "Updating geonames.org tables..." |
114 | 132 |
|
115 | 133 |
# Check for data downloaded into geonames under the current directory. |
116 |
DATADIR="$(dirname $0)/geonames" |
|
117 | 134 |
if [[ ! -d "${DATADIR}" ]]; then |
118 | 135 |
echo "making directory ${DATADIR}" |
119 | 136 |
mkdir -p "${DATADIR}" |
derived/biengeo/update_gadm_data.sh | ||
---|---|---|
31 | 31 |
# GADM data originally available at http://www.gadm.org/data2/gadm_v2_shp.zip |
32 | 32 |
# gadm.org now links this file from biogeo.ucdavis.edu. |
33 | 33 |
GADM_DATA_URL="http://biogeo.ucdavis.edu/data/gadm2/gadm_v2_shp.zip" |
34 |
GADM_DATA_DIR="${SCRIPT_DIR}/gadm_v2_shp"
|
|
34 |
GADM_DATA_DIR="${HOME}/gadm_v2_shp"
|
|
35 | 35 |
|
36 | 36 |
function usage { |
37 | 37 |
echo "Usage: $0 [OPTIONS]" >&2 |
... | ... | |
39 | 39 |
echo "-d, --dbname=DBNAME database name psql commands will connect to" >&2 |
40 | 40 |
echo "-h, --host=HOSTNAME database server host or socket directory" >&2 |
41 | 41 |
echo "-U, --username=USERNAME database user name" >&2 |
42 |
echo "" |
|
43 |
echo "Input Data Options:" >&2 |
|
44 |
echo "-g, --gadm-data GADM data directory (default: ${HOME}/gadm_v2_shp)" >&2 |
|
45 |
echo " Delete this directory, or the GADM data in it," >&2 |
|
46 |
echo " to re-download the data." >&2 |
|
42 | 47 |
exit 1; |
43 | 48 |
} |
44 | 49 |
|
... | ... | |
83 | 88 |
DB_NAME="${1#*=}" |
84 | 89 |
shift |
85 | 90 |
;; |
91 |
-g) |
|
92 |
if [[ -z $2 ]]; then |
|
93 |
echo "Option $1 requires an argument." >&2 |
|
94 |
usage |
|
95 |
fi |
|
96 |
GADM_DATA_DIR="$2" |
|
97 |
shift 2 |
|
98 |
;; |
|
99 |
--gadm-data=*) |
|
100 |
GADM_DATA_DIR="${1#*=}" |
|
101 |
shift |
|
102 |
;; |
|
86 | 103 |
*) |
87 | 104 |
echo "Invalid option: $1" >&2 |
88 | 105 |
usage |
Also available in: Unified diff
Added biengeo script options for data directories.
Added GADM and geonames.org data dir options to
update_validation_data.sh scripts.
Added geoscrub input data dir option to geoscrub.sh scripts.