Revision 11563
Added by Paul Sarando about 11 years ago
derived/biengeo/geoscrub.sh | ||
---|---|---|
23 | 23 |
echo "-d, --dbname=DBNAME database name psql commands will connect to" >&2 |
24 | 24 |
echo "-h, --host=HOSTNAME database server host or socket directory" >&2 |
25 | 25 |
echo "-U, --username=USERNAME database user name" >&2 |
26 |
echo "Input Data Options:" >&2
|
|
26 |
echo "Geoscrub Data Options:" >&2
|
|
27 | 27 |
echo "-i, --geoscrub-input Geoscrub input directory (default: ${HOME}/geoscrub_input)" >&2 |
28 | 28 |
echo " Delete this directory, or the input CSV in it," >&2 |
29 | 29 |
echo " to re-download the data." >&2 |
30 |
echo "-o, --output-file Optional file where geoscrubbed CSV should be saved." >&2 |
|
31 |
echo " If this option is given, then the final geoscrub table" >&2 |
|
32 |
echo " will be dumped to the specified file in CSV format." >&2 |
|
33 |
echo " No output file is saved if this option is omitted." >&2 |
|
30 | 34 |
exit 1; |
31 | 35 |
} |
32 | 36 |
|
... | ... | |
83 | 87 |
GEOSCRUB_INPUT_OPT="$1" |
84 | 88 |
shift |
85 | 89 |
;; |
90 |
-o) |
|
91 |
if [[ -z $2 ]]; then |
|
92 |
echo "Option $1 requires an argument." >&2 |
|
93 |
exit 1; |
|
94 |
fi |
|
95 |
OUTPUT_FILE="$2" |
|
96 |
shift 2 |
|
97 |
;; |
|
98 |
--output-file=*) |
|
99 |
OUTPUT_FILE="${1#*=}" |
|
100 |
shift |
|
101 |
;; |
|
86 | 102 |
*) |
87 | 103 |
echo "Invalid option: $1" >&2 |
88 | 104 |
usage |
... | ... | |
113 | 129 |
run_sql_script "${SCRIPT_DIR}/geovalidate.sql" |
114 | 130 |
|
115 | 131 |
echo "Input successfully scrubbed." |
116 |
echo "Scrubbed input available in the geoscrub table of the ${DB_NAME} database." |
|
132 |
if [[ -n $OUTPUT_FILE ]]; then |
|
133 |
echo "Saving geoscrubbed data as a CSV file to ${OUTPUT_FILE}..." |
|
117 | 134 |
|
135 |
run_sql_script "${SCRIPT_DIR}/output.geoscrub.sql" > "$OUTPUT_FILE" |
|
136 |
if [[ ! -w $OUTPUT_FILE ]]; then |
|
137 |
echo "Could not save geoscrub table contents to ${OUTPUT_FILE}" |
|
138 |
exit 1 |
|
139 |
fi |
|
140 |
else |
|
141 |
echo "Scrubbed input available in the geoscrub table of the ${DB_NAME} database." |
|
142 |
fi |
|
143 |
|
derived/biengeo/output.geoscrub.sql | ||
---|---|---|
1 |
\COPY (SELECT decimallatitude, decimallongitude, country, stateprovince, county, countrystd, stateprovincestd, countystd, latlonvalidity, countryvalidity, stateprovincevalidity, countyvalidity FROM geoscrub) TO STDOUT WITH CSV; |
|
2 |
|
derived/biengeo/README.txt | ||
---|---|---|
64 | 64 |
WARNING: deletes any previous geoscrubbing results! |
65 | 65 |
runtime: ~5.5 h |
66 | 66 |
cd <svn_biengeo_root> |
67 |
3. geoscrub.sh [--geoscrub-input=input_dir] |
|
67 |
3. geoscrub.sh [--geoscrub-input=input_dir] [--output-file=geoscrub-output.csv]
|
|
68 | 68 |
- runs the following scripts in order to load and scrub vegbien input data: |
69 | 69 |
* load-geoscrub-input.sh |
70 | 70 |
- dumps geoscrub_input from vegbien and loads it into the geoscrub db |
... | ... | |
75 | 75 |
runtime: 5.5 h |
76 | 76 |
- contains (postgis-extended) SQL statements that score the validity |
77 | 77 |
of GADM2-scrubbed names against given point coordinates |
78 |
- If the --output-file (or -o) option is given, then the final geoscrub table |
|
79 |
will be dumped to the specified output file in CSV format. |
|
78 | 80 |
|
79 | 81 |
[Also see comments embedded in specific scripts in this directory.] |
80 | 82 |
|
Also available in: Unified diff
Added an output CSV file option to geoscrub.sh.