Project

General

Profile

« Previous | Next » 

Revision 11563

Added an output CSV file option to geoscrub.sh.

View differences:

derived/biengeo/geoscrub.sh
23 23
    echo "-d, --dbname=DBNAME      database name psql commands will connect to" >&2
24 24
    echo "-h, --host=HOSTNAME      database server host or socket directory" >&2
25 25
    echo "-U, --username=USERNAME  database user name" >&2
26
    echo "Input Data Options:" >&2
26
    echo "Geoscrub Data Options:" >&2
27 27
    echo "-i, --geoscrub-input     Geoscrub input directory (default: ${HOME}/geoscrub_input)" >&2
28 28
    echo "                         Delete this directory, or the input CSV in it," >&2
29 29
    echo "                         to re-download the data." >&2
30
    echo "-o, --output-file        Optional file where geoscrubbed CSV should be saved." >&2
31
    echo "                         If this option is given, then the final geoscrub table" >&2
32
    echo "                         will be dumped to the specified file in CSV format." >&2
33
    echo "                         No output file is saved if this option is omitted." >&2
30 34
    exit 1;
31 35
}
32 36

  
......
83 87
            GEOSCRUB_INPUT_OPT="$1"
84 88
            shift
85 89
            ;;
90
        -o)
91
            if [[ -z $2  ]];  then
92
                echo "Option $1 requires an argument." >&2
93
                exit 1;
94
            fi
95
            OUTPUT_FILE="$2"
96
            shift 2
97
            ;;
98
        --output-file=*)
99
            OUTPUT_FILE="${1#*=}"
100
            shift
101
            ;;
86 102
        *)
87 103
            echo "Invalid option: $1" >&2
88 104
            usage
......
113 129
run_sql_script "${SCRIPT_DIR}/geovalidate.sql"
114 130

  
115 131
echo "Input successfully scrubbed."
116
echo "Scrubbed input available in the geoscrub table of the ${DB_NAME} database."
132
if [[ -n $OUTPUT_FILE ]]; then
133
    echo "Saving geoscrubbed data as a CSV file to ${OUTPUT_FILE}..."
117 134

  
135
    run_sql_script "${SCRIPT_DIR}/output.geoscrub.sql" > "$OUTPUT_FILE"
136
    if [[ ! -w $OUTPUT_FILE ]]; then
137
        echo "Could not save geoscrub table contents to ${OUTPUT_FILE}"
138
        exit 1
139
    fi
140
else
141
    echo "Scrubbed input available in the geoscrub table of the ${DB_NAME} database."
142
fi
143

  
derived/biengeo/output.geoscrub.sql
1
\COPY (SELECT decimallatitude, decimallongitude, country, stateprovince, county, countrystd, stateprovincestd, countystd, latlonvalidity, countryvalidity, stateprovincevalidity, countyvalidity FROM geoscrub) TO STDOUT WITH CSV;
2

  
derived/biengeo/README.txt
64 64
WARNING: deletes any previous geoscrubbing results!
65 65
runtime: ~5.5 h
66 66
cd <svn_biengeo_root>
67
3. geoscrub.sh [--geoscrub-input=input_dir]
67
3. geoscrub.sh [--geoscrub-input=input_dir] [--output-file=geoscrub-output.csv]
68 68
   - runs the following scripts in order to load and scrub vegbien input data:
69 69
   * load-geoscrub-input.sh
70 70
     - dumps geoscrub_input from vegbien and loads it into the geoscrub db
......
75 75
     runtime: 5.5 h
76 76
     - contains (postgis-extended) SQL statements that score the validity
77 77
       of GADM2-scrubbed names against given point coordinates
78
   - If the --output-file (or -o) option is given, then the final geoscrub table
79
     will be dumped to the specified output file in CSV format.
78 80

  
79 81
[Also see comments embedded in specific scripts in this directory.]
80 82

  

Also available in: Unified diff