Project

General

Profile

« Previous | Next » 

Revision 9b850015

Added by Jim Regetz over 12 years ago

updated directory paths and related comments

View differences:

climate/extra/ghcn-to-psql.R
1 1
# R script for batch parsing and loading GHCN daily station files
2
# (*.dly) into a PostgreSQL database. Script will process all such files
3
# in the current working directory.
2
# (*.dly) into a PostgreSQL database. The appropriate GHCN files are
3
# assumed to have been downloaded to the location specified by
4
# 'ghcndir', with the daily files themselves in a "ghcnd_all"
5
# subdirectory exactly as generated by unpacking "ghcnd_all.tar.gz";
6
# note that for the purposes of storage efficiency, we're not currently
7
# keeping these uncompressed files on disk, so this tarball would need
8
# to be unpacked again if for some reason this script needs to be
9
# re-run.
4 10
#
5 11
# As currently written, the script assumes that the 'ghcn' database
6 12
# already exists locally but has no tables, and that it can be accessed
......
20 26
#-------------#
21 27

  
22 28
# location of ghcn daily data (on atlas)
23
datadir <- "/home/layers/data/climate/ghcn/ghcnd_all"
29
ghcndir <- "/home/layers/data/climate/ghcn/v2.92-upd-2012052822"
24 30
# output file
25 31
logfile <- "~/ghcn-psql-load.log"
26 32

  
......
115 121
        )"))
116 122

  
117 123
# process and insert daily data
118
dailies <- list.files(datadir, pattern="*.dly")
124
dailies <- list.files(file.path(ghcndir, "ghcnd_all"), pattern="*.dly")
119 125
for (file in dailies) {
120 126
    cat(date(), "\t", file=logfile, append=TRUE)
121
    if (loadAsCSV(file.path(datadir, file), VARS)) {
127
    if (loadAsCSV(file.path(ghcndir, "ghcnd_all", file), VARS)) {
122 128
        wideToLong(db)
123 129
        dbGetQuery(db, 'delete from ghcn_wide')
124 130
    }
......
135 141
#BATCH.SIZE <- 10
136 142
#counter <- 1
137 143
#for (file in dailies) {
138
#    loadAsCSV(file.path(datadir, file), VARS)
144
#    loadAsCSV(file.path(ghcndir, "ghcnd_all", file), VARS)
139 145
#    if (counter %% BATCH.SIZE == 0) {
140 146
#        wideToLong(db)
141 147
#        dbGetQuery(db, 'delete from ghcn_wide')

Also available in: Unified diff