Revision 9b850015
Added by Jim Regetz over 12 years ago
climate/extra/ghcn-to-psql.R | ||
---|---|---|
1 | 1 |
# R script for batch parsing and loading GHCN daily station files |
2 |
# (*.dly) into a PostgreSQL database. Script will process all such files |
|
3 |
# in the current working directory. |
|
2 |
# (*.dly) into a PostgreSQL database. The appropriate GHCN files are |
|
3 |
# assumed to have been downloaded to the location specified by |
|
4 |
# 'ghcndir', with the daily files themselves in a "ghcnd_all" |
|
5 |
# subdirectory exactly as generated by unpacking "ghcnd_all.tar.gz"; |
|
6 |
# note that for the purposes of storage efficiency, we're not currently |
|
7 |
# keeping these uncompressed files on disk, so this tarball would need |
|
8 |
# to be unpacked again if for some reason this script needs to be |
|
9 |
# re-run. |
|
4 | 10 |
# |
5 | 11 |
# As currently written, the script assumes that the 'ghcn' database |
6 | 12 |
# already exists locally but has no tables, and that it can be accessed |
... | ... | |
20 | 26 |
#-------------# |
21 | 27 |
|
22 | 28 |
# location of ghcn daily data (on atlas) |
23 |
datadir <- "/home/layers/data/climate/ghcn/ghcnd_all"
|
|
29 |
ghcndir <- "/home/layers/data/climate/ghcn/v2.92-upd-2012052822"
|
|
24 | 30 |
# output file |
25 | 31 |
logfile <- "~/ghcn-psql-load.log" |
26 | 32 |
|
... | ... | |
115 | 121 |
)")) |
116 | 122 |
|
117 | 123 |
# process and insert daily data |
118 |
dailies <- list.files(datadir, pattern="*.dly")
|
|
124 |
dailies <- list.files(file.path(ghcndir, "ghcnd_all"), pattern="*.dly")
|
|
119 | 125 |
for (file in dailies) { |
120 | 126 |
cat(date(), "\t", file=logfile, append=TRUE) |
121 |
if (loadAsCSV(file.path(datadir, file), VARS)) {
|
|
127 |
if (loadAsCSV(file.path(ghcndir, "ghcnd_all", file), VARS)) {
|
|
122 | 128 |
wideToLong(db) |
123 | 129 |
dbGetQuery(db, 'delete from ghcn_wide') |
124 | 130 |
} |
... | ... | |
135 | 141 |
#BATCH.SIZE <- 10 |
136 | 142 |
#counter <- 1 |
137 | 143 |
#for (file in dailies) { |
138 |
# loadAsCSV(file.path(datadir, file), VARS)
|
|
144 |
# loadAsCSV(file.path(ghcndir, "ghcnd_all", file), VARS)
|
|
139 | 145 |
# if (counter %% BATCH.SIZE == 0) { |
140 | 146 |
# wideToLong(db) |
141 | 147 |
# dbGetQuery(db, 'delete from ghcn_wide') |
Also available in: Unified diff
updated directory paths and related comments