Revision 669c150d
Added by Jim Regetz over 12 years ago
- ID 669c150d42a488207ab6516307ba7abf56bcff0f
climate/extra/ghcn-to-sqlite.R | ||
---|---|---|
56 | 56 |
DLY.COLS <- c("character", "integer", "integer", "character", |
57 | 57 |
rep(c("numeric", "character", "character", "character"), times=31)) |
58 | 58 |
|
59 |
loadAsCSV <- function(dly) { |
|
59 |
# shell out to OS to leverage grep/awk/tr for faster initial parsing and |
|
60 |
# filtering of data; if no data records are read in, this function |
|
61 |
# returns NULL |
|
62 |
loadAsCSV <- function(dly, patt=NULL) { |
|
60 | 63 |
awk <- paste( |
61 | 64 |
"awk -v FIELDWIDTHS='", |
62 | 65 |
paste(c(11, 4, 2, 4, rep(c(5,1,1,1), times=31)), collapse=" "), |
63 | 66 |
"' -v OFS=',' '{ $1=$1 \"\"; print }'", sep="") |
64 | 67 |
tr <- "tr -d ' '" |
65 |
system(paste(awk, dly, "|", tr), intern=TRUE) |
|
66 |
read.csv(textConnection(system(paste(awk, dly, "|", tr), |
|
67 |
intern=TRUE)), header=FALSE, colClasses=DLY.COLS) |
|
68 |
if (is.null(patt)) { |
|
69 |
cmd <- paste(awk, dly, "|", tr) |
|
70 |
} else { |
|
71 |
patt <- shQuote(paste(patt, collapse="\\|")) |
|
72 |
cmd <- paste("grep -e", patt, dly, "|", awk, "|", tr) |
|
73 |
} |
|
74 |
csv <- system(cmd, intern=TRUE) |
|
75 |
if (length(csv)>0) { |
|
76 |
read.csv(textConnection(csv), header=FALSE, colClasses=DLY.COLS) |
|
77 |
} else { |
|
78 |
NULL |
|
79 |
} |
|
68 | 80 |
} |
69 | 81 |
|
70 | 82 |
dailies <- list.files(pattern="*.dly") |
71 | 83 |
for (file in dailies) { |
72 |
x <- loadAsCSV(file) |
|
73 |
long <- reshape(x, direction="long", |
|
74 |
varying=matrix(5:ncol(x), nrow=4)) |
|
75 |
ghcn_bulk_insert(db, sql, long) |
|
84 |
x <- loadAsCSV(file, c("TMAX", "TMIN")) |
|
85 |
if (is.null(x)) { |
|
86 |
message("no rows imported for ", file) |
|
87 |
} else { |
|
88 |
long <- reshape(x, direction="long", |
|
89 |
varying=matrix(5:ncol(x), nrow=4)) |
|
90 |
ghcn_bulk_insert(db, sql, long) |
|
91 |
} |
|
76 | 92 |
} |
Also available in: Unified diff
added grep pre-filtering of rows in the *.dly files