Revision a7663038
Added by Jim Regetz almost 13 years ago
- ID a76630389fd88dcc15e652a6d2dba6c571522716
climate/extra/ghcn-to-sqlite.R | ||
---|---|---|
46 | 46 |
} |
47 | 47 |
|
48 | 48 |
# shell out to OS to leverage grep/awk/tr for faster initial parsing and |
49 |
# filtering of data; if no data records are read in, this function
|
|
50 |
# returns NULL |
|
49 |
# filtering of data into a temp file; if filtering yields no data
|
|
50 |
# records, this function returns NULL
|
|
51 | 51 |
loadAsCSV <- function(dly, patt=NULL) { |
52 |
tmpfile <- tempfile() |
|
52 | 53 |
awk <- paste( |
53 | 54 |
"awk -v FIELDWIDTHS='", |
54 | 55 |
paste(c(11, 4, 2, 4, rep(c(5,1,1,1), times=31)), collapse=" "), |
... | ... | |
60 | 61 |
patt <- shQuote(paste(patt, collapse="\\|")) |
61 | 62 |
cmd <- paste("grep -e", patt, dly, "|", awk, "|", tr) |
62 | 63 |
} |
63 |
csv <- system(cmd, intern=TRUE) |
|
64 |
if (length(csv)>0) { |
|
65 |
read.csv(textConnection(csv), header=FALSE, colClasses=DLY.COLS) |
|
64 |
cmd <- paste(cmd, tmpfile, sep=" > ") |
|
65 |
# execute command and read from tmpfile if successful |
|
66 |
if (system(cmd)==0 & 0<file.info(tmpfile)$size) { |
|
67 |
out <- read.csv(tmpfile, header=FALSE, colClasses=DLY.COLS) |
|
68 |
file.remove(tmpfile) |
|
66 | 69 |
} else { |
67 |
NULL |
|
70 |
out <- NULL
|
|
68 | 71 |
} |
72 |
return(out) |
|
69 | 73 |
} |
70 | 74 |
|
71 |
# split data columnwise by day, then recombine into long format; note |
|
72 |
# that the indexing here is hard-coded to work for the *.dly files, and |
|
73 |
# simply assumes that they are all consistent |
|
75 |
# function to convert the wide-form (days across columns) GHCN data into |
|
76 |
# long form (unique row for each day*element); note that all indexing |
|
77 |
# here is hard-coded to work for the *.dly files, and simply assumes |
|
78 |
# that they are all consistent |
|
74 | 79 |
wideToLong <- function(dat, days) { |
75 |
daily.data <- lapply(seq_along(days), function(i) { |
|
76 |
dat <- data.frame(dat[1:4], day=i, dat[days[[i]]]) |
|
77 |
dat$srcrowid <- seq(nrow(dat)) |
|
78 |
names(dat) <- 1:ncol(dat) |
|
79 |
dat |
|
80 |
}) |
|
81 |
do.call("rbind", daily.data) |
|
80 |
# convert id vars to long form, relying on R to recycle the first |
|
81 |
# four to match the length of the fifth (slightly faster than doing |
|
82 |
# this manually) |
|
83 |
out <- data.frame( |
|
84 |
dat[1:4], |
|
85 |
V5=rep(1:31, each=nrow(dat)) |
|
86 |
) |
|
87 |
# now combine and fill in the daily values/flags |
|
88 |
for (i in 1:4) { |
|
89 |
cols <- sapply(days, function(x) x[[i]]) |
|
90 |
out[[5+i]] <- as.vector(as.matrix(dat[, cols])) |
|
91 |
} |
|
92 |
# add original row id |
|
93 |
out$id <- 1:nrow(dat) |
|
94 |
out |
|
82 | 95 |
} |
83 | 96 |
|
84 | 97 |
|
Also available in: Unified diff
further optimized functions in R for some minor extra speedup