/ - Diff - Environment and organisms - NCEAS Projects

« Previous | Next »

Revision a7663038

Added by Jim Regetz almost 13 years ago

ID a76630389fd88dcc15e652a6d2dba6c571522716

further optimized functions in R for some minor extra speedup

+    }
     # shell out to OS to leverage grep/awk/tr for faster initial parsing and
     # filtering of data; if no data records are read in, this function
     # returns NULL
     # filtering of data into a temp file; if filtering yields no data
     # records, this function returns NULL
     loadAsCSV <- function(dly, patt=NULL) {
         tmpfile <- tempfile()
         awk <- paste(
             "awk -v FIELDWIDTHS='",
             paste(c(11, 4, 2, 4, rep(c(5,1,1,1), times=31)), collapse=" "),
-...
             patt <- shQuote(paste(patt, collapse="\\|"))
             cmd <- paste("grep -e", patt, dly, "|", awk, "|", tr)
+        }
         csv <- system(cmd, intern=TRUE)
         if (length(csv)>0) {
             read.csv(textConnection(csv), header=FALSE, colClasses=DLY.COLS)
         cmd <- paste(cmd, tmpfile, sep=" > ")
         # execute command and read from tmpfile if successful
         if (system(cmd)==0 & 0<file.info(tmpfile)$size) {
             out <- read.csv(tmpfile, header=FALSE, colClasses=DLY.COLS)
             file.remove(tmpfile)
         } else {
             NULL
             out <- NULL
+        }
         return(out)
+    }
     # split data columnwise by day, then recombine into long format; note
     # that the indexing here is hard-coded to work for the *.dly files, and
     # simply assumes that they are all consistent
     # function to convert the wide-form (days across columns) GHCN data into
     # long form (unique row for each day*element); note that all indexing
     # here is hard-coded to work for the *.dly files, and simply assumes
     # that they are all consistent
     wideToLong <- function(dat, days) {
         daily.data <- lapply(seq_along(days), function(i) {
             dat <- data.frame(dat[1:4], day=i, dat[days[[i]]])
             dat$srcrowid <- seq(nrow(dat))
             names(dat) <- 1:ncol(dat)
             dat
             })
         do.call("rbind", daily.data)
         # convert id vars to long form, relying on R to recycle the first
         # four to match the length of the fifth (slightly faster than doing
         # this manually)
         out <- data.frame(
             dat[1:4],
             V5=rep(1:31, each=nrow(dat))
+        )
         # now combine and fill in the daily values/flags
         for (i in 1:4) {
             cols <- sapply(days, function(x) x[[i]])
             out[[5+i]] <- as.vector(as.matrix(dat[, cols]))
+        }
         # add original row id
         out$id <- 1:nrow(dat)
         out
+    }

Also available in: Unified diff

Project

General

Profile

Revision a7663038

Added by Jim Regetz almost 13 years ago