Project

General

Profile

« Previous | Next » 

Revision a7663038

Added by Jim Regetz over 12 years ago

  • ID a76630389fd88dcc15e652a6d2dba6c571522716

further optimized functions in R for some minor extra speedup

View differences:

climate/extra/ghcn-to-sqlite.R
46 46
}
47 47

  
48 48
# shell out to OS to leverage grep/awk/tr for faster initial parsing and
49
# filtering of data; if no data records are read in, this function
50
# returns NULL
49
# filtering of data into a temp file; if filtering yields no data
50
# records, this function returns NULL
51 51
loadAsCSV <- function(dly, patt=NULL) {
52
    tmpfile <- tempfile()
52 53
    awk <- paste(
53 54
        "awk -v FIELDWIDTHS='",
54 55
        paste(c(11, 4, 2, 4, rep(c(5,1,1,1), times=31)), collapse=" "),
......
60 61
        patt <- shQuote(paste(patt, collapse="\\|"))
61 62
        cmd <- paste("grep -e", patt, dly, "|", awk, "|", tr)
62 63
    }
63
    csv <- system(cmd, intern=TRUE)
64
    if (length(csv)>0) {
65
        read.csv(textConnection(csv), header=FALSE, colClasses=DLY.COLS)
64
    cmd <- paste(cmd, tmpfile, sep=" > ")
65
    # execute command and read from tmpfile if successful
66
    if (system(cmd)==0 & 0<file.info(tmpfile)$size) {
67
        out <- read.csv(tmpfile, header=FALSE, colClasses=DLY.COLS)
68
        file.remove(tmpfile)
66 69
    } else {
67
        NULL
70
        out <- NULL
68 71
    }
72
    return(out)
69 73
}
70 74

  
71
# split data columnwise by day, then recombine into long format; note
72
# that the indexing here is hard-coded to work for the *.dly files, and
73
# simply assumes that they are all consistent
75
# function to convert the wide-form (days across columns) GHCN data into
76
# long form (unique row for each day*element); note that all indexing
77
# here is hard-coded to work for the *.dly files, and simply assumes
78
# that they are all consistent
74 79
wideToLong <- function(dat, days) {
75
    daily.data <- lapply(seq_along(days), function(i) {
76
        dat <- data.frame(dat[1:4], day=i, dat[days[[i]]])
77
        dat$srcrowid <- seq(nrow(dat))
78
        names(dat) <- 1:ncol(dat)
79
        dat
80
        })
81
    do.call("rbind", daily.data)
80
    # convert id vars to long form, relying on R to recycle the first
81
    # four to match the length of the fifth (slightly faster than doing
82
    # this manually)
83
    out <- data.frame(
84
        dat[1:4],
85
        V5=rep(1:31, each=nrow(dat))
86
    )
87
    # now combine and fill in the daily values/flags
88
    for (i in 1:4) {
89
        cols <- sapply(days, function(x) x[[i]])
90
        out[[5+i]] <- as.vector(as.matrix(dat[, cols]))
91
    }
92
    # add original row id
93
    out$id <- 1:nrow(dat)
94
    out
82 95
}
83 96

  
84 97

  

Also available in: Unified diff