Project

General

Profile

« Previous | Next » 

Revision 2d08ed05

Added by Jim Regetz over 12 years ago

  • ID 2d08ed050cb0a6ee6b2c09c1c39ac2e57c3f4cd0

replaced read.fortran with faster system call to awk/tr

View differences:

climate/extra/ghcn-to-sqlite.R
53 53
sql <- paste("insert into ghcn values (", params.clist, ")", sep="")
54 54

  
55 55
# process and insert daily data
56
DLY.COLS <- c("character", "integer", "integer", "character",
57
    rep(c("numeric", "character", "character", "character"), times=31))
58

  
59
loadAsCSV <- function(dly) {
60
    awk <- paste(
61
        "awk -v FIELDWIDTHS='",
62
        paste(c(11, 4, 2, 4, rep(c(5,1,1,1), times=31)), collapse=" "),
63
        "' -v OFS=',' '{ $1=$1 \"\"; print }'", sep="")
64
    tr <- "tr -d ' '"
65
    system(paste(awk, dly, "|", tr), intern=TRUE)
66
    read.csv(textConnection(system(paste(awk, dly, "|", tr),
67
        intern=TRUE)), header=FALSE, colClasses=DLY.COLS)
68
}
69

  
56 70
dailies <- list.files(pattern="*.dly")
57 71
for (file in dailies) {
58
    x <- read.fortran(file,
59
        format=c("A11","I4","I2","A4",rep(c("F5","3A1"),times=31)))
72
    x <- loadAsCSV(file)
60 73
    long <- reshape(x, direction="long",
61 74
        varying=matrix(5:ncol(x), nrow=4))
62 75
    ghcn_bulk_insert(db, sql, long)

Also available in: Unified diff