Project

General

Profile

« Previous | Next » 

Revision 669c150d

Added by Jim Regetz over 12 years ago

  • ID 669c150d42a488207ab6516307ba7abf56bcff0f

added grep pre-filtering of rows in the *.dly files

View differences:

climate/extra/ghcn-to-sqlite.R
56 56
DLY.COLS <- c("character", "integer", "integer", "character",
57 57
    rep(c("numeric", "character", "character", "character"), times=31))
58 58

  
59
loadAsCSV <- function(dly) {
59
# shell out to OS to leverage grep/awk/tr for faster initial parsing and
60
# filtering of data; if no data records are read in, this function
61
# returns NULL
62
loadAsCSV <- function(dly, patt=NULL) {
60 63
    awk <- paste(
61 64
        "awk -v FIELDWIDTHS='",
62 65
        paste(c(11, 4, 2, 4, rep(c(5,1,1,1), times=31)), collapse=" "),
63 66
        "' -v OFS=',' '{ $1=$1 \"\"; print }'", sep="")
64 67
    tr <- "tr -d ' '"
65
    system(paste(awk, dly, "|", tr), intern=TRUE)
66
    read.csv(textConnection(system(paste(awk, dly, "|", tr),
67
        intern=TRUE)), header=FALSE, colClasses=DLY.COLS)
68
    if (is.null(patt)) {
69
        cmd <- paste(awk, dly, "|", tr)
70
    } else {
71
        patt <- shQuote(paste(patt, collapse="\\|"))
72
        cmd <- paste("grep -e", patt, dly, "|", awk, "|", tr)
73
    }
74
    csv <- system(cmd, intern=TRUE)
75
    if (length(csv)>0) {
76
        read.csv(textConnection(csv), header=FALSE, colClasses=DLY.COLS)
77
    } else {
78
        NULL
79
    }
68 80
}
69 81

  
70 82
dailies <- list.files(pattern="*.dly")
71 83
for (file in dailies) {
72
    x <- loadAsCSV(file)
73
    long <- reshape(x, direction="long",
74
        varying=matrix(5:ncol(x), nrow=4))
75
    ghcn_bulk_insert(db, sql, long)
84
    x <- loadAsCSV(file, c("TMAX", "TMIN"))
85
    if (is.null(x)) {
86
        message("no rows imported for ", file)
87
    } else {
88
        long <- reshape(x, direction="long",
89
            varying=matrix(5:ncol(x), nrow=4))
90
        ghcn_bulk_insert(db, sql, long)
91
    }
76 92
}

Also available in: Unified diff