Project

General

Profile

Download (1.68 KB) Statistics
| Branch: | Revision:
1
# R script for batch parsing and loading GHCN daily station files
2
# (*.dly) into a SQLite database
3
#
4
# As written, the script is intended to create and populate the database
5
# from scratch, reporting an error if the script already exists. In
6
# principle though, the code that processes and loads a given *.dly file
7
# could be run on its own to load additional data into an already
8
# existing database file.
9
#
10
# Jim Regetz
11
# NCEAS
12
# Created on 09-May-2012
13

    
14
require(RSQLite)
15

    
16
db.path <- "ghcn_all.db"
17

    
18
# define bulk insert helper function (adapted from RSQLite
19
# documentation)
20
ghcn_bulk_insert <- function(db, sql, dat) {
21
    dbBeginTransaction(db)
22
    dbGetPreparedQuery(db, sql, bind.data = dat)
23
    dbCommit(db)
24
    dbGetQuery(db, "select count(*) from ghcn")[[1]]
25
}
26

    
27
# establish database connection
28
con <- dbDriver("SQLite")
29
if (file.exists(db.path)) {
30
    stop("database already exists at ", db.path)
31
}
32
db <- dbConnect(con, dbname=db.path)
33

    
34
# create main ghcn table
35
sql <- "
36
    CREATE TABLE ghcn (
37
        id text,
38
        year int,
39
        month int,
40
        element text,
41
        day int,
42
        value int,
43
        mflag text,
44
        qflag text,
45
        sflag text,
46
        srcrowid int)
47
"
48
dbGetQuery(db, sql)
49

    
50
# prepare sql insert statement
51
params.clist <- paste(rep("?", length(dbListFields(db, "ghcn"))),
52
    collapse=", ")
53
sql <- paste("insert into ghcn values (", params.clist, ")", sep="")
54

    
55
# process and insert daily data
56
dailies <- list.files(pattern="*.dly")
57
for (file in dailies) {
58
    x <- read.fortran(file,
59
        format=c("A11","I4","I2","A4",rep(c("F5","3A1"),times=31)))
60
    long <- reshape(x, direction="long",
61
        varying=matrix(5:ncol(x), nrow=4))
62
    ghcn_bulk_insert(db, sql, long)
63
}
(2-2/2)