Revision dd85f6d3
Added by Jim Regetz almost 13 years ago
- ID dd85f6d3ced4302fec1ad2a3f0fed981e11ae389
climate/extra/ghcn-to-sqlite.R | ||
---|---|---|
1 |
# R script for batch parsing and loading GHCN daily station files |
|
2 |
# (*.dly) into a SQLite database |
|
3 |
# |
|
4 |
# As written, the script is intended to create and populate the database |
|
5 |
# from scratch, reporting an error if the script already exists. In |
|
6 |
# principle though, the code that processes and loads a given *.dly file |
|
7 |
# could be run on its own to load additional data into an already |
|
8 |
# existing database file. |
|
9 |
# |
|
10 |
# Jim Regetz |
|
11 |
# NCEAS |
|
12 |
# Created on 09-May-2012 |
|
13 |
|
|
14 |
require(RSQLite) |
|
15 |
|
|
16 |
db.path <- "ghcn_all.db" |
|
17 |
|
|
18 |
# define bulk insert helper function (adapted from RSQLite |
|
19 |
# documentation) |
|
20 |
ghcn_bulk_insert <- function(db, sql, dat) { |
|
21 |
dbBeginTransaction(db) |
|
22 |
dbGetPreparedQuery(db, sql, bind.data = dat) |
|
23 |
dbCommit(db) |
|
24 |
dbGetQuery(db, "select count(*) from ghcn")[[1]] |
|
25 |
} |
|
26 |
|
|
27 |
# establish database connection |
|
28 |
con <- dbDriver("SQLite") |
|
29 |
if (file.exists(db.path)) { |
|
30 |
stop("database already exists at ", db.path) |
|
31 |
} |
|
32 |
db <- dbConnect(con, dbname=db.path) |
|
33 |
|
|
34 |
# create main ghcn table |
|
35 |
sql <- " |
|
36 |
CREATE TABLE ghcn ( |
|
37 |
id text, |
|
38 |
year int, |
|
39 |
month int, |
|
40 |
element text, |
|
41 |
day int, |
|
42 |
value int, |
|
43 |
mflag text, |
|
44 |
qflag text, |
|
45 |
sflag text, |
|
46 |
srcrowid int) |
|
47 |
" |
|
48 |
dbGetQuery(db, sql) |
|
49 |
|
|
50 |
# prepare sql insert statement |
|
51 |
params.clist <- paste(rep("?", length(dbListFields(db, "ghcn"))), |
|
52 |
collapse=", ") |
|
53 |
sql <- paste("insert into ghcn values (", params.clist, ")", sep="") |
|
54 |
|
|
55 |
# process and insert daily data |
|
56 |
dailies <- list.files(pattern="*.dly") |
|
57 |
for (file in dailies) { |
|
58 |
x <- read.fortran(file, |
|
59 |
format=c("A11","I4","I2","A4",rep(c("F5","3A1"),times=31))) |
|
60 |
long <- reshape(x, direction="long", |
|
61 |
varying=matrix(5:ncol(x), nrow=4)) |
|
62 |
ghcn_bulk_insert(db, sql, long) |
|
63 |
} |
Also available in: Unified diff
added initial R script to load GHCN data into a SQLite DB