1 |
bc59c6bb
|
Benoit Parmentier
|
####################Interpolation of Tmax for 10 dates.#####################
|
2 |
d7b0ef36
|
Benoit Parmentier
|
#This script interpolates station values for the Oregon case study. This program loads the station data from a csv file
|
3 |
7f643c83
|
Benoit Parmentier
|
#and perform two types of regression: multiple linear model and general additive model (GAM). Note that this program:
|
4 |
e2d673e4
|
Benoit Parmentier
|
#1)assumes that the csv file is in the current working
|
5 |
|
|
#2)extract relevant variables from raster images before performing the regressions.
|
6 |
7f643c83
|
Benoit Parmentier
|
#The user must provide the list of raster images in a textile.
|
7 |
2a89fa5f
|
Benoit Parmentier
|
#Script created by Benoit Parmentier on March 3, 2012.
|
8 |
89c6eee2
|
Benoit Parmentier
|
|
9 |
e2d673e4
|
Benoit Parmentier
|
###Loading r library and packages # loading the raster package
|
10 |
92c36e33
|
Benoit Parmentier
|
library(gtools) # loading ...
|
11 |
89c6eee2
|
Benoit Parmentier
|
library(mgcv)
|
12 |
|
|
|
13 |
|
|
###Parameters and arguments
|
14 |
4180b123
|
Benoit Parmentier
|
#infile1<-"ghcn_or_b_02122012_OR83M.csv"
|
15 |
|
|
infile1<-"ghcn_or_tmax_b_03032012_OR83M.csv"
|
16 |
3f649df0
|
Benoit Parmentier
|
path<-"C:/Data/Benoit/NCEAS/window_Oregon_data"
|
17 |
e2d673e4
|
Benoit Parmentier
|
setwd(path)
|
18 |
2a89fa5f
|
Benoit Parmentier
|
#infile2<-"dates_interpolation_03012012.txt" # list of 10 dates for the regression
|
19 |
25a68ae3
|
Benoit Parmentier
|
infile2<-"dates_interpolation_03052012.txt"
|
20 |
2a89fa5f
|
Benoit Parmentier
|
prop<-0.3 #Proportion of testing retained for validation
|
21 |
4180b123
|
Benoit Parmentier
|
out_prefix<-"_03042012_r1"
|
22 |
aac7c36c
|
Benoit Parmentier
|
infile3<-"models_interpolation_03052012.txt"
|
23 |
|
|
|
24 |
89c6eee2
|
Benoit Parmentier
|
|
25 |
7f643c83
|
Benoit Parmentier
|
#######START OF THE SCRIPT #############
|
26 |
|
|
|
27 |
2a89fa5f
|
Benoit Parmentier
|
###Reading the station data and setting up for models' comparison
|
28 |
e2d673e4
|
Benoit Parmentier
|
ghcn<-read.csv(paste(path,"/",infile1, sep=""), header=TRUE) #The "paste" function concatenates the path and file name in common string.
|
29 |
25a68ae3
|
Benoit Parmentier
|
ghcn = transform(ghcn,Northness = cos(ASPECT)) #Adding a variable to the dataframe
|
30 |
|
|
ghcn = transform(ghcn,Eastness = sin(ASPECT)) #adding variable to the dataframe.
|
31 |
|
|
|
32 |
4180b123
|
Benoit Parmentier
|
ghcn = transform(ghcn,Northness_w = sin(slope)*cos(ASPECT)) #Adding a variable to the dataframe
|
33 |
|
|
ghcn = transform(ghcn,Eastness_w = sin(slope)*sin(ASPECT)) #adding variable to the dataframe.
|
34 |
|
|
set.seed(100)
|
35 |
bc59c6bb
|
Benoit Parmentier
|
dates <-readLines(paste(path,"/",infile2, sep=""))
|
36 |
aac7c36c
|
Benoit Parmentier
|
models <-readLines(paste(path,"/",infile3, sep=""))
|
37 |
ec229367
|
Benoit Parmentier
|
|
38 |
25a68ae3
|
Benoit Parmentier
|
results <- matrix(1,length(dates),14) #This is a matrix containing the diagnostic measures from the GAM models.
|
39 |
|
|
|
40 |
aac7c36c
|
Benoit Parmentier
|
results_AIC<- matrix(1,length(dates),length(models)+3)
|
41 |
|
|
results_GCV<- matrix(1,length(dates),length(models)+3)
|
42 |
|
|
results_RMSE<- matrix(1,length(dates),length(models)+3)
|
43 |
|
|
|
44 |
4180b123
|
Benoit Parmentier
|
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==d)) #this creates a list of 10 subsets data
|
45 |
|
|
#note that compare to the previous version date_ column was changed to date
|
46 |
ec229367
|
Benoit Parmentier
|
|
47 |
2a89fa5f
|
Benoit Parmentier
|
## looping through the dates...
|
48 |
aac7c36c
|
Benoit Parmentier
|
#Change this into a nested loop, looping through the number of models
|
49 |
bc59c6bb
|
Benoit Parmentier
|
|
50 |
2a89fa5f
|
Benoit Parmentier
|
for(i in 1:length(dates)){ # start of the for loop #1
|
51 |
|
|
|
52 |
|
|
###Regression part 1: Creating a validation dataset by creating training and testing datasets
|
53 |
|
|
|
54 |
|
|
n<-nrow(ghcn.subsets[[i]])
|
55 |
|
|
ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows
|
56 |
|
|
nv<-n-ns #create a sample for validation with prop of the rows
|
57 |
|
|
#ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows
|
58 |
|
|
ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
|
59 |
|
|
ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training)
|
60 |
|
|
data_s <- ghcn.subsets[[i]][ind.training, ]
|
61 |
|
|
data_v <- ghcn.subsets[[i]][ind.testing, ]
|
62 |
|
|
|
63 |
|
|
####Regression part 2: GAM models
|
64 |
|
|
|
65 |
aac7c36c
|
Benoit Parmentier
|
mod1<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM), data=data_s)
|
66 |
|
|
mod2<- gam(tmax~ s(lat,lon,ELEV_SRTM), data=data_s)
|
67 |
|
|
mod3<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness)+ s (Eastness) + s(DISTOC), data=data_s)
|
68 |
|
|
mod4<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness_w)+ s (Eastness_w) + s(DISTOC), data=data_s)
|
69 |
|
|
mod5<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM, Northness) + s (Eastness) + s(DISTOC), data=data_s)
|
70 |
|
|
mod6<- gam(tmax~ s(lat,lon) + s (ELEV_SRTM, Northness) + s (Eastness) + s(DISTOC), data=data_s)
|
71 |
4180b123
|
Benoit Parmentier
|
|
72 |
2a89fa5f
|
Benoit Parmentier
|
|
73 |
|
|
####Regression part 3: Calculating and storing diagnostic measures
|
74 |
|
|
|
75 |
aac7c36c
|
Benoit Parmentier
|
results_AIC[i,1]<- dates[i] #storing the interpolation dates in the first column
|
76 |
|
|
results_AIC[i,2]<- ns #number of stations used in the training stage
|
77 |
|
|
results_AIC[i,3]<- AIC (mod1)
|
78 |
|
|
results_AIC[i,4]<- AIC (mod2)
|
79 |
|
|
results_AIC[i,5]<- AIC (mod3)
|
80 |
|
|
results_AIC[i,6]<- AIC (mod4)
|
81 |
|
|
results_AIC[i,7]<- AIC (mod5)
|
82 |
|
|
results_AIC[i,8]<- AIC (mod6)
|
83 |
2a89fa5f
|
Benoit Parmentier
|
|
84 |
aac7c36c
|
Benoit Parmentier
|
results_GCV[i,1]<- dates[i] #storing the interpolation dates in the first column
|
85 |
|
|
results_GCV[i,2]<- ns #number of stations used in the training stage
|
86 |
|
|
results_GCV[i,3]<- mod1$gcv.ubre
|
87 |
|
|
results_GCV[i,4]<- mod2$gcv.ubre
|
88 |
|
|
results_GCV[i,5]<- mod3$gcv.ubre
|
89 |
|
|
results_GCV[i,6]<- mod4$gcv.ubre
|
90 |
|
|
results_GCV[i,7]<- mod5$gcv.ubre
|
91 |
|
|
results_GCV[i,8]<- mod6$gcv.ubre
|
92 |
25a68ae3
|
Benoit Parmentier
|
|
93 |
aac7c36c
|
Benoit Parmentier
|
results_DEV[i,1]<- dates[i] #storing the interpolation dates in the first column
|
94 |
|
|
results_DEV[i,2]<- ns #number of stations used in the training stage
|
95 |
|
|
results_DEV[i,3]<- mod1$deviance
|
96 |
|
|
results_DEV[i,4]<- mod2$deviance
|
97 |
|
|
results_DEV[i,5]<- mod3$deviance
|
98 |
|
|
results_DEV[i,6]<- mod4$deviance
|
99 |
|
|
results_DEV[i,7]<- mod5$deviance
|
100 |
|
|
results_DEV[i,8]<- mod6$deviance
|
101 |
2a89fa5f
|
Benoit Parmentier
|
|
102 |
|
|
#####VALIDATION: Prediction checking the results using the testing data########
|
103 |
|
|
|
104 |
aac7c36c
|
Benoit Parmentier
|
y_mod1<- predict(mod1, newdata=data_v, se.fit = TRUE) #Using the coeff to predict new values.
|
105 |
|
|
y_mod2<- predict(mod2, newdata=data_v, se.fit = TRUE)
|
106 |
|
|
y_mod3<- predict(mod3, newdata=data_v, se.fit = TRUE)
|
107 |
|
|
y_mod4<- predict(mod4, newdata=data_v, se.fit = TRUE)
|
108 |
|
|
y_mod5<- predict(mod5, newdata=data_v, se.fit = TRUE)
|
109 |
|
|
y_mod5<- predict(mod6, newdata=data_v, se.fit = TRUE)
|
110 |
25a68ae3
|
Benoit Parmentier
|
|
111 |
aac7c36c
|
Benoit Parmentier
|
res_mod1<- data_v$tmax - y_mod1$fit #Residuals for GMA model that resembles the ANUSPLIN interpolation
|
112 |
|
|
res_mod2<- data_v$tmax - y_mod2$fit #Residuals for GAM model that resembles the PRISM interpolation
|
113 |
|
|
res_mod3<- data_v$tmax - y_mod3$fit
|
114 |
|
|
res_mod4<- data_v$tmax - y_mod4$fit
|
115 |
|
|
res_mod5<- data_v$tmax - y_mod5$fit
|
116 |
|
|
res_mod6<- data_v$tmax - y_mod6$fit
|
117 |
25a68ae3
|
Benoit Parmentier
|
|
118 |
aac7c36c
|
Benoit Parmentier
|
RMSE_mod1 <- sqrt(sum(res_mod1^2)/nv)
|
119 |
|
|
RMSE_mod2 <- sqrt(sum(res_mod2^2)/nv)
|
120 |
|
|
RMSE_mod3 <- sqrt(sum(res_mod3^2)/nv)
|
121 |
|
|
RMSE_mod4 <- sqrt(sum(res_mod4^2)/nv)
|
122 |
|
|
RMSE_mod5 <- sqrt(sum(res_mod5^2)/nv)
|
123 |
|
|
RMSE_mod6 <- sqrt(sum(res_mod6^2)/nv)
|
124 |
2a89fa5f
|
Benoit Parmentier
|
|
125 |
aac7c36c
|
Benoit Parmentier
|
|
126 |
|
|
results_RMSE[i,1]<- dates[i] #storing the interpolation dates in the first column
|
127 |
|
|
results_RMSE[i,2]<- ns #number of stations used in the training stage
|
128 |
|
|
results_RMSE[i,3]<- RMSE_mod1
|
129 |
|
|
results_RMSE[i,4]<- RMSE_mod2
|
130 |
|
|
results_RMSE[i,5]<- RMSE_mod3
|
131 |
|
|
results_RMSE[i,6]<- RMSE_mod4
|
132 |
|
|
results_RMSE[i,7]<- RMSE_mod5
|
133 |
|
|
results_RMSE[i,8]<- RMSE_mod6
|
134 |
25a68ae3
|
Benoit Parmentier
|
|
135 |
|
|
data_name<-paste("ghcn_v_",dates[[i]],sep="")
|
136 |
|
|
assign(data_name,data_v)
|
137 |
|
|
#ghcn_v<-ls(pattern="ghcn_v_")
|
138 |
2a89fa5f
|
Benoit Parmentier
|
|
139 |
|
|
# end of the for loop #1
|
140 |
|
|
}
|
141 |
|
|
|
142 |
|
|
## Plotting and saving diagnostic measures
|
143 |
|
|
|
144 |
aac7c36c
|
Benoit Parmentier
|
results_RMSEnum <-results_RMSE
|
145 |
|
|
mode(results_RMSEnum)<- "numeric"
|
146 |
2a89fa5f
|
Benoit Parmentier
|
# Make it numeric first
|
147 |
|
|
# Now turn it into a data.frame...
|
148 |
|
|
|
149 |
aac7c36c
|
Benoit Parmentier
|
results_table_RMSE<-as.data.frame(results_RMSEnum)
|
150 |
|
|
colnames(results_table_RMSE)<-c("dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6")
|
151 |
2a89fa5f
|
Benoit Parmentier
|
|
152 |
|
|
win.graph()
|
153 |
0d1cde0a
|
Benoit Parmentier
|
barplot(results_table$RMSE_A1/10,main="RMSE for the A1 models",names.arg=results_table$dates,ylab="Temp (deg. C)",xlab="interpolated date")
|
154 |
2a89fa5f
|
Benoit Parmentier
|
savePlot(paste("GAM_ANUSPLIN1_RMSE",out_prefix,".emf", sep=""), type="emf")
|
155 |
|
|
win.graph()
|
156 |
0d1cde0a
|
Benoit Parmentier
|
barplot(results_table$RMSE_P1/10,main="RMSE for the P1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date")
|
157 |
25a68ae3
|
Benoit Parmentier
|
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
|
158 |
|
|
win.graph()
|
159 |
0d1cde0a
|
Benoit Parmentier
|
barplot(results_table$RMSE_P2/10,main="RMSE for the P2 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date")
|
160 |
2a89fa5f
|
Benoit Parmentier
|
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
|
161 |
|
|
win.graph()
|
162 |
0d1cde0a
|
Benoit Parmentier
|
barplot(results_table$AIC_A1,main="AIC for the A1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date")
|
163 |
2a89fa5f
|
Benoit Parmentier
|
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
|
164 |
|
|
win.graph()
|
165 |
0d1cde0a
|
Benoit Parmentier
|
barplot(results_table$AIC_P1/10,main="AIC for the P1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date")
|
166 |
2a89fa5f
|
Benoit Parmentier
|
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
|
167 |
4180b123
|
Benoit Parmentier
|
win.graph()
|
168 |
0d1cde0a
|
Benoit Parmentier
|
barplot(results_table$AIC_P2/10,main="AIC for the P2 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date")
|
169 |
25a68ae3
|
Benoit Parmentier
|
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
|
170 |
|
|
win.graph()
|
171 |
0d1cde0a
|
Benoit Parmentier
|
barplot(results_table$Deviance_A1/10,main="Deviance for the A1 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date")
|
172 |
4180b123
|
Benoit Parmentier
|
savePlot(paste("GAM_ANUSPLIN1_Deviance",out_prefix,".emf", sep=""), type="emf")
|
173 |
|
|
win.graph()
|
174 |
0d1cde0a
|
Benoit Parmentier
|
barplot(results_table$Deviance_P1/10,main="Deviance for the P1 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date")
|
175 |
4180b123
|
Benoit Parmentier
|
savePlot(paste("GAM_PRISM1_Deviance",out_prefix,".emf", sep=""), type="emf")
|
176 |
25a68ae3
|
Benoit Parmentier
|
win.graph()
|
177 |
0d1cde0a
|
Benoit Parmentier
|
barplot(results_table$Deviance_P2/10,main="Deviance for the P2 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date")
|
178 |
25a68ae3
|
Benoit Parmentier
|
savePlot(paste("GAM_PRISM2_Deviance",out_prefix,".emf", sep=""), type="emf")
|
179 |
|
|
|
180 |
2a89fa5f
|
Benoit Parmentier
|
write.csv(results_table, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""))
|
181 |
|
|
|
182 |
4180b123
|
Benoit Parmentier
|
|
183 |
2a89fa5f
|
Benoit Parmentier
|
# End of script##########
|
184 |
|
|
|
185 |
|
|
# ###############################
|
186 |
bc59c6bb
|
Benoit Parmentier
|
|
187 |
|
|
#
|
188 |
|
|
# ############Diagnostic GAM plots#############
|
189 |
|
|
# win.graph()
|
190 |
|
|
# gam.check(GAM_ANUSPLIN1)
|
191 |
|
|
# savePlot("GAM_ANUSPLIN1_diagnostic1.emf", type="emf")
|
192 |
|
|
# win.graph()
|
193 |
|
|
# gam.check(GAM_PRISM1) #This will produce basic plots of residuals
|
194 |
|
|
# savePlot("GAM_PRISM_diagnostic1.emf", type="emf")
|
195 |
|
|
# gam.check(GAM_ANUSPLIN1)
|
196 |
|
|
# win.graph()
|
197 |
|
|
# vis.gam(GAM_ANUSPLIN1)
|
198 |
|
|
# savePlot("GAM_ANUSPLIN1_prediction.emf", type="emf")
|
199 |
|
|
# win.graph()
|
200 |
|
|
# vis.gam(GAM_PRISM1)
|
201 |
|
|
# savePlot("GAM_PRISM1_prediction.emf", type="emf")
|
202 |
|
|
# win.graph()
|
203 |
|
|
# vis.gam(GAM_ANUSPLIN1, view=c("lat","ELEV_SRTM"))
|
204 |
|
|
# #vis.gam(GAM_ANUSPLIN1, view=c("lat","ELEV_SRTM", theta=100,phi=200))
|
205 |
|
|
# savePlot("GAM_ANUSPLIN1_prediction2.emf", type="emf")
|
206 |
2a89fa5f
|
Benoit Parmentier
|
#
|
207 |
25a68ae3
|
Benoit Parmentier
|
|
208 |
bc59c6bb
|
Benoit Parmentier
|
#
|
209 |
2a89fa5f
|
Benoit Parmentier
|
|
210 |
89c6eee2
|
Benoit Parmentier
|
|
211 |
|
|
|
212 |
|
|
|
213 |
e2d673e4
|
Benoit Parmentier
|
|