Revision 38eccf20
Added by Benoit Parmentier over 12 years ago
climate/research/oregon/interpolation/GAM_LST_sampling_reg.R | ||
---|---|---|
27 | 27 |
setwd(path) |
28 | 28 |
infile2<-"dates_interpolation_03052012.txt" #List of 10 dates for the regression |
29 | 29 |
prop<-0.3 #Proportion of testing retained for validation |
30 |
n_runs<- 2 #Number of runs
|
|
31 |
out_prefix<-"_04252012_run30_LST"
|
|
32 |
infile3<-"LST_dates_var_names.txt" |
|
33 |
infile4<-"models_interpolation_04032012b.txt"
|
|
30 |
n_runs<- 30 #Number of runs
|
|
31 |
out_prefix<-"_05142012_run03_LST"
|
|
32 |
infile3<-"LST_dates_var_names.txt" #List of LST averages.
|
|
33 |
infile4<-"models_interpolation_05142012.txt"
|
|
34 | 34 |
|
35 | 35 |
#######START OF THE SCRIPT ############# |
36 | 36 |
|
... | ... | |
43 | 43 |
ghcn = transform(ghcn,Northness_w = sin(slope)*cos(ASPECT)) #Adding a variable to the dataframe |
44 | 44 |
ghcn = transform(ghcn,Eastness_w = sin(slope)*sin(ASPECT)) #adding variable to the dataframe. |
45 | 45 |
#Note that "transform" output is a data.frame not spatial object |
46 |
#set.seed(100) |
|
46 |
#set.seed(100) #modify this to a seed variable allowing different runs. |
|
47 |
|
|
47 | 48 |
dates <-readLines(paste(path,"/",infile2, sep="")) |
48 | 49 |
LST_dates <-readLines(paste(path,"/",infile3, sep="")) |
49 | 50 |
models <-readLines(paste(path,"/",infile4, sep="")) |
50 | 51 |
|
51 | 52 |
#results <- matrix(1,length(dates),14) #This is a matrix containing the diagnostic measures from the GAM models. |
52 | 53 |
|
53 |
results_AIC<- matrix(1,length(dates),length(models)+2)
|
|
54 |
results_GCV<- matrix(1,length(dates),length(models)+2)
|
|
55 |
results_DEV<- matrix(1,length(dates),length(models)+2)
|
|
56 |
results_RMSE<- matrix(1,length(dates),length(models)+2)
|
|
54 |
results_AIC<- matrix(1,length(dates),length(models)+3)
|
|
55 |
results_GCV<- matrix(1,length(dates),length(models)+3)
|
|
56 |
results_DEV<- matrix(1,length(dates),length(models)+3)
|
|
57 |
results_RMSE<- matrix(1,length(dates),length(models)+3)
|
|
57 | 58 |
RMSE_run<-matrix(1,length(dates),1) |
58 | 59 |
cor_LST_LC1<-matrix(1,length(dates),1) #correlation LST-LC1 |
59 | 60 |
cor_LST_LC3<-matrix(1,length(dates),1) #correlation LST-LC3 |
60 | 61 |
cor_LST_tmax<-matrix(1,length(dates),1) #correlation LST-tmax |
61 | 62 |
#Screening for bad values |
62 |
RMSE_run<-matrix(1,lenth) |
|
63 |
#RMSE_run<-matrix(1,lenth)
|
|
63 | 64 |
results_RMSE_all<- matrix(1,length(dates)*n_runs,length(models)+3) |
64 |
|
|
65 |
results_RMSE_all2<- matrix(1,0,length(models)+3) |
|
65 | 66 |
ghcn_all<-ghcn |
66 | 67 |
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400) |
67 | 68 |
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0) |
... | ... | |
103 | 104 |
mod5<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST), data=data_s) |
104 | 105 |
mod6<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC1), data=data_s) |
105 | 106 |
mod7<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC3), data=data_s) |
107 |
mod8<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LC1), data=data_s) |
|
106 | 108 |
|
107 | 109 |
####Regression part 3: Calculating and storing diagnostic measures |
108 | 110 |
|
109 | 111 |
results_AIC[i,1]<- j |
110 |
results_AIC[i,1]<- dates[i] #storing the interpolation dates in the first column
|
|
111 |
results_AIC[i,2]<- ns #number of stations used in the training stage
|
|
112 |
results_AIC[i,3]<- AIC (mod1)
|
|
113 |
results_AIC[i,4]<- AIC (mod2)
|
|
114 |
results_AIC[i,5]<- AIC (mod3)
|
|
115 |
results_AIC[i,6]<- AIC (mod4)
|
|
116 |
results_AIC[i,7]<- AIC (mod5)
|
|
117 |
results_AIC[i,8]<- AIC (mod6)
|
|
118 |
results_AIC[i,9]<- AIC (mod7)
|
|
112 |
results_AIC[i,2]<- dates[i] #storing the interpolation dates in the first column
|
|
113 |
results_AIC[i,3]<- ns #number of stations used in the training stage
|
|
114 |
results_AIC[i,4]<- AIC (mod1)
|
|
115 |
results_AIC[i,5]<- AIC (mod2)
|
|
116 |
results_AIC[i,6]<- AIC (mod3)
|
|
117 |
results_AIC[i,7]<- AIC (mod4)
|
|
118 |
results_AIC[i,8]<- AIC (mod5)
|
|
119 |
results_AIC[i,9]<- AIC (mod6)
|
|
120 |
results_AIC[i,10]<- AIC (mod7)
|
|
119 | 121 |
|
120 |
results_GCV[i,1]<- dates[i] #storing the interpolation dates in the first column |
|
121 |
results_GCV[i,2]<- ns #number of stations used in the training stage |
|
122 |
results_GCV[i,3]<- mod1$gcv.ubre |
|
123 |
results_GCV[i,4]<- mod2$gcv.ubre |
|
124 |
results_GCV[i,5]<- mod3$gcv.ubre |
|
125 |
results_GCV[i,6]<- mod4$gcv.ubre |
|
126 |
results_GCV[i,7]<- mod5$gcv.ubre |
|
127 |
results_GCV[i,8]<- mod6$gcv.ubre |
|
128 |
results_GCV[i,9]<- mod7$gcv.ubre |
|
122 |
results_GCV[i,1]<- j # "j" is the counter index for the number of runs. |
|
123 |
results_GCV[i,2]<- dates[i] #storing the interpolation dates in the first column |
|
124 |
results_GCV[i,3]<- ns #number of stations used in the training stage |
|
125 |
results_GCV[i,4]<- mod1$gcv.ubre |
|
126 |
results_GCV[i,5]<- mod2$gcv.ubre |
|
127 |
results_GCV[i,6]<- mod3$gcv.ubre |
|
128 |
results_GCV[i,7]<- mod4$gcv.ubre |
|
129 |
results_GCV[i,8]<- mod5$gcv.ubre |
|
130 |
results_GCV[i,9]<- mod6$gcv.ubre |
|
131 |
results_GCV[i,10]<- mod7$gcv.ubre |
|
129 | 132 |
|
130 |
results_DEV[i,1]<- dates[i] #storing the interpolation dates in the first column |
|
131 |
results_DEV[i,2]<- ns #number of stations used in the training stage |
|
132 |
results_DEV[i,3]<- mod1$deviance |
|
133 |
results_DEV[i,4]<- mod2$deviance |
|
134 |
results_DEV[i,5]<- mod3$deviance |
|
135 |
results_DEV[i,6]<- mod4$deviance |
|
136 |
results_DEV[i,7]<- mod5$deviance |
|
137 |
results_DEV[i,8]<- mod6$deviance |
|
138 |
results_DEV[i,9]<- mod7$deviance |
|
133 |
results_DEV[i,1]<- j # "j" is the counter index for the number of runs. |
|
134 |
results_DEV[i,2]<- dates[i] #storing the interpolation dates in the first column |
|
135 |
results_DEV[i,3]<- ns #number of stations used in the training stage |
|
136 |
results_DEV[i,4]<- mod1$deviance |
|
137 |
results_DEV[i,5]<- mod2$deviance |
|
138 |
results_DEV[i,6]<- mod3$deviance |
|
139 |
results_DEV[i,7]<- mod4$deviance |
|
140 |
results_DEV[i,8]<- mod5$deviance |
|
141 |
results_DEV[i,9]<- mod6$deviance |
|
142 |
results_DEV[i,10]<- mod7$deviance |
|
139 | 143 |
|
140 | 144 |
#####VALIDATION: Prediction checking the results using the testing data######## |
141 | 145 |
|
... | ... | |
163 | 167 |
RMSE_mod6 <- sqrt(sum(res_mod6^2)/nv) |
164 | 168 |
RMSE_mod7 <- sqrt(sum(res_mod7^2)/nv) |
165 | 169 |
|
166 |
results_RMSE[i,1]<- dates[i] #storing the interpolation dates in the first column |
|
167 |
results_RMSE[i,2]<- ns #number of stations used in the training stage |
|
168 |
results_RMSE[i,3]<- RMSE_mod1 |
|
169 |
results_RMSE[i,4]<- RMSE_mod2 |
|
170 |
results_RMSE[i,5]<- RMSE_mod3 |
|
171 |
results_RMSE[i,6]<- RMSE_mod4 |
|
172 |
results_RMSE[i,7]<- RMSE_mod5 |
|
173 |
results_RMSE[i,8]<- RMSE_mod6 |
|
174 |
results_RMSE[i,9]<- RMSE_mod7 |
|
170 |
results_RMSE[i,1]<- j |
|
171 |
results_RMSE[i,2]<- dates[i] #storing the interpolation dates in the first column |
|
172 |
results_RMSE[i,3]<- ns #number of stations used in the training stage |
|
173 |
results_RMSE[i,4]<- RMSE_mod1 |
|
174 |
results_RMSE[i,5]<- RMSE_mod2 |
|
175 |
results_RMSE[i,6]<- RMSE_mod3 |
|
176 |
results_RMSE[i,7]<- RMSE_mod4 |
|
177 |
results_RMSE[i,8]<- RMSE_mod5 |
|
178 |
results_RMSE[i,9]<- RMSE_mod6 |
|
179 |
results_RMSE[i,10]<- RMSE_mod7 |
|
175 | 180 |
#Saving dataset in dataframes |
176 | 181 |
data_name<-paste("ghcn_v_",dates[[i]],sep="") |
177 | 182 |
assign(data_name,data_v) |
178 | 183 |
data_name<-paste("ghcn_s_",dates[[i]],sep="") |
179 | 184 |
assign(data_name,data_s) |
180 | 185 |
#ghcn_v<-ls(pattern="ghcn_v_") |
181 |
RMSE_run[i]<-j |
|
182 | 186 |
# end of the for loop #2 (nested in loop #1) |
183 | 187 |
} |
184 |
results_RMSE_all |
|
188 |
|
|
185 | 189 |
results_RMSEnum <-results_RMSE |
186 | 190 |
results_AICnum <-results_AIC |
187 | 191 |
mode(results_RMSEnum)<- "numeric" |
... | ... | |
191 | 195 |
|
192 | 196 |
results_table_RMSE<-as.data.frame(results_RMSEnum) |
193 | 197 |
results_table_AIC<-as.data.frame(results_AICnum) |
194 |
colnames(results_table_RMSE)<-c("dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6", "mod7") |
|
195 |
colnames(results_table_AIC)<-c("dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6", "mod7") |
|
198 |
colnames(results_table_RMSE)<-c("run","dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6", "mod7")
|
|
199 |
colnames(results_table_AIC)<-c("run","dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6", "mod7")
|
|
196 | 200 |
|
197 |
write.table(results_table_RMSE, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""), sep=",", append=TRUE) |
|
201 |
write.table(results_table_RMSE, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""), sep=",", col.names=FALSE, append=TRUE)
|
|
198 | 202 |
#write.table(results_table_AIC, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""),sep=",", append=TRUE) |
199 | 203 |
|
200 |
Print(j) #This is the run umber printed to the console. |
|
204 |
#results_RMSE_all Need to put all the results in one data.frame... |
|
205 |
results_RMSE_all2<-rbind(results_RMSE_all2,results_table_RMSE) |
|
206 |
#Print(j) #This is the run umber printed to the console. |
|
201 | 207 |
} #end of loop 1 |
202 | 208 |
|
209 |
write.table(results_RMSE_all2, file= paste(path,"/","results_GAM_Assessment",out_prefix,"all.txt",sep=""), sep=",", col.names=TRUE) |
|
210 |
|
|
211 |
|
|
212 |
###Analysing the results from the 365 days run: Summarize by month |
|
213 |
# |
|
214 |
# for(i in 1:nrow(results_table_RMSE)){ |
|
215 |
# date<-results_table_RMSE$dates[i] |
|
216 |
# date<-strptime(date, "%Y%m%d") |
|
217 |
# results_table_RMSE$month[i]<-as.integer(strftime(date, "%m")) |
|
218 |
# } |
|
219 |
# |
|
220 |
# average<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE,mean, na.rm=TRUE) |
|
221 |
# average<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE, FUN=mean) |
|
222 |
# #average on all the data.frame |
|
223 |
# averaget<-aggregate(results_table_RMSE, by=list(results_table_RMSE$month),FUN=mean, na.rm=TRUE) |
|
224 |
# #mediant<-aggregate(results_table_RMSE, by=list(results_table_RMSE$month),FUN=median, na.rm=TRUE) |
|
225 |
# #average_lowt<-aggregate(results_table_RMSE, by=list(results_table_RMSE$month), FUN=function(v) t.test(v)$conf.int[1]) |
|
226 |
# #average_up<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE, function(v) t.test(v)$conf.int[2]) |
|
227 |
# |
|
228 |
# median<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE, median, na.rm=TRUE) |
|
229 |
# average_low<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE, function(v) t.test(v)$conf.int[1]) |
|
230 |
# average_up<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE, function(v) t.test(v)$conf.int[2]) |
|
231 |
# |
|
232 |
# mod<-names(averaget) |
|
233 |
# mod<-mod[4:11] |
|
234 |
# #Saving graphic plots |
|
235 |
# for(i in 1:length(mod)){ |
|
236 |
# X11(width=14,height=10) |
|
237 |
# name<-mod[i] |
|
238 |
# barplot2(average[[name]],plot.ci=TRUE, ci.l=average_low[[name]], ci.u=average_up[[name]],main="Mean RMSE per month", names.arg=c("Jan", "Feb", "Mar", "Apr", "May", "Jun","Jul", "Aug", "Sep","Oct", "Nov", "Dec"),ylim=c(20,30),ylab="RMSE in tenth deg C",xlab=name) |
|
239 |
# #title(paste("Sampling RMSE for mod",i,sep="")) |
|
240 |
# savePlot(paste("barplot_results_RMSE_month_",name,out_prefix,".png", sep=""), type="png") |
|
241 |
# dev.off() |
|
242 |
# } |
|
243 |
# |
|
244 |
# |
|
245 |
# for(i in 1:length(mod)){ |
|
246 |
# X11(width=14,height=10) |
|
247 |
# name<-mod[i] |
|
248 |
# barplot2(average[[name]],plot.ci=TRUE, ci.l=average_low[[name]], ci.u=average_up[[name]],main=paste(" Mean RMSE per month ",name, sep=""), names.arg=c("Jan", "Feb", "Mar", "Apr", "May", "Jun","Jul", "Aug", "Sep","Oct", "Nov", "Dec"),ylim=c(20,30),ylab="RMSE in tenth deg C",xlab=name) |
|
249 |
# #title(paste("Sampling RMSE for mod",i,sep="")) |
|
250 |
# savePlot(paste("barplot_results_RMSE_month_",name,out_prefix,".png", sep=""), type="png") |
|
251 |
# dev.off() |
|
252 |
# |
|
253 |
# X11(width=14,height=10) |
|
254 |
# name<-mod[i] |
|
255 |
# hist(results_table_RMSE[[name]],breaks=15, main=paste(" Histogram RMSE_",name, sep=""),xlab=paste("RMSE ",name, sep="")) |
|
256 |
# savePlot(paste("Hist_results_RMSE_365_",name,out_prefix,".png", sep=""), type="png") |
|
257 |
# dev.off() |
|
258 |
# |
|
259 |
# } |
|
260 |
# |
|
261 |
# for(i in 1:length(mod)){ |
|
262 |
# X11(width=14,height=10) |
|
263 |
# name<-mod[i] |
|
264 |
# hist(results_table_RMSE[[name]],breaks=15, main=paste(" Histogram RMSE_",name, sep=""),xlab=paste("RMSE ",name, sep="")) |
|
265 |
# savePlot(paste("Hist_results_RMSE_365_",name,out_prefix,".png", sep=""), type="png") |
|
266 |
# dev.off() |
|
267 |
# } |
|
268 |
# |
|
269 |
# r<-(results_RMSE_all2[,4:10]) #selecting only the columns related to models... |
|
270 |
# |
|
271 |
# mean_r<-sapply(r, mean) |
|
272 |
# median_r<-sapply(r, median) |
|
273 |
# sd_r<-sapply(r, sd) |
|
274 |
# |
|
275 |
# barplot(mean_r,ylim=c(20,26),ylab="RMSE in tenth deg C") |
|
276 |
# barplot(median_r,ylim=c(20,26),ylab="RMSE in tenth deg C",add=TRUE,inside=FALSE,beside=TRUE) # put both on the same plot |
|
277 |
# barplot(sd_r,ylim=c(6,8),ylab="RMSE in tenth deg C") # put both on the same plot |
|
278 |
# |
|
279 |
# height<-rbind(mean_r,median_r) |
|
280 |
# barplot(height,ylim=c(20,26),ylab="RMSE in tenth deg C",beside=TRUE,legend=rownames(height)) |
|
281 |
# barplot(height,ylim=c(20,26),ylab="RMSE in tenth deg C",beside=TRUE, col=c("darkblue","red"),legend=rownames(height)) # put both on the same plot |
|
282 |
# PNG(paste("Barplot_results_RMSE_sampling_",out_prefix,".png", sep="")) |
|
283 |
# |
|
284 |
# barplot2(mean_r,median_r,ylim=c(23,26),ylab="RMSE in tenth deg C") # put both on the same plot |
|
285 |
# #Collect var explained and p values for each var... |
|
286 |
# |
|
287 |
# mod<-names(mean_r) |
|
288 |
# average<-mean_r |
|
289 |
# average_low<-mean_r-sd_r |
|
290 |
# average_up<-mean_r+sd_r |
|
291 |
# |
|
292 |
# for(i in 1:length(mod)){ |
|
293 |
# #X11(width=14,height=10) |
|
294 |
# name<-mod[i] |
|
295 |
# barplot2(average[[name]],plot.ci=TRUE, ci.l=average_low[[name]], ci.u=average_up[[name]],main=paste(" Mean RMSE per month ",name, sep=""), names.arg=c("mod1", "mod2", "mod3", "mod4", "mod5", "mod6","mod7"),ylim=c(20,30),ylab="RMSE in tenth deg C",xlab=name) |
|
296 |
# #title(paste("Sampling RMSE for mod",i,sep="")) |
|
297 |
# #savePlot(paste("barplot_results_RMSE_month_",name,out_prefix,".png", sep=""), type="png") |
|
298 |
# #dev.off() |
|
299 |
# } |
|
300 |
|
|
203 | 301 |
# End of script########## |
204 | 302 |
|
205 | 303 |
#Selecting dates and files based on names |
Also available in: Unified diff
GAM LST, adding lines to summarize samling RMSE using plots, task #409