Project

General

Profile

« Previous | Next » 

Revision 38eccf20

Added by Benoit Parmentier over 12 years ago

GAM LST, adding lines to summarize samling RMSE using plots, task #409

View differences:

climate/research/oregon/interpolation/GAM_LST_sampling_reg.R
27 27
setwd(path) 
28 28
infile2<-"dates_interpolation_03052012.txt"               #List of 10 dates for the regression
29 29
prop<-0.3  #Proportion of testing retained for validation   
30
n_runs<- 2 #Number of runs
31
out_prefix<-"_04252012_run30_LST"
32
infile3<-"LST_dates_var_names.txt"
33
infile4<-"models_interpolation_04032012b.txt"
30
n_runs<- 30 #Number of runs
31
out_prefix<-"_05142012_run03_LST"
32
infile3<-"LST_dates_var_names.txt"     #List of LST averages.
33
infile4<-"models_interpolation_05142012.txt"
34 34

  
35 35
#######START OF THE SCRIPT #############
36 36

  
......
43 43
ghcn = transform(ghcn,Northness_w = sin(slope)*cos(ASPECT)) #Adding a variable to the dataframe
44 44
ghcn = transform(ghcn,Eastness_w = sin(slope)*sin(ASPECT))  #adding variable to the dataframe.
45 45
                                              #Note that "transform" output is a data.frame not spatial object 
46
#set.seed(100)
46
#set.seed(100) #modify this to a seed variable allowing different runs.
47

  
47 48
dates <-readLines(paste(path,"/",infile2, sep=""))
48 49
LST_dates <-readLines(paste(path,"/",infile3, sep=""))
49 50
models <-readLines(paste(path,"/",infile4, sep=""))
50 51

  
51 52
#results <- matrix(1,length(dates),14)            #This is a matrix containing the diagnostic measures from the GAM models.
52 53

  
53
results_AIC<- matrix(1,length(dates),length(models)+2)  
54
results_GCV<- matrix(1,length(dates),length(models)+2)
55
results_DEV<- matrix(1,length(dates),length(models)+2)
56
results_RMSE<- matrix(1,length(dates),length(models)+2)
54
results_AIC<- matrix(1,length(dates),length(models)+3)  
55
results_GCV<- matrix(1,length(dates),length(models)+3)
56
results_DEV<- matrix(1,length(dates),length(models)+3)
57
results_RMSE<- matrix(1,length(dates),length(models)+3)
57 58
RMSE_run<-matrix(1,length(dates),1)
58 59
cor_LST_LC1<-matrix(1,length(dates),1)      #correlation LST-LC1
59 60
cor_LST_LC3<-matrix(1,length(dates),1)      #correlation LST-LC3
60 61
cor_LST_tmax<-matrix(1,length(dates),1)    #correlation LST-tmax
61 62
#Screening for bad values
62
RMSE_run<-matrix(1,lenth)
63
#RMSE_run<-matrix(1,lenth)
63 64
results_RMSE_all<- matrix(1,length(dates)*n_runs,length(models)+3)
64

  
65
results_RMSE_all2<- matrix(1,0,length(models)+3)
65 66
ghcn_all<-ghcn
66 67
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400)
67 68
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0)
......
103 104
    mod5<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST), data=data_s)
104 105
    mod6<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC1), data=data_s)
105 106
    mod7<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC3), data=data_s)
107
    mod8<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LC1), data=data_s)
106 108
    
107 109
    ####Regression part 3: Calculating and storing diagnostic measures
108 110
    
109 111
    results_AIC[i,1]<- j
110
    results_AIC[i,1]<- dates[i]  #storing the interpolation dates in the first column
111
    results_AIC[i,2]<- ns        #number of stations used in the training stage
112
    results_AIC[i,3]<- AIC (mod1)
113
    results_AIC[i,4]<- AIC (mod2)
114
    results_AIC[i,5]<- AIC (mod3)
115
    results_AIC[i,6]<- AIC (mod4)
116
    results_AIC[i,7]<- AIC (mod5)
117
    results_AIC[i,8]<- AIC (mod6)
118
    results_AIC[i,9]<- AIC (mod7)
112
    results_AIC[i,2]<- dates[i]  #storing the interpolation dates in the first column
113
    results_AIC[i,3]<- ns        #number of stations used in the training stage
114
    results_AIC[i,4]<- AIC (mod1)
115
    results_AIC[i,5]<- AIC (mod2)
116
    results_AIC[i,6]<- AIC (mod3)
117
    results_AIC[i,7]<- AIC (mod4)
118
    results_AIC[i,8]<- AIC (mod5)
119
    results_AIC[i,9]<- AIC (mod6)
120
    results_AIC[i,10]<- AIC (mod7)
119 121
    
120
    results_GCV[i,1]<- dates[i]  #storing the interpolation dates in the first column
121
    results_GCV[i,2]<- ns        #number of stations used in the training stage
122
    results_GCV[i,3]<- mod1$gcv.ubre
123
    results_GCV[i,4]<- mod2$gcv.ubre
124
    results_GCV[i,5]<- mod3$gcv.ubre
125
    results_GCV[i,6]<- mod4$gcv.ubre
126
    results_GCV[i,7]<- mod5$gcv.ubre
127
    results_GCV[i,8]<- mod6$gcv.ubre
128
    results_GCV[i,9]<- mod7$gcv.ubre
122
    results_GCV[i,1]<- j         # "j" is the counter index for the number of runs.
123
    results_GCV[i,2]<- dates[i]  #storing the interpolation dates in the first column
124
    results_GCV[i,3]<- ns        #number of stations used in the training stage
125
    results_GCV[i,4]<- mod1$gcv.ubre
126
    results_GCV[i,5]<- mod2$gcv.ubre
127
    results_GCV[i,6]<- mod3$gcv.ubre
128
    results_GCV[i,7]<- mod4$gcv.ubre
129
    results_GCV[i,8]<- mod5$gcv.ubre
130
    results_GCV[i,9]<- mod6$gcv.ubre
131
    results_GCV[i,10]<- mod7$gcv.ubre
129 132
    
130
    results_DEV[i,1]<- dates[i]  #storing the interpolation dates in the first column
131
    results_DEV[i,2]<- ns        #number of stations used in the training stage
132
    results_DEV[i,3]<- mod1$deviance
133
    results_DEV[i,4]<- mod2$deviance
134
    results_DEV[i,5]<- mod3$deviance
135
    results_DEV[i,6]<- mod4$deviance
136
    results_DEV[i,7]<- mod5$deviance
137
    results_DEV[i,8]<- mod6$deviance
138
    results_DEV[i,9]<- mod7$deviance
133
    results_DEV[i,1]<- j         # "j" is the counter index for the number of runs.
134
    results_DEV[i,2]<- dates[i]  #storing the interpolation dates in the first column
135
    results_DEV[i,3]<- ns        #number of stations used in the training stage
136
    results_DEV[i,4]<- mod1$deviance
137
    results_DEV[i,5]<- mod2$deviance
138
    results_DEV[i,6]<- mod3$deviance
139
    results_DEV[i,7]<- mod4$deviance
140
    results_DEV[i,8]<- mod5$deviance
141
    results_DEV[i,9]<- mod6$deviance
142
    results_DEV[i,10]<- mod7$deviance
139 143
    
140 144
    #####VALIDATION: Prediction checking the results using the testing data########
141 145
    
......
163 167
    RMSE_mod6 <- sqrt(sum(res_mod6^2)/nv)
164 168
    RMSE_mod7 <- sqrt(sum(res_mod7^2)/nv)
165 169
    
166
    results_RMSE[i,1]<- dates[i]  #storing the interpolation dates in the first column
167
    results_RMSE[i,2]<- ns        #number of stations used in the training stage
168
    results_RMSE[i,3]<- RMSE_mod1
169
    results_RMSE[i,4]<- RMSE_mod2
170
    results_RMSE[i,5]<- RMSE_mod3
171
    results_RMSE[i,6]<- RMSE_mod4
172
    results_RMSE[i,7]<- RMSE_mod5
173
    results_RMSE[i,8]<- RMSE_mod6
174
    results_RMSE[i,9]<- RMSE_mod7
170
    results_RMSE[i,1]<- j
171
    results_RMSE[i,2]<- dates[i]  #storing the interpolation dates in the first column
172
    results_RMSE[i,3]<- ns        #number of stations used in the training stage
173
    results_RMSE[i,4]<- RMSE_mod1
174
    results_RMSE[i,5]<- RMSE_mod2
175
    results_RMSE[i,6]<- RMSE_mod3
176
    results_RMSE[i,7]<- RMSE_mod4
177
    results_RMSE[i,8]<- RMSE_mod5
178
    results_RMSE[i,9]<- RMSE_mod6
179
    results_RMSE[i,10]<- RMSE_mod7
175 180
    #Saving dataset in dataframes
176 181
    data_name<-paste("ghcn_v_",dates[[i]],sep="")
177 182
    assign(data_name,data_v)
178 183
    data_name<-paste("ghcn_s_",dates[[i]],sep="")
179 184
    assign(data_name,data_s)
180 185
    #ghcn_v<-ls(pattern="ghcn_v_")
181
    RMSE_run[i]<-j
182 186
    # end of the for loop #2 (nested in loop #1)
183 187
  }
184
  results_RMSE_all
188

  
185 189
  results_RMSEnum <-results_RMSE
186 190
  results_AICnum <-results_AIC
187 191
  mode(results_RMSEnum)<- "numeric"
......
191 195
  
192 196
  results_table_RMSE<-as.data.frame(results_RMSEnum)
193 197
  results_table_AIC<-as.data.frame(results_AICnum)
194
  colnames(results_table_RMSE)<-c("dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6", "mod7")
195
  colnames(results_table_AIC)<-c("dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6", "mod7")
198
  colnames(results_table_RMSE)<-c("run","dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6", "mod7")
199
  colnames(results_table_AIC)<-c("run","dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6", "mod7")
196 200
  
197
  write.table(results_table_RMSE, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""), sep=",", append=TRUE)
201
  write.table(results_table_RMSE, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""), sep=",", col.names=FALSE, append=TRUE)
198 202
  #write.table(results_table_AIC, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""),sep=",", append=TRUE)
199 203
  
200
  Print(j) #This is the run umber printed to the console.
204
  #results_RMSE_all Need to put all the results in one data.frame...
205
  results_RMSE_all2<-rbind(results_RMSE_all2,results_table_RMSE)
206
  #Print(j) #This is the run umber printed to the console.
201 207
} #end of loop 1
202 208

  
209
write.table(results_RMSE_all2, file= paste(path,"/","results_GAM_Assessment",out_prefix,"all.txt",sep=""), sep=",", col.names=TRUE)
210

  
211

  
212
###Analysing the results from the 365 days run: Summarize by month
213
# 
214
# for(i in 1:nrow(results_table_RMSE)){
215
#   date<-results_table_RMSE$dates[i]
216
#   date<-strptime(date, "%Y%m%d")
217
#   results_table_RMSE$month[i]<-as.integer(strftime(date, "%m"))
218
# }
219
# 
220
# average<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE,mean, na.rm=TRUE)
221
# average<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE, FUN=mean)
222
# #average on all the data.frame
223
# averaget<-aggregate(results_table_RMSE, by=list(results_table_RMSE$month),FUN=mean, na.rm=TRUE)
224
# #mediant<-aggregate(results_table_RMSE, by=list(results_table_RMSE$month),FUN=median, na.rm=TRUE)
225
# #average_lowt<-aggregate(results_table_RMSE, by=list(results_table_RMSE$month), FUN=function(v) t.test(v)$conf.int[1])
226
# #average_up<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE, function(v) t.test(v)$conf.int[2])
227
# 
228
# median<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE, median, na.rm=TRUE)
229
# average_low<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE, function(v) t.test(v)$conf.int[1])
230
# average_up<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE, function(v) t.test(v)$conf.int[2])
231
# 
232
# mod<-names(averaget)
233
# mod<-mod[4:11]
234
# #Saving graphic plots
235
# for(i in 1:length(mod)){
236
#   X11(width=14,height=10)
237
#   name<-mod[i]
238
#   barplot2(average[[name]],plot.ci=TRUE, ci.l=average_low[[name]], ci.u=average_up[[name]],main="Mean RMSE per month", names.arg=c("Jan", "Feb", "Mar", "Apr", "May", "Jun","Jul", "Aug", "Sep","Oct", "Nov", "Dec"),ylim=c(20,30),ylab="RMSE in tenth deg C",xlab=name)
239
#   #title(paste("Sampling RMSE for mod",i,sep=""))
240
#   savePlot(paste("barplot_results_RMSE_month_",name,out_prefix,".png", sep=""), type="png")
241
#   dev.off() 
242
# }
243
# 
244
# 
245
# for(i in 1:length(mod)){
246
#   X11(width=14,height=10)
247
#   name<-mod[i]
248
#   barplot2(average[[name]],plot.ci=TRUE, ci.l=average_low[[name]], ci.u=average_up[[name]],main=paste(" Mean RMSE per month ",name, sep=""), names.arg=c("Jan", "Feb", "Mar", "Apr", "May", "Jun","Jul", "Aug", "Sep","Oct", "Nov", "Dec"),ylim=c(20,30),ylab="RMSE in tenth deg C",xlab=name)
249
#   #title(paste("Sampling RMSE for mod",i,sep=""))
250
#   savePlot(paste("barplot_results_RMSE_month_",name,out_prefix,".png", sep=""), type="png")
251
#   dev.off() 
252
#   
253
#   X11(width=14,height=10)
254
#   name<-mod[i]
255
#   hist(results_table_RMSE[[name]],breaks=15, main=paste(" Histogram RMSE_",name, sep=""),xlab=paste("RMSE ",name, sep=""))
256
#   savePlot(paste("Hist_results_RMSE_365_",name,out_prefix,".png", sep=""), type="png")
257
#   dev.off()
258
#   
259
# }
260
# 
261
# for(i in 1:length(mod)){
262
#   X11(width=14,height=10)
263
#   name<-mod[i]
264
#   hist(results_table_RMSE[[name]],breaks=15, main=paste(" Histogram RMSE_",name, sep=""),xlab=paste("RMSE ",name, sep=""))
265
#   savePlot(paste("Hist_results_RMSE_365_",name,out_prefix,".png", sep=""), type="png")
266
#   dev.off()
267
# }
268
# 
269
# r<-(results_RMSE_all2[,4:10]) #selecting only the columns related to models...
270
# 
271
# mean_r<-sapply(r, mean)
272
# median_r<-sapply(r, median)
273
# sd_r<-sapply(r, sd)
274
# 
275
# barplot(mean_r,ylim=c(20,26),ylab="RMSE in tenth deg C")
276
# barplot(median_r,ylim=c(20,26),ylab="RMSE in tenth deg C",add=TRUE,inside=FALSE,beside=TRUE) # put both on the same plot
277
# barplot(sd_r,ylim=c(6,8),ylab="RMSE in tenth deg C") # put both on the same plot
278
# 
279
# height<-rbind(mean_r,median_r)
280
# barplot(height,ylim=c(20,26),ylab="RMSE in tenth deg C",beside=TRUE,legend=rownames(height))
281
# barplot(height,ylim=c(20,26),ylab="RMSE in tenth deg C",beside=TRUE, col=c("darkblue","red"),legend=rownames(height)) # put both on the same plot
282
# PNG(paste("Barplot_results_RMSE_sampling_",out_prefix,".png", sep=""))
283
# 
284
# barplot2(mean_r,median_r,ylim=c(23,26),ylab="RMSE in tenth deg C") # put both on the same plot
285
# #Collect var explained and p values for each var...
286
# 
287
# mod<-names(mean_r)
288
# average<-mean_r
289
# average_low<-mean_r-sd_r
290
# average_up<-mean_r+sd_r
291
# 
292
# for(i in 1:length(mod)){
293
#   #X11(width=14,height=10)
294
#   name<-mod[i]
295
#   barplot2(average[[name]],plot.ci=TRUE, ci.l=average_low[[name]], ci.u=average_up[[name]],main=paste(" Mean RMSE per month ",name, sep=""), names.arg=c("mod1", "mod2", "mod3", "mod4", "mod5", "mod6","mod7"),ylim=c(20,30),ylab="RMSE in tenth deg C",xlab=name)
296
#   #title(paste("Sampling RMSE for mod",i,sep=""))
297
#   #savePlot(paste("barplot_results_RMSE_month_",name,out_prefix,".png", sep=""), type="png")
298
#   #dev.off()
299
#   }
300

  
203 301
# End of script##########
204 302

  
205 303
#Selecting dates and files based on names

Also available in: Unified diff