Project

General

Profile

« Previous | Next » 

Revision 1508a57e

Added by Benoit Parmentier over 11 years ago

GAM Fusion raster prediction, modification of validation section, call to function for boxplot, Venezuela predictions

View differences:

climate/research/oregon/interpolation/GAM_fusion_analysis_raster_prediction_multisampling.R
8 8
#2)Constant sampling: use the same sample over the runs
9 9
#3)over dates: run over for example 365 dates without mulitsampling
10 10
#4)use seed number: use seed if random samples must be repeatable
11
#5)GAM fusion: possibilty of running GAM+FUSION and other options added
11
#5)GAM fusion: possibilty of running GAM+FUSION or GAM+CAI and other options added
12 12
#The interpolation is done first at the monthly time scale then delta surfaces are added.
13 13
#AUTHOR: Benoit Parmentier                                                                        
14
#DATE: 02/20/2013                                                                                 
15
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--                                   
14
#DATE: 02/26/2013                                                                                 
15
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#568--                                   
16 16
###################################################################################################
17 17

  
18 18
###Loading R library and packages                                                      
......
69 69
#Default name of LST avg to be matched               
70 70
lst_avg<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")  
71 71

  
72
source(file.path(script_path,"GAM_fusion_function_multisampling_02202013.R"))
73
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_02202013.R"))
74
                              
72
source(file.path(script_path,"GAM_fusion_function_multisampling_02262013.R"))
73
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_02262013.R"))
74

  
75 75
###################### START OF THE SCRIPT ########################
76 76

  
77
#Create log file to keep track of details such as processing times and parameters.
77
################# CREATE LOG FILE #####################
78

  
79
#create log file to keep track of details such as processing times and parameters.
78 80

  
79 81
log_fname<-paste("R_log_t",out_prefix, ".log",sep="")
80 82

  
......
92 94
writeLines("Starting script process time:",con=log_file,sep="\n")
93 95
writeLines(as.character(time1),con=log_file,sep="\n")    
94 96

  
95
###Reading the daily station data and setting up for models' comparison
97
############### Reading the daily station data and other data #################
98

  
96 99
ghcn<-readOGR(dsn=in_path,layer=sub(".shp","",infile_daily))
97 100
CRS_interp<-proj4string(ghcn)                       #Storing projection information (ellipsoid, datum,etc.)
98 101

  
......
163 166
#dst<-subset(dst,dst$TMax>-15 & dst$TMax<45) #may choose different threshold??
164 167
#dst<-subset(dst,dst$ELEV_SRTM>0) #This will drop two stations...or 24 rows
165 168

  
166
##Sampling: training and testing sites.
169
##### Create sampling: select training and testing sites ###
167 170

  
168
#Make this a a function
171
#Make this a a function!!!!
169 172
                  
170 173
if (seed_number>0) {
171 174
  set.seed(seed_number)                        #Using a seed number allow results based on random number to be compared...
......
234 237
  sampling_station_id<-list_const_sampling_station_id
235 238
}
236 239

  
237
######## Prediction for the range of dates and sampling data
240
########### PREDICT FOR MONTHLY SCALE  #############
238 241

  
239 242
#First predict at the monthly time scale: climatology
240 243
writeLines("Predictions at monthly scale:",con=log_file,sep="\n")
......
253 256
  list_tmp[[i]]<-tmp
254 257
}
255 258

  
259
################## PREDICT AT DAILY TIME SCALE #################
260

  
256 261
#put together list of clim models per month...
257 262
rast_clim_yearlist<-list_tmp
258 263
#Second predict at the daily time scale: delta
......
265 270
t2<-proc.time()-t1
266 271
writeLines(as.character(t2),con=log_file,sep="\n")
267 272

  
268
#Add accuracy_metrics section/function
269
#now get list of raster daily prediction layers
270
#gam_fus_mod_tmp<-gam_fus_mod
271
#this should be change later once correction has been made
272
#for (i in 1:length(gam_fus_mod)){
273
#  obj_names<-c(y_var_name,"clim","delta","data_s","sampling_dat","data_v",
274
#               "mod_kr_day")
275
#  names(gam_fus_mod[[i]])<-obj_names
276
#  names(gam_fus_mod[[i]][[y_var_name]])<-c("mod1","mod2","mod3","mod4","mod_kr")
277
#}
278

  
279
##
273
############### NOW RUN VALIDATION #########################
274

  
280 275
list_tmp<-vector("list",length(gam_fus_mod))
281 276
for (i in 1:length(gam_fus_mod)){
282 277
  tmp<-gam_fus_mod[[i]][[y_var_name]]  #y_var_name is the variable predicted (tmax or tmin)
......
293 288
t2<-proc.time()-t1
294 289
writeLines(as.character(t2),con=log_file,sep="\n")
295 290

  
296
####This part concerns validation assessment and must be moved later...
297
## make this a function??
298
list_tmp<-vector("list",length(gam_fus_validation_mod))
299
for (i in 1:length(gam_fus_validation_mod)){
300
  tmp<-gam_fus_validation_mod[[i]]$metrics_v
301
  list_tmp[[i]]<-tmp
302
}
303
tb_diagnostic<-do.call(rbind,list_tmp) #long rownames
304
rownames(tb_diagnostic)<-NULL #remove row names
305

  
306
mod_names<-unique(tb_diagnostic$pred_mod) #models that have accuracy metrics
307

  
308
#now boxplots and mean per models
309
t<-melt(tb_diagnostic,
310
        #measure=mod_var, 
311
        id=c("date","pred_mod","prop"),
312
        na.rm=F)
313
avg_tb<-cast(t,pred_mod~variable,mean)
314
tb<-tb_diagnostic
315
tb_mod_list<-vector("list",length(mod_names))
316
for(i in 1:length(mod_names)){            # Reorganizing information in terms of metrics 
317
  mod_name_tb<-paste("tb_",mod_names[i],sep="")
318
  tb_mod<-subset(tb, pred_mod==mod_names[i])
319
  assign(mod_name_tb,tb_mod)
320
  tb_mod_list[[i]]<-tb_mod
321
}
322
names(tb_mod_list)<-mod_names
291
#################### ASSESSMENT OF PREDICTIONS: PLOTS OF ACCURACY METRICS ###########
323 292

  
324
mod_metrics<-do.call(cbind,tb_mod_list)
325
mod_pat<-glob2rx("*.rmse")   
326
mod_var<-grep(mod_pat,names(mod_metrics),value=TRUE) # using grep with "value" extracts the matching names         
293
##Create data.frame with valiation metrics for a full year
294
tb_diagnostic_v<-extract_from_list_obj(gam_fus_validation_mod,"metrics_v")
295
rownames(tb_diagnostic_v)<-NULL #remove row names
327 296

  
328
boxplot(mod_metrics[[mod_var]])
329
test<-mod_metrics[mod_var]
330
boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5)
297
#Call function to create plots of metrics for validation dataset
298
metric_names<-c("rmse","mae","me","r","m50")
299
summary_metrics<-boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix)
300
names(summary_metrics)<-c("avg","median")
301
##Write out information concerning accuracy and predictions
302
outfile<-file.path(in_path,paste("assessment_measures_",out_prefix,".txt",sep=""))
303
write.table(tb_diagnostic_v,file= outfile,row.names=FALSE,sep=",")
304
write.table(summary_metrics[[1]], file= outfile, append=TRUE,sep=",") #write out avg
305
write.table(summary_metrics[[2]], file= outfile, append=TRUE,sep=",") #write out median
306

  
307
#################### CLOSE LOG FILE  #############
331 308

  
332 309
#close log_file connection and add meta data
333 310
writeLines("Finished script process time:",con=log_file,sep="\n")
......
339 316
writeLines("End of script",con=log_file,sep="\n")
340 317
close(log_file)
341 318

  
342
####End of part to be changed...
343

  
344
#### END OF SCRIPT #######
319
############################################################
320
######################## END OF SCRIPT #####################

Also available in: Unified diff