Revision 1508a57e
Added by Benoit Parmentier over 11 years ago
climate/research/oregon/interpolation/GAM_fusion_analysis_raster_prediction_multisampling.R | ||
---|---|---|
8 | 8 |
#2)Constant sampling: use the same sample over the runs |
9 | 9 |
#3)over dates: run over for example 365 dates without mulitsampling |
10 | 10 |
#4)use seed number: use seed if random samples must be repeatable |
11 |
#5)GAM fusion: possibilty of running GAM+FUSION and other options added |
|
11 |
#5)GAM fusion: possibilty of running GAM+FUSION or GAM+CAI and other options added
|
|
12 | 12 |
#The interpolation is done first at the monthly time scale then delta surfaces are added. |
13 | 13 |
#AUTHOR: Benoit Parmentier |
14 |
#DATE: 02/20/2013
|
|
15 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--
|
|
14 |
#DATE: 02/26/2013
|
|
15 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#568--
|
|
16 | 16 |
################################################################################################### |
17 | 17 |
|
18 | 18 |
###Loading R library and packages |
... | ... | |
69 | 69 |
#Default name of LST avg to be matched |
70 | 70 |
lst_avg<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12") |
71 | 71 |
|
72 |
source(file.path(script_path,"GAM_fusion_function_multisampling_02202013.R"))
|
|
73 |
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_02202013.R"))
|
|
74 |
|
|
72 |
source(file.path(script_path,"GAM_fusion_function_multisampling_02262013.R"))
|
|
73 |
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_02262013.R"))
|
|
74 |
|
|
75 | 75 |
###################### START OF THE SCRIPT ######################## |
76 | 76 |
|
77 |
#Create log file to keep track of details such as processing times and parameters. |
|
77 |
################# CREATE LOG FILE ##################### |
|
78 |
|
|
79 |
#create log file to keep track of details such as processing times and parameters. |
|
78 | 80 |
|
79 | 81 |
log_fname<-paste("R_log_t",out_prefix, ".log",sep="") |
80 | 82 |
|
... | ... | |
92 | 94 |
writeLines("Starting script process time:",con=log_file,sep="\n") |
93 | 95 |
writeLines(as.character(time1),con=log_file,sep="\n") |
94 | 96 |
|
95 |
###Reading the daily station data and setting up for models' comparison |
|
97 |
############### Reading the daily station data and other data ################# |
|
98 |
|
|
96 | 99 |
ghcn<-readOGR(dsn=in_path,layer=sub(".shp","",infile_daily)) |
97 | 100 |
CRS_interp<-proj4string(ghcn) #Storing projection information (ellipsoid, datum,etc.) |
98 | 101 |
|
... | ... | |
163 | 166 |
#dst<-subset(dst,dst$TMax>-15 & dst$TMax<45) #may choose different threshold?? |
164 | 167 |
#dst<-subset(dst,dst$ELEV_SRTM>0) #This will drop two stations...or 24 rows |
165 | 168 |
|
166 |
##Sampling: training and testing sites.
|
|
169 |
##### Create sampling: select training and testing sites ###
|
|
167 | 170 |
|
168 |
#Make this a a function |
|
171 |
#Make this a a function!!!!
|
|
169 | 172 |
|
170 | 173 |
if (seed_number>0) { |
171 | 174 |
set.seed(seed_number) #Using a seed number allow results based on random number to be compared... |
... | ... | |
234 | 237 |
sampling_station_id<-list_const_sampling_station_id |
235 | 238 |
} |
236 | 239 |
|
237 |
######## Prediction for the range of dates and sampling data
|
|
240 |
########### PREDICT FOR MONTHLY SCALE #############
|
|
238 | 241 |
|
239 | 242 |
#First predict at the monthly time scale: climatology |
240 | 243 |
writeLines("Predictions at monthly scale:",con=log_file,sep="\n") |
... | ... | |
253 | 256 |
list_tmp[[i]]<-tmp |
254 | 257 |
} |
255 | 258 |
|
259 |
################## PREDICT AT DAILY TIME SCALE ################# |
|
260 |
|
|
256 | 261 |
#put together list of clim models per month... |
257 | 262 |
rast_clim_yearlist<-list_tmp |
258 | 263 |
#Second predict at the daily time scale: delta |
... | ... | |
265 | 270 |
t2<-proc.time()-t1 |
266 | 271 |
writeLines(as.character(t2),con=log_file,sep="\n") |
267 | 272 |
|
268 |
#Add accuracy_metrics section/function |
|
269 |
#now get list of raster daily prediction layers |
|
270 |
#gam_fus_mod_tmp<-gam_fus_mod |
|
271 |
#this should be change later once correction has been made |
|
272 |
#for (i in 1:length(gam_fus_mod)){ |
|
273 |
# obj_names<-c(y_var_name,"clim","delta","data_s","sampling_dat","data_v", |
|
274 |
# "mod_kr_day") |
|
275 |
# names(gam_fus_mod[[i]])<-obj_names |
|
276 |
# names(gam_fus_mod[[i]][[y_var_name]])<-c("mod1","mod2","mod3","mod4","mod_kr") |
|
277 |
#} |
|
278 |
|
|
279 |
## |
|
273 |
############### NOW RUN VALIDATION ######################### |
|
274 |
|
|
280 | 275 |
list_tmp<-vector("list",length(gam_fus_mod)) |
281 | 276 |
for (i in 1:length(gam_fus_mod)){ |
282 | 277 |
tmp<-gam_fus_mod[[i]][[y_var_name]] #y_var_name is the variable predicted (tmax or tmin) |
... | ... | |
293 | 288 |
t2<-proc.time()-t1 |
294 | 289 |
writeLines(as.character(t2),con=log_file,sep="\n") |
295 | 290 |
|
296 |
####This part concerns validation assessment and must be moved later... |
|
297 |
## make this a function?? |
|
298 |
list_tmp<-vector("list",length(gam_fus_validation_mod)) |
|
299 |
for (i in 1:length(gam_fus_validation_mod)){ |
|
300 |
tmp<-gam_fus_validation_mod[[i]]$metrics_v |
|
301 |
list_tmp[[i]]<-tmp |
|
302 |
} |
|
303 |
tb_diagnostic<-do.call(rbind,list_tmp) #long rownames |
|
304 |
rownames(tb_diagnostic)<-NULL #remove row names |
|
305 |
|
|
306 |
mod_names<-unique(tb_diagnostic$pred_mod) #models that have accuracy metrics |
|
307 |
|
|
308 |
#now boxplots and mean per models |
|
309 |
t<-melt(tb_diagnostic, |
|
310 |
#measure=mod_var, |
|
311 |
id=c("date","pred_mod","prop"), |
|
312 |
na.rm=F) |
|
313 |
avg_tb<-cast(t,pred_mod~variable,mean) |
|
314 |
tb<-tb_diagnostic |
|
315 |
tb_mod_list<-vector("list",length(mod_names)) |
|
316 |
for(i in 1:length(mod_names)){ # Reorganizing information in terms of metrics |
|
317 |
mod_name_tb<-paste("tb_",mod_names[i],sep="") |
|
318 |
tb_mod<-subset(tb, pred_mod==mod_names[i]) |
|
319 |
assign(mod_name_tb,tb_mod) |
|
320 |
tb_mod_list[[i]]<-tb_mod |
|
321 |
} |
|
322 |
names(tb_mod_list)<-mod_names |
|
291 |
#################### ASSESSMENT OF PREDICTIONS: PLOTS OF ACCURACY METRICS ########### |
|
323 | 292 |
|
324 |
mod_metrics<-do.call(cbind,tb_mod_list)
|
|
325 |
mod_pat<-glob2rx("*.rmse")
|
|
326 |
mod_var<-grep(mod_pat,names(mod_metrics),value=TRUE) # using grep with "value" extracts the matching names
|
|
293 |
##Create data.frame with valiation metrics for a full year
|
|
294 |
tb_diagnostic_v<-extract_from_list_obj(gam_fus_validation_mod,"metrics_v")
|
|
295 |
rownames(tb_diagnostic_v)<-NULL #remove row names
|
|
327 | 296 |
|
328 |
boxplot(mod_metrics[[mod_var]]) |
|
329 |
test<-mod_metrics[mod_var] |
|
330 |
boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5) |
|
297 |
#Call function to create plots of metrics for validation dataset |
|
298 |
metric_names<-c("rmse","mae","me","r","m50") |
|
299 |
summary_metrics<-boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix) |
|
300 |
names(summary_metrics)<-c("avg","median") |
|
301 |
##Write out information concerning accuracy and predictions |
|
302 |
outfile<-file.path(in_path,paste("assessment_measures_",out_prefix,".txt",sep="")) |
|
303 |
write.table(tb_diagnostic_v,file= outfile,row.names=FALSE,sep=",") |
|
304 |
write.table(summary_metrics[[1]], file= outfile, append=TRUE,sep=",") #write out avg |
|
305 |
write.table(summary_metrics[[2]], file= outfile, append=TRUE,sep=",") #write out median |
|
306 |
|
|
307 |
#################### CLOSE LOG FILE ############# |
|
331 | 308 |
|
332 | 309 |
#close log_file connection and add meta data |
333 | 310 |
writeLines("Finished script process time:",con=log_file,sep="\n") |
... | ... | |
339 | 316 |
writeLines("End of script",con=log_file,sep="\n") |
340 | 317 |
close(log_file) |
341 | 318 |
|
342 |
####End of part to be changed... |
|
343 |
|
|
344 |
#### END OF SCRIPT ####### |
|
319 |
############################################################ |
|
320 |
######################## END OF SCRIPT ##################### |
Also available in: Unified diff
GAM Fusion raster prediction, modification of validation section, call to function for boxplot, Venezuela predictions