4 |
4 |
#different covariates using two baselines. Accuracy methods are added in the the function script to evaluate results.
|
5 |
5 |
#Figures, tables and data for the contribution of covariate paper are also produced in the script.
|
6 |
6 |
#AUTHOR: Benoit Parmentier
|
7 |
|
#DATE: 09/09/2013
|
8 |
|
#Version: 3
|
|
7 |
#MMODIFIED ON: 10/15/2013
|
|
8 |
#Version: 4
|
9 |
9 |
#PROJECT: Environmental Layers project
|
10 |
10 |
#################################################################################################
|
11 |
11 |
|
... | ... | |
37 |
37 |
|
38 |
38 |
#### FUNCTION USED IN SCRIPT
|
39 |
39 |
|
40 |
|
function_analyses_paper <-"contribution_of_covariates_paper_interpolation_functions_09232013.R"
|
|
40 |
function_analyses_paper <-"contribution_of_covariates_paper_interpolation_functions_10152013.R"
|
41 |
41 |
|
42 |
42 |
##############################
|
43 |
43 |
#### Parameters and constants
|
... | ... | |
52 |
52 |
#gwr results:
|
53 |
53 |
in_dir4 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gwr_day_lst_comb3_part1_07122013"
|
54 |
54 |
#multisampling results (gam)
|
55 |
|
in_dir5<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_daily_mults10_lst_comb3_08082013"
|
56 |
|
in_dir6<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_kriging_daily_mults10_lst_comb3_08062013"
|
57 |
|
in_dir7<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gwr_daily_mults10_lst_comb3_08072013"
|
|
55 |
#in_dir5<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_daily_mults10_lst_comb3_08082013"
|
|
56 |
#in_dir6<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_kriging_daily_mults10_lst_comb3_08062013"
|
|
57 |
#in_dir7<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gwr_daily_mults10_lst_comb3_08072013"
|
|
58 |
#Hold-out every two days over 365 days
|
|
59 |
in_dir5 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gam_daily_mults1_lst_comb3_10122013"
|
|
60 |
in_dir6 <- "/data/project/layers/commons/Oregon_interpolation/output_data_365d_kriging_daily_mults1_lst_comb3_10112013"
|
|
61 |
in_dir7 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gwr_daily_mults1_lst_comb3_10132013"
|
58 |
62 |
|
59 |
63 |
out_dir<-"/home/parmentier/Data/IPLANT_project/paper_analyses_tables_fig_08032013"
|
60 |
64 |
setwd(out_dir)
|
... | ... | |
63 |
67 |
met_stations_outfiles_obj_file<-"/data/project/layers/commons/data_workflow/output_data_365d_gam_fus_lst_test_run_07172013/met_stations_outfiles_obj_gam_fusion__365d_gam_fus_lst_test_run_07172013.RData"
|
64 |
68 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
|
65 |
69 |
y_var_name <- "dailyTmax"
|
66 |
|
out_prefix<-"analyses_10102013"
|
|
70 |
out_prefix<-"analyses_10152013"
|
67 |
71 |
|
68 |
72 |
#method_interpolation <- "gam_daily"
|
69 |
73 |
covar_obj_file_1 <- "covar_obj__365d_gam_day_lst_comb3_08132013.RData"
|
... | ... | |
77 |
81 |
raster_obj_file_3 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_day_lst_comb3_07112013.RData"
|
78 |
82 |
raster_obj_file_4 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_day_lst_comb3_part1_07122013.RData"
|
79 |
83 |
#multisampling using baseline lat,lon + elev
|
80 |
|
raster_obj_file_5 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_daily_mults10_lst_comb3_08082013.RData"
|
81 |
|
raster_obj_file_6 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_daily_mults10_lst_comb3_08062013.RData"
|
82 |
|
raster_obj_file_7 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_mults10_lst_comb3_08072013.RData"
|
83 |
|
#raster_prediction_obj_gam_daily_dailyTmax_365d_gam_daily_mults10_lst_comb3_08082013.RData
|
|
84 |
#raster_obj_file_5 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_daily_mults10_lst_comb3_08082013.RData"
|
|
85 |
#raster_obj_file_6 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_daily_mults10_lst_comb3_08062013.RData"
|
|
86 |
#raster_obj_file_7 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_mults10_lst_comb3_08072013.RData"
|
|
87 |
|
|
88 |
raster_obj_file_5 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_daily_mults1_lst_comb3_10122013.RData"
|
|
89 |
raster_obj_file_6 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_daily_mults1_lst_comb3_10112013.RData"
|
|
90 |
raster_obj_file_7 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_mults1_lst_comb3_10132013.RData"
|
84 |
91 |
|
85 |
92 |
#Load objects containing training, testing, models objects
|
86 |
93 |
|
... | ... | |
101 |
108 |
raster_prediction_obj_6 <-load_obj(file.path(in_dir6,raster_obj_file_6)) #kriging daily multisampling 10 to 70%
|
102 |
109 |
raster_prediction_obj_7 <-load_obj(file.path(in_dir7,raster_obj_file_7)) #gwr daily multisampling 10 to 70%
|
103 |
110 |
|
|
111 |
|
104 |
112 |
############### BEGIN SCRIPT #################
|
105 |
113 |
|
106 |
114 |
############
|
... | ... | |
292 |
300 |
|
293 |
301 |
l1$mae_tb #contains
|
294 |
302 |
|
295 |
|
#Prepare parameters/arguments for plotting
|
296 |
303 |
list_dist_obj <-list(l1,l3,l4)
|
|
304 |
head(list_dist_obj[[1]]$dstspat_dat)
|
|
305 |
#should add method_interp in data.frame
|
|
306 |
|
|
307 |
#Prepare parameters/arguments for plotting
|
|
308 |
|
297 |
309 |
col_t <- c("red","blue","black")
|
298 |
310 |
pch_t <- 1:length(col_t)
|
299 |
311 |
legend_text <- c("GAM","Kriging","GWR")
|
... | ... | |
357 |
369 |
prop_obj_gwr<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7)
|
358 |
370 |
|
359 |
371 |
list_prop_obj<-list(prop_obj_gam,prop_obj_kriging,prop_obj_gwr)
|
|
372 |
#Testing siginificance between models
|
|
373 |
|
|
374 |
mod_compk1 <-kruskal.test(prop_obj_gwr$tb$rmse ~ as.factor(prop_obj_gwr$tb$prop)) #Kruskal Wallis test
|
|
375 |
mod_compk2 <-kruskal.test(tb2$rmse ~ as.factor(tb2$pred_mod))
|
|
376 |
print(mod_compk1) #not significant
|
|
377 |
print(mod_compk2) #not significant
|
|
378 |
|
|
379 |
#Multiple Kruskal Wallis
|
|
380 |
mod_compk1 <-kruskalmc(prop_obj_gwr$tb$rmse ~ as.factor(prop_obj_gwr$tb$prop))
|
|
381 |
mod_compk2 <-kruskalmc(tb2$rmse ~ as.factor(tb2$pred_mod))
|
|
382 |
|
|
383 |
print(mod_compk1)
|
|
384 |
print(mod_compk2)
|
|
385 |
|
|
386 |
prop_tb <- rbind(prop_obj_gam$tb,prop_obj_kriging$tb,prop_obj_gwr$tb)
|
|
387 |
|
|
388 |
mod_compk1 <-kruskal.test(prop_tb$rmse ~ as.factor(prop_tb$method_interp)+as.factor(prop_tb$prop)) #Kruskal Wallis test
|
|
389 |
mod_prop <-lm(prop_tb$rmse ~ as.factor(prop_tb$method_interp)+as.factor(prop_tb$prop)) #This is significant!!
|
360 |
390 |
|
361 |
391 |
## plot setting for figure 4
|
362 |
392 |
|
... | ... | |
378 |
408 |
png(filename=file.path(out_dir,png_file_name),
|
379 |
409 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix)
|
380 |
410 |
par(mfrow=c(row_mfrow,col_mfrow))
|
381 |
|
|
|
411 |
#par(mfrow=c(1,1))
|
382 |
412 |
metric_name<-"mae"
|
383 |
413 |
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name,add_CI,CI_bar)
|
384 |
414 |
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name","add_CI","CI_bar")
|
... | ... | |
419 |
449 |
rownames(tb4_s)<-NULL #remove row names
|
420 |
450 |
tb4_s$method_interp <- "gwr_daily" #add type of interpolation...out_prefix too??
|
421 |
451 |
|
|
452 |
tb5_s<-extract_from_list_obj(raster_prediction_obj_5$validation_mod_obj,"metrics_s")
|
|
453 |
rownames(tb5_s)<-NULL #remove row names
|
|
454 |
tb5_s$method_interp <- "gam_daily" #add type of interpolation...out_prefix too??
|
|
455 |
|
|
456 |
tb6_s<-extract_from_list_obj(raster_prediction_obj_6$validation_mod_obj,"metrics_s")
|
|
457 |
rownames(tb6_s)<-NULL #remove row names
|
|
458 |
tb6_s$method_interp <- "kriging_daily" #add type of interpolation...out_prefix too??
|
|
459 |
|
|
460 |
tb7_s<-extract_from_list_obj(raster_prediction_obj_7$validation_mod_obj,"metrics_s")
|
|
461 |
rownames(tb7_s)<-NULL #remove row names
|
|
462 |
tb7_s$method_interp <- "gwr_daily" #add type of interpolation...out_prefix too??
|
|
463 |
|
422 |
464 |
#tb1_s <-raster_prediction_obj_1$tb_diagnostic_s #gam dailycontains the accuracy metrics for each run...
|
423 |
465 |
#tb3_s <-raster_prediction_obj_3$tb_diagnostic_s #Kriging daily methods
|
424 |
466 |
#tb4_s <-raster_prediction_obj_4$tb_diagnostic_s #gwr daily methods
|
... | ... | |
427 |
469 |
tb3 <-raster_prediction_obj_3$tb_diagnostic_v #Kriging daily methods
|
428 |
470 |
tb4 <-raster_prediction_obj_4$tb_diagnostic_v #gwr daily methods
|
429 |
471 |
|
|
472 |
tb5 <-raster_prediction_obj_5$tb_diagnostic_v #gam dailycontains the accuracy metrics for each run...
|
|
473 |
tb6 <-raster_prediction_obj_6$tb_diagnostic_v #Kriging daily methods
|
|
474 |
tb7 <-raster_prediction_obj_7$tb_diagnostic_v #gwr daily methods
|
|
475 |
|
430 |
476 |
#Calculate difference in MAE and RMSE for training and testing data: call diff_df function
|
431 |
477 |
diff_tb1 <-diff_df(tb1_s[tb1_s$pred_mod=="mod1",],tb1[tb1$pred_mod=="mod1",],c("mae","rmse")) #gam select differences for mod1
|
432 |
478 |
diff_tb3 <-diff_df(tb3_s[tb3_s$pred_mod=="mod1",],tb3[tb3$pred_mod=="mod1",],c("mae","rmse")) #kriging
|
433 |
479 |
diff_tb4 <-diff_df(tb4_s[tb4_s$pred_mod=="mod1",],tb4[tb4$pred_mod=="mod1",],c("mae","rmse")) #gwr
|
434 |
480 |
|
|
481 |
diff_tb5 <-diff_df(tb5_s[tb5_s$pred_mod=="mod1",],tb5[tb5$pred_mod=="mod1",],c("mae","rmse")) #gam select differences for mod1
|
|
482 |
diff_tb6 <-diff_df(tb6_s[tb6_s$pred_mod=="mod1",],tb6[tb6$pred_mod=="mod1",],c("mae","rmse")) #kriging
|
|
483 |
diff_tb7 <-diff_df(tb7_s[tb7_s$pred_mod=="mod1",],tb7[tb7$pred_mod=="mod1",],c("mae","rmse")) #gwr
|
|
484 |
|
|
485 |
|
435 |
486 |
diff_mae_data <-data.frame(gam=diff_tb1$mae,kriging=diff_tb3$mae,gwr=diff_tb4$mae)
|
436 |
487 |
diff_rmse_data <-data.frame(gam=diff_tb1$rmse,kriging=diff_tb3$rmse,gwr=diff_tb4$rmse)
|
437 |
488 |
|
|
489 |
|
|
490 |
diff_mae_data_mult <-data.frame(gam=diff_tb5$mae,kriging=diff_tb6$mae,gwr=diff_tb7$mae)
|
|
491 |
diff_rmse_data_mult <-data.frame(gam=diff_tb5$rmse,kriging=diff_tb6$rmse,gwr=diff_tb7$rmse)
|
|
492 |
|
|
493 |
diff_mae_data_mult$prop <- tb5$prop
|
|
494 |
|
|
495 |
boxplot(diff_mae_data_mult$gam~diff_mae_data_mult$prop)
|
|
496 |
boxplot(diff_mae_data_mult$kriging~diff_mae_data_mult$prop)
|
|
497 |
|
|
498 |
plot(diff_mae_data_mult$gam~diff_mae_data_mult$prop,type="p")
|
|
499 |
|
|
500 |
|
|
501 |
|
438 |
502 |
#Test the plot
|
439 |
503 |
boxplot(diff_mae_data)
|
440 |
504 |
boxplot(diff_rmse_data) #plot differences in training and testing accuracies for three methods
|
... | ... | |
442 |
506 |
xlab="Interpolation method",
|
443 |
507 |
ylab="RMSE (C)")
|
444 |
508 |
|
|
509 |
boxplot(diff_mae_data_mult)
|
|
510 |
boxplot(diff_rmse_data_mult) #plot differences in training and testing accuracies for three methods
|
|
511 |
|
|
512 |
|
|
513 |
boxplot(diff_mae_data_mult)
|
|
514 |
|
445 |
515 |
tb5 <-raster_prediction_obj_5$tb_diagnostic_v #gam dailycontains the accuracy metrics for each run...
|
446 |
516 |
tb6 <-raster_prediction_obj_6$tb_diagnostic_v #Kriging daily methods
|
447 |
517 |
tb7 <-raster_prediction_obj_7$tb_diagnostic_v #gwr daily methods
|
... | ... | |
497 |
567 |
l_pch_t <- c(1,1,3,3,2,2)
|
498 |
568 |
l_lty_t <- c(2,1,1,2,2,1)
|
499 |
569 |
add_CI <- c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE)
|
500 |
|
#add_CI <- c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)
|
|
570 |
add_CI <- c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)
|
501 |
571 |
y_range<-c(0.5,3)
|
502 |
572 |
|
503 |
573 |
plot_ac_holdout_prop(l_prop,l_col_t,l_pch_t,add_CI,y_range)
|
... | ... | |
518 |
588 |
ylab="RMSE (C)")
|
519 |
589 |
|
520 |
590 |
|
521 |
|
boxplot(diff_mae_data) #plot differences in training and testing accuracies for three methods
|
|
591 |
boxplot(diff_mae_data_mult[-4]) #plot differences in training and testing accuracies for three methods
|
|
592 |
|
522 |
593 |
title(main="Difference between training and testing MAE",
|
523 |
594 |
xlab="Interpolation method",
|
524 |
595 |
ylab="MAE (C)")
|
... | ... | |
526 |
597 |
dev.off()
|
527 |
598 |
|
528 |
599 |
############### STUDY TIME AND accuracy
|
529 |
|
#########Figure 6: Monthly RMSE averages for the three interpolation methods: GAM, GWR and Kriging.
|
|
600 |
#########Figure 6: Monthly RMSE averages for the three interpolation methods: GAM, GWR and Kriging.
|
530 |
601 |
|
531 |
602 |
mae_tmp<- data.frame(gam=tb1[tb1$pred_mod=="mod1",c("mae")],
|
532 |
603 |
kriging=tb3[tb3$pred_mod=="mod1",c("mae")],
|
revisions analyses and figures for single time scale paper scripts following feedback from E&O layer group