Project

General

Profile

« Previous | Next » 

Revision 3b1a1539

Added by Benoit Parmentier about 11 years ago

revisions analyses and figures for single time scale paper scripts following feedback from E&O layer group

View differences:

climate/research/oregon/interpolation/contribution_of_covariates_paper_interpolation.R
4 4
#different covariates using two baselines. Accuracy methods are added in the the function script to evaluate results.
5 5
#Figures, tables and data for the contribution of covariate paper are also produced in the script.
6 6
#AUTHOR: Benoit Parmentier                                                                      
7
#DATE: 09/09/2013            
8
#Version: 3
7
#MMODIFIED ON: 10/15/2013            
8
#Version: 4
9 9
#PROJECT: Environmental Layers project                                     
10 10
#################################################################################################
11 11

  
......
37 37

  
38 38
#### FUNCTION USED IN SCRIPT
39 39

  
40
function_analyses_paper <-"contribution_of_covariates_paper_interpolation_functions_09232013.R"
40
function_analyses_paper <-"contribution_of_covariates_paper_interpolation_functions_10152013.R"
41 41

  
42 42
##############################
43 43
#### Parameters and constants  
......
52 52
#gwr results:
53 53
in_dir4 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gwr_day_lst_comb3_part1_07122013"
54 54
#multisampling results (gam)
55
in_dir5<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_daily_mults10_lst_comb3_08082013"
56
in_dir6<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_kriging_daily_mults10_lst_comb3_08062013"
57
in_dir7<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gwr_daily_mults10_lst_comb3_08072013"
55
#in_dir5<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_daily_mults10_lst_comb3_08082013"
56
#in_dir6<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_kriging_daily_mults10_lst_comb3_08062013"
57
#in_dir7<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gwr_daily_mults10_lst_comb3_08072013"
58
#Hold-out every two days over 365 days
59
in_dir5 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gam_daily_mults1_lst_comb3_10122013"
60
in_dir6 <- "/data/project/layers/commons/Oregon_interpolation/output_data_365d_kriging_daily_mults1_lst_comb3_10112013"
61
in_dir7 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gwr_daily_mults1_lst_comb3_10132013"
58 62

  
59 63
out_dir<-"/home/parmentier/Data/IPLANT_project/paper_analyses_tables_fig_08032013"
60 64
setwd(out_dir)
......
63 67
met_stations_outfiles_obj_file<-"/data/project/layers/commons/data_workflow/output_data_365d_gam_fus_lst_test_run_07172013/met_stations_outfiles_obj_gam_fusion__365d_gam_fus_lst_test_run_07172013.RData"
64 68
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
65 69
y_var_name <- "dailyTmax"
66
out_prefix<-"analyses_10102013"
70
out_prefix<-"analyses_10152013"
67 71

  
68 72
#method_interpolation <- "gam_daily"
69 73
covar_obj_file_1 <- "covar_obj__365d_gam_day_lst_comb3_08132013.RData"
......
77 81
raster_obj_file_3 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_day_lst_comb3_07112013.RData"
78 82
raster_obj_file_4 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_day_lst_comb3_part1_07122013.RData"
79 83
#multisampling using baseline lat,lon + elev
80
raster_obj_file_5 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_daily_mults10_lst_comb3_08082013.RData"
81
raster_obj_file_6 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_daily_mults10_lst_comb3_08062013.RData"
82
raster_obj_file_7 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_mults10_lst_comb3_08072013.RData"
83
#raster_prediction_obj_gam_daily_dailyTmax_365d_gam_daily_mults10_lst_comb3_08082013.RData
84
#raster_obj_file_5 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_daily_mults10_lst_comb3_08082013.RData"
85
#raster_obj_file_6 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_daily_mults10_lst_comb3_08062013.RData"
86
#raster_obj_file_7 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_mults10_lst_comb3_08072013.RData"
87

  
88
raster_obj_file_5 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_daily_mults1_lst_comb3_10122013.RData"
89
raster_obj_file_6 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_daily_mults1_lst_comb3_10112013.RData"
90
raster_obj_file_7 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_mults1_lst_comb3_10132013.RData"
84 91

  
85 92
#Load objects containing training, testing, models objects 
86 93

  
......
101 108
raster_prediction_obj_6 <-load_obj(file.path(in_dir6,raster_obj_file_6)) #kriging daily multisampling 10 to 70% 
102 109
raster_prediction_obj_7 <-load_obj(file.path(in_dir7,raster_obj_file_7)) #gwr daily multisampling 10 to 70%
103 110

  
111

  
104 112
############### BEGIN SCRIPT #################
105 113

  
106 114
############
......
292 300

  
293 301
l1$mae_tb #contains
294 302

  
295
#Prepare parameters/arguments for plotting
296 303
list_dist_obj <-list(l1,l3,l4)
304
head(list_dist_obj[[1]]$dstspat_dat)
305
#should add method_interp in data.frame
306

  
307
#Prepare parameters/arguments for plotting
308

  
297 309
col_t <- c("red","blue","black")
298 310
pch_t <- 1:length(col_t)
299 311
legend_text <- c("GAM","Kriging","GWR")
......
357 369
prop_obj_gwr<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7)
358 370

  
359 371
list_prop_obj<-list(prop_obj_gam,prop_obj_kriging,prop_obj_gwr)
372
#Testing siginificance between models
373

  
374
mod_compk1 <-kruskal.test(prop_obj_gwr$tb$rmse ~ as.factor(prop_obj_gwr$tb$prop)) #Kruskal Wallis test
375
mod_compk2 <-kruskal.test(tb2$rmse ~ as.factor(tb2$pred_mod))
376
print(mod_compk1) #not significant
377
print(mod_compk2) #not significant
378

  
379
#Multiple Kruskal Wallis
380
mod_compk1 <-kruskalmc(prop_obj_gwr$tb$rmse ~ as.factor(prop_obj_gwr$tb$prop))
381
mod_compk2 <-kruskalmc(tb2$rmse ~ as.factor(tb2$pred_mod))
382

  
383
print(mod_compk1)
384
print(mod_compk2)
385

  
386
prop_tb <- rbind(prop_obj_gam$tb,prop_obj_kriging$tb,prop_obj_gwr$tb)
387

  
388
mod_compk1 <-kruskal.test(prop_tb$rmse ~ as.factor(prop_tb$method_interp)+as.factor(prop_tb$prop)) #Kruskal Wallis test
389
mod_prop <-lm(prop_tb$rmse ~ as.factor(prop_tb$method_interp)+as.factor(prop_tb$prop)) #This is significant!!
360 390

  
361 391
## plot setting for figure 4
362 392

  
......
378 408
png(filename=file.path(out_dir,png_file_name),
379 409
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
380 410
par(mfrow=c(row_mfrow,col_mfrow))
381

  
411
#par(mfrow=c(1,1))
382 412
metric_name<-"mae"
383 413
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name,add_CI,CI_bar)
384 414
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name","add_CI","CI_bar")
......
419 449
rownames(tb4_s)<-NULL #remove row names
420 450
tb4_s$method_interp <- "gwr_daily" #add type of interpolation...out_prefix too??
421 451

  
452
tb5_s<-extract_from_list_obj(raster_prediction_obj_5$validation_mod_obj,"metrics_s")
453
rownames(tb5_s)<-NULL #remove row names
454
tb5_s$method_interp <- "gam_daily" #add type of interpolation...out_prefix too??
455

  
456
tb6_s<-extract_from_list_obj(raster_prediction_obj_6$validation_mod_obj,"metrics_s")
457
rownames(tb6_s)<-NULL #remove row names
458
tb6_s$method_interp <- "kriging_daily" #add type of interpolation...out_prefix too??
459

  
460
tb7_s<-extract_from_list_obj(raster_prediction_obj_7$validation_mod_obj,"metrics_s")
461
rownames(tb7_s)<-NULL #remove row names
462
tb7_s$method_interp <- "gwr_daily" #add type of interpolation...out_prefix too??
463

  
422 464
#tb1_s <-raster_prediction_obj_1$tb_diagnostic_s  #gam dailycontains the accuracy metrics for each run...
423 465
#tb3_s <-raster_prediction_obj_3$tb_diagnostic_s  #Kriging daily methods
424 466
#tb4_s <-raster_prediction_obj_4$tb_diagnostic_s  #gwr daily methods
......
427 469
tb3 <-raster_prediction_obj_3$tb_diagnostic_v  #Kriging daily methods
428 470
tb4 <-raster_prediction_obj_4$tb_diagnostic_v  #gwr daily methods
429 471

  
472
tb5 <-raster_prediction_obj_5$tb_diagnostic_v  #gam dailycontains the accuracy metrics for each run...
473
tb6 <-raster_prediction_obj_6$tb_diagnostic_v  #Kriging daily methods
474
tb7 <-raster_prediction_obj_7$tb_diagnostic_v  #gwr daily methods
475

  
430 476
#Calculate difference in MAE and RMSE for training and testing data: call diff_df function
431 477
diff_tb1 <-diff_df(tb1_s[tb1_s$pred_mod=="mod1",],tb1[tb1$pred_mod=="mod1",],c("mae","rmse")) #gam select differences for mod1
432 478
diff_tb3 <-diff_df(tb3_s[tb3_s$pred_mod=="mod1",],tb3[tb3$pred_mod=="mod1",],c("mae","rmse")) #kriging
433 479
diff_tb4 <-diff_df(tb4_s[tb4_s$pred_mod=="mod1",],tb4[tb4$pred_mod=="mod1",],c("mae","rmse")) #gwr
434 480

  
481
diff_tb5 <-diff_df(tb5_s[tb5_s$pred_mod=="mod1",],tb5[tb5$pred_mod=="mod1",],c("mae","rmse")) #gam select differences for mod1
482
diff_tb6 <-diff_df(tb6_s[tb6_s$pred_mod=="mod1",],tb6[tb6$pred_mod=="mod1",],c("mae","rmse")) #kriging
483
diff_tb7 <-diff_df(tb7_s[tb7_s$pred_mod=="mod1",],tb7[tb7$pred_mod=="mod1",],c("mae","rmse")) #gwr
484

  
485

  
435 486
diff_mae_data <-data.frame(gam=diff_tb1$mae,kriging=diff_tb3$mae,gwr=diff_tb4$mae)
436 487
diff_rmse_data <-data.frame(gam=diff_tb1$rmse,kriging=diff_tb3$rmse,gwr=diff_tb4$rmse)
437 488

  
489

  
490
diff_mae_data_mult  <-data.frame(gam=diff_tb5$mae,kriging=diff_tb6$mae,gwr=diff_tb7$mae)
491
diff_rmse_data_mult <-data.frame(gam=diff_tb5$rmse,kriging=diff_tb6$rmse,gwr=diff_tb7$rmse)
492

  
493
diff_mae_data_mult$prop <- tb5$prop
494

  
495
boxplot(diff_mae_data_mult$gam~diff_mae_data_mult$prop)
496
boxplot(diff_mae_data_mult$kriging~diff_mae_data_mult$prop)
497

  
498
plot(diff_mae_data_mult$gam~diff_mae_data_mult$prop,type="p")
499

  
500

  
501

  
438 502
#Test the plot
439 503
boxplot(diff_mae_data)
440 504
boxplot(diff_rmse_data) #plot differences in training and testing accuracies for three methods
......
442 506
      xlab="Interpolation method",
443 507
      ylab="RMSE (C)")
444 508

  
509
boxplot(diff_mae_data_mult)
510
boxplot(diff_rmse_data_mult) #plot differences in training and testing accuracies for three methods
511

  
512

  
513
boxplot(diff_mae_data_mult)
514

  
445 515
tb5 <-raster_prediction_obj_5$tb_diagnostic_v  #gam dailycontains the accuracy metrics for each run...
446 516
tb6 <-raster_prediction_obj_6$tb_diagnostic_v  #Kriging daily methods
447 517
tb7 <-raster_prediction_obj_7$tb_diagnostic_v  #gwr daily methods
......
497 567
l_pch_t <- c(1,1,3,3,2,2)
498 568
l_lty_t <- c(2,1,1,2,2,1)
499 569
add_CI <- c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE)
500
#add_CI <- c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)
570
add_CI <- c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)
501 571
y_range<-c(0.5,3)
502 572

  
503 573
plot_ac_holdout_prop(l_prop,l_col_t,l_pch_t,add_CI,y_range)
......
518 588
      ylab="RMSE (C)")
519 589

  
520 590

  
521
boxplot(diff_mae_data) #plot differences in training and testing accuracies for three methods
591
boxplot(diff_mae_data_mult[-4]) #plot differences in training and testing accuracies for three methods
592

  
522 593
title(main="Difference between training and testing MAE",
523 594
      xlab="Interpolation method",
524 595
      ylab="MAE (C)")
......
526 597
dev.off()
527 598

  
528 599
############### STUDY TIME AND accuracy
529
#########Figure 6: Monthly RMSE averages for the three interpolation methods: GAM, GWR and Kriging.
600
  #########Figure 6: Monthly RMSE averages for the three interpolation methods: GAM, GWR and Kriging.
530 601

  
531 602
mae_tmp<- data.frame(gam=tb1[tb1$pred_mod=="mod1",c("mae")],
532 603
                     kriging=tb3[tb3$pred_mod=="mod1",c("mae")],
climate/research/oregon/interpolation/contribution_of_covariates_paper_interpolation_functions.R
4 4
# interpolation code.
5 5
#Figures and data for the contribution of covariate paper are also produced.
6 6
#AUTHOR: Benoit Parmentier                                                                      #
7
#DATE: 08/15/2013            
7
#DATE: 10/15/2013            
8 8
#Version: 2
9 9
#PROJECT: Environmental Layers project                                       #
10 10
#################################################################################################
......
380 380
    n_tb<-subset(l$n_tb,pred_mod==mod_name,select=metric_name) 
381 381
    sd_tb<-subset(l$sd_tb,pred_mod==mod_name,select=metric_name) #l$sd_abs_tb[,metric_name]
382 382
    
383
    xlab_text<-"holdout proportion"
383
    xlab_text<-""
384 384
    
385 385
    no <- unlist(as.data.frame(n_tb))
386 386
    y <- unlist(as.data.frame(avg_tb))
......
412 412
    if(add_CI[i]==TRUE){
413 413
      
414 414
      if (i==1){
415
        plotCI(y=y, x=x, uiw=ciw, col=col_t[i], barcol="blue", lwd=1,pch=pch_t[i],main=paste(" Comparison of ",metric_name," in ",mod_name,sep=""),
416
               ylab="RMSE (C)", xlab=xlab_text) 
415
        plotCI(y=y, x=x, uiw=ciw, col=col_t[i], barcol="blue", lwd=1,pch=pch_t[i],
416
               ylab="", xlab=xlab_text) 
417 417
        lines(y~x, pch=pch_t[i],col=col_t[i],type="b")
418 418
      }else{
419 419
        lines(y~x, pch=pch_t[i],col=col_t[i],type="b")
420 420
        plotCI(y=y, x=x, uiw=ciw, col=col_t[i], barcol="blue", lwd=1,pch=pch_t[i],
421
               ylab="RMSE (C)", xlab=xlab_text,add=TRUE) 
421
               ylab="", xlab=xlab_text,add=TRUE) 
422 422
      }
423 423
      
424 424
    }else{

Also available in: Unified diff