Project

General

Profile

« Previous | Next » 

Revision b2298f8e

Added by Benoit Parmentier almost 11 years ago

multi timescale script update, monthly holdout accuracy and figures production

View differences:

climate/research/oregon/interpolation/multi_timescale_paper_interpolation.R
5 5
#Figures, tables and data for the  paper are also produced in the script.
6 6
#AUTHOR: Benoit Parmentier 
7 7
#CREATED ON: 10/31/2013  
8
#MODIFIED ON: 11/15/2013            
8
#MODIFIED ON: 12/02/2013            
9 9
#Version: 1
10 10
#PROJECT: Environmental Layers project                                     
11 11
#################################################################################################
......
32 32
library(automap)                             # Kriging automatic fitting of variogram using gstat
33 33
library(rgeos)                               # Geometric, topologic library of functions
34 34
#RPostgreSQL                                 # Interface R and Postgres, not used in this script
35

  
35
library(gridExtra)
36 36
#Additional libraries not used in workflow
37 37
library(pgirmess)                            # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess}  
38 38

  
39 39
#### FUNCTION USED IN SCRIPT
40 40

  
41 41
function_analyses_paper1 <-"contribution_of_covariates_paper_interpolation_functions_10222013.R"
42
function_analyses_paper2 <-"multi_timescales_paper_interpolation_functions_11252013.R"
42
function_analyses_paper2 <-"multi_timescales_paper_interpolation_functions_12022013.R"
43 43

  
44 44
##############################
45 45
#### Parameters and constants  
......
51 51
#direct methods: gam, kriging, gwr
52 52
in_dir1 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gam_daily_lst_comb5_11012013"
53 53
in_dir2 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_kriging_daily_lst_comb5_11022013"
54
in_dir3 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gwr_daily_lst_comb5p1_3_11062013"
54
in_dir3a <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gwr_daily_lst_comb5p1_3_11062013"
55
in_dir3b <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gwr_daily_lst_comb5p4_7_11292013"
56

  
55 57
#CAI: gam, kriging, gwr
56 58
in_dir4 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gam_cai_lst_comb5_11032013"
57 59
in_dir5 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_kriging_cai_lst_comb5_11032013"
......
60 62
in_dir7 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gam_fss_lst_comb5_11062013"
61 63
in_dir8 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_kriging_fss_lst_comb5_11052013"
62 64
in_dir9 <- "/data/project/layers/commons/Oregon_interpolation/output_data_365d_gwr_fss_lst_comb5_11052013"
63
#
65
###
66

  
67
### hold out 0-70
68

  
69
in_dir10 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gam_fss_lst_mults_0_70_comb5_11082013"
70
in_dir11 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_kriging_fss_lst_mults_0_70_comb5_11132013"
71
in_dir12 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gwr_fss_lst_mults_0_70_comb5_11162013"
72
in_dir13 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gam_cai_lst_mults_0_70_comb5_11192013"
73
in_dir14 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_kriging_cai_lst_mults_0_70_comb5_11272013"
74
in_dir15 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gwr_cai_lst_mults_0_70_comb5_11222013"
64 75

  
65 76
##raster_prediction object for comb5
66 77
#direct methods
67 78
raster_obj_file_1 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_daily_lst_comb5_11012013.RData" 
68 79
raster_obj_file_2 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_daily_lst_comb5_11022013.RData"
69
raster_obj_file_3 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_lst_comb5p1_3_11062013.RData"
70
raster_obj_file_3b <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_lst_comb5p1_3_11062013.RData"
80
raster_obj_file_3a <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_lst_comb5p1_3_11062013.RData"
81
raster_obj_file_3b <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_lst_comb5p4_7_11292013.RData"
71 82
#CAI
72 83
raster_obj_file_4 <- "raster_prediction_obj_gam_CAI_dailyTmax_365d_gam_cai_lst_comb5_11032013.RData"
73 84
raster_obj_file_5 <- "raster_prediction_obj_kriging_CAI_dailyTmax_365d_kriging_cai_lst_comb5_11032013.RData"
......
77 88
raster_obj_file_8 <- "raster_prediction_obj_kriging_fusion_dailyTmax_365d_kriging_fss_lst_comb5_11052013.RData"
78 89
raster_obj_file_9 <- "raster_prediction_obj_gwr_fusion_dailyTmax_365d_gwr_fss_lst_comb5_11052013.RData"
79 90

  
91
## holdout
92

  
93
raster_obj_file_10 <- "raster_prediction_obj_gam_fusion_dailyTmax_365d_gam_fss_lst_mults_0_70_comb5_11082013.RData"
94
raster_obj_file_11 <- "raster_prediction_obj_kriging_fusion_dailyTmax_365d_kriging_fss_lst_mults_0_70_comb5_11132013.RData"
95
raster_obj_file_12 <- "raster_prediction_obj_gwr_fusion_dailyTmax_365d_gwr_fss_lst_mults_0_70_comb5_11162013.RData"
96
raster_obj_file_13 <- "raster_prediction_obj_gam_CAI_dailyTmax_365d_gam_cai_lst_mults_0_70_comb5_11192013.RData"
97
raster_obj_file_14 <- "raster_prediction_obj_kriging_CAI_dailyTmax_365d_kriging_cai_lst_mults_0_70_comb5_11272013.RData"
98
raster_obj_file_15 <- "raster_prediction_obj_gwr_CAI_dailyTmax_365d_gwr_cai_lst_mults_0_70_comb5_11222013.RData"
99

  
80 100
out_dir<-"/home/parmentier/Data/IPLANT_project/paper_multitime_scale__analyses_tables_fig_09032013"
81 101
setwd(out_dir)
82 102

  
......
84 104
met_stations_outfiles_obj_file<-"/data/project/layers/commons/data_workflow/output_data_365d_gam_fus_lst_test_run_07172013/met_stations_outfiles_obj_gam_fusion__365d_gam_fus_lst_test_run_07172013.RData"
85 105
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
86 106
y_var_name <- "dailyTmax"
87
out_prefix<-"analyses_11252013"
107
out_prefix<-"analyses_12022013"
88 108
ref_rast_name<- "/data/project/layers/commons/data_workflow/inputs/region_outlines_ref_files/mean_day244_rescaled.rst"                     #This is the shape file of outline of the study area. #local raster name defining resolution, exent, local projection--. set on the fly??
89 109
infile_reg_outline <- "/data/project/layers/commons/data_workflow/inputs/region_outlines_ref_files/OR83M_state_outline.shp"  #input region outline defined by polygon: Oregon
90 110
ref_rast_name <-"/data/project/layers/commons/data_workflow/inputs/region_outlines_ref_files/mean_day244_rescaled.rst"  #local raster name defining resolution, exent: oregon
......
110 130
names(s_raster)<-covar_names
111 131

  
112 132
#raster_prediction_obj_1 <-load_obj(file.path(in_dir1,raster_obj_file_1)) #comb5 gam_daily
113
#raster_prediction_obj_2 <-load_obj(file.path(in_dir2,raster_obj_file_2)) #comb5 kriging_daily
114
#raster_prediction_obj_3 <-load_obj(file.path(in_dir3,raster_obj_file_3)) #comb5 gwr_daily mod1 to mod3
115
#raster_prediction_obj_3b <-load_obj(file.path(in_dir3b,raster_obj_file_3b)) #comb5 gwr_daily mod4 to mod7
116
                             
117
#raster_prediction_obj_4 <-load_obj(file.path(in_dir4,raster_obj_file_4)) #comb5 gam_CAI
118
#raster_prediction_obj_5 <-load_obj(file.path(in_dir5,raster_obj_file_5)) #comb5 kriging_CAI
119
#raster_prediction_obj_6 <-load_obj(file.path(in_dir6,raster_obj_file_6)) #comb5 gwr_CAI 
120
#raster_prediction_obj_7 <-load_obj(file.path(in_dir7,raster_obj_file_7)) #comb5 gam_fss
121
#raster_prediction_obj_8 <-load_obj(file.path(in_dir8,raster_obj_file_8)) #comb5 kriging_fss 
122
#raster_prediction_obj_9 <-load_obj(file.path(in_dir9,raster_obj_file_9)) #comb5 gwr_fss
123 133

  
124 134
############### BEGIN SCRIPT #################
125 135

  
......
128 138
## This is a table of accuracy  
129 139

  
130 140
list_raster_obj_files  <- list(file.path(in_dir1,raster_obj_file_1),file.path(in_dir2,raster_obj_file_2),
131
                               file.path(in_dir3,raster_obj_file_3),file.path(in_dir4,raster_obj_file_4),
132
                               file.path(in_dir5,raster_obj_file_5),file.path(in_dir6,raster_obj_file_6),
133
                               file.path(in_dir7,raster_obj_file_7),file.path(in_dir8,raster_obj_file_8),
134
                               file.path(in_dir9,raster_obj_file_9))
141
                               file.path(in_dir3a,raster_obj_file_3a),file.path(in_dir3b,raster_obj_file_3b),
142
                               file.path(in_dir4,raster_obj_file_4),file.path(in_dir5,raster_obj_file_5),
143
                               file.path(in_dir6,raster_obj_file_6),file.path(in_dir7,raster_obj_file_7),
144
                               file.path(in_dir8,raster_obj_file_8),file.path(in_dir9,raster_obj_file_9))
135 145
 
136
names(list_raster_obj_files)<- c("gam_daily","kriging_daily","gwr_daily",
146
names(list_raster_obj_files)<- c("gam_daily","kriging_daily","gwr_daily","gwr_daily",
137 147
                                 "gam_CAI","kriging_CAI","gwr_CAI",
138 148
                                 "gam_fss","kriging_fss","gwr_fss")
139 149
summary_metrics_v_list<-lapply(list_raster_obj_files,FUN=function(x){x<-load_obj(x);x[["summary_metrics_v"]]$avg$rmse})                           
......
148 158
table_kriging <- do.call(cbind,table_kriging)
149 159
table_kriging <- table_kriging[1:7,]                         
150 160

  
151
#for kriging models
161
#for gwr models
152 162
table_gwr <- summary_metrics_v_list[grep("gwr",names(summary_metrics_v_list))]
153 163
table_gwr <- do.call(cbind,table_gwr)
154 164
table_gwr <- table_gwr[1:7,]                         
......
277 287
################################################
278 288
######### Figure 4. RMSE multi-timescale mulitple hold out for FSS and CAI
279 289

  
290
raster_prediction_obj_9 <-load_obj(file.path(in_dir9,raster_obj_file_9)) #comb5 gwr_fss
291

  
292
list_raster_obj_files_holdout  <- list(file.path(in_dir10,raster_obj_file_10),file.path(in_dir11,raster_obj_file_11),
293
                                  file.path(in_dir12,raster_obj_file_12),file.path(in_dir13,raster_obj_file_13),
294
                                  file.path(in_dir14,raster_obj_file_14),file.path(in_dir15,raster_obj_file_15))
295
 
296
names(list_raster_obj_files_holdout)<- c("gam_fss","kriging_fss","gwr_fss",
297
                                         "gam_CAI","kriging_CAI","gwr_CAI")
298
tb_v_list<-lapply(list_raster_obj_files_holdout,FUN=function(x){x<-load_obj(x);x[["tb_diagnostic_v"]]})                           
299
tb_s_list<-lapply(list_raster_obj_files_holdout,FUN=function(x){x<-load_obj(x);x[["tb_diagnostic_s"]]})                           
300
#tb_s_list<-mclapply(list_raster_obj_files_holdout,FUN=function(x){x<-load_obj(x);x[["tb_diagnostic_s"]]},mc.preschedule=FALSE,mc.cores = 6)                           
301
#tb_v_list<-mclapply(list_raster_obj_files_holdout,FUN=function(x){x<-load_obj(x);x[["tb_diagnostic_v"]]},mc.preschedule=FALSE,mc.cores = 6)                           
302
names(tb_s_list) <- paste("tb_s_",names(tb_s_list),sep="")
303
names(tb_v_list) <- paste("tb_v_",names(tb_v_list),sep="")
304

  
305
tb_mv_list<-lapply(list_raster_obj_files_holdout,FUN=function(x){x<-load_obj(x);x[["tb_month_diagnostic_mv"]]})                           
306
tb_ms_list<-lapply(list_raster_obj_files_holdout,FUN=function(x){x<-load_obj(x);x[["tb_month_diagnostic_ms"]]})                           
307
#tb_mv_list<-mclapply(list_raster_obj_files_holdout,FUN=function(x){x<-load_obj(x);x[["tb_month_diagnostic_v"]]},mc.preschedule=FALSE,mc.cores = 6)                           
308
#tb_ms_list<-mclapply(list_raster_obj_files_holdout,FUN=function(x){x<-load_obj(x);x[["tb_month_diagnostic_s"]]},mc.preschedule=FALSE,mc.cores = 6)                           
309
names(tb_ms_list) <- paste("tb_ms_",names(tb_ms_list),sep="") #monthly training accuracy
310
names(tb_mv_list) <- paste("tb_mv_",names(tb_mv_list),sep="") #monthly testing accuracy
311

  
312
list_tb <- c(tb_s_list,tb_v_list,tb_ms_list,tb_mv_list) #combined in one list
313
ac_metric <- "rmse"
314
#Quick function to explore accuracy make this a function to create solo figure...and run only a subset...
315
plot_accuracy_by_holdout_fun <-function(list_tb,ac_metric){
316
  #
317
  for(i in 1:length(list_tb)){
318
    #i <- i+1
319
    tb <-list_tb[[i]]
320
    plot_name <- names(list_tb)[i]
321
    pat_str <- "tb_m"
322
    if(substr(plot_name,start=1,stop=4)== pat_str){
323
      names_id <- c("pred_mod","prop")
324
      plot_formula <- paste(ac_metric,"~prop",sep="",collapse="") 
325
    }else{
326
      names_id <- c("pred_mod","prop_month")
327
      plot_formula <- paste(ac_metric,"~prop_month",collapse="")
328
    }
329
    names_mod <-unique(tb$pred_mod)
330
    prop_obj <- calc_stat_prop_tb_diagnostic(names_mod,names_id,tb)
331
    avg_tb <- prop_obj$avg_tb
332
  
333
    layout_m<-c(1,1) #one row two columns
334
    par(mfrow=layout_m)
335
    
336
    png(paste("Figure__accuracy_",ac_metric,"_prop_month_",plot_name,"_",out_prefix,".png", sep=""),
337
      height=480*layout_m[1],width=480*layout_m[2])
338
    
339
    p<- xyplot(as.formula(plot_formula),group=pred_mod,type="b",
340
          data=avg_tb,
341
          main=paste(ac_metric,plot_name,sep=" "),
342
          pch=1:length(avg_tb$pred_mod),
343
          par.settings=list(superpose.symbol = list(
344
          pch=1:length(avg_tb$pred_mod))),
345
          auto.key=list(columns=5))
346
    print(p)
347
  
348
    dev.off()
349
  }
350
  #end of function
351
}
352

  
353
#For paper...
354
#Combine figures... tb_v for GWR, Kriging and GAM for both FSS and CAI
355

  
280 356
################################################
281 357
######### Figure 5. RMSE multi-timescale mulitple hold out Overtraining tendency
282
                             
358

  
359
#For paper...
360
#Combine figures... tb_v for GWR, Kriging and GAM for both FSS and CAI
361

  
362
##### Calculate differences
363

  
364
metric_names <- c("mae","rmse","me","r")
365
diff_kriging_CAI <- diff_df(list_tb[["tb_s_kriging_CAI"]],list_tb[["tb_v_kriging_CAI"]],metric_names)
366
diff_gam_CAI <- diff_df(list_tb[["tb_s_gam_CAI"]][tb_s_gam_CAI$pred_mod!="mod_kr"],list_tb[["tb_v_gam_CAI"]],metric_names)
367
diff_gwr_CAI <- diff_df(tb_s_gwr_CAI,tb_v_gwr_CAI,metric_names)
368

  
369
layout_m<-c(1,1) #one row two columns
370
par(mfrow=layout_m)
371

  
372
png(paste("Figure__accuracy_rmse_prop_month_",plot_name,out_suffix,".png", sep=""),
373
    height=480*layout_m[1],width=480*layout_m[2])
374
boxplot(diff_kriging_CAI$rmse,diff_gam_CAI$rmse,diff_gwr_CAI$rmse,names=c("kriging_CAI","gam_CAI","gwr_CAI"),
375
        main="Difference between training and testing daily rmse")
376
dev.off()
377

  
378
#remove prop 0,
379
diff_kriging_CAI <- diff_df(tb_s_kriging_CAI[tb_s_kriging_CAI$prop_month!=0,],tb_v_kriging_CAI[tb_v_kriging_CAI$prop_month!=0,],metric_names)
380
diff_gam_CAI <- diff_df(tb_s_gam_CAI[tb_s_gam_CAI$prop_month!=0,],tb_v_gam_CAI[tb_v_gam_CAI$prop_month!=0,],metric_names)
381
diff_gwr_CAI <- diff_df(tb_s_gwr_CAI[tb_s_gwr_CAI$prop_month!=0,],tb_v_gwr_CAI[tb_v_gwr_CAI$prop_month!=0,],metric_names)
382
boxplot(diff_kriging_CAI$rmse,diff_gam_CAI$rmse,diff_gwr_CAI$rmse,names=c("kriging_CAI","gam_CAI","gwr_CAI"),
383
        main="Difference between training and testing daily rmse")
384

  
385
#now monthly accuracy: use mapply and provide  a list of of inputs...
386
metric_names <- c("mae","rmse","me","r")
387
diff_kriging_m_CAI <- diff_df(tb_ms_kriging_CAI[tb_ms_kriging_CAI$prop!=0,],tb_mv_kriging_CAI,metric_names)
388
diff_gam_m_CAI <- diff_df(tb_ms_gam_CAI[tb_ms_gam_CAI$prop!=0,],tb_mv_gam_CAI,metric_names)
389
diff_gwr_m_CAI <- diff_df(tb_ms_gwr_CAI[tb_ms_gwr_CAI$prop!=0,],tb_mv_gwr_CAI,metric_names)
390

  
391
layout_m<-c(1,1) #one row two columns
392
par(mfrow=layout_m)
393

  
394
png(paste("Figure__accuracy_rmse_prop_month_",plot_name,out_suffix,".png", sep=""),
395
    height=480*layout_m[1],width=480*layout_m[2])
396
boxplot(diff_kriging_m_CAI$rmse,diff_gam_m_CAI$rmse,diff_gwr_m_CAI$rmse,names=c("kriging_CAI","gam_CAI","gwr_CAI"),
397
        main="Difference between training and monhtly testing rmse")
398
dev.off()
399

  
400
#boxplot(diff_kriging_m_CAI$rmse,diff_gam_m_CAI$rmse,diff_gwr_CAI,names=c("kriging_CAI","gam_CAI","gwr_CAI"),
401
#        main="Difference between training and monhtly testing rmse")
402

  
403
### For fusion
404

  
405
metric_names <- c("mae","rmse","me","r")
406
diff_kriging_fus <- diff_df(tb_s_kriging_fus,tb_v_kriging_fus,metric_names)
407
diff_gam_fus <- diff_df(tb_s_gam_fus,tb_v_gam_fus,metric_names)
408
diff_gwr_fus <- diff_df(tb_s_gwr_fus,tb_v_gwr_fus,metric_names)
409

  
410
layout_m<-c(1,1) #one row two columns
411
par(mfrow=layout_m)
412

  
413
png(paste("Figure__accuracy_rmse_prop_month_",plot_name,out_suffix,".png", sep=""),
414
    height=480*layout_m[1],width=480*layout_m[2])
415
boxplot(diff_kriging_fus$rmse,diff_gam_fus$rmse,diff_gwr_fus$rmse,names=c("kriging_fus","gam_fus","gwr_fus"),
416
        main="Difference between training and testing daily rmse")
417
dev.off()
418

  
419
metric_names <- c("mae","rmse","me","r")
420
diff_kriging_m_fus <- diff_df(tb_ms_kriging_fus[tb_ms_kriging_fus$prop!=0,],tb_mv_kriging_fus[tb_mv_kriging_fus$prop!=0,],metric_names)
421
diff_gam_m_fus <- diff_df(tb_ms_gam_fus[tb_ms_gam_fus$prop!=0,],tb_mv_gam_fus[tb_mv_gam_fus$prop!=0,],metric_names)
422
diff_gwr_m_fus <- diff_df(tb_ms_gwr_fus[tb_ms_gwr_fus$prop!=0,],tb_mv_gwr_fus[tb_mv_gwr_fus$prop!=0,],metric_names)
423

  
424
layout_m<-c(1,1) #one row two columns
425
par(mfrow=layout_m)
426

  
427
png(paste("Figure__accuracy_rmse_prop_month_",plot_name,out_suffix,".png", sep=""),
428
    height=480*layout_m[1],width=480*layout_m[2])
429
boxplot(diff_kriging_m_fus$rmse,diff_gam_m_fus$rmse,diff_gwr_m_fus$rmse, names=c("kriging_fus","gam_fus","gwr_fus"),
430
        main="Difference between training and testing FUS rmse")
431
dev.off()
432

  
283 433
################################################
284 434
######### Figure 6: Spatial pattern of prediction for one day (maps)
285 435

  
......
402 552
#title_plot2
403 553
#rast_pred2
404 554
#debug(plot_transect_m2)
405
trans_data2<-plot_transect_m2(list_transect2,rast_pred2,title_plot2,disp=FALSE,m_layers_sc)
406
trans_data3<-plot_transect_m2(list_transect3,rast_pred3,title_plot3,disp=FALSE,m_layers_sc)
555
trans_data2 <-plot_transect_m2(list_transect2,rast_pred2,title_plot2,disp=FALSE,m_layers_sc)
556
trans_data3 <-plot_transect_m2(list_transect3,rast_pred3,title_plot3,disp=FALSE,m_layers_sc)
407 557

  
408 558
################################################
559

  
409 560
#Figure 9: Image differencing and land cover  
410 561
#Do for january and September...
411 562
png(paste("Fig9_image_difference_",date_selected,out_prefix,".png", sep=""),

Also available in: Unified diff