Project

General

Profile

« Previous | Next » 

Revision fd7dcc1c

Added by Benoit Parmentier about 11 years ago

analyses paper part 5: monthly hold out gam CAI first results

View differences:

climate/research/oregon/interpolation/analyses_papers_methods_comparison_part5.R
1
######################################## Paper Methods_comparison: Analyses part 5 #######################################
2
############################ Scripts for figures and analyses for paper 2 #####################################
3
#This script performs analyses and create figures for the FSS paper.
4
#It uses inputs from interpolation objects created at earlier stages...     
5
#Note that this is exploratory code i.e. not part of the worklfow.
6
#AUTHOR: Benoit Parmentier                                                                       #
7
#DATE: 09/06/2013                                                                                #
8
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491--                                  #
9
###################################################################################################
10

  
11
###Loading R library and packages                                                      
12
#library(gtools)                                        # loading some useful tools 
13
library(mgcv)                   # GAM package by Wood 2006 (version 2012)
14
library(sp)                     # Spatial pacakge with class definition by Bivand et al. 2008
15
library(spdep)                  # Spatial package with methods and spatial stat. by Bivand et al. 2012
16
library(rgdal)                  # GDAL wrapper for R, spatial utilities (Keitt et al. 2012)
17
library(gstat)                  # Kriging and co-kriging by Pebesma et al. 2004
18
library(automap)                # Automated Kriging based on gstat module by Hiemstra et al. 2008
19
library(spgwr)
20
library(maptools)
21
library(graphics)
22
library(parallel)               # Urbanek S. and Ripley B., package for multi cores & parralel processing
23
library(raster)
24
library(rasterVis)
25
library(plotrix)                # Draw circle on graph and additional plotting options
26
library(reshape)                # Data format and type transformation
27

  
28
##################### Function used in the script ##############
29

  
30
## Extract a list of object from an object: Useful to extract information from
31
## RData objects saved in the interpolation phase.
32

  
33
load_obj <- function(f)
34
{
35
  env <- new.env()
36
  nm <- load(f, env)[1]
37
  env[[nm]]
38
}
39

  
40
### Need to improve this function!!!
41
calc_stat_prop_tb_diagnostic <-function(names_mod,names_id,tb){
42
  
43
  t<-melt(subset(tb,pred_mod==names_mod),
44
          measure=c("mae","rmse","r","me","m50"), 
45
          id=names_id,
46
          na.rm=T)
47
  char_tmp <-rep("+",length=length(names_id)-1)
48
  var_summary <-paste(names_id,sep="",collapse=char_tmp)
49
  var_summary_formula <-paste(var_summary,collpase="~variable")
50
  avg_tb<-cast(t,var_summary_formula,mean)
51
  sd_tb<-cast(t,var_summary_formula,sd)
52
  n_tb<-cast(t,var_summary_formula,length)
53
  #n_NA<-cast(t,dst_cat1~variable,is.na)
54
  
55
  #### prepare returning object
56
  prop_obj<-list(tb,avg_tb,sd_tb,n_tb)
57
  names(prop_obj) <-c("tb","avg_tb","sd_tb","n_tb")
58
  
59
  return(prop_obj)
60
}
61

  
62
################## PARAMETERS ##########
63

  
64

  
65
#"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_lst_comb4_07152013/"
66
in_dir1 <- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_CAI_lst_comb3_08312013/"
67
in_dir <- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_CAI_lst_comb3_09012013"
68
in_dir2 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_CAI_lst_comb3_09032013"
69
in_dir4 <- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_kriging_CAI_lst_comb3_09042013"
70

  
71
raster_prediction_obj1 <-load_obj(file.path(in_dir1,"raster_prediction_obj_gam_CAI_dailyTmax_365d_gam_CAI_lst_comb3_08312013.RData"))
72
raster_prediction_obj <-load_obj(file.path(in_dir,"raster_prediction_obj_gam_CAI_dailyTmax_365d_gam_CAI_lst_comb3_09012013.RData"))
73
raster_prediction_obj2 <-load_obj(file.path(in_dir2,"raster_prediction_obj_gam_CAI_dailyTmax_365d_gam_CAI_lst_comb3_09032013.RData"))
74
raster_prediction_obj4 <-load_obj(file.path(in_dir4,"raster_prediction_obj_kriging_CAI_dailyTmax_365d_kriging_CAI_lst_comb3_09042013.RData"))
75
                                                                    
76
out_dir<-"/home/parmentier/Data/IPLANT_project/paper_multitime_scale__analyses_tables_fig_09032013"
77
setwd(out_dir)
78
y_var_name <- "dailyTmax"
79
y_var_month <- "TMax"
80
#y_var_month <- "LSTD_bias"
81
out_suffix <- "_OR_09032013"
82
#script_path<-"/data/project/layers/commons/data_workflow/env_layers_scripts/"
83
#### FUNCTION USED IN SCRIPT
84

  
85
function_analyses_paper <-"contribution_of_covariates_paper_interpolation_functions_08152013.R"
86

  
87
script_path<-"/home/parmentier/Data/IPLANT_project/env_layers_scripts/" #path to script
88
source(file.path(script_path,function_analyses_paper)) #source all functions used in this script.
89

  
90
#################################################################################
91
############ ANALYSES 1: Average accuracy per proportion for monthly hold out in muli-timescale mehtods... #######
92

  
93
tb_mv_gam_CAI <-rbind(raster_prediction_obj1$tb_month_diagnostic_v,raster_prediction_obj$tb_month_diagnostic_v,raster_prediction_obj2$tb_month_diagnostic_v)
94
tb_ms_gam_CAI <-rbind(raster_prediction_obj1$tb_month_diagnostic_s,raster_prediction_obj$tb_month_diagnostic_s,raster_prediction_obj2$tb_month_diagnostic_s)
95

  
96
tb_v_gam_CAI <-rbind(raster_prediction_obj1$tb_diagnostic_v,raster_prediction_obj$tb_diagnostic_v,raster_prediction_obj2$tb_diagnostic_v)
97
tb_s_gam_CAI <-rbind(raster_prediction_obj1$tb_diagnostic_s,raster_prediction_obj$tb_diagnostic_s,raster_prediction_obj2$tb_diagnostic_s)
98

  
99
prop_obj_gam_CAI_v <- calc_stat_prop_tb_diagnostic(names_mod,names_id,tb_v)
100

  
101
tb_mv_kriging_CAI <- raster_prediction_obj4$tb_month_diagnostic_v
102
tb_ms_kriging_CAI <- raster_prediction_obj4$tb_month_diagnostic_s
103

  
104
tb_v_kriging_CAI <- raster_prediction_obj4$tb_diagnostic_v
105
tb_s_kriging_CAI <- raster_prediction_obj4$tb_diagnostic_s
106
                                  
107
list_tb <-list(tb_v_gam_CAI,tb_v_kriging_CAI,tb_s_gam_CAI,tb_s_kriging_CAI,tb_mv_gam_CAI,tb_mv_kriging_CAI,tb_ms_gam_CAI,tb_ms_kriging_CAI) #Add fusion here
108
names(list_tb) <- c("tb_v_gam_CAI","tb_v_kriging_CAI","tb_s_gam_CAI","tb_s_kriging_CAI","tb_mv_gam_CAI","tb_mv_kriging_CAI","tb_ms_gam_CAI","tb_ms_kriging_CAI") #Add fusion here
109

  
110
##### DAILY AVERAGE ACCURACY : PLOT AND DIFFERENCES...
111

  
112
for(i in 1:length(list_tb)){
113
  i<-i+1
114
  tb <-list_tb[[i]]
115
  plot_name <- names(list_tb)[i]
116
  pat_str <- "tb_m"
117
  if(substr(plot_name,start=1,stop=4)== pat_str){
118
    names_id <- c("pred_mod","prop")
119
    plot_formula <- paste("rmse","~prop",sep="",collapse="")
120
    
121
  }else{
122
    names_id <- c("pred_mod","prop_month")
123
    plot_formula <- paste("rmse","~prop_month",collapse="")
124
  }
125
  names_mod <-unique(tb$pred_mod)
126
  
127
  prop_obj <- calc_stat_prop_tb_diagnostic(names_mod,names_id,tb)
128
  
129
  avg_tb <- prop_obj$avg_tb
130
  
131
  layout_m<-c(1,1) #one row two columns
132
  par(mfrow=layout_m)
133
    
134
  png(paste("Figure__accuracy_rmse_prop_month_",plot_name,out_suffix,".png", sep=""),
135
      height=480*layout_m[1],width=480*layout_m[2])
136
    
137
  xyplot(as.formula(plot_formula),group=pred_mod,type="b",
138
          data=avg_tb,
139
          main=paste("rmse ",plot_name,sep=" "),
140
          pch=1:length(avg_tb$pred_mod),
141
          par.settings=list(superpose.symbol = list(
142
            pch=1:length(avg_tb$pred_mod))),
143
          auto.key=list(columns=5))
144
  
145
  dev.off()
146
  
147
}
148

  
149
#xyplot( rmse ~ prop_month | pred_mod,type="b",data=as.data.frame(avg_tb))
150

  
151
##### Calculate differences
152

  
153
#Calculate the difference between training and testing in two different data.frames. Columns to substract are provided.
154
diff_df<-function(tb_s,tb_v,list_metric_names){
155
  tb_diff<-vector("list", length(list_metric_names))
156
  for (i in 1:length(list_metric_names)){
157
    metric_name<-list_metric_names[i]
158
    tb_diff[[i]] <-tb_s[,c(metric_name)] - tb_v[,c(metric_name)]
159
  }
160
  names(tb_diff)<-list_metric_names
161
  tb_diff<-as.data.frame(do.call(cbind,tb_diff))
162
  return(tb_diff)
163
}
164

  
165
metric_names <- c("mae","rmse","me","r")
166
diff_kriging_CAI <- diff_df(tb_s_kriging_CAI,tb_v_kriging_CAI,metric_names)
167
diff_gam_CAI <- diff_df(tb_s_gam_CAI,tb_v_gam_CAI,metric_names)
168

  
169
boxplot(diff_kriging_CAI$rmse,diff_gam_CAI$rmse)
170

  

Also available in: Unified diff