Revision fd7dcc1c
Added by Benoit Parmentier about 11 years ago
climate/research/oregon/interpolation/analyses_papers_methods_comparison_part5.R | ||
---|---|---|
1 |
######################################## Paper Methods_comparison: Analyses part 5 ####################################### |
|
2 |
############################ Scripts for figures and analyses for paper 2 ##################################### |
|
3 |
#This script performs analyses and create figures for the FSS paper. |
|
4 |
#It uses inputs from interpolation objects created at earlier stages... |
|
5 |
#Note that this is exploratory code i.e. not part of the worklfow. |
|
6 |
#AUTHOR: Benoit Parmentier # |
|
7 |
#DATE: 09/06/2013 # |
|
8 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491-- # |
|
9 |
################################################################################################### |
|
10 |
|
|
11 |
###Loading R library and packages |
|
12 |
#library(gtools) # loading some useful tools |
|
13 |
library(mgcv) # GAM package by Wood 2006 (version 2012) |
|
14 |
library(sp) # Spatial pacakge with class definition by Bivand et al. 2008 |
|
15 |
library(spdep) # Spatial package with methods and spatial stat. by Bivand et al. 2012 |
|
16 |
library(rgdal) # GDAL wrapper for R, spatial utilities (Keitt et al. 2012) |
|
17 |
library(gstat) # Kriging and co-kriging by Pebesma et al. 2004 |
|
18 |
library(automap) # Automated Kriging based on gstat module by Hiemstra et al. 2008 |
|
19 |
library(spgwr) |
|
20 |
library(maptools) |
|
21 |
library(graphics) |
|
22 |
library(parallel) # Urbanek S. and Ripley B., package for multi cores & parralel processing |
|
23 |
library(raster) |
|
24 |
library(rasterVis) |
|
25 |
library(plotrix) # Draw circle on graph and additional plotting options |
|
26 |
library(reshape) # Data format and type transformation |
|
27 |
|
|
28 |
##################### Function used in the script ############## |
|
29 |
|
|
30 |
## Extract a list of object from an object: Useful to extract information from |
|
31 |
## RData objects saved in the interpolation phase. |
|
32 |
|
|
33 |
load_obj <- function(f) |
|
34 |
{ |
|
35 |
env <- new.env() |
|
36 |
nm <- load(f, env)[1] |
|
37 |
env[[nm]] |
|
38 |
} |
|
39 |
|
|
40 |
### Need to improve this function!!! |
|
41 |
calc_stat_prop_tb_diagnostic <-function(names_mod,names_id,tb){ |
|
42 |
|
|
43 |
t<-melt(subset(tb,pred_mod==names_mod), |
|
44 |
measure=c("mae","rmse","r","me","m50"), |
|
45 |
id=names_id, |
|
46 |
na.rm=T) |
|
47 |
char_tmp <-rep("+",length=length(names_id)-1) |
|
48 |
var_summary <-paste(names_id,sep="",collapse=char_tmp) |
|
49 |
var_summary_formula <-paste(var_summary,collpase="~variable") |
|
50 |
avg_tb<-cast(t,var_summary_formula,mean) |
|
51 |
sd_tb<-cast(t,var_summary_formula,sd) |
|
52 |
n_tb<-cast(t,var_summary_formula,length) |
|
53 |
#n_NA<-cast(t,dst_cat1~variable,is.na) |
|
54 |
|
|
55 |
#### prepare returning object |
|
56 |
prop_obj<-list(tb,avg_tb,sd_tb,n_tb) |
|
57 |
names(prop_obj) <-c("tb","avg_tb","sd_tb","n_tb") |
|
58 |
|
|
59 |
return(prop_obj) |
|
60 |
} |
|
61 |
|
|
62 |
################## PARAMETERS ########## |
|
63 |
|
|
64 |
|
|
65 |
#"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_lst_comb4_07152013/" |
|
66 |
in_dir1 <- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_CAI_lst_comb3_08312013/" |
|
67 |
in_dir <- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_CAI_lst_comb3_09012013" |
|
68 |
in_dir2 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_CAI_lst_comb3_09032013" |
|
69 |
in_dir4 <- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_kriging_CAI_lst_comb3_09042013" |
|
70 |
|
|
71 |
raster_prediction_obj1 <-load_obj(file.path(in_dir1,"raster_prediction_obj_gam_CAI_dailyTmax_365d_gam_CAI_lst_comb3_08312013.RData")) |
|
72 |
raster_prediction_obj <-load_obj(file.path(in_dir,"raster_prediction_obj_gam_CAI_dailyTmax_365d_gam_CAI_lst_comb3_09012013.RData")) |
|
73 |
raster_prediction_obj2 <-load_obj(file.path(in_dir2,"raster_prediction_obj_gam_CAI_dailyTmax_365d_gam_CAI_lst_comb3_09032013.RData")) |
|
74 |
raster_prediction_obj4 <-load_obj(file.path(in_dir4,"raster_prediction_obj_kriging_CAI_dailyTmax_365d_kriging_CAI_lst_comb3_09042013.RData")) |
|
75 |
|
|
76 |
out_dir<-"/home/parmentier/Data/IPLANT_project/paper_multitime_scale__analyses_tables_fig_09032013" |
|
77 |
setwd(out_dir) |
|
78 |
y_var_name <- "dailyTmax" |
|
79 |
y_var_month <- "TMax" |
|
80 |
#y_var_month <- "LSTD_bias" |
|
81 |
out_suffix <- "_OR_09032013" |
|
82 |
#script_path<-"/data/project/layers/commons/data_workflow/env_layers_scripts/" |
|
83 |
#### FUNCTION USED IN SCRIPT |
|
84 |
|
|
85 |
function_analyses_paper <-"contribution_of_covariates_paper_interpolation_functions_08152013.R" |
|
86 |
|
|
87 |
script_path<-"/home/parmentier/Data/IPLANT_project/env_layers_scripts/" #path to script |
|
88 |
source(file.path(script_path,function_analyses_paper)) #source all functions used in this script. |
|
89 |
|
|
90 |
################################################################################# |
|
91 |
############ ANALYSES 1: Average accuracy per proportion for monthly hold out in muli-timescale mehtods... ####### |
|
92 |
|
|
93 |
tb_mv_gam_CAI <-rbind(raster_prediction_obj1$tb_month_diagnostic_v,raster_prediction_obj$tb_month_diagnostic_v,raster_prediction_obj2$tb_month_diagnostic_v) |
|
94 |
tb_ms_gam_CAI <-rbind(raster_prediction_obj1$tb_month_diagnostic_s,raster_prediction_obj$tb_month_diagnostic_s,raster_prediction_obj2$tb_month_diagnostic_s) |
|
95 |
|
|
96 |
tb_v_gam_CAI <-rbind(raster_prediction_obj1$tb_diagnostic_v,raster_prediction_obj$tb_diagnostic_v,raster_prediction_obj2$tb_diagnostic_v) |
|
97 |
tb_s_gam_CAI <-rbind(raster_prediction_obj1$tb_diagnostic_s,raster_prediction_obj$tb_diagnostic_s,raster_prediction_obj2$tb_diagnostic_s) |
|
98 |
|
|
99 |
prop_obj_gam_CAI_v <- calc_stat_prop_tb_diagnostic(names_mod,names_id,tb_v) |
|
100 |
|
|
101 |
tb_mv_kriging_CAI <- raster_prediction_obj4$tb_month_diagnostic_v |
|
102 |
tb_ms_kriging_CAI <- raster_prediction_obj4$tb_month_diagnostic_s |
|
103 |
|
|
104 |
tb_v_kriging_CAI <- raster_prediction_obj4$tb_diagnostic_v |
|
105 |
tb_s_kriging_CAI <- raster_prediction_obj4$tb_diagnostic_s |
|
106 |
|
|
107 |
list_tb <-list(tb_v_gam_CAI,tb_v_kriging_CAI,tb_s_gam_CAI,tb_s_kriging_CAI,tb_mv_gam_CAI,tb_mv_kriging_CAI,tb_ms_gam_CAI,tb_ms_kriging_CAI) #Add fusion here |
|
108 |
names(list_tb) <- c("tb_v_gam_CAI","tb_v_kriging_CAI","tb_s_gam_CAI","tb_s_kriging_CAI","tb_mv_gam_CAI","tb_mv_kriging_CAI","tb_ms_gam_CAI","tb_ms_kriging_CAI") #Add fusion here |
|
109 |
|
|
110 |
##### DAILY AVERAGE ACCURACY : PLOT AND DIFFERENCES... |
|
111 |
|
|
112 |
for(i in 1:length(list_tb)){ |
|
113 |
i<-i+1 |
|
114 |
tb <-list_tb[[i]] |
|
115 |
plot_name <- names(list_tb)[i] |
|
116 |
pat_str <- "tb_m" |
|
117 |
if(substr(plot_name,start=1,stop=4)== pat_str){ |
|
118 |
names_id <- c("pred_mod","prop") |
|
119 |
plot_formula <- paste("rmse","~prop",sep="",collapse="") |
|
120 |
|
|
121 |
}else{ |
|
122 |
names_id <- c("pred_mod","prop_month") |
|
123 |
plot_formula <- paste("rmse","~prop_month",collapse="") |
|
124 |
} |
|
125 |
names_mod <-unique(tb$pred_mod) |
|
126 |
|
|
127 |
prop_obj <- calc_stat_prop_tb_diagnostic(names_mod,names_id,tb) |
|
128 |
|
|
129 |
avg_tb <- prop_obj$avg_tb |
|
130 |
|
|
131 |
layout_m<-c(1,1) #one row two columns |
|
132 |
par(mfrow=layout_m) |
|
133 |
|
|
134 |
png(paste("Figure__accuracy_rmse_prop_month_",plot_name,out_suffix,".png", sep=""), |
|
135 |
height=480*layout_m[1],width=480*layout_m[2]) |
|
136 |
|
|
137 |
xyplot(as.formula(plot_formula),group=pred_mod,type="b", |
|
138 |
data=avg_tb, |
|
139 |
main=paste("rmse ",plot_name,sep=" "), |
|
140 |
pch=1:length(avg_tb$pred_mod), |
|
141 |
par.settings=list(superpose.symbol = list( |
|
142 |
pch=1:length(avg_tb$pred_mod))), |
|
143 |
auto.key=list(columns=5)) |
|
144 |
|
|
145 |
dev.off() |
|
146 |
|
|
147 |
} |
|
148 |
|
|
149 |
#xyplot( rmse ~ prop_month | pred_mod,type="b",data=as.data.frame(avg_tb)) |
|
150 |
|
|
151 |
##### Calculate differences |
|
152 |
|
|
153 |
#Calculate the difference between training and testing in two different data.frames. Columns to substract are provided. |
|
154 |
diff_df<-function(tb_s,tb_v,list_metric_names){ |
|
155 |
tb_diff<-vector("list", length(list_metric_names)) |
|
156 |
for (i in 1:length(list_metric_names)){ |
|
157 |
metric_name<-list_metric_names[i] |
|
158 |
tb_diff[[i]] <-tb_s[,c(metric_name)] - tb_v[,c(metric_name)] |
|
159 |
} |
|
160 |
names(tb_diff)<-list_metric_names |
|
161 |
tb_diff<-as.data.frame(do.call(cbind,tb_diff)) |
|
162 |
return(tb_diff) |
|
163 |
} |
|
164 |
|
|
165 |
metric_names <- c("mae","rmse","me","r") |
|
166 |
diff_kriging_CAI <- diff_df(tb_s_kriging_CAI,tb_v_kriging_CAI,metric_names) |
|
167 |
diff_gam_CAI <- diff_df(tb_s_gam_CAI,tb_v_gam_CAI,metric_names) |
|
168 |
|
|
169 |
boxplot(diff_kriging_CAI$rmse,diff_gam_CAI$rmse) |
|
170 |
|
Also available in: Unified diff
analyses paper part 5: monthly hold out gam CAI first results