Revision 4fbd2f73
Added by Benoit Parmentier over 11 years ago
climate/research/oregon/interpolation/analyses_papers_methods_comparison_part2.R | ||
---|---|---|
1 |
######################################## Paper Methods_comparison ####################################### |
|
2 |
############################ Scripts for figures and analyses for the the IBS poster ##################################### |
|
3 |
#This script performs analyses and create figures for the FSS paper. |
|
4 |
#It uses inputs from interpolation objects created at earlier stages... # |
|
5 |
#AUTHOR: Benoit Parmentier # |
|
6 |
#DATE: 07/20/2013 # |
|
7 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491-- # |
|
8 |
################################################################################################### |
|
9 |
|
|
10 |
###Loading R library and packages |
|
11 |
#library(gtools) # loading some useful tools |
|
12 |
library(mgcv) # GAM package by Wood 2006 (version 2012) |
|
13 |
library(sp) # Spatial pacakge with class definition by Bivand et al. 2008 |
|
14 |
library(spdep) # Spatial package with methods and spatial stat. by Bivand et al. 2012 |
|
15 |
library(rgdal) # GDAL wrapper for R, spatial utilities (Keitt et al. 2012) |
|
16 |
library(gstat) # Kriging and co-kriging by Pebesma et al. 2004 |
|
17 |
library(automap) # Automated Kriging based on gstat module by Hiemstra et al. 2008 |
|
18 |
library(spgwr) |
|
19 |
library(maptools) |
|
20 |
library(graphics) |
|
21 |
library(parallel) # Urbanek S. and Ripley B., package for multi cores & parralel processing |
|
22 |
library(raster) |
|
23 |
library(rasterVis) |
|
24 |
library(plotrix) # Draw circle on graph and additional plotting options |
|
25 |
library(reshape) # Data format and type transformation |
|
26 |
|
|
27 |
## Functions |
|
28 |
#loading R objects that might have similar names |
|
29 |
load_obj <- function(f) |
|
30 |
{ |
|
31 |
env <- new.env() |
|
32 |
nm <- load(f, env)[1] |
|
33 |
env[[nm]] |
|
34 |
} |
|
35 |
|
|
36 |
script_path<-"/home/parmentier/Data/IPLANT_project/env_layers_scripts/" |
|
37 |
#source(file.path(script_path,"interpolation_method_day_function_multisampling_06082013.R")) #Include GAM_day |
|
38 |
|
|
39 |
## Parmeters |
|
40 |
|
|
41 |
#in_dir<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_GAM_fus_all_lst_05312013" |
|
42 |
#in_dir<-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_lst_comb3_07092013/" |
|
43 |
in_dir<-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_lst_comb4_07152013/" |
|
44 |
#in_dir<-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_mult_lst_comb3_07202013" |
|
45 |
|
|
46 |
out_dir<-"" |
|
47 |
setwd(in_dir) |
|
48 |
y_var_name <- "dailyTmax" |
|
49 |
y_var_month <- "TMax" |
|
50 |
#y_var_month <- "LSTD_bias" |
|
51 |
|
|
52 |
out_prefix<-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_GAM_fus_all_lst_05312013" |
|
53 |
method_interpolation <- "gam_daily" |
|
54 |
covar_obj_file <- "covar_obj__365d_gam_day_lst_comb3_07092013.RData" |
|
55 |
raster_obj_file <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_lst_comb3_07092013.RData" |
|
56 |
raster_obj_file <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_lst_comb4_07152013.RData" |
|
57 |
#raster_obj_file <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_mult_lst_comb3_07202013.RData" |
|
58 |
|
|
59 |
|
|
60 |
#Load objects containing training, testing, models objects |
|
61 |
covar_obj <-load_obj(covar_obj_file) |
|
62 |
infile_covariates <- covar_obj$infile_covariates |
|
63 |
infile_reg_outline <- covar_obj$infile_reg_outline |
|
64 |
covar_names<- covar_obj$covar_names |
|
65 |
|
|
66 |
raster_prediction_obj <-load_obj(raster_obj_file) |
|
67 |
|
|
68 |
names(raster_prediction_obj) #list of two objects |
|
69 |
|
|
70 |
raster_prediction_obj$summary_metrics_v |
|
71 |
|
|
72 |
j<-1 #selected month for climatology |
|
73 |
i<-1 |
|
74 |
data_s <- raster_prediction_obj$method_mod_obj[[i]]$data_s #training data |
|
75 |
data_v <- raster_prediction_obj$method_mod_obj[[i]]$data_v #testing data |
|
76 |
|
|
77 |
#data_month1 <- clim_method_mod_obj[[1]]$data_month #monthly data |
|
78 |
#data_month <- clim_method_mod_obj[[j]]$data_month #monthly data |
|
79 |
#clim_mod_obj_month <- clim_method_mod_obj[[j]] |
|
80 |
#names(clim_mod_obj_month) |
|
81 |
|
|
82 |
##### |
|
83 |
s_raster <- brick(infile_covariates) |
|
84 |
names(s_raster)<-covar_names |
|
85 |
|
|
86 |
|
|
87 |
### COMPARING MEANS AND DISTRIBUTIONS |
|
88 |
|
|
89 |
tb<- raster_prediction_obj$tb_diagnostic_v |
|
90 |
|
|
91 |
mod_comp<-lm(tb$rmse ~ tb$pred_mod) |
|
92 |
summary(mod_comp) |
|
93 |
|
|
94 |
m<-aov(tb$rmse ~tb$pred_mod) |
|
95 |
TukeyHSD(m) |
|
96 |
plot(TukeyHSD(m)) |
|
97 |
#Testing for normality |
|
98 |
|
|
99 |
histogram(~tb$rmse| tb$pred_mod) |
|
100 |
x1<-subset(tb,tb$pred_mod=="mod1") |
|
101 |
shapiro.test(x1$rmse) |
|
102 |
|
|
103 |
x3<-subset(tb,tb$pred_mod=="mod4") |
|
104 |
shapiro.test(x3$rmse) |
|
105 |
|
|
106 |
x5<-subset(tb,tb$pred_mod=="mod5") |
|
107 |
shapiro.test(x1$rmse) |
|
108 |
|
|
109 |
xyplot(tb$rmse~tb$mae | tb$pred_mod) #scatterplot by categories... |
|
110 |
xyplot(tb$rmse~tb$mae, groups=tb$pred_mod) #scatterplot by categories... |
|
111 |
|
|
112 |
|
|
113 |
xyplot(tb$rmse~1:365 | tb$pred_mod,type="l") |
|
114 |
xyplot(rmse~1:365, groups=pred_mod,type="l",data=tb) |
|
115 |
tb1<-subset(tb,tb$pred_mod%in%c("mod1","mod2","mod3","mod4","mod5","mod6")) |
|
116 |
xyplot(rmse~1:365, groups=pred_mod,type="l",data=tb1) |
|
117 |
|
|
118 |
wilcox.test(x1$rmse,x5$rmse) |
|
119 |
|
|
120 |
lapply(tb_list,FUN=wilcox.test,x1$rmse) |
|
121 |
|
|
122 |
## ACCOUNT FOR NON NORMALITY: |
|
123 |
|
|
124 |
#mod_compk<-kruskal.test(tbp$rmse ~ as.factor(tbp$pred_mod)) |
|
125 |
|
|
126 |
mod_compk<-kruskal.test(tb1$rmse ~ as.factor(tb1$pred_mod)) |
|
127 |
mod_compk |
|
128 |
|
|
129 |
#kruskalmc {pgirmess} |
|
130 |
library(pgirmess) |
|
131 |
mod_compk<-kruskalmc(tb1$rmse ~ as.factor(tb1$pred_mod)) |
|
132 |
mod_compk<-kruskalmc(tbp$rmse ~ as.factor(tbp$pred_mod)) |
|
133 |
|
|
134 |
mod_compk |
|
135 |
|
|
136 |
m<-aov(tb$rmse ~tb$pred_mod) |
|
137 |
TukeyHSD(m) |
|
138 |
plot(TukeyHSD(m)) |
|
139 |
|
|
140 |
t<-melt(tb, |
|
141 |
measure=c("mae","rmse","r","m50"), |
|
142 |
id=c("pred_mod","prop"), |
|
143 |
na.rm=T) |
|
144 |
|
|
145 |
avg_tb<-cast(t,pred_mod+prop~variable,mean) |
|
146 |
sd_tb<-cast(t,pred_mod+prop~variable,sd) |
|
147 |
|
|
148 |
n_tb<-cast(t,pred_mod+prop~variable,length) |
|
149 |
|
|
150 |
n=n_tb[["rmse"]] |
|
151 |
y <-avg_tb[["rmse"]] |
|
152 |
x<- 1:length(avg_tb[["pred_mod"]]) |
|
153 |
y_sd <- sd_tb[["rmse"]] |
|
154 |
|
|
155 |
ciw <-y_sd |
|
156 |
#ciw2 <- qt(0.975, n) * y_sd2 / sqrt(n) |
|
157 |
|
|
158 |
plotCI(y=y, x=x, uiw=ciw, col="red", main=" Mean and Std_dev RMSE per model", barcol="blue", lwd=1, |
|
159 |
ylab="RMSE (C)", xlab="Models") |
|
160 |
|
|
161 |
ciw <- qt(0.975, n) * y_sd / sqrt(n) |
|
162 |
|
|
163 |
plotCI(y=y, x=x, uiw=ciw, col="red", main=" Mean and CI RMSE per model", barcol="blue", lwd=1, |
|
164 |
ylab="RMSE (C)", xlab="Models") |
|
165 |
|
|
166 |
#lines(x,y,col="red") |
|
167 |
#legend("bottomright",legend=c("fus"), cex=1.2, col=c("red"), |
|
168 |
# lty=1, title="RMSE") |
|
169 |
|
|
170 |
### BY proportion use xy box plot? |
|
171 |
|
|
172 |
|
|
173 |
|
|
174 |
|
|
175 |
|
|
176 |
|
|
177 |
|
|
178 |
|
Also available in: Unified diff
analyses papers part 2: comparison comb3 baseline covariates