Revision 455b069b
Added by Benoit Parmentier over 11 years ago
climate/research/oregon/interpolation/analyses_papers_methods_comparison_part3.R | ||
---|---|---|
1 |
######################################## Paper Methods_comparison ####################################### |
|
2 |
############################ Scripts for figures and analyses for the the IBS poster ##################################### |
|
3 |
#This script performs analyses and create figures for the FSS paper. |
|
4 |
#It uses inputs from interpolation objects created at earlier stages... |
|
5 |
#Note that this is exploratory code i.e. not part of the worklfow. |
|
6 |
#AUTHOR: Benoit Parmentier # |
|
7 |
#DATE: 08/02/2013 # |
|
8 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491-- # |
|
9 |
################################################################################################### |
|
10 |
|
|
11 |
###Loading R library and packages |
|
12 |
#library(gtools) # loading some useful tools |
|
13 |
library(mgcv) # GAM package by Wood 2006 (version 2012) |
|
14 |
library(sp) # Spatial pacakge with class definition by Bivand et al. 2008 |
|
15 |
library(spdep) # Spatial package with methods and spatial stat. by Bivand et al. 2012 |
|
16 |
library(rgdal) # GDAL wrapper for R, spatial utilities (Keitt et al. 2012) |
|
17 |
library(gstat) # Kriging and co-kriging by Pebesma et al. 2004 |
|
18 |
library(automap) # Automated Kriging based on gstat module by Hiemstra et al. 2008 |
|
19 |
library(spgwr) |
|
20 |
library(maptools) |
|
21 |
library(graphics) |
|
22 |
library(parallel) # Urbanek S. and Ripley B., package for multi cores & parralel processing |
|
23 |
library(raster) |
|
24 |
library(rasterVis) |
|
25 |
library(plotrix) # Draw circle on graph and additional plotting options |
|
26 |
library(reshape) # Data format and type transformation |
|
27 |
|
|
28 |
##################### Function used in the script ############## |
|
29 |
|
|
30 |
## Extract a list of object from an object: Useful to extract information from |
|
31 |
## RData objects saved in the interpolation phase. |
|
32 |
|
|
33 |
extract_list_from_list_obj<-function(obj_list,list_name){ |
|
34 |
#Create a list of an object from a given list of object using a name prodived as input |
|
35 |
|
|
36 |
list_tmp<-vector("list",length(obj_list)) |
|
37 |
for (i in 1:length(obj_list)){ |
|
38 |
tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame |
|
39 |
list_tmp[[i]]<-tmp |
|
40 |
} |
|
41 |
return(list_tmp) #this is a data.frame |
|
42 |
} |
|
43 |
|
|
44 |
## Produce data.frame with residuals for models and distance to closest fitting station |
|
45 |
calc_dist_ref_data_point <- function(i,list_param){ |
|
46 |
#This function creates a list of data.frame containing the distance to teh closest |
|
47 |
# reference point (e.g. fitting station) for a give data frame. |
|
48 |
#Inputs: |
|
49 |
#data_s: given data.frame from wich distance is computed |
|
50 |
#data_v: reference data.frame, destination, often the fitting points used in analyses |
|
51 |
#i: index variable to operate on list |
|
52 |
#names_var: |
|
53 |
#Outputs: |
|
54 |
#list_dstspat_er |
|
55 |
|
|
56 |
#Parsing input arguments |
|
57 |
data_s<-list_param$data_s[[i]] |
|
58 |
data_v<-list_param$data_v[[i]] |
|
59 |
|
|
60 |
names_var<-list_param$names_var |
|
61 |
|
|
62 |
###### |
|
63 |
|
|
64 |
names_var<-intersect(names_var,names(data_v)) #there may be missing columns |
|
65 |
#use columns that exists |
|
66 |
|
|
67 |
d_s_v<-matrix(0,nrow(data_v),nrow(data_s)) |
|
68 |
for(k in 1:nrow(data_s)){ |
|
69 |
pt<-data_s[k,] |
|
70 |
d_pt<-(spDistsN1(data_v,pt,longlat=FALSE))/1000 #Distance to station k in km |
|
71 |
d_s_v[,k]<-d_pt |
|
72 |
} |
|
73 |
|
|
74 |
#Create data.frame with position, ID, dst and residuals... |
|
75 |
|
|
76 |
pos<-vector("numeric",nrow(data_v)) |
|
77 |
y<-vector("numeric",nrow(data_v)) |
|
78 |
dst<-vector("numeric",nrow(data_v)) |
|
79 |
|
|
80 |
for (k in 1:nrow(data_v)){ |
|
81 |
pos[k]<-match(min(d_s_v[k,]),d_s_v[k,]) |
|
82 |
dst[k]<-min(d_s_v[k,]) |
|
83 |
} |
|
84 |
|
|
85 |
dstspat_er<-as.data.frame(cbind(v_id=as.vector(data_v$id),s_id=as.vector(data_s$id[pos]), |
|
86 |
pos=pos, lat=data_v$lat, lon=data_v$lon, x=data_v$x,y=data_v$y, |
|
87 |
dst=dst, |
|
88 |
as.data.frame(data_v[,names_var]))) |
|
89 |
|
|
90 |
return(dstspat_er) |
|
91 |
} |
|
92 |
|
|
93 |
# create plot of accury in term of distance to closest fitting station |
|
94 |
plot_dst_spat_fun<-function(stat_tb,names_var,cat_val){ |
|
95 |
|
|
96 |
range_y<-range(as.vector(unlist(stat_tb[,names_var])),na.rm=T) #flatten data.frame |
|
97 |
col_t<-rainbow(length(names_var)) |
|
98 |
pch_t<- 1:length(names_var) |
|
99 |
plot(stat_tb[,names_var[1]], ylim=range_y,pch=pch_t[1],col=col_t[1],type="b", |
|
100 |
yla="MAE (in degree C)",xlab="",xaxt="n") |
|
101 |
#points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[1],col=col_t[1]),type="p") |
|
102 |
for (k in 2:length(names_var)){ |
|
103 |
lines(stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k],type="b", |
|
104 |
xlab="",axes=F) |
|
105 |
#points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k]),type="p") |
|
106 |
} |
|
107 |
legend("topleft",legend=names_var, |
|
108 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
|
109 |
axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1]) |
|
110 |
} |
|
111 |
|
|
112 |
################## PARAMETERS ########## |
|
113 |
|
|
114 |
#in_dir<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_GAM_fus_all_lst_05312013" |
|
115 |
#in_dir<-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_lst_comb3_07092013/" |
|
116 |
in_dir<-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_lst_comb4_07152013/" |
|
117 |
in_dir2<-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_mult_lst_comb3_07232013" |
|
118 |
|
|
119 |
out_dir<-"" |
|
120 |
setwd(in_dir) |
|
121 |
|
|
122 |
out_prefix<-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_GAM_fus_all_lst_05312013" |
|
123 |
method_interpolation <- "gam_daily" |
|
124 |
covar_obj_file <- "covar_obj__365d_gam_day_lst_comb3_07092013.RData" |
|
125 |
|
|
126 |
raster_obj_file <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_lst_comb3_07092013.RData" |
|
127 |
raster_obj_file2 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_mults15_lst_comb3_07232013.RData" |
|
128 |
|
|
129 |
raster_prediction_obj <-load_obj(raster_obj_file) |
|
130 |
names(raster_prediction_obj) #list of two objects |
|
131 |
|
|
132 |
raster_prediction_obj$summary_metrics_v |
|
133 |
|
|
134 |
################################################################################# |
|
135 |
############ ANALYSES 1: ACCURACY IN TERMS OF DISTANCE TO CLOSEST STATIONS... ####### |
|
136 |
|
|
137 |
names(raster_prediction_obj$validation_mod_obj[[1]]) |
|
138 |
|
|
139 |
#First extract list of training and testing spdf from interpolation object |
|
140 |
|
|
141 |
list_data_s <- extract_list_from_list_obj(raster_prediction_obj$validation_mod_obj,"data_s") |
|
142 |
list_data_v <- extract_list_from_list_obj(raster_prediction_obj$validation_mod_obj,"data_v") |
|
143 |
|
|
144 |
#Assign model's names |
|
145 |
names_mod <- paste("res_mod",1:9,sep="") |
|
146 |
i<-1 |
|
147 |
names_mod<-c("res_mod1","res_mod2","res_mod3","res_mod4","res_mod5","res_mod6","res_mod7","res_mod8","res_mod9") |
|
148 |
|
|
149 |
## Generate data frame with residuals in term of distance to closest stations |
|
150 |
|
|
151 |
names_var<-c(names_mod,"dates") |
|
152 |
list_param_dst<-list(i,list_data_s,list_data_v,names_mod) |
|
153 |
names(list_param_dst) <- c("index","data_s","data_v","names_var") |
|
154 |
|
|
155 |
#call function over 365 dates |
|
156 |
list_dstspat_er <-lapply(1:length(list_data_v),FUN=calc_dist_ref_data_point,list_param=list_param_dst) |
|
157 |
#now assemble in one data.frame |
|
158 |
dstspat_dat<-do.call(rbind.fill,list_dstspat_er) |
|
159 |
|
|
160 |
### |
|
161 |
|
|
162 |
# Plot results: accuracy in term of distance to closest fitting stations... |
|
163 |
|
|
164 |
plot(dstspat_dat$dst,abs(dstspat_dat$res_mod1)) |
|
165 |
limit_val<-seq(0,150, by=10) |
|
166 |
|
|
167 |
limit_val<-seq(5,155, by=10) |
|
168 |
|
|
169 |
dstspat_dat$dst_cat1 <- cut(dstspat_dat$dst,breaks=limit_val) |
|
170 |
mae_fun<-function(x){mean(abs(x))} |
|
171 |
sd_abs_fun<-function(x){sd(abs(x))} |
|
172 |
t<-melt(dstspat_dat, |
|
173 |
measure=c("res_mod1","res_mod2","res_mod3","res_mod4","res_mod5","res_mod6","res_mod8"), |
|
174 |
id=c("dst_cat1"), |
|
175 |
na.rm=T) |
|
176 |
mae_tb<-cast(t,dst_cat1~variable,mae_fun) |
|
177 |
sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun) |
|
178 |
|
|
179 |
avg_tb<-cast(t,dst_cat1~variable,mean) |
|
180 |
sd_tb<-cast(t,dst_cat1~variable,sd) |
|
181 |
n_tb<-cast(t,dst_cat1~variable,length) |
|
182 |
n_NA<-cast(t,dst_cat1~variable,is.na) |
|
183 |
|
|
184 |
mod_name<-"mod1" |
|
185 |
mod_name<-"mod4" |
|
186 |
xlab_text<-"distance to fitting station" |
|
187 |
|
|
188 |
n <- unlist(n_tb[1:12,c(mod_name)]) |
|
189 |
y <- unlist(mae_tb[1:12,c(mod_name)]) |
|
190 |
|
|
191 |
x<- 1:length(y) |
|
192 |
y_sd <- unlist(sd_abs_tb[1:12,c(mod_name)]) |
|
193 |
|
|
194 |
ciw <-y_sd |
|
195 |
#ciw2 <- qt(0.975, n) * y_sd2 / sqrt(n) |
|
196 |
|
|
197 |
plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" MAE for ",mod_name,sep=""), barcol="blue", lwd=1, |
|
198 |
ylab="RMSE (C)", xlab=xlab_text) |
|
199 |
|
|
200 |
ciw <- qt(0.975, n) * y_sd / sqrt(n) |
|
201 |
|
|
202 |
plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" CI MAE for for ",mod_name,sep=""), barcol="blue", lwd=1, |
|
203 |
ylab="RMSE (C)", xlab=xlab_text) |
|
204 |
|
|
205 |
names_var<-intersect(names_var,names(dstspat_dat)) #there may be missing columns |
|
206 |
range_y<-range(as.vector(dstspat_dat[,names_var]),na.rm=T) |
|
207 |
dst_dat<- vector("list",length(names_var)) |
|
208 |
|
|
209 |
##################################################### |
|
210 |
### PART II MULTISAMPLING COMPARISON ############### |
|
211 |
|
|
212 |
#Use run of 7 hold out proportions, 10 to 70% with 10 random samples and 12 dates... |
|
213 |
#Use gam_day method |
|
214 |
#Use comb3 i.e. using baseline s(lat,lon)+s(elev) |
|
215 |
|
|
216 |
#read in relevant data: |
|
217 |
raster_prediction_obj <-load_obj(file.path(in_dir2,raster_obj_file2)) |
|
218 |
tb <-raster_prediction_obj$tb_diagnostic_v #contains the accuracy metrics for each run... |
|
219 |
|
|
220 |
names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9") |
|
221 |
|
|
222 |
t<-melt(tb, |
|
223 |
measure=c("mae","rmse","r","m50"), |
|
224 |
id=c("pred_mod","prop"), |
|
225 |
na.rm=T) |
|
226 |
tbp<- subset(tb,prop!=70) #remove 70% hold out because it is only predicted for mod1 (baseline) |
|
227 |
|
|
228 |
tp<-melt(tbp, |
|
229 |
measure=c("mae","rmse","r","m50"), |
|
230 |
id=c("pred_mod","prop"), |
|
231 |
na.rm=T) |
|
232 |
|
|
233 |
avg_tp<-cast(tp,pred_mod~variable,mean) |
|
234 |
|
|
235 |
avg_tb<-cast(t,pred_mod+prop~variable,mean) |
|
236 |
sd_tb<-cast(t,pred_mod+prop~variable,sd) |
|
237 |
|
|
238 |
n_tb<-cast(t,pred_mod+prop~variable,length) |
|
239 |
|
|
240 |
xyplot(avg_tb$rmse~avg_tb$prop,type="b",group=pred_mod, |
|
241 |
data=avg_tb, |
|
242 |
pch=1:length(avg_tb$pred_mod), |
|
243 |
par.settings=list(superpose.symbol = list( |
|
244 |
pch=1:length(avg_tb$pred_mod))), |
|
245 |
auto.key=list(columns=5)) |
|
246 |
|
|
247 |
mod_name<-"mod1" |
|
248 |
mod_name<-"mod4" |
|
249 |
xlab_text<-"proportion of hold out" |
|
250 |
|
|
251 |
n <- unlist(subset(n_tb,pred_mod==mod_name,select=c(rmse))) |
|
252 |
y <- unlist(subset(avg_tb,pred_mod==mod_name,select=c(rmse))) |
|
253 |
|
|
254 |
x<- 1:length(y) |
|
255 |
y_sd <- unlist(subset(sd_tb,pred_mod==mod_name,select=c(rmse))) |
|
256 |
|
|
257 |
ciw <-y_sd |
|
258 |
#ciw2 <- qt(0.975, n) * y_sd2 / sqrt(n) |
|
259 |
|
|
260 |
plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" Mean and Std_dev RMSE for ",mod_name,sep=""), barcol="blue", lwd=1, |
|
261 |
ylab="RMSE (C)", xlab=xlab_text) |
|
262 |
|
|
263 |
ciw <- qt(0.975, n) * y_sd / sqrt(n) |
|
264 |
|
|
265 |
plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" Mean and CI RMSE for ",mod_name,sep=""), barcol="blue", lwd=1, |
|
266 |
ylab="RMSE (C)", xlab=xlab_text) |
|
267 |
|
|
268 |
n=150 |
|
269 |
ciw <- qt(0.975, n) * y_sd / sqrt(n) |
|
270 |
ciw2 <- qt(0.975, n) * y_sd2 / sqrt(n) |
|
271 |
|
|
272 |
#Comparison of MAE for different proportions for FUSION and CAI using CI |
|
273 |
X11() |
|
274 |
plotCI(y=y, x=x, uiw=ciw, col="red", main=" FUS: RMSE proportion of validation hold out", barcol="blue", lwd=1, |
|
275 |
ylab="RMSE (C)", xlab="Proportions of validation hold out (in %)") |
|
276 |
lines(x,y,col="red") |
|
277 |
legend("bottomright",legend=c("fus"), cex=1.2, col=c("red"), |
|
278 |
lty=1, title="RMSE") |
|
279 |
savePlot(paste("Comparison_multisampling_fus_RMSE_CI",out_prefix,".png", sep=""), type="png") |
|
280 |
|
|
281 |
plotCI(y=y2, x=x2, uiw=ciw2, col="black", main=" CAI: RMSE proportion of validation hold out", barcol="blue", lwd=1, |
|
282 |
ylab="RMSE (C)", xlab="Proportions of validation hold out (in %)") |
|
283 |
lines(x2,y2,col="grey") |
|
284 |
legend("bottomright",legend=c("CAI"), cex=1.2, col=c("grey"), |
|
285 |
lty=1, title="RMSE") |
|
286 |
savePlot(paste("Comparison_multisampling_CAI_RMSE_CI",out_prefix,".png", sep=""), type="png") |
|
287 |
dev.off() |
|
288 |
|
|
289 |
#Comparison of MAE for different proportions for FUSION and CAI |
|
290 |
X11() |
|
291 |
plot(x,y,col="red",type="b", ylab="RMSE (C)", xlab="Proportions of validation hold out (in %)") |
|
292 |
lines(x2,y2,col="grey") |
|
293 |
points(x2,y2,col="grey") |
|
294 |
title("MAE in terms of proportions and random sampling") |
|
295 |
legend("bottomright",legend=c("fus","CAI"), cex=1.2, col=c("red","grey"), |
|
296 |
lty=1, title="RMSE") |
|
297 |
savePlot(paste("Comparison_multisampling_fus_CAI_RMSE_averages",out_prefix,".png", sep=""), type="png") |
|
298 |
dev.off() |
Also available in: Unified diff
analyses paper part 3: comb3, multisampling and distance to closest fitting station