Revision 3618899e
Added by Benoit Parmentier over 11 years ago
climate/research/oregon/interpolation/contribution_of_covariates_paper_interpolation.R | ||
---|---|---|
4 | 4 |
# interpolation code. |
5 | 5 |
#Figures and data for the contribution of covariate paper are also produced. |
6 | 6 |
#AUTHOR: Benoit Parmentier # |
7 |
#DATE: 08/02/2013
|
|
7 |
#DATE: 08/05/2013
|
|
8 | 8 |
#Version: 1 |
9 | 9 |
#PROJECT: Environmental Layers project # |
10 | 10 |
################################################################################################# |
... | ... | |
25 | 25 |
library(maps) |
26 | 26 |
library(reshape) |
27 | 27 |
library(plotrix) |
28 |
library(plyr) |
|
28 | 29 |
|
29 | 30 |
#### FUNCTION USED IN SCRIPT |
30 | 31 |
|
... | ... | |
58 | 59 |
return(stat_tb) |
59 | 60 |
} |
60 | 61 |
|
62 |
## Produce data.frame with residuals for models and distance to closest fitting station |
|
63 |
calc_dist_ref_data_point <- function(i,list_param){ |
|
64 |
#This function creates a list of data.frame containing the distance to teh closest |
|
65 |
# reference point (e.g. fitting station) for a give data frame. |
|
66 |
#Inputs: |
|
67 |
#data_s: given data.frame from wich distance is computed |
|
68 |
#data_v: reference data.frame, destination, often the fitting points used in analyses |
|
69 |
#i: index variable to operate on list |
|
70 |
#names_var: |
|
71 |
#Outputs: |
|
72 |
#list_dstspat_er |
|
73 |
|
|
74 |
#Parsing input arguments |
|
75 |
data_s<-list_param$data_s[[i]] |
|
76 |
data_v<-list_param$data_v[[i]] |
|
77 |
|
|
78 |
names_var<-list_param$names_var |
|
79 |
|
|
80 |
###### |
|
81 |
|
|
82 |
names_var<-intersect(names_var,names(data_v)) #there may be missing columns |
|
83 |
#use columns that exists |
|
84 |
|
|
85 |
d_s_v<-matrix(0,nrow(data_v),nrow(data_s)) |
|
86 |
for(k in 1:nrow(data_s)){ |
|
87 |
pt<-data_s[k,] |
|
88 |
d_pt<-(spDistsN1(data_v,pt,longlat=FALSE))/1000 #Distance to station k in km |
|
89 |
d_s_v[,k]<-d_pt |
|
90 |
} |
|
91 |
|
|
92 |
#Create data.frame with position, ID, dst and residuals... |
|
93 |
|
|
94 |
pos<-vector("numeric",nrow(data_v)) |
|
95 |
y<-vector("numeric",nrow(data_v)) |
|
96 |
dst<-vector("numeric",nrow(data_v)) |
|
97 |
|
|
98 |
for (k in 1:nrow(data_v)){ |
|
99 |
pos[k]<-match(min(d_s_v[k,]),d_s_v[k,]) |
|
100 |
dst[k]<-min(d_s_v[k,]) |
|
101 |
} |
|
102 |
|
|
103 |
dstspat_er<-as.data.frame(cbind(v_id=as.vector(data_v$id),s_id=as.vector(data_s$id[pos]), |
|
104 |
pos=pos, lat=data_v$lat, lon=data_v$lon, x=data_v$x,y=data_v$y, |
|
105 |
dst=dst, |
|
106 |
as.data.frame(data_v[,names_var]))) |
|
107 |
|
|
108 |
return(dstspat_er) |
|
109 |
} |
|
110 |
|
|
111 |
### Main function to call to obtain distance to closest fitting stations for valiation dataset |
|
112 |
distance_to_closest_fitting_station<-function(raster_prediction_obj,names_mod,dist_classes=c(0)){ |
|
113 |
#This function computes the distance between training and testing points and returns and data frame |
|
114 |
#with distance,residuals, ID of training and testing points |
|
115 |
#Input: raster_prediction_obj, names of residuals models to return, distance classes |
|
116 |
#Output: data frame |
|
117 |
|
|
118 |
#Functions used in the script |
|
119 |
|
|
120 |
mae_fun<-function(x){mean(abs(x))} #Mean Absolute Error give a residuals vector |
|
121 |
sd_abs_fun<-function(x){sd(abs(x))} #sd Absolute Error give a residuals vector |
|
122 |
|
|
123 |
##BEGIN |
|
124 |
|
|
125 |
##### PART I: generate data.frame with residuals in term of distance to closest fitting station |
|
126 |
|
|
127 |
#return list of training and testing data frames |
|
128 |
list_data_s <- extract_list_from_list_obj(raster_prediction_obj$validation_mod_obj,"data_s") #training |
|
129 |
list_data_v <- extract_list_from_list_obj(raster_prediction_obj$validation_mod_obj,"data_v") #testing (validation) |
|
130 |
|
|
131 |
i<-1 |
|
132 |
names_var<-c(names_mod,"dates") |
|
133 |
list_param_dst<-list(i,list_data_s,list_data_v,names_mod) |
|
134 |
names(list_param_dst) <- c("index","data_s","data_v","names_var") |
|
135 |
|
|
136 |
#call function "calc_dist_ref_data_point" over 365 dates |
|
137 |
#note that this function depends on other functions !!! see script |
|
138 |
|
|
139 |
list_dstspat_er <-lapply(1:length(list_data_v),FUN=calc_dist_ref_data_point,list_param=list_param_dst) |
|
140 |
#now assemble in one data.frame |
|
141 |
dstspat_dat<-do.call(rbind.fill,list_dstspat_er) |
|
142 |
|
|
143 |
########### PART II: generate distance classes and summary statistics |
|
144 |
|
|
145 |
if (length(dist_classes)==1){ |
|
146 |
range_val<-range(dstspat_dat$dst) |
|
147 |
max_val<-round_any(range_val[2],10, f=ceiling) #round max value to nearest 10 (from plyr package) |
|
148 |
min_val<-0 |
|
149 |
limit_val<-seq(min_val,max_val, by=10) |
|
150 |
}else{ |
|
151 |
limit_val<-dist_classes |
|
152 |
} |
|
153 |
|
|
154 |
dstspat_dat$dst_cat1 <- cut(dstspat_dat$dst,include.lowest=TRUE,breaks=limit_val) |
|
155 |
|
|
156 |
names_var <- intersect(names_mod,names(dstspat_dat)) |
|
157 |
t<-melt(dstspat_dat, |
|
158 |
measure=names_var, |
|
159 |
id=c("dst_cat1"), |
|
160 |
na.rm=T) |
|
161 |
|
|
162 |
mae_tb<-cast(t,dst_cat1~variable,mae_fun) |
|
163 |
sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun) |
|
164 |
|
|
165 |
avg_tb<-cast(t,dst_cat1~variable,mean) |
|
166 |
sd_tb<-cast(t,dst_cat1~variable,sd) |
|
167 |
n_tb<-cast(t,dst_cat1~variable,length) |
|
168 |
#n_NA<-cast(t,dst_cat1~variable,is.na) |
|
169 |
|
|
170 |
#### prepare returning object |
|
171 |
dstspat_obj<-list(dstspat_dat,mae_tb,sd_abs_tb,avg_tb,sd_tb,n_tb) |
|
172 |
names(dstspat_obj) <-c("dstspat_dat","mae_tb","sd_abs_tb","avg_tb","sd_tb","n_tb") |
|
173 |
|
|
174 |
return(dstspat_obj) |
|
175 |
|
|
176 |
} |
|
177 |
|
|
178 |
# create plot of accury in term of distance to closest fitting station |
|
179 |
plot_dst_spat_fun<-function(stat_tb,names_var,cat_val){ |
|
180 |
|
|
181 |
range_y<-range(as.vector(unlist(stat_tb[,names_var])),na.rm=T) #flatten data.frame |
|
182 |
col_t<-rainbow(length(names_var)) |
|
183 |
pch_t<- 1:length(names_var) |
|
184 |
plot(stat_tb[,names_var[1]], ylim=range_y,pch=pch_t[1],col=col_t[1],type="b", |
|
185 |
yla="MAE (in degree C)",xlab="",xaxt="n") |
|
186 |
#points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[1],col=col_t[1]),type="p") |
|
187 |
for (k in 2:length(names_var)){ |
|
188 |
lines(stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k],type="b", |
|
189 |
xlab="",axes=F) |
|
190 |
#points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k]),type="p") |
|
191 |
} |
|
192 |
legend("topleft",legend=names_var, |
|
193 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
|
194 |
axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1]) |
|
195 |
} |
|
196 |
|
|
197 |
plot_dst_MAE <-function(list_param){ |
|
198 |
# |
|
199 |
#list_dist_obj: list of dist object |
|
200 |
#col_t: list of color for each |
|
201 |
#pch_t: symbol for line |
|
202 |
#legend_text: text for line and symbol |
|
203 |
#mod_name: selected models |
|
204 |
# |
|
205 |
## BEGIN ## |
|
206 |
|
|
207 |
list_dist_obj<-list_param$list_dist_obj |
|
208 |
col_t<-list_param$col_t |
|
209 |
pch_t<- list_param$pch_t |
|
210 |
legend_text <- list_param$legend_text |
|
211 |
list_mod_name<-list_param$mod_name |
|
212 |
|
|
213 |
for (i in 1:length(list_dist_obj)){ |
|
214 |
|
|
215 |
l<-list_dist_obj[[i]] |
|
216 |
mae_tb<-l$mae_tb |
|
217 |
n_tb<-l$n_tb |
|
218 |
sd_abs_tb<-l$sd_abs_tb |
|
219 |
|
|
220 |
mod_name<-list_mod_name[i] |
|
221 |
xlab_text<-"distance to fitting station" |
|
222 |
|
|
223 |
n <- unlist(n_tb[1:13,c(mod_name)]) |
|
224 |
y <- unlist(mae_tb[1:13,c(mod_name)]) |
|
225 |
|
|
226 |
x<- 1:length(y) |
|
227 |
y_sd <- unlist(sd_abs_tb[1:12,c(mod_name)]) |
|
228 |
|
|
229 |
ciw <-y_sd |
|
230 |
#ciw2 <- qt(0.975, n) * y_sd2 / sqrt(n) |
|
231 |
|
|
232 |
#plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" MAE for ",mod_name,sep=""), barcol="blue", lwd=1, |
|
233 |
# ylab="RMSE (C)", xlab=xlab_text) |
|
234 |
|
|
235 |
ciw <- qt(0.975, n) * y_sd / sqrt(n) |
|
236 |
|
|
237 |
if(i==1){ |
|
238 |
plotCI(y=y, x=x, uiw=ciw, col=col_t[i], main=paste(" Comparison of MAE in ",mod_name,sep=""), barcol="blue", lwd=1, |
|
239 |
ylab="RMSE (C)", xlab=xlab_text) |
|
240 |
lines(y~x, col=col_t[i]) |
|
241 |
|
|
242 |
}else{ |
|
243 |
lines(y~x, col=col_t[i]) |
|
244 |
} |
|
245 |
|
|
246 |
} |
|
247 |
legend("topleft",legend=legend_text, |
|
248 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
|
249 |
#axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1]) |
|
250 |
|
|
251 |
} |
|
252 |
|
|
61 | 253 |
#### Parameters and constants |
62 | 254 |
|
63 | 255 |
|
... | ... | |
74 | 266 |
in_dir4 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gwr_day_lst_comb3_part1_07122013" |
75 | 267 |
#multisampling results (gam) |
76 | 268 |
in_dir5<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_mults15_lst_comb3_07232013" |
77 |
#in_dir<-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_mult_lst_comb3_07202013" |
|
269 |
in_dir6<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_kriging_daily_mults10_lst_comb3_08062013" |
|
270 |
in_dir7<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gwr_daily_mults10_lst_comb3_08072013" |
|
78 | 271 |
|
79 |
in_dir <- in_dir1 |
|
80 | 272 |
out_dir<-"/home/parmentier/Data/IPLANT_project/paper_analyses_tables_fig_08032013" |
81 | 273 |
setwd(out_dir) |
82 | 274 |
|
... | ... | |
93 | 285 |
|
94 | 286 |
raster_obj_file_3 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_day_lst_comb3_07112013.RData" |
95 | 287 |
raster_obj_file_4 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_day_lst_comb3_part1_07122013.RData" |
96 |
raster_obj_file_5 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_mult_lst_comb3_07202013.RData" |
|
97 |
|
|
288 |
#multisampling using baseline lat,lon + elev |
|
289 |
raster_obj_file_5 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_mults15_lst_comb3_07232013.RData" |
|
290 |
raster_obj_file_6 <- "kriging_daily_validation_mod_obj_dailyTmax_365d_kriging_daily_mults10_lst_comb3_08062013.RData" |
|
291 |
raster_obj_file_7 <- "" |
|
292 |
|
|
98 | 293 |
#Load objects containing training, testing, models objects |
99 | 294 |
|
100 | 295 |
covar_obj <-load_obj(file.path(in_dir1,covar_obj_file1)) |
... | ... | |
105 | 300 |
s_raster <- brick(file.path(in_dir1,infile_covariates)) |
106 | 301 |
names(s_raster)<-covar_names |
107 | 302 |
|
108 |
raster_prediction_obj_1 <-load_obj(file.path(in_dir1,raster_obj_file_1)) |
|
109 |
raster_prediction_obj_2 <-load_obj(file.path(in_dir2,raster_obj_file_2)) |
|
303 |
raster_prediction_obj_1 <-load_obj(file.path(in_dir1,raster_obj_file_1)) #comb3
|
|
304 |
raster_prediction_obj_2 <-load_obj(file.path(in_dir2,raster_obj_file_2)) #comb4
|
|
110 | 305 |
raster_prediction_obj_3 <-load_obj(file.path(in_dir3,raster_obj_file_3)) |
111 |
raster_prediction_obj_4 <-load_obj(file.path(in_dir4,raster_obj_file_4)) |
|
306 |
raster_prediction_obj_4 <-load_obj(file.path(in_dir4,raster_obj_file_4)) |
|
307 |
raster_prediction_obj_5 <-load_obj(file.path(in_dir5,raster_obj_file_5)) #gam daily multisampling 10 to 70% |
|
308 |
raster_prediction_obj_6 <-load_obj(file.path(in_dir6,raster_obj_file_6)) #kriging daily multisampling |
|
309 |
#raster_prediction_obj_7 <-load_obj(file.path(in_dir7,raster_obj_file_7)) #kriging daily multisampling |
|
112 | 310 |
|
113 | 311 |
names(raster_prediction_obj_1) #list of two objects |
114 | 312 |
|
... | ... | |
118 | 316 |
raster_prediction_obj_1$method_mod_obj[[1]]$formulas |
119 | 317 |
raster_prediction_obj_2$method_mod_obj[[1]]$formulas |
120 | 318 |
|
121 |
#baseline 1:
|
|
319 |
###baseline 2: s(lat,lon) + s(elev)
|
|
122 | 320 |
|
123 | 321 |
summary_metrics_v1<-raster_prediction_obj_1$summary_metrics_v |
124 | 322 |
summary_metrics_v2<-raster_prediction_obj_2$summary_metrics_v |
... | ... | |
126 | 324 |
table_data1 <-summary_metrics_v1$avg[,c("mae","rmse","me","r")] |
127 | 325 |
table_data2 <-summary_metrics_v2$avg[,c("mae","rmse","me","r")] |
128 | 326 |
|
129 |
model_col<-c("Baseline","Northing","Easting","LST","DISTOC","Forest","CANHEIGHT","LST*Forest") # removed ,"LST*CANHEIGHT")
|
|
327 |
model_col<-c("Baseline2","Northness","Eastness","LST","DISTOC","Forest","CANHEIGHT","LST*Forest") # removed ,"LST*CANHEIGHT")
|
|
130 | 328 |
names_table_col<-c("DiffMAE","DiffRMSE","DiffME","Diffr","Model") |
131 | 329 |
|
132 | 330 |
df1<- as.data.frame(sapply(table_data1,FUN=function(x) x-x[1])) |
... | ... | |
135 | 333 |
names(df1)<- names_table_col |
136 | 334 |
df1 |
137 | 335 |
|
138 |
model_col<-c("Baseline","Elevation","Northing","Easting","LST","DISTOC","Forest","CANHEIGHT") #,"LST*Forest") # removed ,"LST*CANHEIGHT") |
|
336 |
###baseline 1: s(lat,lon) |
|
337 |
|
|
338 |
model_col<-c("Baseline1","Elevation","Northing","Easting","LST","DISTOC","Forest","CANHEIGHT") #,"LST*Forest") # removed ,"LST*CANHEIGHT") |
|
139 | 339 |
df2<- as.data.frame(sapply(table_data2,FUN=function(x) x-x[1])) |
140 | 340 |
df2<- round(df2,digit=3) #roundto three digits teh differences |
141 | 341 |
df2$Model <-model_col |
142 | 342 |
names(df2)<- names_table_col |
143 | 343 |
df2 |
144 | 344 |
|
145 |
file_name<-paste("table3a_paper","_",out_prefix,".txt",sep="")
|
|
345 |
file_name<-paste("table3b_baseline2_paper","_",out_prefix,".txt",sep="")
|
|
146 | 346 |
write.table(df1,file=file_name,sep=",") |
147 | 347 |
|
148 |
file_name<-paste("table3b_paper","_",out_prefix,".txt",sep="")
|
|
348 |
file_name<-paste("table3a_baseline1_paper","_",out_prefix,".txt",sep="")
|
|
149 | 349 |
write.table(df2,file=file_name,sep=",") |
150 | 350 |
|
151 | 351 |
##Table 4: Interpolation methods comparison |
... | ... | |
158 | 358 |
|
159 | 359 |
names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9") |
160 | 360 |
|
161 |
|
|
162 | 361 |
sd1 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_1,"sd") |
163 | 362 |
sd2 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_2,"sd") |
164 | 363 |
sd3 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_3,"sd") |
... | ... | |
188 | 387 |
file_name<-paste("table4_avg_paper","_",out_prefix,".txt",sep="") |
189 | 388 |
write.table(table,file=file_name,sep=",") |
190 | 389 |
|
191 |
file_name<-paste("table34_sd_paper","_",out_prefix,".txt",sep="")
|
|
390 |
file_name<-paste("table4_sd_paper","_",out_prefix,".txt",sep="") |
|
192 | 391 |
write.table(table_sd,file=file_name,sep=",") |
193 | 392 |
|
194 | 393 |
#for(i in nrow(table)) |
... | ... | |
208 | 407 |
|
209 | 408 |
### Figure 1 |
210 | 409 |
|
410 |
### Figure 2: |
|
211 | 411 |
|
212 |
### Figure 2
|
|
412 |
#not generated in R
|
|
213 | 413 |
|
214 |
# ANALYSES 1: ACCURACY IN TERMS OF DISTANCE TO CLOSEST STATIONS... |
|
215 |
#?? for all models gam or only interpolation methods?? |
|
414 |
### Figure 3 |
|
216 | 415 |
|
217 |
tb1<- raster_prediction_obj_1$tb_diagnostic_v |
|
416 |
#Analysis accuracy in term of distance to closest station |
|
417 |
#Assign model's names |
|
218 | 418 |
|
219 |
names(raster_prediction_obj$validation_mod_obj[[1]]) |
|
419 |
names_mod <- paste("res_mod",1:9,sep="") |
|
420 |
names(raster_prediction_obj_1$validation_mod_obj[[1]]) |
|
421 |
limit_val<-seq(0,150, by=10) |
|
220 | 422 |
|
221 |
list_data_s <- extract_list_from_list_obj(raster_prediction_obj$validation_mod_obj,"data_s") |
|
222 |
list_data_v <- extract_list_from_list_obj(raster_prediction_obj$validation_mod_obj,"data_v") |
|
423 |
l1 <- distance_to_closest_fitting_station(raster_prediction_obj_1,names_mod,dist_classes=limit_val) |
|
424 |
l3 <- distance_to_closest_fitting_station(raster_prediction_obj_3,names_mod,dist_classes=limit_val) |
|
425 |
l4 <- distance_to_closest_fitting_station(raster_prediction_obj_4,names_mod,dist_classes=limit_val) |
|
426 |
|
|
427 |
list_dist_obj<-list(l1,l3,l4) |
|
428 |
col_t<-c("red","blue","black") |
|
429 |
pch_t<- 1:length(col_t) |
|
430 |
legend_text <- c("GAM","Kriging","GWR") |
|
431 |
mod_name<-c("res_mod1","res_mod1","res_mod1")#selected models |
|
432 |
|
|
433 |
#png_names<- |
|
434 |
#png(file.path(out_path,paste("clim_surface_",y_var_name,"_",sampling_dat$date[i],"_",sampling_dat$prop[i], |
|
435 |
# "_",sampling_dat$run_samp[i],out_prefix,".png", sep=""))) |
|
436 |
|
|
437 |
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name) |
|
438 |
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name") |
|
439 |
|
|
440 |
#debug(plot_dst_MAE) |
|
441 |
plot_dst_MAE(list_param_plot) |
|
442 |
|
|
443 |
|
|
444 |
#Figure 4. RMSE and MAE, mulitisampling and hold out for FSS and GAM. |
|
445 |
|
|
446 |
#Using baseline 2: lat,lon and elev |
|
447 |
|
|
448 |
#Use run of 7 hold out proportions, 10 to 70% with 10 random samples and 12 dates... |
|
449 |
#Use gam_day method |
|
450 |
#Use comb3 i.e. using baseline s(lat,lon)+s(elev) |
|
451 |
|
|
452 |
#read in relevant data: |
|
453 |
|
|
454 |
tb5 <-raster_prediction_obj_5$tb_diagnostic_v #gam dailycontains the accuracy metrics for each run... |
|
455 |
tb6 <-raster_prediction_obj_6$tb_diagnostic_v #Kriging daily methods |
|
456 |
#tb7 <-raster_prediction_obj_7$tb_diagnostic_v #gwr daily methods |
|
457 |
|
|
458 |
#names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9") |
|
459 |
names_mod<-c("mod1") |
|
460 |
|
|
461 |
calc_stat_prop_tb <-function(names_mod,raster_prediction_obj){ |
|
462 |
#add for testing?? |
|
463 |
tb <-raster_prediction_obj$tb_diagnostic_v |
|
464 |
t<-melt(subset(tb5,pred_mod==names_mod), |
|
465 |
measure=c("mae","rmse","r","m50"), |
|
466 |
id=c("pred_mod","prop"), |
|
467 |
na.rm=T) |
|
468 |
|
|
469 |
avg_tb<-cast(t,pred_mod+prop~variable,mean) |
|
470 |
sd_tb<-cast(t,pred_mod+prop~variable,sd) |
|
471 |
n_tb<-cast(t,pred_mod+prop~variable,length) |
|
472 |
#n_NA<-cast(t,dst_cat1~variable,is.na) |
|
473 |
|
|
474 |
#### prepare returning object |
|
475 |
prop_obj<-list(tb,mae_tb,avg_tb,sd_tb,n_tb) |
|
476 |
names(prop_obj) <-c("tb","avg_tb","sd_tb","n_tb") |
|
477 |
#names(prop_obj) <-c("tb_v","tb_s","avg_tb_v","sd_tb_v","n_tb_s","avg_tb_s","sd_tb_s","n_tb_s") |
|
478 |
} |
|
479 |
|
|
480 |
|
|
481 |
#tbp<- subset(tb,prop!=70) #remove 70% hold out because it is only predicted for mod1 (baseline) |
|
482 |
#tp<-melt(tbp, |
|
483 |
# measure=c("mae","rmse","r","m50"), |
|
484 |
# id=c("pred_mod","prop"), |
|
485 |
# na.rm=T) |
|
486 |
|
|
487 |
avg_tp<-cast(tp,pred_mod~variable,mean) |
|
488 |
|
|
489 |
avg_tb<-cast(t5,pred_mod+prop~variable,mean) |
|
490 |
sd_tb<-cast(t,pred_mod+prop~variable,sd) |
|
491 |
|
|
492 |
n_tb<-cast(t,pred_mod+prop~variable,length) |
|
493 |
|
|
494 |
xyplot(avg_tb$rmse~avg_tb$prop,type="b",group=pred_mod, |
|
495 |
data=avg_tb, |
|
496 |
pch=1:length(avg_tb$pred_mod), |
|
497 |
par.settings=list(superpose.symbol = list( |
|
498 |
pch=1:length(avg_tb$pred_mod))), |
|
499 |
auto.key=list(columns=5)) |
|
500 |
|
|
501 |
mod_name<-"mod1" |
|
502 |
mod_name<-"mod4" |
|
503 |
xlab_text<-"proportion of hold out" |
|
504 |
|
|
505 |
n <- unlist(subset(n_tb,pred_mod==mod_name,select=c(rmse))) |
|
506 |
y <- unlist(subset(avg_tb,pred_mod==mod_name,select=c(rmse))) |
|
507 |
|
|
508 |
x<- 1:length(y) |
|
509 |
y_sd <- unlist(subset(sd_tb,pred_mod==mod_name,select=c(rmse))) |
|
510 |
|
|
511 |
ciw <-y_sd |
|
512 |
#ciw2 <- qt(0.975, n) * y_sd2 / sqrt(n) |
|
513 |
|
|
514 |
plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" Mean and Std_dev RMSE for ",mod_name,sep=""), barcol="blue", lwd=1, |
|
515 |
ylab="RMSE (C)", xlab=xlab_text) |
|
516 |
|
|
517 |
ciw <- qt(0.975, n) * y_sd / sqrt(n) |
|
518 |
|
|
519 |
plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" Mean and CI RMSE for ",mod_name,sep=""), barcol="blue", lwd=1, |
|
520 |
ylab="RMSE (C)", xlab=xlab_text) |
|
521 |
|
|
522 |
n=150 |
|
523 |
ciw <- qt(0.975, n) * y_sd / sqrt(n) |
|
524 |
ciw2 <- qt(0.975, n) * y_sd2 / sqrt(n) |
|
525 |
|
|
526 |
plot_dst_MAE <-function(list_param){ |
|
527 |
# |
|
528 |
#list_dist_obj: list of dist object |
|
529 |
#col_t: list of color for each |
|
530 |
#pch_t: symbol for line |
|
531 |
#legend_text: text for line and symbol |
|
532 |
#mod_name: selected models |
|
533 |
# |
|
534 |
## BEGIN ## |
|
535 |
|
|
536 |
list_dist_obj<-list_param$list_dist_obj |
|
537 |
col_t<-list_param$col_t |
|
538 |
pch_t<- list_param$pch_t |
|
539 |
legend_text <- list_param$legend_text |
|
540 |
list_mod_name<-list_param$mod_name |
|
541 |
|
|
542 |
for (i in 1:length(list_dist_obj)){ |
|
543 |
|
|
544 |
l<-list_dist_obj[[i]] |
|
545 |
mae_tb<-l$mae_tb |
|
546 |
n_tb<-l$n_tb |
|
547 |
sd_abs_tb<-l$sd_abs_tb |
|
548 |
|
|
549 |
mod_name<-list_mod_name[i] |
|
550 |
xlab_text<-"distance to fitting station" |
|
551 |
|
|
552 |
n <- unlist(n_tb[1:13,c(mod_name)]) |
|
553 |
y <- unlist(mae_tb[1:13,c(mod_name)]) |
|
554 |
|
|
555 |
x<- 1:length(y) |
|
556 |
y_sd <- unlist(sd_abs_tb[1:12,c(mod_name)]) |
|
557 |
|
|
558 |
ciw <-y_sd |
|
559 |
#ciw2 <- qt(0.975, n) * y_sd2 / sqrt(n) |
|
560 |
|
|
561 |
#plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" MAE for ",mod_name,sep=""), barcol="blue", lwd=1, |
|
562 |
# ylab="RMSE (C)", xlab=xlab_text) |
|
563 |
|
|
564 |
ciw <- qt(0.975, n) * y_sd / sqrt(n) |
|
565 |
|
|
566 |
if(i==1){ |
|
567 |
plotCI(y=y, x=x, uiw=ciw, col=col_t[i], main=paste(" Comparison of MAE in ",mod_name,sep=""), barcol="blue", lwd=1, |
|
568 |
ylab="RMSE (C)", xlab=xlab_text) |
|
569 |
lines(y~x, col=col_t[i]) |
|
570 |
|
|
571 |
}else{ |
|
572 |
lines(y~x, col=col_t[i]) |
|
573 |
} |
|
574 |
|
|
575 |
} |
|
576 |
legend("topleft",legend=legend_text, |
|
577 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
|
578 |
#axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1]) |
|
579 |
|
|
580 |
} |
|
581 |
|
|
582 |
################### END OF SCRIPT ################### |
|
583 |
|
|
584 |
# create plot of accury in term of distance to closest fitting station |
|
585 |
plot_dst_spat_fun<-function(stat_tb,names_var,cat_val){ |
|
586 |
|
|
587 |
range_y<-range(as.vector(unlist(stat_tb[,names_var])),na.rm=T) #flatten data.frame |
|
588 |
col_t<-rainbow(length(names_var)) |
|
589 |
pch_t<- 1:length(names_var) |
|
590 |
plot(stat_tb[,names_var[1]], ylim=range_y,pch=pch_t[1],col=col_t[1],type="b", |
|
591 |
ylab="MAE (in degree C)",xlab="",xaxt="n") |
|
592 |
#points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[1],col=col_t[1]),type="p") |
|
593 |
for (k in 2:length(names_var)){ |
|
594 |
lines(stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k],type="b", |
|
595 |
xlab="",axes=F) |
|
596 |
#points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k]),type="p") |
|
597 |
} |
|
598 |
legend("topleft",legend=names_var, |
|
599 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
|
600 |
axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1]) |
|
601 |
} |
|
223 | 602 |
|
224 |
names_mod <- paste("res_mod",1:9,sep="") |
Also available in: Unified diff
paper analyses and figure contributio of covariates, modifications