Revision bf80e55e
Added by Benoit Parmentier over 11 years ago
climate/research/oregon/interpolation/contribution_of_covariates_paper_interpolation.R | ||
---|---|---|
47 | 47 |
return(list_tmp) #this is a data.frame |
48 | 48 |
} |
49 | 49 |
|
50 |
#This extract a data.frame object from raster prediction obj and combine them in one data.frame |
|
51 |
extract_from_list_obj<-function(obj_list,list_name){ |
|
52 |
list_tmp<-vector("list",length(obj_list)) |
|
53 |
for (i in 1:length(obj_list)){ |
|
54 |
tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame |
|
55 |
list_tmp[[i]]<-tmp |
|
56 |
} |
|
57 |
tb_list_tmp<-do.call(rbind,list_tmp) #long rownames |
|
58 |
return(tb_list_tmp) #this is a data.frame |
|
59 |
} |
|
60 |
|
|
50 | 61 |
calc_stat_from_raster_prediction_obj <-function(raster_prediction_obj,stat){ |
51 | 62 |
tb <-raster_prediction_obj$tb_diagnostic_v #Kriging methods |
52 | 63 |
|
... | ... | |
250 | 261 |
|
251 | 262 |
} |
252 | 263 |
|
264 |
calc_stat_prop_tb <-function(names_mod,raster_prediction_obj,testing=TRUE){ |
|
265 |
|
|
266 |
#add for testing?? |
|
267 |
if (testing==TRUE){ |
|
268 |
tb <-raster_prediction_obj$tb_diagnostic_v #use testing accuracy information |
|
269 |
}else{ |
|
270 |
tb <-raster_prediction_obj$tb_diagnostic_s #use training accuracy information |
|
271 |
} |
|
272 |
|
|
273 |
t<-melt(subset(tb,pred_mod==names_mod), |
|
274 |
measure=c("mae","rmse","r","m50"), |
|
275 |
id=c("pred_mod","prop"), |
|
276 |
na.rm=T) |
|
277 |
|
|
278 |
avg_tb<-cast(t,pred_mod+prop~variable,mean) |
|
279 |
sd_tb<-cast(t,pred_mod+prop~variable,sd) |
|
280 |
n_tb<-cast(t,pred_mod+prop~variable,length) |
|
281 |
#n_NA<-cast(t,dst_cat1~variable,is.na) |
|
282 |
|
|
283 |
#### prepare returning object |
|
284 |
prop_obj<-list(tb,avg_tb,sd_tb,n_tb) |
|
285 |
names(prop_obj) <-c("tb","avg_tb","sd_tb","n_tb") |
|
286 |
|
|
287 |
return(prop_obj) |
|
288 |
} |
|
289 |
|
|
290 |
#ploting |
|
291 |
plot_prop_metrics <-function(list_param){ |
|
292 |
# |
|
293 |
#list_dist_obj: list of dist object |
|
294 |
#col_t: list of color for each |
|
295 |
#pch_t: symbol for line |
|
296 |
#legend_text: text for line and symbol |
|
297 |
#mod_name: selected models |
|
298 |
# |
|
299 |
## BEGIN ## |
|
300 |
|
|
301 |
list_obj<-list_param$list_prop_obj |
|
302 |
col_t <-list_param$col_t |
|
303 |
pch_t <- list_param$pch_t |
|
304 |
legend_text <- list_param$legend_text |
|
305 |
list_mod_name<-list_param$mod_name |
|
306 |
metric_name<-list_param$metric_name |
|
307 |
|
|
308 |
for (i in 1:length(list_obj)){ |
|
309 |
|
|
310 |
l<-list_obj[[i]] |
|
311 |
mod_name<-list_mod_name[i] |
|
312 |
avg_tb<-subset(l$avg_tb,pred_mod==mod_name,select=metric_name) #selecte relevant accuarcy metric |
|
313 |
n_tb<-subset(l$n_tb,pred_mod==mod_name,select=metric_name) |
|
314 |
sd_tb<-subset(l$sd_tb,pred_mod==mod_name,select=metric_name) #l$sd_abs_tb[,metric_name] |
|
315 |
|
|
316 |
xlab_text<-"holdout proportion" |
|
317 |
|
|
318 |
no <- unlist(as.data.frame(n_tb)) |
|
319 |
y <- unlist(as.data.frame(avg_tb)) |
|
320 |
|
|
321 |
x<- l$avg_tb$prop |
|
322 |
y_sd <- unlist(as.data.frame(sd_tb)) #sd_tb |
|
323 |
|
|
324 |
ciw <-y_sd |
|
325 |
#ciw2 <- qt(0.975, n) * y_sd2 / sqrt(n) |
|
326 |
|
|
327 |
#plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" MAE for ",mod_name,sep=""), barcol="blue", lwd=1, |
|
328 |
# ylab="RMSE (C)", xlab=xlab_text) |
|
329 |
|
|
330 |
ciw <- qt(0.975, no) * y_sd / sqrt(no) |
|
331 |
|
|
332 |
if(i==1){ |
|
333 |
plotCI(y=y, x=x, uiw=ciw, col=col_t[i], main=paste(" Comparison of ",metric_name," in ",mod_name,sep=""), barcol="blue", lwd=1, |
|
334 |
ylab="RMSE (C)", xlab=xlab_text) |
|
335 |
lines(y~x, col=col_t[i]) |
|
336 |
|
|
337 |
}else{ |
|
338 |
lines(y~x, col=col_t[i]) |
|
339 |
} |
|
340 |
|
|
341 |
} |
|
342 |
legend("topleft",legend=legend_text, |
|
343 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
|
344 |
#axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1]) |
|
345 |
|
|
346 |
} |
|
347 |
|
|
348 |
############################## |
|
253 | 349 |
#### Parameters and constants |
254 | 350 |
|
255 | 351 |
|
... | ... | |
265 | 361 |
#gwr results: |
266 | 362 |
in_dir4 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gwr_day_lst_comb3_part1_07122013" |
267 | 363 |
#multisampling results (gam) |
268 |
in_dir5<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_mults15_lst_comb3_07232013"
|
|
364 |
in_dir5<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_daily_mults10_lst_comb3_08082013"
|
|
269 | 365 |
in_dir6<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_kriging_daily_mults10_lst_comb3_08062013" |
270 | 366 |
in_dir7<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gwr_daily_mults10_lst_comb3_08072013" |
271 | 367 |
|
... | ... | |
286 | 382 |
raster_obj_file_3 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_day_lst_comb3_07112013.RData" |
287 | 383 |
raster_obj_file_4 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_day_lst_comb3_part1_07122013.RData" |
288 | 384 |
#multisampling using baseline lat,lon + elev |
289 |
raster_obj_file_5 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_mults15_lst_comb3_07232013.RData" |
|
290 |
raster_obj_file_6 <- "kriging_daily_validation_mod_obj_dailyTmax_365d_kriging_daily_mults10_lst_comb3_08062013.RData" |
|
291 |
raster_obj_file_7 <- "" |
|
292 |
|
|
385 |
raster_obj_file_5 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_daily_mults10_lst_comb3_08082013.RData" |
|
386 |
raster_obj_file_6 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_daily_mults10_lst_comb3_08062013.RData" |
|
387 |
raster_obj_file_7 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_mults10_lst_comb3_08072013.RData" |
|
388 |
#raster_prediction_obj_gam_daily_dailyTmax_365d_gam_daily_mults10_lst_comb3_08082013.RData |
|
389 |
|
|
293 | 390 |
#Load objects containing training, testing, models objects |
294 | 391 |
|
295 | 392 |
covar_obj <-load_obj(file.path(in_dir1,covar_obj_file1)) |
... | ... | |
306 | 403 |
raster_prediction_obj_4 <-load_obj(file.path(in_dir4,raster_obj_file_4)) |
307 | 404 |
raster_prediction_obj_5 <-load_obj(file.path(in_dir5,raster_obj_file_5)) #gam daily multisampling 10 to 70% |
308 | 405 |
raster_prediction_obj_6 <-load_obj(file.path(in_dir6,raster_obj_file_6)) #kriging daily multisampling |
309 |
#raster_prediction_obj_7 <-load_obj(file.path(in_dir7,raster_obj_file_7)) #kriging daily multisampling
|
|
406 |
raster_prediction_obj_7 <-load_obj(file.path(in_dir7,raster_obj_file_7)) #kriging daily multisampling |
|
310 | 407 |
|
311 | 408 |
names(raster_prediction_obj_1) #list of two objects |
312 | 409 |
|
... | ... | |
396 | 493 |
#element<-paste(mean_val,"+-",sd_val,sep="") |
397 | 494 |
#table__paper[i,j]<-element |
398 | 495 |
|
399 |
######################### |
|
400 |
####### Now create figures ### |
|
496 |
####################################
|
|
497 |
####### Now create figures #############
|
|
401 | 498 |
|
402 | 499 |
#figure 1: study area |
403 | 500 |
#figure 2: methodological worklfow |
... | ... | |
411 | 508 |
|
412 | 509 |
#not generated in R |
413 | 510 |
|
414 |
### Figure 3 |
|
511 |
################################################ |
|
512 |
################### Figure 3 |
|
415 | 513 |
|
416 | 514 |
#Analysis accuracy in term of distance to closest station |
417 | 515 |
#Assign model's names |
... | ... | |
440 | 538 |
#debug(plot_dst_MAE) |
441 | 539 |
plot_dst_MAE(list_param_plot) |
442 | 540 |
|
443 |
|
|
444 |
#Figure 4. RMSE and MAE, mulitisampling and hold out for FSS and GAM. |
|
541 |
#################################################### |
|
542 |
#########Figure 4. RMSE and MAE, mulitisampling and hold out for FSS and GAM.
|
|
445 | 543 |
|
446 | 544 |
#Using baseline 2: lat,lon and elev |
447 | 545 |
|
... | ... | |
449 | 547 |
#Use gam_day method |
450 | 548 |
#Use comb3 i.e. using baseline s(lat,lon)+s(elev) |
451 | 549 |
|
550 |
#names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9") |
|
551 |
names_mod<-c("mod1") |
|
552 |
|
|
553 |
debug(calc_stat_prop_tb) |
|
554 |
prop_obj_gam<-calc_stat_prop_tb(names_mod,raster_prediction_obj_5) |
|
555 |
prop_obj_kriging<-calc_stat_prop_tb(names_mod,raster_prediction_obj_6) |
|
556 |
prop_obj_gwr<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7) |
|
557 |
|
|
558 |
list_prop_obj<-list(prop_obj_gam,prop_obj_kriging,prop_obj_gwr) |
|
559 |
col_t<-c("red","blue","black") |
|
560 |
pch_t<- 1:length(col_t) |
|
561 |
legend_text <- c("GAM","Kriging","GWR") |
|
562 |
mod_name<-c("mod1","mod1","mod1")#selected models |
|
563 |
metric_name<-"rmse" |
|
564 |
#png_names<- |
|
565 |
#png(file.path(out_path,paste("clim_surface_",y_var_name,"_",sampling_dat$date[i],"_",sampling_dat$prop[i], |
|
566 |
# "_",sampling_dat$run_samp[i],out_prefix,".png", sep=""))) |
|
567 |
|
|
568 |
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name) |
|
569 |
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name") |
|
570 |
#debug(plot_prop_metrics) |
|
571 |
plot_prop_metrics(list_param_plot) |
|
572 |
|
|
573 |
#################################################### |
|
574 |
#########Figure 5. Overtraining tendency |
|
575 |
|
|
452 | 576 |
#read in relevant data: |
453 | 577 |
|
454 | 578 |
tb5 <-raster_prediction_obj_5$tb_diagnostic_v #gam dailycontains the accuracy metrics for each run... |
455 | 579 |
tb6 <-raster_prediction_obj_6$tb_diagnostic_v #Kriging daily methods |
456 |
#tb7 <-raster_prediction_obj_7$tb_diagnostic_v #gwr daily methods
|
|
580 |
tb7 <-raster_prediction_obj_7$tb_diagnostic_v #gwr daily methods |
|
457 | 581 |
|
458 |
#names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9") |
|
459 |
names_mod<-c("mod1") |
|
582 |
prop_obj_gam_s<-calc_stat_prop_tb(names_mod,raster_prediction_o bj_5,testing=FALSE) |
|
583 |
prop_obj_kriging_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_6,testing=FALSE) |
|
584 |
prop_obj_gwr_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7,testing=FALSE) |
|
460 | 585 |
|
461 |
calc_stat_prop_tb <-function(names_mod,raster_prediction_obj){ |
|
462 |
#add for testing?? |
|
463 |
tb <-raster_prediction_obj$tb_diagnostic_v |
|
464 |
t<-melt(subset(tb5,pred_mod==names_mod), |
|
465 |
measure=c("mae","rmse","r","m50"), |
|
466 |
id=c("pred_mod","prop"), |
|
467 |
na.rm=T) |
|
468 |
|
|
469 |
avg_tb<-cast(t,pred_mod+prop~variable,mean) |
|
470 |
sd_tb<-cast(t,pred_mod+prop~variable,sd) |
|
471 |
n_tb<-cast(t,pred_mod+prop~variable,length) |
|
472 |
#n_NA<-cast(t,dst_cat1~variable,is.na) |
|
473 |
|
|
474 |
#### prepare returning object |
|
475 |
prop_obj<-list(tb,mae_tb,avg_tb,sd_tb,n_tb) |
|
476 |
names(prop_obj) <-c("tb","avg_tb","sd_tb","n_tb") |
|
477 |
#names(prop_obj) <-c("tb_v","tb_s","avg_tb_v","sd_tb_v","n_tb_s","avg_tb_s","sd_tb_s","n_tb_s") |
|
586 |
prop_obj_gam$avg_tb - prop_obj_gam_s$avg_tb |
|
587 |
plot(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, type="b",) |
|
588 |
|
|
589 |
y_range<-range(prop_obj_gam$avg_tb$rmse,prop_obj_gam_s$avg_tb$rmse) |
|
590 |
plot(prop_obj_gam$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, type="b",ylim=y_range) |
|
591 |
lines(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, type="b",ylim=y_range,col=c("red")) |
|
592 |
lines(prop_obj_gam$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, type="b",ylim=y_range,col=c("red"),lty=2) |
|
593 |
lines(prop_obj_gwr_s$avg_tb$rmse ~ prop_obj_gwr_s$avg_tb$prop, type="b",ylim=y_range,col=c("black")) |
|
594 |
lines(prop_obj_gwr$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, type="b",ylim=y_range,col=c("black"),lty=2) |
|
595 |
|
|
596 |
y_range<-range(prop_obj_kriging$avg_tb$rmse,prop_obj_kriging_s$avg_tb$rmse) |
|
597 |
plot(prop_obj_kriging$avg_tb$rmse ~ prop_obj_kriging$avg_tb$prop, type="b",ylim=y_range,col=c("blue"),lty=2) |
|
598 |
lines(prop_obj_kriging_s$avg_tb$rmse ~ prop_obj_kriging_s$avg_tb$prop, type="b",ylim=y_range,col=c("blue")) |
|
599 |
|
|
600 |
## Calculate average difference for RMSE for all three methods |
|
601 |
#read in relevant data: |
|
602 |
tb1_s<-extract_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"metrics_s") |
|
603 |
rownames(tb1_s)<-NULL #remove row names |
|
604 |
tb1_s$method_interp <- "gam_daily" #add type of interpolation...out_prefix too?? |
|
605 |
|
|
606 |
tb3_s<-extract_from_list_obj(raster_prediction_obj_4$validation_mod_obj,"metrics_s") |
|
607 |
rownames(tb1_s)<-NULL #remove row names |
|
608 |
tb3_s$method_interp <- "kriging_daily" #add type of interpolation...out_prefix too?? |
|
609 |
|
|
610 |
tb4_s<-extract_from_list_obj(raster_prediction_obj_3$validation_mod_obj,"metrics_s") |
|
611 |
rownames(tb4_s)<-NULL #remove row names |
|
612 |
tb4_s$method_interp <- "gwr_daily" #add type of interpolation...out_prefix too?? |
|
613 |
|
|
614 |
#tb1_s <-raster_prediction_obj_1$tb_diagnostic_s #gam dailycontains the accuracy metrics for each run... |
|
615 |
#tb3_s <-raster_prediction_obj_3$tb_diagnostic_s #Kriging daily methods |
|
616 |
#tb4_s <-raster_prediction_obj_4$tb_diagnostic_s #gwr daily methods |
|
617 |
|
|
618 |
tb1 <-raster_prediction_obj_1$tb_diagnostic_v #gam dailycontains the accuracy metrics for each run... |
|
619 |
tb3 <-raster_prediction_obj_3$tb_diagnostic_v #Kriging daily methods |
|
620 |
tb4 <-raster_prediction_obj_4$tb_diagnostic_v #gwr daily methods |
|
621 |
|
|
622 |
diff_df<-function(tb_s,tb_v,list_metric_names){ |
|
623 |
tb_diff<-vector("list", length(list_metric_names)) |
|
624 |
for (i in 1:length(list_metric_names)){ |
|
625 |
metric_name<-list_metric_names[i] |
|
626 |
tb_diff[[i]] <-tb_s[,c(metric_name)] - tb_v[,c(metric_name)] |
|
627 |
} |
|
628 |
names(tb_diff)<-list_metric_names |
|
629 |
tb_diff<-as.data.frame(do.call(cbind,tb_diff)) |
|
630 |
return(tb_diff) |
|
478 | 631 |
} |
479 | 632 |
|
480 | 633 |
|
481 |
#tbp<- subset(tb,prop!=70) #remove 70% hold out because it is only predicted for mod1 (baseline) |
|
482 |
#tp<-melt(tbp, |
|
483 |
# measure=c("mae","rmse","r","m50"), |
|
484 |
# id=c("pred_mod","prop"), |
|
485 |
# na.rm=T) |
|
634 |
#debug(diff_df) |
|
635 |
diff_tb1 <-diff_df(tb1_s[tb1_s$pred_mod=="mod1",],tb1[tb1$pred_mod=="mod1",],c("mae","rmse")) #select differences for mod1 |
|
636 |
diff_tb3 <-diff_df(tb3_s[tb3_s$pred_mod=="mod1",],tb3[tb3$pred_mod=="mod1",],c("mae","rmse")) |
|
637 |
diff_tb4 <-diff_df(tb4_s[tb4_s$pred_mod=="mod1",],tb4[tb4$pred_mod=="mod1",],c("mae","rmse")) |
|
486 | 638 |
|
487 |
avg_tp<-cast(tp,pred_mod~variable,mean)
|
|
639 |
x<-data.frame(gam=diff_tb1$mae,gwr=diff_tb3$mae,kriging=diff_tb4$mae)
|
|
488 | 640 |
|
489 |
avg_tb<-cast(t5,pred_mod+prop~variable,mean) |
|
490 |
sd_tb<-cast(t,pred_mod+prop~variable,sd) |
|
641 |
boxplot(x) #plot differences in training and testing accuracies for three methods |
|
491 | 642 |
|
492 |
n_tb<-cast(t,pred_mod+prop~variable,length) |
|
643 |
mae_tmp<- data.frame(gam=tb1[tb1$pred_mod=="mod1",c("mae")], |
|
644 |
kriging=tb3[tb3$pred_mod=="mod1",c("mae")], |
|
645 |
gwr=tb4[tb4$pred_mod=="mod1",c("mae")]) |
|
493 | 646 |
|
494 |
xyplot(avg_tb$rmse~avg_tb$prop,type="b",group=pred_mod, |
|
495 |
data=avg_tb, |
|
496 |
pch=1:length(avg_tb$pred_mod), |
|
497 |
par.settings=list(superpose.symbol = list( |
|
498 |
pch=1:length(avg_tb$pred_mod))), |
|
499 |
auto.key=list(columns=5)) |
|
647 |
plot(mae_tmp$gam,col=c("red"),type="b") |
|
648 |
lines(mae_tmp$kriging,col=c("blue"),type="b") |
|
649 |
lines(mae_tmp$gwr,col=c("black"),type="b") |
|
650 |
legend("topleft",legend=legend_text, |
|
651 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
|
500 | 652 |
|
501 |
mod_name<-"mod1" |
|
502 |
mod_name<-"mod4" |
|
503 |
xlab_text<-"proportion of hold out" |
|
653 |
max(mae_tmp$gam) |
|
504 | 654 |
|
505 |
n <- unlist(subset(n_tb,pred_mod==mod_name,select=c(rmse)))
|
|
506 |
y <- unlist(subset(avg_tb,pred_mod==mod_name,select=c(rmse)))
|
|
655 |
x2<-tb1[tb1$pred_mod=="mod1",c("mae","date")]
|
|
656 |
arrange(x2,desc(mae))
|
|
507 | 657 |
|
508 |
x<- 1:length(y) |
|
509 |
y_sd <- unlist(subset(sd_tb,pred_mod==mod_name,select=c(rmse))) |
|
510 | 658 |
|
511 |
ciw <-y_sd |
|
512 |
#ciw2 <- qt(0.975, n) * y_sd2 / sqrt(n) |
|
659 |
pmax(x2$mae,x2$date) |
|
513 | 660 |
|
514 |
plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" Mean and Std_dev RMSE for ",mod_name,sep=""), barcol="blue", lwd=1,
|
|
515 |
ylab="RMSE (C)", xlab=xlab_text)
|
|
661 |
kriging=tb3[tb3$pred_mod=="mod1",c("mae")],
|
|
662 |
gwr=tb4[tb4$pred_mod=="mod1",c("mae")])
|
|
516 | 663 |
|
517 |
ciw <- qt(0.975, n) * y_sd / sqrt(n)
|
|
664 |
##### MONTHLY AVERAGES
|
|
518 | 665 |
|
519 |
plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" Mean and CI RMSE for ",mod_name,sep=""), barcol="blue", lwd=1, |
|
520 |
ylab="RMSE (C)", xlab=xlab_text) |
|
666 |
tb1_month<-raster_prediction_obj_1$summary_month_metrics_v[[1]] #note that this is for model1 |
|
667 |
tb3_month<-raster_prediction_obj_3$summary_month_metrics_v[[1]] |
|
668 |
tb4_month<- raster_prediction_obj_4$summary_month_metrics_v[[1]] |
|
521 | 669 |
|
522 |
n=150 |
|
523 |
ciw <- qt(0.975, n) * y_sd / sqrt(n) |
|
524 |
ciw2 <- qt(0.975, n) * y_sd2 / sqrt(n) |
|
670 |
y_range<-range(tb1_month$mae,tb3_month$mae,tb4_month$mae) |
|
671 |
plot(1:12,tb1_month$mae,col=c("red"),type="b",ylim=y_range) |
|
672 |
lines(1:12,tb3_month$mae,col=c("blue"),type="b") |
|
673 |
lines(1:12,tb4_month$mae,col=c("black"),type="b") |
|
525 | 674 |
|
526 |
plot_dst_MAE <-function(list_param){ |
|
675 |
date<-strptime(tb1$date, "%Y%m%d") # interpolation date being processed |
|
676 |
month<-strftime(date, "%m") # current month of the date being processed |
|
677 |
|
|
678 |
tb1$month<-month |
|
679 |
x3<-tb1[tb1$pred_mod=="mod1",] |
|
680 |
|
|
681 |
(plot(x3[month=="01",c("mae")])) |
|
682 |
median(x3[x3$month=="03",c("mae")],na.rm=T) |
|
683 |
mean(x3[x3$month=="03",c("mae")],na.rm=T) |
|
684 |
|
|
685 |
plot_dst_spat_fun<-function(stat_tb,names_var,cat_val){ |
|
686 |
|
|
687 |
range_y<-range(as.vector(unlist(stat_tb[,names_var])),na.rm=T) #flatten data.frame |
|
688 |
col_t<-rainbow(length(names_var)) |
|
689 |
pch_t<- 1:length(names_var) |
|
690 |
plot(stat_tb[,names_var[1]], ylim=range_y,pch=pch_t[1],col=col_t[1],type="b", |
|
691 |
yla="MAE (in degree C)",xlab="",xaxt="n") |
|
692 |
#points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[1],col=col_t[1]),type="p") |
|
693 |
for (k in 2:length(names_var)){ |
|
694 |
lines(stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k],type="b", |
|
695 |
xlab="",axes=F) |
|
696 |
#points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k]),type="p") |
|
697 |
} |
|
698 |
legend("topleft",legend=names_var, |
|
699 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
|
700 |
axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1]) |
|
701 |
} |
|
702 |
|
|
703 |
plot_prop_metrics <-function(list_param){ |
|
527 | 704 |
# |
528 | 705 |
#list_dist_obj: list of dist object |
529 | 706 |
#col_t: list of color for each |
... | ... | |
533 | 710 |
# |
534 | 711 |
## BEGIN ## |
535 | 712 |
|
536 |
list_dist_obj<-list_param$list_dist_obj
|
|
537 |
col_t<-list_param$col_t |
|
538 |
pch_t<- list_param$pch_t |
|
713 |
list_obj<-list_param$list_prop_obj
|
|
714 |
col_t <-list_param$col_t
|
|
715 |
pch_t <- list_param$pch_t
|
|
539 | 716 |
legend_text <- list_param$legend_text |
540 | 717 |
list_mod_name<-list_param$mod_name |
718 |
metric_name<-list_param$metric_name |
|
541 | 719 |
|
542 |
for (i in 1:length(list_dist_obj)){ |
|
543 |
|
|
544 |
l<-list_dist_obj[[i]] |
|
545 |
mae_tb<-l$mae_tb |
|
546 |
n_tb<-l$n_tb |
|
547 |
sd_abs_tb<-l$sd_abs_tb |
|
720 |
for (i in 1:length(list_obj)){ |
|
548 | 721 |
|
722 |
l<-list_obj[[i]] |
|
549 | 723 |
mod_name<-list_mod_name[i] |
550 |
xlab_text<-"distance to fitting station" |
|
724 |
avg_tb<-subset(l$avg_tb,pred_mod==mod_name,select=metric_name) #selecte relevant accuarcy metric |
|
725 |
n_tb<-subset(l$n_tb,pred_mod==mod_name,select=metric_name) |
|
726 |
sd_tb<-subset(l$sd_tb,pred_mod==mod_name,select=metric_name) #l$sd_abs_tb[,metric_name] |
|
551 | 727 |
|
552 |
n <- unlist(n_tb[1:13,c(mod_name)]) |
|
553 |
y <- unlist(mae_tb[1:13,c(mod_name)]) |
|
728 |
xlab_text<-"holdout proportion" |
|
554 | 729 |
|
555 |
x<- 1:length(y) |
|
556 |
y_sd <- unlist(sd_abs_tb[1:12,c(mod_name)]) |
|
730 |
no <- unlist(as.data.frame(n_tb)) |
|
731 |
y <- unlist(as.data.frame(avg_tb)) |
|
732 |
|
|
733 |
x<- l$avg_tb$prop |
|
734 |
y_sd <- unlist(as.data.frame(sd_tb)) #sd_tb |
|
557 | 735 |
|
558 | 736 |
ciw <-y_sd |
559 | 737 |
#ciw2 <- qt(0.975, n) * y_sd2 / sqrt(n) |
... | ... | |
561 | 739 |
#plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" MAE for ",mod_name,sep=""), barcol="blue", lwd=1, |
562 | 740 |
# ylab="RMSE (C)", xlab=xlab_text) |
563 | 741 |
|
564 |
ciw <- qt(0.975, n) * y_sd / sqrt(n)
|
|
742 |
ciw <- qt(0.975, no) * y_sd / sqrt(no)
|
|
565 | 743 |
|
566 | 744 |
if(i==1){ |
567 |
plotCI(y=y, x=x, uiw=ciw, col=col_t[i], main=paste(" Comparison of MAE in ",mod_name,sep=""), barcol="blue", lwd=1,
|
|
745 |
plotCI(y=y, x=x, uiw=ciw, col=col_t[i], main=paste(" Comparison of ",metric_name," in ",mod_name,sep=""), barcol="blue", lwd=1,
|
|
568 | 746 |
ylab="RMSE (C)", xlab=xlab_text) |
569 | 747 |
lines(y~x, col=col_t[i]) |
570 | 748 |
|
... | ... | |
579 | 757 |
|
580 | 758 |
} |
581 | 759 |
|
760 |
|
|
582 | 761 |
################### END OF SCRIPT ################### |
583 | 762 |
|
584 |
# create plot of accury in term of distance to closest fitting station |
|
585 |
plot_dst_spat_fun<-function(stat_tb,names_var,cat_val){ |
|
586 |
|
|
587 |
range_y<-range(as.vector(unlist(stat_tb[,names_var])),na.rm=T) #flatten data.frame |
|
588 |
col_t<-rainbow(length(names_var)) |
|
589 |
pch_t<- 1:length(names_var) |
|
590 |
plot(stat_tb[,names_var[1]], ylim=range_y,pch=pch_t[1],col=col_t[1],type="b", |
|
591 |
ylab="MAE (in degree C)",xlab="",xaxt="n") |
|
592 |
#points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[1],col=col_t[1]),type="p") |
|
593 |
for (k in 2:length(names_var)){ |
|
594 |
lines(stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k],type="b", |
|
595 |
xlab="",axes=F) |
|
596 |
#points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k]),type="p") |
|
597 |
} |
|
598 |
legend("topleft",legend=names_var, |
|
599 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
|
600 |
axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1]) |
|
601 |
} |
|
602 | 763 |
|
Also available in: Unified diff
anlalyses paper contribution of covariates, over training tendency