Revision 076ccfc4
Added by Benoit Parmentier almost 9 years ago
climate/research/oregon/interpolation/global_run_scalingup_assessment_part2.R | ||
---|---|---|
5 | 5 |
#Analyses, figures, tables and data are also produced in the script. |
6 | 6 |
#AUTHOR: Benoit Parmentier |
7 | 7 |
#CREATED ON: 03/23/2014 |
8 |
#MODIFIED ON: 01/04/2016
|
|
8 |
#MODIFIED ON: 01/06/2016
|
|
9 | 9 |
#Version: 5 |
10 | 10 |
#PROJECT: Environmental Layers project |
11 | 11 |
#COMMENTS: analyses for run 10 global analyses,all regions 1500x4500km with additional tiles to increase overlap |
12 | 12 |
#TODO: |
13 |
#1) Split functions and master script
|
|
14 |
#2) Make this is a script/function callable from the shell/bash
|
|
15 |
#3) Check image format for tif
|
|
13 |
#1) Add plot broken down by year and region
|
|
14 |
#2) Modify code for overall assessment accross all regions and year
|
|
15 |
#3) Clean up
|
|
16 | 16 |
|
17 |
#First source these files: |
|
18 |
#Resolved call issues from R. |
|
19 |
#source /nobackupp6/aguzman4/climateLayers/sharedModules2/etc/environ.sh |
|
20 |
# |
|
17 | 21 |
################################################################################################# |
18 | 22 |
|
19 | 23 |
#### FUNCTION USED IN SCRIPT |
... | ... | |
132 | 136 |
|
133 | 137 |
####### Function used in the script ####### |
134 | 138 |
|
135 |
load_obj <- function(f){ |
|
136 |
env <- new.env() |
|
137 |
nm <- load(f, env)[1] |
|
138 |
env[[nm]] |
|
139 |
} |
|
140 |
|
|
141 |
create_dir_fun <- function(out_dir,out_suffix){ |
|
142 |
if(!is.null(out_suffix)){ |
|
143 |
out_name <- paste("output_",out_suffix,sep="") |
|
144 |
out_dir <- file.path(out_dir,out_name) |
|
145 |
} |
|
146 |
#create if does not exists |
|
147 |
if(!file.exists(out_dir)){ |
|
148 |
dir.create(out_dir) |
|
149 |
} |
|
150 |
return(out_dir) |
|
151 |
} |
|
152 |
|
|
153 | 139 |
#function_assessment_part2_functions <- "global_run_scalingup_assessment_part2_functions_0923015.R" |
154 | 140 |
#source(file.path(script_path,function_assessment_part2_functions)) #source all functions used in this script |
155 | 141 |
|
... | ... | |
172 | 158 |
region_name <- list_param_run_assessment_plotting$region_name #<- "world" #PARAM 15 |
173 | 159 |
df_assessment_files_name <- list_param_run_assessment_plotting$df_assessment_files_name #PARAM 16 |
174 | 160 |
threshold_missing_day <- list_param_run_assessment_plotting$threshold_missing_day #PARM17 |
161 |
year_predicted <- list_param_run_assessment_plotting$year_predicted |
|
175 | 162 |
|
176 | 163 |
NA_value <- NA_flag_val |
177 | 164 |
|
... | ... | |
188 | 175 |
|
189 | 176 |
setwd(out_dir) |
190 | 177 |
|
191 |
list_outfiles <- vector("list", length=20) #collect names of output files
|
|
192 |
list_outfiles_names <- vector("list", length=20) #collect names of output files
|
|
178 |
list_outfiles <- vector("list", length=23) #collect names of output files
|
|
179 |
list_outfiles_names <- vector("list", length=23) #collect names of output files
|
|
193 | 180 |
counter_fig <- 0 #index of figure to collect outputs |
194 | 181 |
|
195 | 182 |
#i <- year_predicted |
... | ... | |
226 | 213 |
tb_s <- merge(tb_s,df_tile_processed,by="tile_id") |
227 | 214 |
tb_month_s<- merge(tb_month_s,df_tile_processed,by="tile_id") |
228 | 215 |
summary_metrics_v <- merge(summary_metrics_v,df_tile_processed,by="tile_id") |
229 |
#} |
|
230 |
|
|
216 |
#test <- merge(summary_metrics_v,df_tile_processed,by="tile_id",all=F) |
|
217 |
#duplicate columns...need to be cleaned up later |
|
218 |
try(tb$year_predicted <- tb$year_predicted.x) |
|
219 |
try(tb$reg <- tb$reg.x) |
|
220 |
try(summary_metrics_v$year_predicted <- summary_metrics_v$year_predicted.x) |
|
221 |
try(summary_metrics_v$reg <- summary_metrics_v$reg.x) |
|
231 | 222 |
#tb_all <- tb |
232 | 223 |
#summary_metrics_v_all <- summary_metrics_v |
233 | 224 |
|
... | ... | |
331 | 322 |
#text(lat-shp,) |
332 | 323 |
#union(list_shp_reg_files[[1]],list_shp_reg_files[[2]]) |
333 | 324 |
list_outfiles[[counter_fig+1]] <- paste("Figure1_tile_processed_region_",region_name,"_",out_suffix,".png",sep="") |
325 |
counter_fig <- counter_fig+1 |
|
334 | 326 |
|
335 | 327 |
############### |
336 | 328 |
### Figure 2: boxplot of average accuracy by model and by tiles |
... | ... | |
639 | 631 |
col_mfrow <- 1 |
640 | 632 |
row_mfrow <- 1 |
641 | 633 |
|
642 |
png(filename=paste("Figure8a_boxplot_overall_separated_by_region_with_oultiers_",model_name[i],"_",out_suffix,".png",sep=""),
|
|
634 |
png(filename=paste("Figure8a_boxplot_overall_separated_by_region_with_oultiers_","_",out_suffix,".png",sep=""), |
|
643 | 635 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
644 | 636 |
|
645 | 637 |
p<- bwplot(rmse~pred_mod | reg, data=tb, |
... | ... | |
651 | 643 |
counter_fig <- counter_fig + 1 |
652 | 644 |
|
653 | 645 |
## Figure 8b |
654 |
png(filename=paste("Figure8b_boxplot_overall_separated_by_region_scaling_",model_name[i],"_",out_suffix,".png",sep=""),
|
|
646 |
png(filename=paste("Figure8b_boxplot_overall_separated_by_region_scaling_","_",out_suffix,".png",sep=""), |
|
655 | 647 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
656 | 648 |
|
657 | 649 |
boxplot(rmse~pred_mod,data=tb,ylim=c(0,5),outline=FALSE)#,names=tb$pred_mod) |
... | ... | |
663 | 655 |
|
664 | 656 |
list_outfiles[[counter_fig+1]] <- paste("Figure8b_boxplot_overall_separated_by_region_scaling_",model_name[i],"_",out_suffix,".png",sep="") |
665 | 657 |
counter_fig <- counter_fig + 1 |
658 |
|
|
659 |
## Select mod1 only now |
|
660 |
tb_subset <- subset(tb,model_name=="mod1") |
|
661 |
## Figure 8c |
|
662 |
|
|
663 |
res_pix <- 480 |
|
664 |
col_mfrow <- 1 |
|
665 |
row_mfrow <- 1 |
|
666 |
|
|
667 |
png(filename=paste("Figure8c_boxplot_overall_separated_by_region_with_oultiers_","mod1","_",out_suffix,".png",sep=""), |
|
668 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
669 |
|
|
670 |
p<- bwplot(rmse~pred_mod | reg, data=tb_subset, |
|
671 |
main="RMSE per model and region over all tiles") |
|
672 |
print(p) |
|
673 |
dev.off() |
|
674 |
|
|
675 |
list_outfiles[[counter_fig+1]] <- paste("Figure8c_boxplot_overall_separated_by_region_with_oultiers_",model_name[i],"_",out_suffix,".png",sep="") |
|
676 |
counter_fig <- counter_fig + 1 |
|
677 |
|
|
678 |
## Figure 8d |
|
679 |
png(filename=paste("Figure8d_boxplot_overall_separated_by_region_scaling_","mod1","_",out_suffix,".png",sep=""), |
|
680 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
681 |
|
|
682 |
boxplot(rmse~pred_mod,data=tb,ylim=c(0,5),outline=FALSE)#,names=tb$pred_mod) |
|
683 |
title("RMSE per model over all tiles") |
|
684 |
p<- bwplot(rmse~pred_mod | reg, data=tb_subset,ylim=c(0,5), |
|
685 |
main="RMSE per model and region over all tiles") |
|
686 |
print(p) |
|
687 |
dev.off() |
|
688 |
|
|
689 |
list_outfiles[[counter_fig+1]] <- paste("Figure8d_boxplot_overall_separated_by_region_scaling_",model_name[i],"_",out_suffix,".png",sep="") |
|
690 |
counter_fig <- counter_fig + 1 |
|
666 | 691 |
|
667 | 692 |
##################################################### |
668 | 693 |
#### Figure 9: plotting boxplot by year and regions ########### |
... | ... | |
675 | 700 |
# png(filename=paste("Figure9a_boxplot_overall_separated_by_region_year_with_oultiers_",model_name[i],"_",out_suffix,".png",sep=""), |
676 | 701 |
# width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
677 | 702 |
# |
678 |
# p<- bwplot(rmse~pred_mod | reg + year, data=tb, |
|
703 |
# p<- bwplot(rmse~pred_mod | reg + year_predicted, data=tb,
|
|
679 | 704 |
# main="RMSE per model and region over all tiles") |
680 | 705 |
# print(p) |
681 | 706 |
# dev.off() |
... | ... | |
686 | 711 |
# |
687 | 712 |
# boxplot(rmse~pred_mod,data=tb,ylim=c(0,5),outline=FALSE)#,names=tb$pred_mod) |
688 | 713 |
# title("RMSE per model over all tiles") |
689 |
# p<- bwplot(rmse~pred_mod | reg, data=tb,ylim=c(0,5), |
|
714 |
# p<- bwplot(rmse~pred_mod | reg + year_predicted, data=tb,ylim=c(0,5),
|
|
690 | 715 |
# main="RMSE per model and region over all tiles") |
691 | 716 |
# print(p) |
692 | 717 |
# dev.off() |
... | ... | |
698 | 723 |
############## Prepare object to return |
699 | 724 |
############## Collect information from assessment ########## |
700 | 725 |
|
701 |
# #comments
|
|
702 |
comments_str <- c("tile processed for the region", |
|
703 |
"boxplot with outlier", |
|
704 |
"boxplot with outlier", |
|
726 |
# This is hard coded and can be improved later on for flexibility. It works for now...
|
|
727 |
comments_str <- c("tile processed for the region",
|
|
728 |
"boxplot with outliers",
|
|
729 |
"boxplot with outliers",
|
|
705 | 730 |
"boxplot scaling by tiles", |
706 | 731 |
"boxplot scaling by tiles", |
707 | 732 |
"boxplot overall region with outliers", |
708 | 733 |
"boxplot overall region with scaling", |
709 |
"Barplot of metrics ranked by tile", |
|
710 |
"Barplot of metrics ranked by tile", |
|
711 |
"Average metrics map centroids", |
|
712 |
"Average metrics map centroids", |
|
713 |
"Number of missing day threshold1 map centroids", |
|
714 |
"Number of missing day threshold1 map centroids", |
|
715 |
"Number of missing day threshold1 map centroids", |
|
734 |
"boxplot overall region with outliers", |
|
735 |
"boxplot overall region with scaling", |
|
736 |
"Barplot of accuracy metrics ranked by tile", |
|
737 |
"Barplot of accuracy metrics ranked by tile", |
|
738 |
"Average accuracy metrics map at centroids", |
|
739 |
"Average accuracy metrics map at centroids", |
|
716 | 740 |
"Number of missing day threshold1 map centroids", |
741 |
"Number of missing day threshold2 map centroids", |
|
742 |
"Number of missing day threshold3 map centroids", |
|
743 |
"Number of missing day threshold4 map centroids", |
|
717 | 744 |
"number_daily_predictions_per_model", |
718 | 745 |
"histogram number_daily_predictions_per_models", |
719 | 746 |
"boxplot overall separated by region with_outliers", |
747 |
"boxplot overall separated by region with_scaling", |
|
748 |
"boxplot overall separated by region with_outliers", |
|
720 | 749 |
"boxplot overall separated by region with_scaling") |
721 |
|
|
722 |
# c("figure_1","figure_2a","figure_2a","figure_2b","figure_2b","figure_3a","figure_3b","figure_5", |
|
723 |
# "figure_5","figure_6","figure_6", |
|
724 |
# Figure_7a |
|
725 |
# Figure_7a |
|
726 |
#Number of missing day threshold1 map centroids Figure_7a |
|
727 |
#Number of missing day threshold1 map centroids Figure_7a |
|
728 |
#number_daily_predictions_per_model Figure_7b |
|
729 |
#histogram number_daily_predictions_per_models Figure_7c |
|
730 |
#boxplot_overall_separated_by_region_with_oultiers_ Figure 8a |
|
731 |
#boxplot_overall_separated_by_region_with_scaling Figure 8b |
|
732 | 750 |
|
733 |
outfiles_names <- c("summary_metrics_v_names","tb_v_accuracy_name","tb_month_s_name","tb_s_accuracy_name", |
|
734 |
"data_month_s_name","data_day_v_name","data_day_s_name","data_month_v_name", "tb_month_v_name", |
|
735 |
"pred_data_month_info_name","pred_data_day_info_name","df_tile_processed_name","df_tiles_all_name", |
|
736 |
"df_tiles_all_name") |
|
737 |
names(list_outfiles) <- outfiles_names |
|
751 |
figure_no <- c("figure_1","figure_2a","figure_2a","figure_2b","figure_2b","figure_3a","figure_3a","figure_3b","figure_3b", |
|
752 |
"figure_5", "figure_5","figure_6","figure_6","Figure_7a","Figure_7a","Figure_7a","Figure_7a","Figure_7b", |
|
753 |
"Figure_7c","Figure 8a","Figure 8a","Figure 8b","Figure 8b") |
|
754 |
|
|
755 |
col_model_name <- c(NA,"mod1","mod_kr","mod1","mod_kr","mod1","mod_kr","mod1","mod_kr","mod1","mod_kr", |
|
756 |
"mod1","mod_kr","mod1","mod1","mod1","mod1","mod1","mod1",NA,NA,"mod1","mod1") |
|
757 |
col_reg <- rep(region_name,length(list_outfiles)) |
|
758 |
col_year_predicted <- rep(year_predicted,length(list_outfiles)) |
|
738 | 759 |
|
739 | 760 |
#This data.frame contains all the files from the assessment |
740 |
df_assessment_figures_files <- data.frame(filename=outfiles_names,files=unlist(list_outfiles), |
|
741 |
reg=region_name,year=year_predicted) |
|
761 |
df_assessment_figures_files <- data.frame(figure_no=figure_no, |
|
762 |
comment = comments_str, |
|
763 |
model_name=col_model_name, |
|
764 |
reg=col_reg, |
|
765 |
year_predicted=col_year_predicted, |
|
766 |
filename=unlist(list_outfiles)) |
|
767 |
|
|
742 | 768 |
###Prepare files for copying back? |
743 |
df_assessment_figures_files_names <- file.path(out_dir,paste("df_assessment_files_",region_name,"_",year_predicted,"_",out_prefix,".txt",sep=""))
|
|
744 |
write.table(df_assessment_files, |
|
745 |
file=df_assessment_files_name,sep=",")
|
|
769 |
df_assessment_figures_files_names <- file.path(out_dir,paste("df_assessment_figures_files_",region_name,"_",year_predicted,"_",out_suffix,".txt",sep=""))
|
|
770 |
write.table(df_assessment_figures_files,
|
|
771 |
file=df_assessment_figures_files_names ,sep=",")
|
|
746 | 772 |
|
747 | 773 |
#df_assessment_figures_files_names |
748 | 774 |
|
749 | 775 |
###################################################### |
750 | 776 |
##### Prepare objet to return #### |
751 | 777 |
|
752 |
#assessment_obj <- list(df_assessment_files, df_assessment_figures_files)
|
|
753 |
#names(assessment_obj) <- c("df_assessment_files", "df_assessment_figures_files")
|
|
778 |
assessment_obj <- list(df_assessment_files, df_assessment_figures_files) |
|
779 |
names(assessment_obj) <- c("df_assessment_files", "df_assessment_figures_files") |
|
754 | 780 |
## Prepare list of files to return... |
755 |
return(df_assessment_figures_files_names)
|
|
781 |
return(assessment_obj)
|
|
756 | 782 |
|
757 | 783 |
} |
758 | 784 |
|
... | ... | |
767 | 793 |
# boxplot overall region with outliers figure_3a reg4 NA |
768 | 794 |
# boxplot overall region with scaling figure_3b reg4 NA |
769 | 795 |
# Barplot of metrics ranked by tile Figure_5 |
796 |
# boxplot overall region with scaling figure_3b reg4 NA |
|
797 |
# Barplot of metrics ranked by tile Figure_5 |
|
770 | 798 |
# Barplot of metrics ranked by tile Figure_5 |
771 | 799 |
# Average metrics map centroids Figure_6 |
772 | 800 |
# Average metrics map centroids Figure_6 |
Also available in: Unified diff
plotting figures assessment debugging and checking output table