Revision ae5ef12a
Added by Benoit Parmentier almost 9 years ago
climate/research/oregon/interpolation/global_run_scalingup_assessment_part3.R | ||
---|---|---|
7 | 7 |
#Analyses, figures, tables and data are also produced in the script. |
8 | 8 |
#AUTHOR: Benoit Parmentier |
9 | 9 |
#CREATED ON: 03/23/2014 |
10 |
#MODIFIED ON: 02/07/2016
|
|
10 |
#MODIFIED ON: 02/08/2016
|
|
11 | 11 |
#Version: 5 |
12 | 12 |
#PROJECT: Environmental Layers project |
13 | 13 |
#COMMENTS: Initial commit, script based on part 2 of assessment, will be modified further for overall assessment |
... | ... | |
147 | 147 |
|
148 | 148 |
####### Function used in the script ####### |
149 | 149 |
|
150 |
script_path <- "/home/parmentier/Data/IPLANT_project/env_layers_scripts" |
|
151 |
function_assessment_part2_functions <- "global_run_scalingup_assessment_part2_functions_01032016.R" |
|
152 |
source(file.path(script_path,function_assessment_part2_functions)) #source all functions used in this script |
|
150 |
#script_path <- "/home/parmentier/Data/IPLANT_project/env_layers_scripts"
|
|
151 |
#function_assessment_part2_functions <- "global_run_scalingup_assessment_part2_functions_01032016.R"
|
|
152 |
#source(file.path(script_path,function_assessment_part2_functions)) #source all functions used in this script
|
|
153 | 153 |
|
154 | 154 |
####### PARSE INPUT ARGUMENTS/PARAMETERS ##### |
155 |
list_param_run_assessment_plotting$in_dir_list_filename #PARAM 0 |
|
155 |
in_dir_list_filename <- list_param_run_assessment_plotting$in_dir_list_filename #PARAM 0
|
|
156 | 156 |
in_dir <- list_param_run_assessment_plotting$in_dir #PARAM 1 |
157 | 157 |
y_var_name <- list_param_run_assessment_plotting$y_var_name #PARAM2 |
158 | 158 |
interpolation_method <- list_param_run_assessment_plotting$interpolation_method #c("gam_CAI") #PARAM3 |
... | ... | |
187 | 187 |
|
188 | 188 |
setwd(out_dir) |
189 | 189 |
|
190 |
list_outfiles <- vector("list", length=23) #collect names of output files, this should be dynamic?
|
|
191 |
list_outfiles_names <- vector("list", length=23) #collect names of output files
|
|
190 |
list_outfiles <- vector("list", length=29) #collect names of output files, this should be dynamic?
|
|
191 |
list_outfiles_names <- vector("list", length=29) #collect names of output files
|
|
192 | 192 |
counter_fig <- 0 #index of figure to collect outputs |
193 | 193 |
|
194 | 194 |
#i <- year_predicted |
195 | 195 |
###Table 1: Average accuracy metrics |
196 | 196 |
###Table 2: daily accuracy metrics for all tiles |
197 | 197 |
|
198 |
##As a first quick set up for the meeting 01/27 just read in from the in_dir_list |
|
199 |
list_tb_fname <- list.files(path=file.path(in_dir,in_dir_list),"tb_diagnostic_v_NA_.*._run_global_analyses_pred_.*._reg4.txt",full.names=T) |
|
200 |
list_df_fname <- list.files(path=file.path(in_dir,in_dir_list),"df_tile_processed_.*._run_global_analyses_pred_.*._reg4.txt",full.names=T) |
|
201 |
list_summary_metrics_v_fname <- list.files(path=file.path(in_dir,in_dir_list),"summary_metrics_v2_NA_.*._run_global_analyses_pred_.*._reg4.txt",full.names=T) |
|
198 |
in_dir_list <- as.list(read.table(in_dir_list_filename,stringsAsFactors=F)[,1]) |
|
199 |
|
|
200 |
##Read in data list from in_dir_list |
|
201 |
list_tb_fname <- list.files(path=file.path(in_dir,in_dir_list),"tb_diagnostic_v_NA_.*.txt",full.names=T) |
|
202 |
list_df_fname <- list.files(path=file.path(in_dir,in_dir_list),"df_tile_processed_.*..txt",full.names=T) |
|
203 |
list_summary_metrics_v_fname <- list.files(path=file.path(in_dir,in_dir_list),"summary_metrics_v2_NA_.*.txt",full.names=T) |
|
204 |
list_tb_s_fname <- list.files(path=file.path(in_dir,in_dir_list),"tb_diagnostic_s_NA.*.txt",full.names=T) |
|
205 |
list_tb_month_s_fname <- list.files(path=file.path(in_dir,in_dir_list),"tb_month_diagnostic_s.*.txt",full.names=T) |
|
206 |
list_data_month_s_fname <- list.files(path=file.path(in_dir,in_dir_list),"data_month_s.*.txt",full.names=T) |
|
207 |
list_data_s_fname <- list.files(path=file.path(in_dir,in_dir_list),"data_day_s.*.txt",full.names=T) |
|
208 |
list_data_v_fname <- list.files(path=file.path(in_dir,in_dir_list),"data_day_v.*.txt",full.names=T) |
|
209 |
list_pred_data_month_info_fname <- list.files(path=file.path(in_dir,in_dir_list),"pred_data_month_info.*.txt",full.names=T) |
|
210 |
list_pred_data_day_info_fname <- list.files(path=file.path(in_dir,in_dir_list),"pred_data_day_info.*.txt",full.names=T) |
|
211 |
|
|
202 | 212 |
#need to fix this !! has all of the files in one list (for a region) |
203 | 213 |
#list_shp <- list.files(path=file.path(in_dir,file.path(in_dir_list,"shapefiles")),"*.shp",full.names=T) |
204 | 214 |
|
215 |
## Step 2: only read what is necessary at this stage... |
|
205 | 216 |
list_tb <- lapply(list_tb_fname,function(x){read.table(x,stringsAsFactors=F,sep=",")}) |
206 | 217 |
tb <- do.call(rbind,list_tb) |
218 |
list_tb_s <- lapply(list_tb_s_fname,function(x){read.table(x,stringsAsFactors=F,sep=",")}) |
|
219 |
tb_s <- do.call(rbind,list_tb_s) |
|
220 |
|
|
207 | 221 |
list_df <- lapply(list_df_fname,function(x){read.table(x,stringsAsFactors=F,sep=",")}) |
208 | 222 |
df_tile_processed <- do.call(rbind,list_df) |
209 | 223 |
list_summary_metrics_v <- lapply(list_summary_metrics_v_fname,function(x){read.table(x,stringsAsFactors=F,sep=",")}) |
210 | 224 |
summary_metrics_v <- do.call(rbind,list_summary_metrics_v) |
225 |
|
|
226 |
list_tb_month_s <- lapply(list_tb_month_s_fname,function(x){read.table(x,stringsAsFactors=F,sep=",")}) |
|
227 |
tb_month_s <- do.call(rbind,list_tb_month_s) |
|
228 |
|
|
211 | 229 |
##Stop added |
212 |
|
|
213 |
tb <- read.table(list_tb[1],stringsAsFactors=F,sep=",") |
|
214 |
|
|
215 |
df_assessment_files <- read.table(df_assessment_files_name,stringsAsFactors=F,sep=",") |
|
216 |
#df_assessment_files, note that in_dir indicate the path of the textfiles |
|
217 |
summary_metrics_v <- read.table(file.path(in_dir,basename(df_assessment_files$files[1])),sep=",") |
|
218 |
tb <- read.table(file.path(in_dir, basename(df_assessment_files$files[2])),sep=",") |
|
219 |
tb_s <- read.table(file.path(in_dir, basename(df_assessment_files$files[4])),sep=",") |
|
220 |
|
|
221 |
tb_month_s <- read.table(file.path(in_dir,basename(df_assessment_files$files[3])),sep=",") |
|
222 |
pred_data_month_info <- read.table(file.path(in_dir, basename(df_assessment_files$files[10])),sep=",") |
|
223 |
pred_data_day_info <- read.table(file.path(in_dir, basename(df_assessment_files$files[11])),sep=",") |
|
224 |
df_tile_processed <- read.table(file.path(in_dir, basename(df_assessment_files$files[12])),sep=",") |
|
230 |
##Screen for non shapefiles tiles due to dir |
|
231 |
df_tile_processed <- df_tile_processed[!is.na(df_tile_processed$shp_files),] |
|
225 | 232 |
|
226 | 233 |
#add column for tile size later on!!! |
227 | 234 |
|
... | ... | |
250 | 257 |
try(summary_metrics_v$reg <- summary_metrics_v$reg.x) |
251 | 258 |
try(summary_metrics_v$lat <- summary_metrics_v$lat.x) |
252 | 259 |
try(summary_metrics_v$lon <- summary_metrics_v$lon.x) |
253 |
#summary_metrics_v |
|
254 |
#tb_all <- tb |
|
255 |
#summary_metrics_v_all <- summary_metrics_v |
|
256 |
|
|
257 |
#table(summary_metrics_v_all$reg) |
|
258 |
#table(summary_metrics_v$reg) |
|
259 |
#table(tb_all$reg) |
|
260 |
#table(tb$reg) |
|
261 |
|
|
260 |
|
|
262 | 261 |
############ PART 2: PRODUCE FIGURES ################ |
263 | 262 |
|
264 | 263 |
########################### |
... | ... | |
268 | 267 |
#list_shp_reg_files <- df_tiled_processed$shp_files |
269 | 268 |
|
270 | 269 |
#list_shp_reg_files<- as.character(df_tile_processed$shp_files) #this could be the solution!! |
271 |
list_shp_reg_files <- as.character(basename(list_df[[1]]$shp_files)) #this could be the solution!!
|
|
270 |
list_shp_reg_files <- as.character(basename(unique(df_tile_processed$shp_files))) #this could be the solution!!
|
|
272 | 271 |
#the shapefiles must be copied in the proper folder!!! |
273 | 272 |
#list_shp_reg_files<- file.path(in_dir,in_dir_list[1],"shapefile",list_shp_reg_files) |
274 | 273 |
#list_shp_reg_files <- file.path("/data/project/layers/commons/NEX_data/",out_dir, |
... | ... | |
362 | 361 |
|
363 | 362 |
dev.off() |
364 | 363 |
|
365 |
#unique(summaty_metrics$tile_id) |
|
366 |
#text(lat-shp,) |
|
367 |
#union(list_shp_reg_files[[1]],list_shp_reg_files[[2]]) |
|
368 | 364 |
list_outfiles[[counter_fig+1]] <- paste("Figure1_tile_processed_region_",region_name,"_",out_suffix,".png",sep="") |
369 | 365 |
counter_fig <- counter_fig+1 |
370 |
|
|
366 |
#this will be changed to be added to data.frame on the fly |
|
367 |
r1 <-c("figure_1","Tiles processed for the region",NA,NA,region_name,year_predicted,list_outfiles[[1]]) |
|
368 |
|
|
371 | 369 |
############### |
372 | 370 |
### Figure 2: boxplot of average accuracy by model and by tiles |
373 | 371 |
|
... | ... | |
391 | 389 |
list_outfiles[[counter_fig+i]] <- fig_filename |
392 | 390 |
} |
393 | 391 |
counter_fig <- counter_fig + length(model_name) |
392 |
|
|
393 |
r2 <-c("figure_2a","Boxplot of accuracy with outliers by tiles","mod1",metric_name,region_name,year_predicted,list_outfiles[[2]]) |
|
394 |
r3 <-c("figure_2a","boxplot of accuracy with outliers by tiles","mod_kr",metric_name,region_name,year_predicted,list_outfiles[[3]]) |
|
395 |
|
|
394 | 396 |
## Figure 2b |
395 | 397 |
#with ylim and removing trailing... |
396 | 398 |
for(i in 1:length(model_name)){ #there are two models!! |
... | ... | |
412 | 414 |
} |
413 | 415 |
counter_fig <- counter_fig + length(model_name) |
414 | 416 |
#bwplot(rmse~tile_id, data=subset(tb,tb$pred_mod=="mod1")) |
417 |
r4 <-c("figure_2b","Boxplot of accuracy with scaling by tiles","mod1",metric_name,region_name,year_predicted,list_outfiles[[4]]) |
|
418 |
r5 <-c("figure_2b","Boxplot of accuracy with scaling by tiles","mod_kr",metric_name,region_name,year_predicted,list_outfiles[[5]]) |
|
415 | 419 |
|
416 | 420 |
############### |
417 | 421 |
### Figure 3: boxplot of average RMSE by model acrosss all tiles |
... | ... | |
845 | 849 |
} |
846 | 850 |
|
847 | 851 |
##################### END OF SCRIPT ###################### |
848 |
|
|
849 |
|
|
850 |
|
|
851 |
|
|
852 |
|
|
853 |
|
|
854 |
|
|
855 |
|
|
856 |
|
|
857 |
|
|
858 |
|
|
859 |
|
|
860 |
|
|
861 |
|
|
862 |
|
|
863 |
|
|
864 |
|
|
865 |
|
|
866 |
|
|
867 |
|
|
868 |
|
|
869 |
|
|
870 |
|
|
871 |
|
|
872 |
|
|
873 |
|
|
874 |
|
|
875 |
|
|
876 |
#### CURRENT ERROR ON NEX |
|
877 |
|
|
878 |
# #comments #figure_no #region #models |
|
879 |
# tile processed for the region figure_1 reg4 NA |
|
880 |
# boxplot with outlier figure_2a reg4 mod1 |
|
881 |
# boxplot with outlier figure_2a reg4 mod_kr |
|
882 |
# boxplot scaling by tiles figure_2b reg4 mod1 |
|
883 |
# boxplot scaling by tiles figure_2b reg4 mod_kr |
|
884 |
# boxplot overall region with outliers figure_3a reg4 NA |
|
885 |
# boxplot overall region with scaling figure_3b reg4 NA |
|
886 |
# Barplot of metrics ranked by tile Figure_5 |
|
887 |
# boxplot overall region with scaling figure_3b reg4 NA |
|
888 |
# Barplot of metrics ranked by tile Figure_5 |
|
889 |
# Barplot of metrics ranked by tile Figure_5 |
|
890 |
# Average metrics map centroids Figure_6 |
|
891 |
# Average metrics map centroids Figure_6 |
|
892 |
# Number of missing day threshold1 map centroids Figure_7a |
|
893 |
# Number of missing day threshold1 map centroids Figure_7a |
|
894 |
# Number of missing day threshold1 map centroids Figure_7a |
|
895 |
# Number of missing day threshold1 map centroids Figure_7a |
|
896 |
# number_daily_predictions_per_model Figure_7b |
|
897 |
# histogram number_daily_predictions_per_models Figure_7c |
|
898 |
# boxplot_overall_separated_by_region_with_oultiers_ Figure 8a |
|
899 |
# boxplot_overall_separated_by_region_with_scaling Figure 8b |
|
900 |
|
|
901 |
# Browse[3]> c |
|
902 |
# Error in text.default(coordinates(pt)[1], coordinates(pt)[2], labels = i, : |
|
903 |
# X11 font -adobe-helvetica-%s-%s-*-*-%d-*-*-*-*-*-*-*, face 2 at size 16 could not be loaded |
|
904 |
# In addition: Warning message: |
|
905 |
# In polypath(x = mcrds[, 1], y = mcrds[, 2], border = border, col = col, : |
|
906 |
# Path drawing not available for this device |
|
907 |
|
|
908 |
|
|
909 |
|
|
910 |
# Browse[2]> for(i in 1:length(threshold_missing_day)){ |
|
911 |
# + |
|
912 |
# + #summary_metrics_v$n_missing <- summary_metrics_v$n == 365 |
|
913 |
# + #summary_metrics_v$n_missing <- summary_metrics_v$n < 365 |
|
914 |
# + summary_metrics_v$n_missing <- summary_metrics_v$n < threshold_missing_day[i] |
|
915 |
# + summary_metrics_v_subset <- subset(summary_metrics_v,model_name=="mod1") |
|
916 |
# + |
|
917 |
# + #res_pix <- 1200 |
|
918 |
# + res_pix <- 960 |
|
919 |
# + |
|
920 |
# + col_mfrow <- 1 |
|
921 |
# + row_mfrow <- 1 |
|
922 |
# + fig_filename <- paste("Figure7a_ac_metrics_map_centroids_tile_",model_name[j],"_","missing_day_",threshold_missing_day[i], |
|
923 |
# + "_",out_suffix,".png",sep="") |
|
924 |
# + png(filename=paste("Figure7a_ac_metrics_map_centroids_tile_",model_name[j],"_","missing_day_",threshold_missing_day[i], |
|
925 |
# + "_",out_suffix,".png",sep=""), |
|
926 |
# + width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
927 |
# + |
|
928 |
# + model_name[j] |
|
929 |
# + |
|
930 |
# + p_shp <- layer(sp.polygons(reg_layer, lwd=1, col='black')) |
|
931 |
# + #title("(a) Mean for 1 January") |
|
932 |
# + p <- bubble(summary_metrics_v_subset,"n_missing",main=paste("Missing per tile and by ",model_name[j]," for ", |
|
933 |
# + threshold_missing_day[i])) |
|
934 |
# + p1 <- p+p_shp |
|
935 |
# + try(print(p1)) #error raised if number of missing values below a threshold does not exist |
|
936 |
# + dev.off() |
|
937 |
# + |
|
938 |
# + list_outfiles[[counter_fig+i]] <- fig_filename |
|
939 |
# + } |
|
940 |
# debug at /nobackupp8/bparmen1/env_layers_scripts/global_run_scalingup_assessment_part2_01042016.R#272: i |
|
941 |
# Browse[3]> counter_fig <- counter_fig+length(threshold_missing_day) #currently 4 days... |
|
942 |
# Browse[3]> c |
|
943 |
# Error in grid.Call.graphics(L_setviewport, pvp, TRUE) : |
|
944 |
# non-finite location and/or size for viewport |
|
945 |
#Error in grid.Call.graphics(L_setviewport, vp, TRUE) : |
|
946 |
# non-finite location and/or size for viewport |
Also available in: Unified diff
combining inputs from accuracy tables, script assessment part3