Revision 2dd925ab
Added by Benoit Parmentier almost 9 years ago
climate/research/oregon/interpolation/global_run_scalingup_assessment_part3.R | ||
---|---|---|
1 | 1 |
############################## INTERPOLATION OF TEMPERATURES ####################################### |
2 | 2 |
####################### Script for assessment of scaling up on NEX : part3 ############################## |
3 | 3 |
#This script uses the worklfow code applied to the globe. Results currently reside on NEX/PLEIADES NASA. |
4 |
#In part 3, models and results are assessed on a tile basis using modifications of method assessment |
|
4 |
#This script complement part1 and part2 of the accuracy assessment and group tables and outputs |
|
5 |
#from run of accuracy assessement generated earlier. |
|
5 | 6 |
#Analyses, figures, tables and data are also produced in the script. |
6 | 7 |
#AUTHOR: Benoit Parmentier |
7 |
#CREATED ON: 05/21/2014 |
|
8 |
#MODIFIED ON: 09/07/2014 |
|
9 |
#Version: 1 |
|
10 |
#PROJECT: Environmental Layers project |
|
11 |
################################################################################################# |
|
12 |
|
|
13 |
### Loading R library and packages |
|
14 |
#library used in the workflow production: |
|
15 |
library(gtools) # loading some useful tools |
|
16 |
library(mgcv) # GAM package by Simon Wood |
|
17 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
18 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
19 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
20 |
library(gstat) # Kriging and co-kriging by Pebesma et al. |
|
21 |
library(fields) # NCAR Spatial Interpolation methods such as kriging, splines |
|
22 |
library(raster) # Hijmans et al. package for raster processing |
|
23 |
library(gdata) # various tools with xls reading, cbindX |
|
24 |
library(rasterVis) # Raster plotting functions |
|
25 |
library(parallel) # Parallelization of processes with multiple cores |
|
26 |
library(maptools) # Tools and functions for sp and other spatial objects e.g. spCbind |
|
27 |
library(maps) # Tools and data for spatial/geographic objects |
|
28 |
library(reshape) # Change shape of object, summarize results |
|
29 |
library(plotrix) # Additional plotting functions |
|
30 |
library(plyr) # Various tools including rbind.fill |
|
31 |
library(spgwr) # GWR method |
|
32 |
library(automap) # Kriging automatic fitting of variogram using gstat |
|
33 |
library(rgeos) # Geometric, topologic library of functions |
|
34 |
#RPostgreSQL # Interface R and Postgres, not used in this script |
|
35 |
library(gridExtra) |
|
36 |
#Additional libraries not used in workflow |
|
37 |
library(pgirmess) # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess} |
|
38 |
library(colorRamps) |
|
8 |
#CREATED ON: 03/23/2014 |
|
9 |
#MODIFIED ON: 01/27/2016 |
|
10 |
#Version: 5 |
|
11 |
#PROJECT: Environmental Layers project |
|
12 |
#COMMENTS: Initial commit, script based on part 2 of assessment, will be modified further for overall assessment |
|
13 |
#TODO: |
|
14 |
#1) Add plot broken down by year and region |
|
15 |
#2) Modify code for overall assessment accross all regions and year |
|
16 |
#3) Clean up |
|
39 | 17 |
|
40 |
#### FUNCTION USED IN SCRIPT |
|
18 |
#First source these files: |
|
19 |
#Resolved call issues from R. |
|
20 |
#source /nobackupp6/aguzman4/climateLayers/sharedModules2/etc/environ.sh |
|
21 |
# |
|
22 |
#setfacl -Rmd user:aguzman4:rwx /nobackupp8/bparmen1/output_run10_1500x4500_global_analyses_pred_1992_10052015 |
|
41 | 23 |
|
42 |
function_analyses_paper1 <- "contribution_of_covariates_paper_interpolation_functions_05212014.R" #first interp paper |
|
43 |
function_analyses_paper2 <- "multi_timescales_paper_interpolation_functions_05052014.R" |
|
44 |
function_assessment_by_tile <- "results_interpolation_date_output_analyses_05212014.R" |
|
45 |
source(file.path(script_path,"results_interpolation_date_output_analyses_08052013.R")) |
|
24 |
################################################################################################# |
|
46 | 25 |
|
47 |
load_obj <- function(f) |
|
48 |
{ |
|
49 |
env <- new.env() |
|
50 |
nm <- load(f, env)[1] |
|
51 |
env[[nm]] |
|
52 |
} |
|
26 |
#### FUNCTION USED IN SCRIPT |
|
53 | 27 |
|
54 |
create_dir_fun <- function(out_dir,out_suffix){ |
|
55 |
if(!is.null(out_suffix)){ |
|
56 |
out_name <- paste("output_",out_suffix,sep="") |
|
57 |
out_dir <- file.path(out_dir,out_name) |
|
58 |
} |
|
59 |
#create if does not exists |
|
60 |
if(!file.exists(out_dir)){ |
|
61 |
dir.create(out_dir) |
|
62 |
} |
|
63 |
return(out_dir) |
|
64 |
} |
|
28 |
#function_analyses_paper1 <-"contribution_of_covariates_paper_interpolation_functions_07182014.R" #first interp paper |
|
29 |
#function_analyses_paper2 <-"multi_timescales_paper_interpolation_functions_08132014.R" |
|
30 |
#function_global_run_assessment_part2 <- "global_run_scalingup_assessment_part2_functions_0923015.R" |
|
65 | 31 |
|
66 |
############################## |
|
32 |
############################################
|
|
67 | 33 |
#### Parameters and constants |
68 | 34 |
|
69 |
Atlas_server <- TRUE #data on NCEAS Atlas |
|
70 |
NEX_sever <- TRUE #data on NEX NASA |
|
71 |
|
|
72 | 35 |
#on ATLAS |
73 |
#if(Atlas_server==TRUE){ |
|
74 |
# |
|
75 |
#} |
|
76 |
script_path<-"/home/parmentier/Data/IPLANT_project/env_layers_scripts/" #path to script |
|
77 |
source(file.path(script_path,function_analyses_paper1)) #source all functions used in this script 1. |
|
78 |
source(file.path(script_path,function_analyses_paper2)) #source all functions used in this script 2. |
|
79 |
source(file.path(script_path,function_assessment_by_tile)) #source all functions used in this script 2. |
|
80 |
|
|
81 | 36 |
#in_dir1 <- "/data/project/layers/commons/NEX_data/test_run1_03232014/output" #On Atlas |
82 | 37 |
#parent output dir : contains subset of the data produced on NEX |
83 |
in_dir1 <- "/data/project/layers/commons/NEX_data/output_run5_global_analyses_08252014/output20Deg/"
|
|
38 |
#in_dir1 <- "/data/project/layers/commons/NEX_data/output_run6_global_analyses_09162014/output20Deg2"
|
|
84 | 39 |
# parent output dir for the curent script analyes |
85 |
out_dir <- "/data/project/layers/commons/NEX_data/output_run5_global_analyses_08252014/" #On NCEAS Atlas
|
|
40 |
#out_dir <-"/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/" #On NCEAS Atlas
|
|
86 | 41 |
# input dir containing shapefiles defining tiles |
87 |
in_dir_shp <- "/data/project/layers/commons/NEX_data/output_run5_global_analyses_08252014/output20Deg/subset/shapefiles"
|
|
42 |
#in_dir_shp <- "/data/project/layers/commons/NEX_data/output_run5_global_analyses_08252014/output/subset/shapefiles"
|
|
88 | 43 |
|
89 | 44 |
#On NEX |
90 | 45 |
#contains all data from the run by Alberto |
91 |
#in_dir1 <- "/nobackupp4/aguzman4/climateLayers/output4" #On NEX
|
|
46 |
#in_dir1 <- " /nobackupp6/aguzman4/climateLayers/out_15x45/" #On NEX
|
|
92 | 47 |
#parent output dir for the current script analyes |
93 | 48 |
#out_dir <- "/nobackup/bparmen1/" #on NEX |
94 | 49 |
#in_dir_shp <- "/nobackupp4/aguzman4/climateLayers/output4/subset/shapefiles/" |
50 |
#in_dir <- "" #PARAM 0 |
|
51 |
#y_var_name <- "dailyTmax" #PARAM1 |
|
52 |
#interpolation_method <- c("gam_CAI") #PARAM2 |
|
53 |
out_suffix<-"global_analyses_overall_assessment_reg4_01272016" |
|
54 |
#out_suffix <- "output_run10_1000x3000_global_analyses_02102015" |
|
55 |
#out_suffix <- "run10_1500x4500_global_analyses_pred_1992_10052015" #PARAM3 |
|
56 |
#out_dir <- "/data/project/layers/commons/NEX_data/output_run10_1500x4500_global_analyses_pred_1992_10052015" #PARAM4 |
|
57 |
create_out_dir_param <- TRUE #PARAM 5 |
|
58 |
#mosaic_plot <- FALSE #PARAM6 |
|
59 |
#if daily mosaics NULL then mosaicas all days of the year |
|
60 |
#day_to_mosaic <- c("19920101","19920102","19920103") #PARAM7 |
|
61 |
#CRS_WGS84 <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 #CONSTANT1 |
|
62 |
#CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 |
|
63 |
#proj_str<- CRS_WGS84 #PARAM 8 #check this parameter |
|
64 |
file_format <- ".rst" #PARAM 9 |
|
65 |
NA_value <- -9999 #PARAM10 |
|
66 |
NA_flag_val <- NA_value |
|
67 |
#multiple_region <- TRUE #PARAM 12 |
|
68 |
#region_name <- "world" #PARAM 13 |
|
69 |
countries_shp <-"/data/project/layers/commons/NEX_data/countries.shp" #PARAM 13, copy this on NEX too |
|
70 |
#plot_region <- TRUE |
|
71 |
#num_cores <- 6 #PARAM 14 |
|
72 |
region_name <- c("reg4") #reference region to merge if necessary, if world all the regions are together #PARAM 16 |
|
73 |
#use previous files produced in step 1a and stored in a data.frame |
|
74 |
#df_assessment_files <- "df_assessment_files_reg4_1984_run_global_analyses_pred_12282015.txt" #PARAM 17 |
|
75 |
threshold_missing_day <- c(367,365,300,200) #PARM18 |
|
95 | 76 |
|
96 |
y_var_name <- "dailyTmax" |
|
97 |
interpolation_method <- c("gam_CAI") |
|
98 |
out_prefix<-"run5_global_analyses_08252014" |
|
77 |
#list_param_run_assessment_plottingin_dir <- list(in_dir,y_var_name, interpolation_method, out_suffix, |
|
78 |
# out_dir, create_out_dir_param, mosaic_plot, proj_str, file_format, NA_value, |
|
79 |
# multiple_region, countries_shp, plot_region, num_cores, |
|
80 |
# region_name, df_assessment_files, threshold_missing_day) |
|
99 | 81 |
|
100 |
#out_dir <-paste(out_dir,"_",out_prefix,sep="") |
|
101 |
create_out_dir_param <- FALSE |
|
82 |
#names(list_param_run_assessment_plottingin_dir) <- c("in_dir","y_var_name","interpolation_method","out_suffix", |
|
83 |
# "out_dir","create_out_dir_param","mosaic_plot","proj_str","file_format","NA_value", |
|
84 |
# "multiple_region","countries_shp","plot_region","num_cores", |
|
85 |
# "region_name","df_assessment_files","threshold_missing_day") |
|
86 |
|
|
87 |
#run_assessment_plotting_prediction_fun(list_param_run_assessment_plottingin_dir) |
|
88 |
|
|
89 |
run_assessment_plotting_prediction_fun <-function(list_param_run_assessment_plotting){ |
|
90 |
|
|
91 |
#### |
|
92 |
#1) in_dir: input directory containing data tables and shapefiles for plotting #PARAM 0 |
|
93 |
#2) y_var_name : variables being predicted e.g. dailyTmax #PARAM1 |
|
94 |
#3) interpolation_method: method used #c("gam_CAI") #PARAM2 |
|
95 |
#4) out_suffix: output suffix #PARAM3 |
|
96 |
#5) out_dir # |
|
97 |
#6) create_out_dir_param # FALSE #PARAM 5 |
|
98 |
#7) mosaic_plot #FALSE #PARAM6 |
|
99 |
#8) proj_str # projection/coordinates system e.g. CRS_WGS84 #PARAM 8 #check this parameter |
|
100 |
#9) file_format #".rst" #PARAM 9 |
|
101 |
#10) NA_value #-9999 #PARAM10 |
|
102 |
#11) multiple_region # <- TRUE #PARAM 12 |
|
103 |
#12) countries_shp #<- "world" #PARAM 13 |
|
104 |
#13) plot_region #<- TRUE |
|
105 |
#14) num_cores <- number of cores used # 6 #PARAM 14 |
|
106 |
#15) region_name #<- c("reg4"), world if full assessment #reference region to merge if necessary #PARAM 16 |
|
107 |
#16) df_assessment_files #PARAM 16 |
|
108 |
#17) threshold_missing_day #PARM18 |
|
109 |
#18) year_predicted |
|
110 |
|
|
111 |
### Loading R library and packages |
|
112 |
#library used in the workflow production: |
|
113 |
library(gtools) # loading some useful tools |
|
114 |
library(mgcv) # GAM package by Simon Wood |
|
115 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
116 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
117 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
118 |
library(gstat) # Kriging and co-kriging by Pebesma et al. |
|
119 |
library(fields) # NCAR Spatial Interpolation methods such as kriging, splines |
|
120 |
library(raster) # Hijmans et al. package for raster processing |
|
121 |
library(gdata) # various tools with xls reading, cbindX |
|
122 |
library(rasterVis) # Raster plotting functions |
|
123 |
library(parallel) # Parallelization of processes with multiple cores |
|
124 |
library(maptools) # Tools and functions for sp and other spatial objects e.g. spCbind |
|
125 |
library(maps) # Tools and data for spatial/geographic objects |
|
126 |
library(reshape) # Change shape of object, summarize results |
|
127 |
library(plotrix) # Additional plotting functions |
|
128 |
library(plyr) # Various tools including rbind.fill |
|
129 |
library(spgwr) # GWR method |
|
130 |
library(automap) # Kriging automatic fitting of variogram using gstat |
|
131 |
library(rgeos) # Geometric, topologic library of functions |
|
132 |
#RPostgreSQL # Interface R and Postgres, not used in this script |
|
133 |
library(gridExtra) |
|
134 |
#Additional libraries not used in workflow |
|
135 |
library(pgirmess) # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess} |
|
136 |
library(colorRamps) |
|
137 |
library(zoo) |
|
138 |
library(xts) |
|
139 |
|
|
140 |
####### Function used in the script ####### |
|
141 |
|
|
142 |
script_path <- "/home/parmentier/Data/IPLANT_project/env_layers_scripts" |
|
143 |
function_assessment_part2_functions <- "global_run_scalingup_assessment_part2_functions_01032016.R" |
|
144 |
source(file.path(script_path,function_assessment_part2_functions)) #source all functions used in this script |
|
145 |
|
|
146 |
####### PARSE INPUT ARGUMENTS/PARAMETERS ##### |
|
147 |
in_dir <- "/data/project/layers/commons/NEX_data" |
|
148 |
in_dir_list <- c("output_run_global_analyses_pred_2009_reg4","output_run_global_analyses_pred_2010_reg4", |
|
149 |
"output_run_global_analyses_pred_2011_reg4","output_run_global_analyses_pred_2012_reg4", |
|
150 |
"output_run_global_analyses_pred_2013_reg4","output_run_global_analyses_pred_2014_reg4") |
|
151 |
|
|
152 |
in_dir <- list_param_run_assessment_plotting$in_dir #PARAM 1 |
|
153 |
y_var_name <- list_param_run_assessment_plotting$y_var_name #PARAM2 |
|
154 |
interpolation_method <- list_param_run_assessment_plotting$interpolation_method #c("gam_CAI") #PARAM3 |
|
155 |
out_suffix <- list_param_run_assessment_plotting$out_suffix #PARAM4 |
|
156 |
out_dir <- list_param_run_assessment_plotting$out_dir # PARAM5 |
|
157 |
create_out_dir_param <- list_param_run_assessment_plotting$create_out_dir_param # FALSE #PARAM 6 |
|
158 |
mosaic_plot <- list_param_run_assessment_plotting$mosaic_plot #FALSE #PARAM7 |
|
159 |
proj_str<- list_param_run_assessment_plotting$proj_str #CRS_WGS84 #PARAM 8 #check this parameter |
|
160 |
file_format <- list_param_run_assessment_plotting$file_format #".rst" #PARAM 9 |
|
161 |
NA_flag_val <- list_param_run_assessment_plotting$NA_flag_val #-9999 #PARAM10 |
|
162 |
multiple_region <- list_param_run_assessment_plotting$multiple_region # <- TRUE #PARAM 11 |
|
163 |
countries_shp <- list_param_run_assessment_plotting$countries_shp #<- "world" #PARAM 12 |
|
164 |
plot_region <- list_param_run_assessment_plotting$plot_region # PARAM13 |
|
165 |
num_cores <- list_param_run_assessment_plotting$num_cores # 6 #PARAM 14 |
|
166 |
region_name <- list_param_run_assessment_plotting$region_name #<- "world" #PARAM 15 |
|
167 |
df_assessment_files_name <- list_param_run_assessment_plotting$df_assessment_files_name #PARAM 16 |
|
168 |
threshold_missing_day <- list_param_run_assessment_plotting$threshold_missing_day #PARM17 |
|
169 |
year_predicted <- list_param_run_assessment_plotting$year_predicted |
|
170 |
|
|
171 |
NA_value <- NA_flag_val |
|
172 |
|
|
173 |
##################### START SCRIPT ################# |
|
174 |
|
|
175 |
####### PART 1: Read in data ######## |
|
176 |
out_dir <- in_dir |
|
177 |
if(create_out_dir_param==TRUE){ |
|
178 |
out_dir <- create_dir_fun(out_dir,out_suffix) |
|
179 |
setwd(out_dir) |
|
180 |
}else{ |
|
181 |
setwd(out_dir) #use previoulsy defined directory |
|
182 |
} |
|
102 | 183 |
|
103 |
if(create_out_dir_param==TRUE){ |
|
104 |
out_dir <- create_dir_fun(out_dir,out_prefix) |
|
105 | 184 |
setwd(out_dir) |
106 |
}else{ |
|
107 |
setwd(out_dir) #use previoulsy defined directory |
|
185 |
|
|
186 |
list_outfiles <- vector("list", length=23) #collect names of output files |
|
187 |
list_outfiles_names <- vector("list", length=23) #collect names of output files |
|
188 |
counter_fig <- 0 #index of figure to collect outputs |
|
189 |
|
|
190 |
#i <- year_predicted |
|
191 |
###Table 1: Average accuracy metrics |
|
192 |
###Table 2: daily accuracy metrics for all tiles |
|
193 |
|
|
194 |
##As a first quick set up for the meeting 01/27 just read in from the in_dir_list |
|
195 |
list_tb_fname <- list.files(path=file.path(in_dir,in_dir_list),"tb_diagnostic_v_NA_.*._run_global_analyses_pred_.*._reg4.txt",full.names=T) |
|
196 |
list_df_fname <- list.files(path=file.path(in_dir,in_dir_list),"df_tile_processed_.*._run_global_analyses_pred_.*._reg4.txt",full.names=T) |
|
197 |
list_summary_metrics_v_fname <- list.files(path=file.path(in_dir,in_dir_list),"summary_metrics_v2_NA_.*._run_global_analyses_pred_.*._reg4.txt",full.names=T) |
|
198 |
|
|
199 |
|
|
200 |
list_tb <- lapply(list_tb_fname,function(x){read.table(x,stringsAsFactors=F,sep=",")}) |
|
201 |
tb <- do.call(rbind,list_tb) |
|
202 |
list_df <- lapply(list_df_fname,function(x){read.table(x,stringsAsFactors=F,sep=",")}) |
|
203 |
df_tile_processed <- do.call(rbind,list_df) |
|
204 |
list_summary_metrics_v <- lapply(list_summary_metrics_v_fname,function(x){read.table(x,stringsAsFactors=F,sep=",")}) |
|
205 |
summary_metrics_v <- do.call(rbind,list_summary_metrics_v) |
|
206 |
|
|
207 |
tb <- read.table(list_tb[1],stringsAsFactors=F,sep=",") |
|
208 |
|
|
209 |
df_assessment_files <- read.table(df_assessment_files_name,stringsAsFactors=F,sep=",") |
|
210 |
#df_assessment_files, note that in_dir indicate the path of the textfiles |
|
211 |
summary_metrics_v <- read.table(file.path(in_dir,basename(df_assessment_files$files[1])),sep=",") |
|
212 |
tb <- read.table(file.path(in_dir, basename(df_assessment_files$files[2])),sep=",") |
|
213 |
tb_s <- read.table(file.path(in_dir, basename(df_assessment_files$files[4])),sep=",") |
|
214 |
|
|
215 |
tb_month_s <- read.table(file.path(in_dir,basename(df_assessment_files$files[3])),sep=",") |
|
216 |
pred_data_month_info <- read.table(file.path(in_dir, basename(df_assessment_files$files[10])),sep=",") |
|
217 |
pred_data_day_info <- read.table(file.path(in_dir, basename(df_assessment_files$files[11])),sep=",") |
|
218 |
df_tile_processed <- read.table(file.path(in_dir, basename(df_assessment_files$files[12])),sep=",") |
|
219 |
|
|
220 |
#add column for tile size later on!!! |
|
221 |
|
|
222 |
tb$pred_mod <- as.character(tb$pred_mod) |
|
223 |
summary_metrics_v$pred_mod <- as.character(summary_metrics_v$pred_mod) |
|
224 |
summary_metrics_v$tile_id <- as.character(summary_metrics_v$tile_id) |
|
225 |
df_tile_processed$tile_id <- as.character(df_tile_processed$tile_id) |
|
226 |
|
|
227 |
tb_month_s$pred_mod <- as.character(tb_month_s$pred_mod) |
|
228 |
tb_month_s$tile_id<- as.character(tb_month_s$tile_id) |
|
229 |
tb_s$pred_mod <- as.character(tb_s$pred_mod) |
|
230 |
tb_s$tile_id <- as.character(tb_s$tile_id) |
|
231 |
|
|
232 |
#multiple regions? #this needs to be included in the previous script!!! |
|
233 |
#if(multiple_region==TRUE){ |
|
234 |
df_tile_processed$reg <- as.character(df_tile_processed$reg) |
|
235 |
tb <- merge(tb,df_tile_processed,by="tile_id") |
|
236 |
tb_s <- merge(tb_s,df_tile_processed,by="tile_id") |
|
237 |
tb_month_s<- merge(tb_month_s,df_tile_processed,by="tile_id") |
|
238 |
summary_metrics_v <- merge(summary_metrics_v,df_tile_processed,by="tile_id") |
|
239 |
#test <- merge(summary_metrics_v,df_tile_processed,by="tile_id",all=F) |
|
240 |
#duplicate columns...need to be cleaned up later |
|
241 |
try(tb$year_predicted <- tb$year_predicted.x) |
|
242 |
try(tb$reg <- tb$reg.x) |
|
243 |
try(summary_metrics_v$year_predicted <- summary_metrics_v$year_predicted.x) |
|
244 |
try(summary_metrics_v$reg <- summary_metrics_v$reg.x) |
|
245 |
#tb_all <- tb |
|
246 |
#summary_metrics_v_all <- summary_metrics_v |
|
247 |
|
|
248 |
#table(summary_metrics_v_all$reg) |
|
249 |
#table(summary_metrics_v$reg) |
|
250 |
#table(tb_all$reg) |
|
251 |
#table(tb$reg) |
|
252 |
|
|
253 |
############ PART 2: PRODUCE FIGURES ################ |
|
254 |
|
|
255 |
########################### |
|
256 |
### Figure 1: plot location of the study area with tiles processed |
|
257 |
|
|
258 |
#df_tiled_processed <- na.omit(df_tile_processed) #remove other list of folders irrelevant |
|
259 |
#list_shp_reg_files <- df_tiled_processed$shp_files |
|
260 |
list_shp_reg_files<- as.character(df_tile_processed$shp_files) |
|
261 |
#list_shp_reg_files <- file.path("/data/project/layers/commons/NEX_data/",out_dir, |
|
262 |
# as.character(df_tile_processed$tile_coord),"shapefiles",basename(list_shp_reg_files)) |
|
263 |
#list_shp_reg_files <- file.path("/data/project/layers/commons/NEX_data/",out_dir, |
|
264 |
#"shapefiles",basename(list_shp_reg_files)) |
|
265 |
|
|
266 |
### Potential function starts here: |
|
267 |
#function(in_dir,out_dir,list_shp_reg_files,title_str,region_name,num_cores,out_suffix,out_suffix) |
|
268 |
|
|
269 |
### First get background map to display where study area is located |
|
270 |
#can make this more general later on..should have this already in a local directory on Atlas or NEX!!!! |
|
271 |
|
|
272 |
#http://www.diva-gis.org/Data |
|
273 |
#countries_shp <-"/data/project/layers/commons/NEX_data/countries.shp" |
|
274 |
path_to_shp <- dirname(countries_shp) |
|
275 |
layer_name <- sub(".shp","",basename(countries_shp)) |
|
276 |
reg_layer <- readOGR(path_to_shp, layer_name) |
|
277 |
#proj4string(reg_layer) <- CRS_locs_WGS84 |
|
278 |
#reg_shp<-readOGR(dirname(list_shp_reg_files[[i]]),sub(".shp","",basename(list_shp_reg_files[[i]]))) |
|
279 |
|
|
280 |
centroids_pts <- vector("list",length(list_shp_reg_files)) |
|
281 |
shps_tiles <- vector("list",length(list_shp_reg_files)) |
|
282 |
#collect info: read in all shapfiles |
|
283 |
#This is slow...make a function and use mclapply?? |
|
284 |
#/data/project/layers/commons/NEX_data/output_run6_global_analyses_09162014/shapefiles |
|
285 |
|
|
286 |
for(i in 1:length(list_shp_reg_files)){ |
|
287 |
#path_to_shp <- dirname(list_shp_reg_files[[i]]) |
|
288 |
path_to_shp <- file.path(out_dir,"/shapefiles") |
|
289 |
layer_name <- sub(".shp","",basename(list_shp_reg_files[[i]])) |
|
290 |
shp1 <- try(readOGR(path_to_shp, layer_name)) #use try to resolve error below |
|
291 |
#shp_61.0_-160.0 |
|
292 |
#Geographical CRS given to non-conformant data: -186.331747678 |
|
293 |
|
|
294 |
#shp1<-readOGR(dirname(list_shp_reg_files[[i]]),sub(".shp","",basename(list_shp_reg_files[[i]]))) |
|
295 |
if (!inherits(shp1,"try-error")) { |
|
296 |
pt <- gCentroid(shp1) |
|
297 |
centroids_pts[[i]] <- pt |
|
298 |
}else{ |
|
299 |
pt <- shp1 |
|
300 |
centroids_pts[[i]] <- pt |
|
301 |
} |
|
302 |
shps_tiles[[i]] <- shp1 |
|
303 |
#centroids_pts[[i]] <- centroids |
|
304 |
} |
|
305 |
|
|
306 |
#fun <- function(i,list_shp_files) |
|
307 |
#coord_names <- c("lon","lat") |
|
308 |
#l_ras#t <- rasterize_df_fun(test,coord_names,proj_str,out_suffix=out_suffix,out_dir=".",file_format,NA_flag_val,tolerance_val=0.000120005) |
|
309 |
|
|
310 |
#remove try-error polygons...we loose three tiles because they extend beyond -180 deg |
|
311 |
tmp <- shps_tiles |
|
312 |
shps_tiles <- remove_errors_list(shps_tiles) #[[!inherits(shps_tiles,"try-error")]] |
|
313 |
#shps_tiles <- tmp |
|
314 |
length(tmp)-length(shps_tiles) #number of tiles with error message |
|
315 |
|
|
316 |
tmp_pts <- centroids_pts |
|
317 |
centroids_pts <- remove_errors_list(centroids_pts) #[[!inherits(shps_tiles,"try-error")]] |
|
318 |
#centroids_pts <- tmp_pts |
|
319 |
|
|
320 |
#plot info: with labels |
|
321 |
res_pix <-1200 |
|
322 |
col_mfrow <- 1 |
|
323 |
row_mfrow <- 1 |
|
324 |
|
|
325 |
png(filename=paste("Figure1_tile_processed_region_",region_name,"_",out_suffix,".png",sep=""), |
|
326 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
327 |
|
|
328 |
plot(reg_layer) |
|
329 |
#Add polygon tiles... |
|
330 |
for(i in 1:length(shps_tiles)){ |
|
331 |
shp1 <- shps_tiles[[i]] |
|
332 |
pt <- centroids_pts[[i]] |
|
333 |
if(!inherits(shp1,"try-error")){ |
|
334 |
plot(shp1,add=T,border="blue") |
|
335 |
#plot(pt,add=T,cex=2,pch=5) |
|
336 |
label_id <- df_tile_processed$tile_id[i] |
|
337 |
text(coordinates(pt)[1],coordinates(pt)[2],labels=i,cex=1.3,font=2,col=c("red")) |
|
338 |
} |
|
339 |
} |
|
340 |
#title(paste("Tiles ", tile_size,region_name,sep="")) |
|
341 |
|
|
342 |
dev.off() |
|
343 |
|
|
344 |
#unique(summaty_metrics$tile_id) |
|
345 |
#text(lat-shp,) |
|
346 |
#union(list_shp_reg_files[[1]],list_shp_reg_files[[2]]) |
|
347 |
list_outfiles[[counter_fig+1]] <- paste("Figure1_tile_processed_region_",region_name,"_",out_suffix,".png",sep="") |
|
348 |
counter_fig <- counter_fig+1 |
|
349 |
|
|
350 |
############### |
|
351 |
### Figure 2: boxplot of average accuracy by model and by tiles |
|
352 |
|
|
353 |
tb$tile_id <- factor(tb$tile_id, levels=unique(tb$tile_id)) |
|
354 |
model_name <- as.character(unique(tb$pred_mod)) |
|
355 |
|
|
356 |
## Figure 2a |
|
357 |
for(i in 1:length(model_name)){ |
|
358 |
|
|
359 |
res_pix <- 480 |
|
360 |
col_mfrow <- 1 |
|
361 |
row_mfrow <- 1 |
|
362 |
fig_filename <- paste("Figure2a_boxplot_with_oultiers_by_tiles_",model_name[i],"_",out_suffix,".png",sep="") |
|
363 |
png(filename=paste("Figure2a_boxplot_with_oultiers_by_tiles_",model_name[i],"_",out_suffix,".png",sep=""), |
|
364 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
365 |
|
|
366 |
boxplot(rmse~tile_id,data=subset(tb,tb$pred_mod==model_name[i])) |
|
367 |
title(paste("RMSE per ",model_name[i])) |
|
368 |
|
|
369 |
dev.off() |
|
370 |
list_outfiles[[counter_fig+i]] <- fig_filename |
|
371 |
} |
|
372 |
counter_fig <- counter_fig + length(model_name) |
|
373 |
## Figure 2b |
|
374 |
#with ylim and removing trailing... |
|
375 |
for(i in 1:length(model_name)){ #there are two models!! |
|
376 |
|
|
377 |
res_pix <- 480 |
|
378 |
col_mfrow <- 1 |
|
379 |
row_mfrow <- 1 |
|
380 |
fig_filename <- paste("Figure2b_boxplot_scaling_by_tiles","_",model_name[i],"_",out_suffix,".png",sep="") |
|
381 |
png(filename=paste("Figure2b_boxplot_scaling_by_tiles","_",model_name[i],"_",out_suffix,".png",sep=""), |
|
382 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
383 |
|
|
384 |
model_name <- unique(tb$pred_mod) |
|
385 |
boxplot(rmse~tile_id,data=subset(tb,tb$pred_mod==model_name[i]) |
|
386 |
,ylim=c(0,4),outline=FALSE) |
|
387 |
title(paste("RMSE per ",model_name[i])) |
|
388 |
dev.off() |
|
389 |
#we already stored one figure |
|
390 |
list_outfiles[[counter_fig+i]] <- fig_filename |
|
391 |
} |
|
392 |
counter_fig <- counter_fig + length(model_name) |
|
393 |
#bwplot(rmse~tile_id, data=subset(tb,tb$pred_mod=="mod1")) |
|
394 |
|
|
395 |
############### |
|
396 |
### Figure 3: boxplot of average RMSE by model acrosss all tiles |
|
397 |
|
|
398 |
#Ok fixed..now selection of model but should also offer an option for using both models!! so make this a function!! |
|
399 |
for(i in 1:length(model_name)){ #there are two models!! |
|
400 |
## Figure 3a |
|
401 |
res_pix <- 480 |
|
402 |
col_mfrow <- 1 |
|
403 |
row_mfrow <- 1 |
|
404 |
|
|
405 |
png(filename=paste("Figure3a_boxplot_overall_region_with_oultiers_",model_name[i],"_",out_suffix,".png",sep=""), |
|
406 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
407 |
|
|
408 |
#boxplot(rmse~pred_mod,data=tb)#,names=tb$pred_mod) |
|
409 |
boxplot(rmse~pred_mod,data=subset(tb,tb$pred_mod==model_name[i]))#,names=tb$pred_mod) |
|
410 |
title(paste("RMSE with outliers for all tiles: ",model_name[i],sep="")) |
|
411 |
dev.off() |
|
412 |
list_outfiles[[counter_fig+1]] <- paste("Figure3a_boxplot_overall_region_with_oultiers_",model_name[i],"_",out_suffix,".png",sep="") |
|
413 |
|
|
414 |
## Figure 3b |
|
415 |
png(filename=paste("Figure3b_boxplot_overall_region_scaling_",model_name[i],"_",out_suffix,".png",sep=""), |
|
416 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
417 |
#boxplot(rmse~pred_mod,data=tb,ylim=c(0,5),outline=FALSE)#,names=tb$pred_mod) |
|
418 |
boxplot(rmse~pred_mod,data=subset(tb,tb$pred_mod==model_name[i]),ylim=c(0,5),outline=FALSE)#,names=tb$pred_mod) |
|
419 |
#title("RMSE per model over all tiles") |
|
420 |
title(paste("RMSE with scaling for all tiles: ",model_name[i],sep="")) |
|
421 |
dev.off() |
|
422 |
list_outfiles[[counter_fig+2]] <- paste("Figure3b_boxplot_overall_region_scaling_",model_name[i],"_",out_suffix,".png",sep="") |
|
423 |
} |
|
424 |
counter_fig <- counter_fig + length(model_name) |
|
425 |
|
|
426 |
################ |
|
427 |
### Figure 4: plot predicted tiff for specific date per model |
|
428 |
|
|
429 |
#y_var_name <-"dailyTmax" |
|
430 |
#index <-244 #index corresponding to Sept 1 |
|
431 |
|
|
432 |
# if (mosaic_plot==TRUE){ |
|
433 |
# index <- 1 #index corresponding to Jan 1 |
|
434 |
# date_selected <- "20100901" |
|
435 |
# name_method_var <- paste(interpolation_method,"_",y_var_name,"_",sep="") |
|
436 |
# |
|
437 |
# pattern_str <- paste("mosaiced","_",name_method_var,"predicted",".*.",date_selected,".*.tif",sep="") |
|
438 |
# lf_pred_list <- list.files(pattern=pattern_str) |
|
439 |
# |
|
440 |
# for(i in 1:length(lf_pred_list)){ |
|
441 |
# |
|
442 |
# |
|
443 |
# r_pred <- raster(lf_pred_list[i]) |
|
444 |
# |
|
445 |
# res_pix <- 480 |
|
446 |
# col_mfrow <- 1 |
|
447 |
# row_mfrow <- 1 |
|
448 |
# |
|
449 |
# png(filename=paste("Figure4_models_predicted_surfaces_",model_name[i],"_",name_method_var,"_",data_selected,"_",out_suffix,".png",sep=""), |
|
450 |
# width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
451 |
# |
|
452 |
# plot(r_pred) |
|
453 |
# title(paste("Mosaiced",model_name[i],name_method_var,date_selected,sep=" ")) |
|
454 |
# dev.off() |
|
455 |
# } |
|
456 |
# #Plot Delta and clim... |
|
457 |
# |
|
458 |
# ## plotting of delta and clim for later scripts... |
|
459 |
# |
|
460 |
# } |
|
461 |
|
|
462 |
|
|
463 |
###################### |
|
464 |
### Figure 5: plot accuracy ranked |
|
465 |
|
|
466 |
#Turn summary table to a point shp |
|
467 |
|
|
468 |
list_df_ac_mod <- vector("list",length=length(model_name)) |
|
469 |
for (i in 1:length(model_name)){ |
|
470 |
|
|
471 |
ac_mod <- summary_metrics_v[summary_metrics_v$pred_mod==model_name[i],] |
|
472 |
### Ranking by tile... |
|
473 |
df_ac_mod <- arrange(as.data.frame(ac_mod),desc(rmse))[,c("pred_mod","rmse","mae","tile_id")] |
|
474 |
list_df_ac_mod[[i]] <- arrange(as.data.frame(ac_mod),desc(rmse))[,c("rmse","mae","tile_id")] |
|
475 |
|
|
476 |
res_pix <- 480 |
|
477 |
col_mfrow <- 1 |
|
478 |
row_mfrow <- 1 |
|
479 |
fig_filename <- paste("Figure5_ac_metrics_ranked_",model_name[i],"_",out_suffix,".png",sep="") |
|
480 |
|
|
481 |
png(filename=paste("Figure5_ac_metrics_ranked_",model_name[i],"_",out_suffix,".png",sep=""), |
|
482 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
483 |
x<- as.character(df_ac_mod$tile_id) |
|
484 |
barplot(df_ac_mod$rmse, names.arg=x) |
|
485 |
#plot(ac_mod1,cex=sqrt(ac_mod1$rmse),pch=1,add=T) |
|
486 |
#plot(ac_mod1,cex=(ac_mod1$rmse1)*2,pch=1,add=T) |
|
487 |
title(paste("RMSE ranked by tile for ",model_name[i],sep="")) |
|
488 |
|
|
489 |
dev.off() |
|
490 |
list_outfiles[[counter_fig+i]] <- fig_filename |
|
491 |
} |
|
492 |
|
|
493 |
counter_fig <- counter_fig + length(model_name) |
|
494 |
|
|
495 |
###################### |
|
496 |
### Figure 6: plot map of average RMSE per tile at centroids |
|
497 |
|
|
498 |
### Without |
|
499 |
|
|
500 |
#list_df_ac_mod <- vector("list",length=length(lf_pred_list)) |
|
501 |
list_df_ac_mod <- vector("list",length=2) |
|
502 |
|
|
503 |
for (i in 1:length(model_name)){ |
|
504 |
|
|
505 |
ac_mod <- summary_metrics_v[summary_metrics_v$pred_mod==model_name[i],] |
|
506 |
#r_pred <- raster(lf_list[i]) |
|
507 |
|
|
508 |
res_pix <- 1200 |
|
509 |
#res_pix <- 480 |
|
510 |
|
|
511 |
col_mfrow <- 1 |
|
512 |
row_mfrow <- 1 |
|
513 |
fig_filename <- paste("Figure6_ac_metrics_map_centroids_tile_",model_name[i],"_",out_suffix,".png",sep="") |
|
514 |
png(filename=paste("Figure6_ac_metrics_map_centroids_tile_",model_name[i],"_",out_suffix,".png",sep=""), |
|
515 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
516 |
|
|
517 |
coordinates(ac_mod) <- ac_mod[,c("lon","lat")] |
|
518 |
#coordinates(ac_mod) <- ac_mod[,c("lon.x","lat.x")] #solve this later |
|
519 |
p_shp <- layer(sp.polygons(reg_layer, lwd=1, col='black')) |
|
520 |
#title("(a) Mean for 1 January") |
|
521 |
p <- bubble(ac_mod,"rmse",main=paste("Average RMSE per tile and by ",model_name[i])) |
|
522 |
p1 <- p+p_shp |
|
523 |
print(p1) |
|
524 |
#plot(ac_mod1,cex=(ac_mod1$rmse1)*2,pch=1,add=T) |
|
525 |
#title(paste("Averrage RMSE per tile and by ",model_name[i])) |
|
526 |
|
|
527 |
dev.off() |
|
528 |
|
|
529 |
### Ranking by tile... |
|
530 |
#df_ac_mod <- |
|
531 |
list_df_ac_mod[[i]] <- arrange(as.data.frame(ac_mod),desc(rmse))[,c("rmse","mae","tile_id")] |
|
532 |
list_outfiles[[counter_fig+i]] <- fig_filename |
|
533 |
} |
|
534 |
counter_fig <- counter_fig+length(model_name) |
|
535 |
|
|
536 |
|
|
537 |
###################### |
|
538 |
### Figure 7: Number of predictions: daily and monthly |
|
539 |
|
|
540 |
## Figure 7a |
|
541 |
|
|
542 |
## Number of tiles with information: |
|
543 |
sum(df_tile_processed$metrics_v) #26,number of tiles with raster object |
|
544 |
length(df_tile_processed$metrics_v) #26,number of tiles in the region |
|
545 |
sum(df_tile_processed$metrics_v)/length(df_tile_processed$metrics_v) #80 of tiles with info |
|
546 |
|
|
547 |
#coordinates |
|
548 |
try(coordinates(summary_metrics_v) <- c("lon","lat")) |
|
549 |
#try(coordinates(summary_metrics_v) <- c("lon.y","lat.y")) |
|
550 |
|
|
551 |
#threshold_missing_day <- c(367,365,300,200) |
|
552 |
|
|
553 |
nb<-nrow(subset(summary_metrics_v,model_name=="mod1")) |
|
554 |
sum(subset(summary_metrics_v,model_name=="mod1")$n_missing)/nb #33/35 |
|
555 |
|
|
556 |
## Make this a figure... |
|
557 |
|
|
558 |
#plot(summary_metrics_v) |
|
559 |
#Make this a function later so that we can explore many thresholds... |
|
560 |
#Problem here |
|
561 |
#Browse[3]> c |
|
562 |
#Error in grid.Call.graphics(L_setviewport, pvp, TRUE) : |
|
563 |
#non-finite location and/or size for viewport |
|
564 |
|
|
565 |
j<-1 #for model name 1 |
|
566 |
for(i in 1:length(threshold_missing_day)){ |
|
567 |
|
|
568 |
#summary_metrics_v$n_missing <- summary_metrics_v$n == 365 |
|
569 |
#summary_metrics_v$n_missing <- summary_metrics_v$n < 365 |
|
570 |
summary_metrics_v$n_missing <- summary_metrics_v$n < threshold_missing_day[i] |
|
571 |
summary_metrics_v_subset <- subset(summary_metrics_v,model_name=="mod1") |
|
572 |
|
|
573 |
#res_pix <- 1200 |
|
574 |
res_pix <- 960 |
|
575 |
|
|
576 |
col_mfrow <- 1 |
|
577 |
row_mfrow <- 1 |
|
578 |
fig_filename <- paste("Figure7a_ac_metrics_map_centroids_tile_",model_name[j],"_","missing_day_",threshold_missing_day[i], |
|
579 |
"_",out_suffix,".png",sep="") |
|
580 |
png(filename=paste("Figure7a_ac_metrics_map_centroids_tile_",model_name[j],"_","missing_day_",threshold_missing_day[i], |
|
581 |
"_",out_suffix,".png",sep=""), |
|
582 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
583 |
|
|
584 |
model_name[j] |
|
585 |
|
|
586 |
p_shp <- layer(sp.polygons(reg_layer, lwd=1, col='black')) |
|
587 |
#title("(a) Mean for 1 January") |
|
588 |
p <- bubble(summary_metrics_v_subset,"n_missing",main=paste("Missing per tile and by ",model_name[j]," for ", |
|
589 |
threshold_missing_day[i])) |
|
590 |
p1 <- p+p_shp |
|
591 |
try(print(p1)) #error raised if number of missing values below a threshold does not exist |
|
592 |
dev.off() |
|
593 |
|
|
594 |
list_outfiles[[counter_fig+i]] <- fig_filename |
|
595 |
} |
|
596 |
counter_fig <- counter_fig+length(threshold_missing_day) #currently 4 days... |
|
597 |
|
|
598 |
### Potential |
|
599 |
png(filename=paste("Figure7b_number_daily_predictions_per_models","_",out_suffix,".png",sep=""), |
|
600 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
601 |
|
|
602 |
xyplot(n~pred_mod | tile_id,data=subset(as.data.frame(summary_metrics_v), |
|
603 |
pred_mod!="mod_kr"),type="h") |
|
604 |
dev.off() |
|
605 |
|
|
606 |
list_outfiles[[counter_fig+1]] <- paste("Figure7b_number_daily_predictions_per_models","_",out_suffix,".png",sep="") |
|
607 |
counter_fig <- counter_fig + 1 |
|
608 |
|
|
609 |
table(tb$pred_mod) |
|
610 |
table(tb$index_d) |
|
611 |
table(subset(tb,pred_mod!="mod_kr")) |
|
612 |
table(subset(tb,pred_mod=="mod1")$index_d) |
|
613 |
#aggregate() |
|
614 |
tb$predicted <- 1 |
|
615 |
test <- aggregate(predicted~pred_mod+tile_id,data=tb,sum) |
|
616 |
xyplot(predicted~pred_mod | tile_id,data=subset(as.data.frame(test), |
|
617 |
pred_mod!="mod_kr"),type="h") |
|
618 |
|
|
619 |
as.character(unique(test$tile_id)) #141 tiles |
|
620 |
|
|
621 |
dim(subset(test,test$predicted==365 & test$pred_mod=="mod1")) |
|
622 |
histogram(subset(test, test$pred_mod=="mod1")$predicted) |
|
623 |
unique(subset(test, test$pred_mod=="mod1")$predicted) |
|
624 |
table((subset(test, test$pred_mod=="mod1")$predicted)) |
|
625 |
|
|
626 |
#LST_avgm_min <- aggregate(LST~month,data=data_month_all,min) |
|
627 |
png(filename=paste("Figure7c_histogram_number_daily_predictions_per_models","_",out_suffix,".png",sep=""), |
|
628 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
629 |
|
|
630 |
histogram(test$predicted~test$tile_id) |
|
631 |
dev.off() |
|
632 |
|
|
633 |
list_outfiles[[counter_fig+1]] <- paste("Figure7c_histogram_number_daily_predictions_per_models","_",out_suffix,".png",sep="") |
|
634 |
counter_fig <- counter_fig + 1 |
|
635 |
|
|
636 |
#table(tb) |
|
637 |
## Figure 7b |
|
638 |
#png(filename=paste("Figure7b_number_daily_predictions_per_models","_",out_suffix,".png",sep=""), |
|
639 |
# width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
640 |
|
|
641 |
#xyplot(n~month | tile_id + pred_mod,data=subset(as.data.frame(tb_month_s), |
|
642 |
# pred_mod!="mod_kr"),type="h") |
|
643 |
#xyplot(n~month | tile_id,data=subset(as.data.frame(tb_month_s), |
|
644 |
# pred_mod="mod_1"),type="h") |
|
645 |
#test=subset(as.data.frame(tb_month_s),pred_mod="mod_1") |
|
646 |
#table(tb_month_s$month) |
|
647 |
#dev.off() |
|
648 |
# |
|
649 |
|
|
650 |
########################################################## |
|
651 |
##### Figure 8: Breaking down accuracy by regions!! ##### |
|
652 |
|
|
653 |
#summary_metrics_v <- merge(summary_metrics_v,df_tile_processed,by="tile_id") |
|
654 |
|
|
655 |
## Figure 8a |
|
656 |
res_pix <- 480 |
|
657 |
col_mfrow <- 1 |
|
658 |
row_mfrow <- 1 |
|
659 |
|
|
660 |
png(filename=paste("Figure8a_boxplot_overall_separated_by_region_with_oultiers_","_",out_suffix,".png",sep=""), |
|
661 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
662 |
|
|
663 |
p<- bwplot(rmse~pred_mod | reg, data=tb, |
|
664 |
main="RMSE per model and region over all tiles") |
|
665 |
print(p) |
|
666 |
dev.off() |
|
667 |
|
|
668 |
list_outfiles[[counter_fig+1]] <- paste("Figure8a_boxplot_overall_separated_by_region_with_oultiers_",model_name[i],"_",out_suffix,".png",sep="") |
|
669 |
counter_fig <- counter_fig + 1 |
|
670 |
|
|
671 |
## Figure 8b |
|
672 |
png(filename=paste("Figure8b_boxplot_overall_separated_by_region_scaling_","_",out_suffix,".png",sep=""), |
|
673 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
674 |
|
|
675 |
boxplot(rmse~pred_mod,data=tb,ylim=c(0,5),outline=FALSE)#,names=tb$pred_mod) |
|
676 |
title("RMSE per model over all tiles") |
|
677 |
p<- bwplot(rmse~pred_mod | reg, data=tb,ylim=c(0,5), |
|
678 |
main="RMSE per model and region over all tiles") |
|
679 |
print(p) |
|
680 |
dev.off() |
|
681 |
|
|
682 |
list_outfiles[[counter_fig+1]] <- paste("Figure8b_boxplot_overall_separated_by_region_scaling_",model_name[i],"_",out_suffix,".png",sep="") |
|
683 |
counter_fig <- counter_fig + 1 |
|
684 |
|
|
685 |
## Select mod1 only now |
|
686 |
tb_subset <- subset(tb,model_name=="mod1") |
|
687 |
## Figure 8c |
|
688 |
|
|
689 |
res_pix <- 480 |
|
690 |
col_mfrow <- 1 |
|
691 |
row_mfrow <- 1 |
|
692 |
|
|
693 |
png(filename=paste("Figure8c_boxplot_overall_separated_by_region_with_oultiers_","mod1","_",out_suffix,".png",sep=""), |
|
694 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
695 |
|
|
696 |
p<- bwplot(rmse~pred_mod | reg, data=tb_subset, |
|
697 |
main="RMSE per model and region over all tiles") |
|
698 |
print(p) |
|
699 |
dev.off() |
|
700 |
|
|
701 |
list_outfiles[[counter_fig+1]] <- paste("Figure8c_boxplot_overall_separated_by_region_with_oultiers_",model_name[i],"_",out_suffix,".png",sep="") |
|
702 |
counter_fig <- counter_fig + 1 |
|
703 |
|
|
704 |
## Figure 8d |
|
705 |
png(filename=paste("Figure8d_boxplot_overall_separated_by_region_scaling_","mod1","_",out_suffix,".png",sep=""), |
|
706 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
707 |
|
|
708 |
boxplot(rmse~pred_mod,data=tb,ylim=c(0,5),outline=FALSE)#,names=tb$pred_mod) |
|
709 |
title("RMSE per model over all tiles") |
|
710 |
p<- bwplot(rmse~pred_mod | reg, data=tb_subset,ylim=c(0,5), |
|
711 |
main="RMSE per model and region over all tiles") |
|
712 |
print(p) |
|
713 |
dev.off() |
|
714 |
|
|
715 |
list_outfiles[[counter_fig+1]] <- paste("Figure8d_boxplot_overall_separated_by_region_scaling_",model_name[i],"_",out_suffix,".png",sep="") |
|
716 |
counter_fig <- counter_fig + 1 |
|
717 |
|
|
718 |
##################################################### |
|
719 |
#### Figure 9: plotting boxplot by year and regions ########### |
|
720 |
|
|
721 |
#Ok fixed..now selection of model but should also offer an option for using both models!! so make this a function!! |
|
722 |
for(i in 1:length(model_name)){ #there are two models!! |
|
723 |
|
|
724 |
## Figure 9a |
|
725 |
res_pix <- 480 |
|
726 |
col_mfrow <- 1 |
|
727 |
row_mfrow <- 1 |
|
728 |
|
|
729 |
png(filename=paste("Figure9a_boxplot_overall_separated_by_region_year_with_oultiers_",model_name[i],"_",out_suffix,".png",sep=""), |
|
730 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
731 |
|
|
732 |
#p<- bwplot(rmse~pred_mod | reg + year_predicted, data=tb, |
|
733 |
# main="RMSE per model and region over all tiles") |
|
734 |
#p<- bwplot(rmse~year_predicted | reg , subset(tb,tb$pred_mod==model_name[i]), |
|
735 |
#main="RMSE per model and region over all tiles") |
|
736 |
#p<- bwplot(rmse~year_predicted , subset(tb,tb$pred_mod==model_name[i]), |
|
737 |
# main="RMSE per model and region over all tiles") |
|
738 |
boxplot(rmse~year_predicted , subset(tb,tb$pred_mod==model_name[i])) |
|
739 |
title(paste("RMSE with outliers and by year for all tiles: ",model_name[i],sep="")) |
|
740 |
#print(p) |
|
741 |
dev.off() |
|
742 |
|
|
743 |
## Figure 9b |
|
744 |
png(filename=paste("Figure9b_boxplot_overall_separated_by_region_year_scaling_",model_name[i],"_",out_suffix,".png",sep=""), |
|
745 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
746 |
|
|
747 |
#boxplot(rmse~pred_mod,data=tb,ylim=c(0,5),outline=FALSE)#,names=tb$pred_mod) |
|
748 |
#title("RMSE per model over all tiles") |
|
749 |
#p<- bwplot(rmse~pred_mod | reg + year_predicted, data=tb,ylim=c(0,5), |
|
750 |
# main="RMSE per model and region over all tiles") |
|
751 |
boxplot(rmse~year_predicted,subset(tb,tb$pred_mod==model_name[i]),ylim=c(0,5),outline=FALSE) |
|
752 |
title(paste("RMSE with scaling and by year for all tiles: ",model_name[i],sep="")) |
|
753 |
|
|
754 |
#print(p) |
|
755 |
dev.off() |
|
756 |
|
|
757 |
list_outfiles[[counter_fig+1]] <- paste("Figure9a_boxplot_overall_separated_by_region_year_with_oultiers_",model_name[i],"_",out_suffix,".png",sep="") |
|
758 |
counter_fig <- counter_fig + 1 |
|
759 |
} |
|
760 |
############################################################## |
|
761 |
############## Prepare object to return |
|
762 |
############## Collect information from assessment ########## |
|
763 |
|
|
764 |
# This is hard coded and can be improved later on for flexibility. It works for now... |
|
765 |
comments_str <- c("tile processed for the region", |
|
766 |
"boxplot with outliers", |
|
767 |
"boxplot with outliers", |
|
768 |
"boxplot scaling by tiles", |
|
769 |
"boxplot scaling by tiles", |
|
770 |
"boxplot overall region with outliers", |
|
771 |
"boxplot overall region with scaling", |
|
772 |
"boxplot overall region with outliers", |
|
773 |
"boxplot overall region with scaling", |
|
774 |
"Barplot of accuracy metrics ranked by tile", |
|
775 |
"Barplot of accuracy metrics ranked by tile", |
|
776 |
"Average accuracy metrics map at centroids", |
|
777 |
"Average accuracy metrics map at centroids", |
|
778 |
"Number of missing day threshold1 map centroids", |
|
779 |
"Number of missing day threshold2 map centroids", |
|
780 |
"Number of missing day threshold3 map centroids", |
|
781 |
"Number of missing day threshold4 map centroids", |
|
782 |
"number_daily_predictions_per_model", |
|
783 |
"histogram number_daily_predictions_per_models", |
|
784 |
"boxplot overall separated by region with_outliers", |
|
785 |
"boxplot overall separated by region with_scaling", |
|
786 |
"boxplot overall separated by region with_outliers", |
|
787 |
"boxplot overall separated by region with_scaling") |
|
788 |
|
|
789 |
figure_no <- c("figure_1","figure_2a","figure_2a","figure_2b","figure_2b","figure_3a","figure_3a","figure_3b","figure_3b", |
|
790 |
"figure_5", "figure_5","figure_6","figure_6","Figure_7a","Figure_7a","Figure_7a","Figure_7a","Figure_7b", |
|
791 |
"Figure_7c","Figure 8a","Figure 8a","Figure 8b","Figure 8b") |
|
792 |
|
|
793 |
col_model_name <- c(NA,"mod1","mod_kr","mod1","mod_kr","mod1","mod_kr","mod1","mod_kr","mod1","mod_kr", |
|
794 |
"mod1","mod_kr","mod1","mod1","mod1","mod1","mod1","mod1",NA,NA,"mod1","mod1") |
|
795 |
col_reg <- rep(region_name,length(list_outfiles)) |
|
796 |
col_year_predicted <- rep(year_predicted,length(list_outfiles)) |
|
797 |
|
|
798 |
#This data.frame contains all the files from the assessment |
|
799 |
df_assessment_figures_files <- data.frame(figure_no=figure_no, |
|
800 |
comment = comments_str, |
|
801 |
model_name=col_model_name, |
|
802 |
reg=col_reg, |
|
803 |
year_predicted=col_year_predicted, |
|
804 |
filename=unlist(list_outfiles)) |
|
805 |
|
|
806 |
###Prepare files for copying back? |
|
807 |
df_assessment_figures_files_names <- file.path(out_dir,paste("df_assessment_figures_files_",region_name,"_",year_predicted,"_",out_suffix,".txt",sep="")) |
|
808 |
write.table(df_assessment_figures_files, |
|
809 |
file=df_assessment_figures_files_names ,sep=",") |
|
810 |
|
|
811 |
#df_assessment_figures_files_names |
|
812 |
|
|
813 |
###################################################### |
|
814 |
##### Prepare objet to return #### |
|
815 |
|
|
816 |
assessment_obj <- list(df_assessment_files, df_assessment_figures_files) |
|
817 |
names(assessment_obj) <- c("df_assessment_files", "df_assessment_figures_files") |
|
818 |
## Prepare list of files to return... |
|
819 |
return(assessment_obj) |
|
820 |
|
|
821 |
} |
|
822 |
|
|
823 |
##################### END OF SCRIPT ###################### |
|
824 |
|
|
825 |
#### Run on the bridge: |
|
826 |
|
|
827 |
#args<-commandArgs(TRUE) |
|
828 |
#script_path<-"/nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/oregon/interpolation" |
|
829 |
#dataHome<-"/nobackupp6/aguzman4/climateLayers/interp/testdata/" |
|
830 |
#script_path2<-"/nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation" |
|
831 |
|
|
832 |
#CALLED FROM MASTER SCRIPT: |
|
833 |
|
|
834 |
script_path <- "/nobackupp8/bparmen1/env_layers_scripts" #path to script |
|
835 |
function_assessment_part1_functions <- "global_run_scalingup_assessment_part1_functions_02112015.R" #PARAM12 |
|
836 |
function_assessment_part1a <-"global_run_scalingup_assessment_part1a_01042016.R" |
|
837 |
function_assessment_part2 <- "global_run_scalingup_assessment_part2_01062016.R" |
|
838 |
function_assessment_part2_functions <- "global_run_scalingup_assessment_part2_functions_01032016.R" |
|
839 |
source(file.path(script_path,function_assessment_part1_functions)) #source all functions used in this script |
|
840 |
source(file.path(script_path,function_assessment_part1a)) #source all functions used in this script |
|
841 |
source(file.path(script_path,function_assessment_part2)) #source all functions used in this script |
|
842 |
source(file.path(script_path,function_assessment_part2_functions)) #source all functions used in this script |
|
843 |
|
|
844 |
### Parameters and arguments ### |
|
845 |
|
|
846 |
var<-"TMAX" # variable being interpolated |
|
847 |
if (var == "TMAX") { |
|
848 |
y_var_name <- "dailyTmax" |
|
849 |
y_var_month <- "TMax" |
|
850 |
} |
|
851 |
if (var == "TMIN") { |
|
852 |
y_var_name <- "dailyTmin" |
|
853 |
y_var_month <- "TMin" |
|
108 | 854 |
} |
109 |
setwd(out_dir) |
|
110 |
|
|
855 |
|
|
856 |
#interpolation_method<-c("gam_fusion") #other otpions to be added later |
|
857 |
interpolation_method<-c("gam_CAI") |
|
858 |
CRS_interp<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"; |
|
859 |
#CRS_interp <-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
|
111 | 860 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 |
112 |
region_name <- "World" |
|
113 | 861 |
|
114 |
###Table 1: Average accuracy metrics
|
|
115 |
###Table 2: daily accuracy metrics for all tiles
|
|
862 |
out_region_name<-""
|
|
863 |
list_models<-c("y_var ~ s(lat,lon,k=5) + s(elev_s,k=3) + s(LST,k=3)")
|
|
116 | 864 |
|
117 |
summary_metrics_v <- read.table(file=file.path(out_dir,paste("summary_metrics_v2_NA_",out_prefix,".txt",sep="")),sep=",") |
|
118 |
tb <- read.table(file=file.path(out_dir,paste("tb_diagnostic_v_NA","_",out_prefix,".txt",sep="")),sep=",") |
|
119 |
#df_tile_processed <- read.table(file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",") |
|
120 |
df_tile_processed <- read.table(file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",") |
|
121 |
#in_dir_list <- file.path(in_dir1,read.table(file.path(in_dir1,"processed.txt"))$V1) |
|
865 |
#reg1 (North Am), reg2(Europe),reg3(Asia), reg4 (South Am), reg5 (Africa), reg6 (Australia-Asia) |
|
866 |
#master directory containing the definition of tile size and tiles predicted |
|
867 |
in_dir1 <- "/nobackupp6/aguzman4/climateLayers/out/" |
|
868 |
#/nobackupp6/aguzman4/climateLayers/out_15x45/1982 |
|
122 | 869 |
|
123 |
########################## START SCRIPT ############################## |
|
870 |
#region_names <- c("reg23","reg4") #selected region names, #PARAM2 |
|
871 |
region_name <- c("reg4") #run assessment by region, this is a unique region only |
|
872 |
#region_names <- c("reg1","reg2","reg3","reg4","reg5","reg6") #selected region names, #PARAM2 |
|
873 |
interpolation_method <- c("gam_CAI") #PARAM4 |
|
874 |
out_prefix <- "run_global_analyses_pred_12282015" #PARAM5 |
|
875 |
#out_dir <- "/nobackupp8/bparmen1/" #PARAM6 |
|
876 |
out_dir <- "/nobackupp8/bparmen1/output_run_global_analyses_pred_12282015" |
|
877 |
#out_dir <-paste(out_dir,"_",out_prefix,sep="") |
|
878 |
create_out_dir_param <- FALSE #PARAM7 |
|
124 | 879 |
|
125 |
#Now add things here...
|
|
126 |
# |
|
127 |
selected_tiles <- df_tile_processed$tile_coord #selecting tiles 4 and 5
|
|
880 |
#CRS_interp<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs";
|
|
881 |
#CRS_interp <-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
|
|
882 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
|
|
128 | 883 |
|
129 |
#selected_tiles <- c("40.0_-120.0","35.0_-115.0") |
|
884 |
#list_year_predicted <- 1984:2004 |
|
885 |
list_year_predicted <- c("2014") |
|
886 |
#year_predicted <- list_year_predicted[1] |
|
130 | 887 |
|
131 |
##raster_prediction object : contains testing and training stations with RMSE and model object |
|
132 |
in_dir_list <- list.files(path=in_dir1,full.names=T) |
|
133 |
in_dir_list <- in_dir_list[grep("subset",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1 |
|
134 |
in_dir_list <- in_dir_list[match(selected_tiles,basename(basename(in_dir_list)))] #the first one is the in_dir1 |
|
888 |
file_format <- ".tif" #format for mosaiced files #PARAM10 |
|
889 |
NA_flag_val <- -9999 #No data value, #PARAM11 |
|
890 |
num_cores <- 6 #number of cores used #PARAM13 |
|
891 |
plotting_figures <- TRUE #running part2 of assessment to generate figures... |
|
892 |
|
|
893 |
##Additional parameters used in part 2, some these may be removed as code is simplified |
|
894 |
mosaic_plot <- FALSE #PARAM14 |
|
895 |
day_to_mosaic <- c("19920102","19920103","19920103") #PARAM15 |
|
896 |
multiple_region <- TRUE #PARAM16 |
|
897 |
countries_shp <- "/nobackupp8/bparmen1/NEX_data/countries.shp" #PARAM17 |
|
898 |
#countries_shp <-"/data/project/layers/commons/NEX_data/countries.shp" #Atlas |
|
899 |
plot_region <- TRUE #PARAM18 |
|
900 |
threshold_missing_day <- c(367,365,300,200)#PARAM19 |
|
135 | 901 |
|
136 |
list_raster_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="^raster_prediction_obj.*.RData",full.names=T)}) |
|
137 |
#list_names_tile_coord <- lapply(list_raster_obj_files,FUN=function(x){basename(dirname(x))}) |
|
138 |
#list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_") |
|
139 |
#names(list_raster_obj_files)<- list_names_tile_id |
|
902 |
year_predicted <- list_year_predicted[1] |
|
903 |
in_dir <- out_dir #PARAM 0 |
|
904 |
#y_var_name <- "dailyTmax" #PARAM1 , already set |
|
905 |
#interpolation_method <- c("gam_CAI") #PARAM2, already set |
|
906 |
out_suffix <- out_prefix #PARAM3 |
|
907 |
#out_dir <- #PARAM4, already set |
|
908 |
create_out_dir_param <- FALSE #PARAM 5, already created and set |
|
909 |
#mosaic_plot <- FALSE #PARAM6 |
|
910 |
#if daily mosaics NULL then mosaicas all days of the year |
|
911 |
#day_to_mosaic <- c("19920101","19920102","19920103") #PARAM7 |
|
912 |
#CRS_locs_WGS84 already set |
|
913 |
proj_str <- CRS_locs_WGS84 #PARAM 8 #check this parameter |
|
914 |
#file_format <- ".rst" #PARAM 9, already set |
|
915 |
#NA_flag_val <- -9999 #PARAM 11, already set |
|
916 |
#multiple_region <- TRUE #PARAM 12 |
|
917 |
#countries_shp <-"/data/project/layers/commons/NEX_data/countries.shp" #PARAM 13, copy this on NEX too |
|
918 |
#plot_region <- TRUE |
|
919 |
#num_cores <- 6 #PARAM 14, already set |
|
920 |
#region_name <- c("reg4") #reference region to merge if necessary, if world all the regions are together #PARAM 16 |
|
921 |
#use previous files produced in step 1a and stored in a data.frame |
|
922 |
df_assessment_files_name <- "df_assessment_files_reg4_2014_run_global_analyses_pred_12282015.txt"# #PARAM 17, set in the script |
|
923 |
df_assessment_files <- read.table(df_assessment_files_name,stringsAsFactors=F,sep=",") |
|
924 |
#threshold_missing_day <- c(367,365,300,200) #PARM18 |
|
140 | 925 |
|
141 |
lf_covar_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar_obj.*.RData",full.names=T)}) |
|
142 |
lf_covar_tif <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar.*.tif",full.names=T)}) |
|
926 |
list_param_run_assessment_plotting <-list( |
|
927 |
in_dir,y_var_name, interpolation_method, out_suffix, |
|
928 |
out_dir, create_out_dir_param, mosaic_plot, proj_str, file_format, NA_flag_val, |
|
929 |
multiple_region, countries_shp, plot_region, num_cores, |
|
930 |
region_name, df_assessment_files_name, threshold_missing_day,year_predicted |
|
931 |
) |
|
143 | 932 |
|
144 |
list_shp_reg_files <- file.path(in_dir_shp,df_tile_processed$shp_files) |
|
145 |
list_shp_reg_files <- file.path(in_dir_shp,df_tile_processed$shp_files) |
|
933 |
names(list_param_run_assessment_plotting) <- c( |
|
934 |
"in_dir","y_var_name","interpolation_method","out_suffix", |
|
935 |
"out_dir","create_out_dir_param","mosaic_plot","proj_str","file_format","NA_flag_val", |
|
936 |
"multiple_region","countries_shp","plot_region","num_cores", |
|
937 |
"region_name","df_assessment_files_name","threshold_missing_day","year_predicted" |
|
938 |
) |
|
146 | 939 |
|
147 |
############### |
|
940 |
#function_assessment_part2 <- "global_run_scalingup_assessment_part2_01032016.R" |
|
941 |
#source(file.path(script_path,function_assessment_part2)) #source all functions used in this script |
|
148 | 942 |
|
149 |
##Quick interactive exploration of raster object to check possible errors |
|
150 |
#robj1 <- load_obj(list_raster_obj_files[[1]]) #This is tile in CA |
|
151 |
#names(robj1) |
|
152 |
#names(robj1$method_mod_obj[[1]]) #for January 1, 2010 |
|
153 |
# names(robj1$method_mod_obj[[1]]$dailyTmax) #for January |
|
154 |
#names(robj1$clim_method_mod_obj[[1]]$data_month) #for January |
|
155 |
#names(robj1$validation_mod_month_obj[[1]]$data_s) #for January with predictions |
|
156 |
#Get the number of models predicted |
|
157 |
#nb_mod <- length(unique(robj1$tb_diagnostic_v$pred_mod)) |
|
158 |
|
|
159 |
### Figure 1: plot location of the study area with tiles processed |
|
160 |
|
|
161 |
### Figures diagnostic tile: |
|
162 |
#Use stage 5 modified/updated code |
|
163 |
|
|
164 |
##Quick exploration of raster object |
|
165 |
|
|
166 |
date_selected_results <- c("20100101") |
|
167 |
date_selected_results <- c("20100901") |
|
168 |
|
|
169 |
#robj1 <- load_obj(list_raster_obj_files[[2]]) |
|
170 |
in_path_tile <- in_dir_list[[2]] #Oregon tile |
|
171 |
#in_path_tile <- NULL # set to NULL if the script is run on the NEX node as part of job |
|
172 |
covar_obj <- load_obj(lf_covar_obj[[2]]) |
|
173 |
out_prefix_str <- paste(out_prefix,"_",basename(dirname(list_raster_obj_files[[2]][2])),sep="") |
|
174 |
var <- "TMAX" |
|
175 |
list_param_results_analyses <- list(out_dir,in_path_tile,script_path,list_raster_obj_files[[2]][2],interpolation_method, |
|
176 |
covar_obj,date_selected_results,var,out_prefix_str) |
|
177 |
names(list_param_results_analyses)<-c("out_path","in_path_tile","script_path","raster_prediction_obj","interpolation_method", |
|
178 |
"covar_obj","date_selected_results","var","out_prefix") |
|
179 |
#list_param <- list_param_results_analyses |
|
180 |
|
|
181 |
#Run modified code from stage 5... |
|
182 |
#plots_assessment_by_date<-function(j,list_param){ |
|
183 |
#Use lapply or mclapply |
|
184 |
debug(plots_assessment_by_date) |
|
185 |
#summary_v_day <- plots_assessment_by_date(1,list_param_results_analyses) |
|
186 |
summary_v_day <- plots_assessment_by_date(244,list_param_results_analyses) |
|
187 |
|
|
188 |
#Call as function... |
|
189 |
|
|
190 |
#Boxplots...etc... |
|
191 |
|
|
192 |
#This is a repeat from earlier code. |
|
193 |
|
|
194 |
##Create data.frame with validation and fit metrics for a full year/full numbe of runs |
|
195 |
|
|
196 |
#Call functions to create plots of metrics for validation dataset |
|
197 |
#tile_selected <- 6 |
|
198 |
#tb_diagnostic_v <- subset(tb,tile_id==6) |
|
199 |
#metric_names<-c("rmse","mae","me","r","m50") |
|
200 |
|
|
201 |
#summary_metrics_v<- boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path) #if adding for fit need to change outprefix |
|
202 |
|
|
203 |
#names(summary_metrics_v)<-c("avg","median") |
|
204 |
|
|
205 |
#summary_month_metrics_v<- boxplot_month_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path) |
|
206 |
|
|
207 |
#Call functions to create plots of metrics for validation dataset |
|
208 |
|
|
209 |
#metric_names<-c("rmse","mae","me","r","m50") |
|
210 |
|
|
211 |
#summary_metrics_v<- boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path) #if adding for fit need to change outprefix |
|
212 |
|
|
213 |
#names(summary_metrics_v)<-c("avg","median") |
|
214 |
|
|
215 |
#summary_month_metrics_v<- boxplot_month_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path) |
|
216 |
|
|
217 |
|
|
218 |
#### Function to plot boxplot from data.frame table of accuracy metrics |
|
219 |
|
|
220 |
|
|
221 |
# ### need to improve these |
|
222 |
# boxplot_from_tb <-function(tb_diagnostic,metric_names,out_prefix,out_path){ |
|
223 |
# #now boxplots and mean per models |
|
224 |
# library(gdata) #Nesssary to use cbindX |
|
225 |
# |
|
226 |
# ### Start script |
|
227 |
# y_var_name<-unique(tb_diagnostic$var_interp) #extract the name of interpolated variable: dailyTmax, dailyTmin |
|
228 |
# |
|
229 |
# mod_names<-sort(unique(tb_diagnostic$pred_mod)) #models that have accuracy metrics |
|
230 |
# t<-melt(tb_diagnostic, |
|
231 |
# #measure=mod_var, |
|
232 |
# id=c("date","pred_mod","prop"), |
|
233 |
# na.rm=F) |
|
234 |
# t$value<-as.numeric(t$value) #problem with char!!! |
|
235 |
# avg_tb<-cast(t,pred_mod~variable,mean) |
|
236 |
# avg_tb$var_interp<-rep(y_var_name,times=nrow(avg_tb)) |
|
237 |
# median_tb<-cast(t,pred_mod~variable,median) |
|
238 |
# |
|
239 |
# #avg_tb<-cast(t,pred_mod~variable,mean) |
|
240 |
# tb<-tb_diagnostic |
|
241 |
# |
|
242 |
# #mod_names<-sort(unique(tb$pred_mod)) #kept for clarity |
|
243 |
# tb_mod_list<-lapply(mod_names, function(k) subset(tb, pred_mod==k)) #this creates a list of 5 based on models names |
|
244 |
# names(tb_mod_list)<-mod_names |
|
245 |
# #mod_metrics<-do.call(cbind,tb_mod_list) |
|
246 |
# #debug here |
|
247 |
# if(length(tb_mod_list)>1){ |
|
248 |
# mod_metrics<-do.call(cbindX,tb_mod_list) #column bind the list?? |
|
249 |
# }else{ |
|
250 |
# mod_metrics<-tb_mod_list[[1]] |
|
251 |
# } |
|
252 |
# |
|
253 |
# test_names<-lapply(1:length(mod_names),function(k) paste(names(tb_mod_list[[1]]),mod_names[k],sep="_")) |
|
254 |
# #test names are used when plotting the boxplot for the different models |
|
255 |
# names(mod_metrics)<-unlist(test_names) |
|
256 |
# rows_total<-lapply(tb_mod_list,nrow) |
|
257 |
# for (j in 1:length(metric_names)){ |
|
258 |
# metric_ac<-metric_names[j] |
|
259 |
# mod_pat<-glob2rx(paste(metric_ac,"_*",sep="")) |
|
260 |
# mod_var<-grep(mod_pat,names(mod_metrics),value=TRUE) # using grep with "value" extracts the matching names |
|
261 |
# #browser() |
|
262 |
# test<-mod_metrics[mod_var] |
|
263 |
# png(file.path(out_path,paste("boxplot_metric_",metric_ac, out_prefix,".png", sep=""))) |
|
264 |
# #boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5, |
|
265 |
# # ylab=paste(metric_ac,"in degree C",sep=" ")) |
|
266 |
# |
|
267 |
# boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5, |
|
268 |
# ylab=paste(metric_ac,"in degree C",sep=" "),axisnames=FALSE,axes=FALSE) |
|
269 |
# axis(1, labels = FALSE) |
|
270 |
# ## Create some text labels |
|
271 |
# labels <- labels<- names(test) |
|
272 |
# ## Plot x axis labels at default tick marks |
|
273 |
# text(1:ncol(test), par("usr")[3] - 0.25, srt = 45, adj = 1, |
|
274 |
# labels = labels, xpd = TRUE) |
|
275 |
# axis(2) |
|
276 |
# box() |
|
277 |
# #legend("bottomleft",legend=paste(names(rows_total),":",rows_total,sep=""),cex=0.7,bty="n") |
|
278 |
# #title(as.character(t(paste(t(names(rows_total)),":",rows_total,sep=""))),cex=0.8) |
|
279 |
# title(paste(metric_ac,"for",y_var_name,sep=" "),cex=0.8) |
|
280 |
# dev.off() |
|
281 |
# } |
|
282 |
# |
|
283 |
# avg_tb$n<-rows_total #total number of predictions on which the mean is based |
|
284 |
# median_tb$n<-rows_total |
|
285 |
# summary_obj<-list(avg_tb,median_tb) |
|
286 |
# names(summary_obj)<-c("avg","median") |
|
287 |
# return(summary_obj) |
|
288 |
# } |
|
289 |
# #boxplot_month_from_tb(tb_diagnostic,metric_names,out_prefix,out_path) |
|
290 |
# ## Function to display metrics by months/seasons |
|
291 |
# boxplot_month_from_tb <-function(tb_diagnostic,metric_names,out_prefix,out_path){ |
|
292 |
# |
|
293 |
# #Generate boxplot per month for models and accuracy metrics |
|
294 |
# #Input parameters: |
|
295 |
# #1) df: data frame containing accurayc metrics (RMSE etc.) per day) |
|
296 |
# #2) metric_names: metrics used for validation |
|
297 |
# #3) out_prefix |
|
298 |
# # |
|
299 |
# |
|
300 |
# ################# |
|
301 |
# ## BEGIN |
|
302 |
# y_var_name<-unique(tb_diagnostic$var_interp) #extract the name of interpolated variable: dailyTmax, dailyTmin |
|
303 |
# date_f<-strptime(tb_diagnostic$date, "%Y%m%d") # interpolation date being processed |
|
304 |
# tb_diagnostic$month<-strftime(date_f, "%m") # current month of the date being processed |
|
305 |
# mod_names<-sort(unique(tb_diagnostic$pred_mod)) #models that have accuracy metrics |
|
306 |
# tb_mod_list<-lapply(mod_names, function(k) subset(tb_diagnostic, pred_mod==k)) #this creates a list of 5 based on models names |
|
307 |
# names(tb_mod_list)<-mod_names |
|
308 |
# t<-melt(tb_diagnostic, |
|
309 |
# #measure=mod_var, |
|
310 |
# id=c("date","pred_mod","prop","month"), |
|
311 |
# na.rm=F) |
|
312 |
# t$value<-as.numeric(t$value) #problem with char!!! |
|
313 |
# tb_mod_m_avg <-cast(t,pred_mod+month~variable,mean) #monthly mean for every model |
|
314 |
# tb_mod_m_avg$var_interp<-rep(y_var_name,times=nrow(tb_mod_m_avg)) |
|
315 |
# |
|
316 |
# tb_mod_m_sd <-cast(t,pred_mod+month~variable,sd) #monthly sd for every model |
|
317 |
# |
|
318 |
# tb_mod_m_list <-lapply(mod_names, function(k) subset(tb_mod_m_avg, pred_mod==k)) #this creates a list of 5 based on models names |
|
319 |
# |
|
320 |
# for (k in 1:length(mod_names)){ |
|
321 |
# mod_metrics <-tb_mod_list[[k]] |
|
322 |
# current_mod_name<- mod_names[k] |
|
323 |
# for (j in 1:length(metric_names)){ |
|
324 |
# metric_ac<-metric_names[j] |
|
325 |
# col_selected<-c(metric_ac,"month") |
|
326 |
# test<-mod_metrics[col_selected] |
|
327 |
# png(file.path(out_path,paste("boxplot_metric_",metric_ac,"_",current_mod_name,"_by_month_",out_prefix,".png", sep=""))) |
|
328 |
# boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5, |
|
329 |
# ylab=paste(metric_ac,"in degree C",sep=" "),,axisnames=FALSE,axes=FALSE) |
|
330 |
# #boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5, |
|
331 |
# # ylab=paste(metric_ac,"in degree C",sep=" ")) |
|
332 |
# axis(1, labels = FALSE) |
|
333 |
# ## Create some text labels |
|
334 |
# labels <- month.abb # abbreviated names for each month |
|
335 |
# ## Plot x axis labels at default tick marks |
|
336 |
# text(1:length(labels), par("usr")[3] - 0.25, srt = 45, adj = 1, |
|
337 |
# labels = labels, xpd = TRUE) |
|
338 |
# axis(2) |
|
339 |
# box() |
|
340 |
# #legend("bottomleft",legend=paste(names(rows_total),":",rows_total,sep=""),cex=0.7,bty="n") |
|
341 |
# title(paste(metric_ac,"for",current_mod_name,"by month",sep=" ")) |
|
342 |
# dev.off() |
|
343 |
# } |
|
344 |
# |
|
345 |
# } |
|
346 |
# summary_month_obj <-c(tb_mod_m_list,tb_mod_m_avg,tb_mod_m_sd) |
|
347 |
# names(summary_month_obj)<-c("tb_list","metric_month_avg","metric_month_sd") |
|
348 |
# return(summary_month_obj) |
|
349 |
# } |
|
943 |
debug(run_assessment_plotting_prediction_fun) |
|
944 |
df_assessment_figures_files <- |
|
945 |
run_assessment_plotting_prediction_fun(list_param_run_assessment_plotting) |
|
350 | 946 |
|
351 | 947 |
|
352 |
##################### END OF SCRIPT ###################### |
|
948 |
|
|
949 |
|
|
950 |
|
|
951 |
|
|
952 |
|
|
953 |
|
|
954 |
|
|
955 |
|
|
956 |
|
|
957 |
|
|
958 |
|
|
959 |
|
|
960 |
|
|
961 |
|
|
962 |
|
|
963 |
|
|
964 |
|
|
965 |
|
|
966 |
|
|
967 |
|
|
968 |
|
|
969 |
|
|
970 |
|
|
971 |
|
|
972 |
|
|
973 |
|
|
974 |
|
|
975 |
|
|
976 |
|
|
977 |
|
|
978 |
#### CURRENT ERROR ON NEX |
|
979 |
|
|
980 |
# #comments #figure_no #region #models |
|
981 |
# tile processed for the region figure_1 reg4 NA |
|
982 |
# boxplot with outlier figure_2a reg4 mod1 |
|
983 |
# boxplot with outlier figure_2a reg4 mod_kr |
|
984 |
# boxplot scaling by tiles figure_2b reg4 mod1 |
|
985 |
# boxplot scaling by tiles figure_2b reg4 mod_kr |
|
986 |
# boxplot overall region with outliers figure_3a reg4 NA |
|
987 |
# boxplot overall region with scaling figure_3b reg4 NA |
|
988 |
# Barplot of metrics ranked by tile Figure_5 |
|
989 |
# boxplot overall region with scaling figure_3b reg4 NA |
|
990 |
# Barplot of metrics ranked by tile Figure_5 |
|
991 |
# Barplot of metrics ranked by tile Figure_5 |
|
992 |
# Average metrics map centroids Figure_6 |
|
993 |
# Average metrics map centroids Figure_6 |
|
994 |
# Number of missing day threshold1 map centroids Figure_7a |
|
995 |
# Number of missing day threshold1 map centroids Figure_7a |
|
996 |
# Number of missing day threshold1 map centroids Figure_7a |
|
997 |
# Number of missing day threshold1 map centroids Figure_7a |
|
998 |
# number_daily_predictions_per_model Figure_7b |
|
999 |
# histogram number_daily_predictions_per_models Figure_7c |
|
1000 |
# boxplot_overall_separated_by_region_with_oultiers_ Figure 8a |
|
1001 |
# boxplot_overall_separated_by_region_with_scaling Figure 8b |
|
1002 |
|
|
1003 |
# Browse[3]> c |
|
1004 |
# Error in text.default(coordinates(pt)[1], coordinates(pt)[2], labels = i, : |
|
1005 |
# X11 font -adobe-helvetica-%s-%s-*-*-%d-*-*-*-*-*-*-*, face 2 at size 16 could not be loaded |
|
1006 |
# In addition: Warning message: |
|
1007 |
# In polypath(x = mcrds[, 1], y = mcrds[, 2], border = border, col = col, : |
|
1008 |
# Path drawing not available for this device |
|
1009 |
|
|
1010 |
|
|
1011 |
|
|
1012 |
# Browse[2]> for(i in 1:length(threshold_missing_day)){ |
|
1013 |
# + |
|
1014 |
# + #summary_metrics_v$n_missing <- summary_metrics_v$n == 365 |
|
1015 |
# + #summary_metrics_v$n_missing <- summary_metrics_v$n < 365 |
|
1016 |
# + summary_metrics_v$n_missing <- summary_metrics_v$n < threshold_missing_day[i] |
|
1017 |
# + summary_metrics_v_subset <- subset(summary_metrics_v,model_name=="mod1") |
|
1018 |
# + |
|
1019 |
# + #res_pix <- 1200 |
|
1020 |
# + res_pix <- 960 |
|
1021 |
# + |
|
1022 |
# + col_mfrow <- 1 |
|
1023 |
# + row_mfrow <- 1 |
|
1024 |
# + fig_filename <- paste("Figure7a_ac_metrics_map_centroids_tile_",model_name[j],"_","missing_day_",threshold_missing_day[i], |
|
1025 |
# + "_",out_suffix,".png",sep="") |
|
1026 |
# + png(filename=paste("Figure7a_ac_metrics_map_centroids_tile_",model_name[j],"_","missing_day_",threshold_missing_day[i], |
|
1027 |
# + "_",out_suffix,".png",sep=""), |
|
1028 |
# + width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
1029 |
# + |
|
1030 |
# + model_name[j] |
|
1031 |
# + |
|
1032 |
# + p_shp <- layer(sp.polygons(reg_layer, lwd=1, col='black')) |
|
1033 |
# + #title("(a) Mean for 1 January") |
|
1034 |
# + p <- bubble(summary_metrics_v_subset,"n_missing",main=paste("Missing per tile and by ",model_name[j]," for ", |
|
1035 |
# + threshold_missing_day[i])) |
|
1036 |
# + p1 <- p+p_shp |
|
1037 |
# + try(print(p1)) #error raised if number of missing values below a threshold does not exist |
|
1038 |
# + dev.off() |
|
1039 |
# + |
|
1040 |
# + list_outfiles[[counter_fig+i]] <- fig_filename |
|
1041 |
# + } |
|
1042 |
# debug at /nobackupp8/bparmen1/env_layers_scripts/global_run_scalingup_assessment_part2_01042016.R#272: i |
|
1043 |
# Browse[3]> counter_fig <- counter_fig+length(threshold_missing_day) #currently 4 days... |
|
1044 |
# Browse[3]> c |
|
1045 |
# Error in grid.Call.graphics(L_setviewport, pvp, TRUE) : |
|
1046 |
# non-finite location and/or size for viewport |
Also available in: Unified diff
initial commit assessment part3 to combine yearly results into figures for presentation