Revision 071349ac
Added by Benoit Parmentier almost 10 years ago
climate/research/oregon/interpolation/global_run_scalingup_assessment_part1_functions.R | ||
---|---|---|
1 |
#################################### INTERPOLATION OF TEMPERATURES ####################################### |
|
2 |
############################ Script for assessment of scaling up on NEX: part 1 ############################## |
|
3 |
#This script uses the worklfow code applied to the globe. Results currently reside on NEX/PLEIADES NASA. |
|
4 |
#The purpose is to create as set of functions to diagnose and assess quickly a set of predictd tiles. |
|
5 |
#Part 1 create summary tables and inputs for figure in part 2 and part 3. |
|
6 |
#AUTHOR: Benoit Parmentier |
|
7 |
#CREATED ON: 03/23/2014 |
|
8 |
#MODIFIED ON: 02/05/2015 |
|
9 |
#Version: 4 |
|
10 |
#PROJECT: Environmental Layers project |
|
11 |
#TO DO: |
|
12 |
# - generate delta and clim mosaic |
|
13 |
# - clean up |
|
14 |
|
|
15 |
#First source file: |
|
16 |
#source /nobackupp4/aguzman4/climateLayers/sharedModules/etc/environ.sh |
|
17 |
#MODULEPATH=$MODULEPATH:/nex/modules/files |
|
18 |
#module load /nex/modules/files/pythonkits/gdal_1.10.0_python_2.7.3_nex |
|
19 |
# These are the names and number for the current subset regions used for global runs: |
|
20 |
#reg1 - North America (NAM) |
|
21 |
#reg2 - Western Europe (WE) |
|
22 |
#reg3 - Eastern Europe to East Asia (EE_EA) |
|
23 |
#reg4 - South America (SAM) |
|
24 |
#reg5 - Africa (AF) |
|
25 |
#reg6 - South East Asia and Australia (SEA_AUS) |
|
26 |
|
|
27 |
################################################################################################# |
|
28 |
|
|
29 |
### Loading R library and packages |
|
30 |
#library used in the workflow production: |
|
31 |
library(gtools) # loading some useful tools |
|
32 |
library(mgcv) # GAM package by Simon Wood |
|
33 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
34 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
35 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
36 |
#library(gstat) # Kriging and co-kriging by Pebesma et al. #not on NEX NASA |
|
37 |
library(fields) # NCAR Spatial Interpolation methods such as kriging, splines |
|
38 |
library(raster) # Hijmans et al. package for raster processing |
|
39 |
library(gdata) # various tools with xls reading, cbindX |
|
40 |
library(rasterVis) # Raster plotting functions |
|
41 |
library(parallel) # Parallelization of processes with multiple cores |
|
42 |
library(maptools) # Tools and functions for sp and other spatial objects e.g. spCbind |
|
43 |
library(maps) # Tools and data for spatial/geographic objects |
|
44 |
library(reshape) # Change shape of object, summarize results |
|
45 |
library(plotrix) # Additional plotting functions |
|
46 |
library(plyr) # Various tools including rbind.fill |
|
47 |
library(spgwr) # GWR method |
|
48 |
#library(automap) # Kriging automatic fitting of variogram using gstat #not on NEX NASA |
|
49 |
#library(rgeos) # Geometric, topologic library of functions # not on NEX NASA |
|
50 |
#RPostgreSQL # Interface R and Postgres, not used in this script |
|
51 |
library(gridExtra) |
|
52 |
#Additional libraries not used in workflow |
|
53 |
library(pgirmess) # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess} |
|
54 |
#library(colorRamps) |
|
55 |
|
|
56 |
#### FUNCTION USED IN SCRIPT |
|
57 |
|
|
58 |
#function_analyses_paper1 <-"contribution_of_covariates_paper_interpolation_functions_07182014.R" #first interp paper |
|
59 |
#function_analyses_paper2 <-"multi_timescales_paper_interpolation_functions_08132014.R" |
|
60 |
|
|
61 |
load_obj <- function(f) |
|
62 |
{ |
|
63 |
env <- new.env() |
|
64 |
nm <- load(f, env)[1] |
|
65 |
env[[nm]] |
|
66 |
} |
|
67 |
|
|
68 |
create_dir_fun <- function(out_dir,out_suffix){ |
|
69 |
if(!is.null(out_suffix)){ |
|
70 |
out_name <- paste("output_",out_suffix,sep="") |
|
71 |
out_dir <- file.path(out_dir,out_name) |
|
72 |
} |
|
73 |
#create if does not exists: create the output dir as defined |
|
74 |
if(!file.exists(out_dir)){ |
|
75 |
dir.create(out_dir) |
|
76 |
} |
|
77 |
return(out_dir) |
|
78 |
} |
|
79 |
|
|
80 |
extract_list_from_list_obj<-function(obj_list,list_name){ |
|
81 |
library(plyr) |
|
82 |
|
|
83 |
#Create a list of an object from a given list of object using a name prodived as input |
|
84 |
|
|
85 |
list_tmp<-vector("list",length(obj_list)) |
|
86 |
for (i in 1:length(obj_list)){ |
|
87 |
tmp <- obj_list[[i]][[list_name]] #double bracket to return data.frame |
|
88 |
list_tmp[[i]] <- as.data.frame(tmp) #deal with spdf... |
|
89 |
} |
|
90 |
return(list_tmp) #this is a data.frame |
|
91 |
} |
|
92 |
|
|
93 |
#This extract a data.frame object from raster prediction obj and combine them in one data.frame |
|
94 |
extract_from_list_obj<-function(obj_list,list_name){ |
|
95 |
#extract object from list of list. This useful for raster_prediction_obj |
|
96 |
library(plyr) |
|
97 |
list_tmp<-vector("list",length(obj_list)) |
|
98 |
for (i in 1:length(obj_list)){ |
|
99 |
tmp <- obj_list[[i]] |
|
100 |
if(inherits(tmp,"try-error")){ |
|
101 |
print(paste("no model generated or error in list",sep=" ")) #change message for any model type... |
|
102 |
list_tmp[[i]] <- NULL #double bracket to return data.frame |
|
103 |
}else{ |
|
104 |
#tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame |
|
105 |
list_tmp[[i]] <- as.data.frame(tmp[[list_name]]) |
|
106 |
} |
|
107 |
# |
|
108 |
#tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame |
|
109 |
#list_tmp[[i]]<- as.data.frame(tmp) #if spdf |
|
110 |
} |
|
111 |
# |
|
112 |
#list_tmp <-list_tmp[!is.null(list_tmp)] |
|
113 |
list_tmp <- list_tmp[unlist(lapply(list_tmp,FUN=function(x){!is.null(x)}))] |
|
114 |
|
|
115 |
tb_list_tmp<-do.call(rbind.fill,list_tmp) #long rownames |
|
116 |
#tb_list_tmp<-do.call(rbind,list_tmp) #long rownames |
|
117 |
return(tb_list_tmp) #this is a data.frame |
|
118 |
} |
|
119 |
|
|
120 |
## Function to mosaic modis or other raster images |
|
121 |
|
|
122 |
mosaic_m_raster_list<-function(j,list_param){ |
|
123 |
#This functions returns a subset of tiles from the modis grid. |
|
124 |
#Arguments: modies grid tile,list of tiles |
|
125 |
#Output: spatial grid data frame of the subset of tiles |
|
126 |
#Note that rasters are assumed to be in the same projection system!! |
|
127 |
|
|
128 |
#rast_list<-vector("list",length(mosaic_list)) |
|
129 |
#for (i in 1:length(mosaic_list)){ |
|
130 |
# read the individual rasters into a list of RasterLayer objects |
|
131 |
# this may be changed so that it is not read in the memory!!! |
|
132 |
|
|
133 |
#parse output... |
|
134 |
|
|
135 |
#j<-list_param$j |
|
136 |
mosaic_list<-list_param$mosaic_list |
|
137 |
out_path<-list_param$out_path |
|
138 |
out_names<-list_param$out_rastnames |
|
139 |
file_format <- list_param$file_format |
|
140 |
NA_flag_val <- list_param$NA_flag_val |
|
141 |
out_suffix <- list_param$out_suffix |
|
142 |
## Start |
|
143 |
|
|
144 |
if(class(mosaic_list[[j]])=="list"){ |
|
145 |
m_list <- unlist(mosaic_list[[j]]) |
|
146 |
} |
|
147 |
input.rasters <- lapply(m_list, raster) |
|
148 |
mosaiced_rast<-input.rasters[[1]] |
|
149 |
|
|
150 |
for (k in 2:length(input.rasters)){ |
|
151 |
mosaiced_rast<-mosaic(mosaiced_rast,input.rasters[[k]], fun=mean) |
|
152 |
#mosaiced_rast<-mosaic(mosaiced_rast,raster(input.rasters[[k]]), fun=mean) |
|
153 |
} |
|
154 |
|
|
155 |
data_name<-paste("mosaiced_",sep="") #can add more later... |
|
156 |
#raster_name<-paste(data_name,out_names[j],".tif", sep="") |
|
157 |
raster_name<-paste(data_name,out_names[j],file_format, sep="") |
|
158 |
|
|
159 |
writeRaster(mosaiced_rast, NAflag=NA_flag_val,filename=file.path(out_path,raster_name),overwrite=TRUE) |
|
160 |
#Writing the data in a raster file format... |
|
161 |
rast_list<-file.path(out_path,raster_name) |
|
162 |
|
|
163 |
## The Raster and rgdal packages write temporary files on the disk when memory is an issue. This can potential build up |
|
164 |
## in long loops and can fill up hard drives resulting in errors. The following sections removes these files |
|
165 |
## as they are created in the loop. This code section can be transformed into a "clean-up function later on |
|
166 |
## Start remove |
|
167 |
#tempfiles<-list.files(tempdir(),full.names=T) #GDAL transient files are not removed |
|
168 |
#files_to_remove<-grep(out_suffix,tempfiles,value=T) #list files to remove |
|
169 |
#if(length(files_to_remove)>0){ |
|
170 |
# file.remove(files_to_remove) |
|
171 |
#} |
|
172 |
#now remove temp files from raster package located in rasterTmpDir |
|
173 |
removeTmpFiles(h=0) #did not work if h is not set to 0 |
|
174 |
## end of remove section |
|
175 |
|
|
176 |
return(rast_list) |
|
177 |
} |
|
178 |
|
|
179 |
### Function: |
|
180 |
pred_data_info_fun <- function(k,list_data,pred_mod,sampling_dat_info){ |
|
181 |
#Summarizing input info from sampling and df used in training/testing |
|
182 |
|
|
183 |
data <- list_data[[k]] |
|
184 |
sampling_dat <- sampling_dat_info[[k]] |
|
185 |
if(data!="try-error"){ |
|
186 |
n <- nrow(data) |
|
187 |
n_mod <- vector("numeric",length(pred_mod)) |
|
188 |
for(j in 1:length(pred_mod)){ |
|
189 |
n_mod[j] <- sum(!is.na(data[[pred_mod[j]]])) |
|
190 |
} |
|
191 |
n <- rep(n,length(pred_mod)) |
|
192 |
sampling_dat <- sampling_dat[rep(seq_len(nrow(sampling_dat)), each=length(pred_mod)),] |
|
193 |
row.names(sampling_dat) <- NULL |
|
194 |
df_n <- data.frame(n,n_mod,pred_mod) |
|
195 |
df_n <- cbind(df_n,sampling_dat) |
|
196 |
}else{ |
|
197 |
n <- rep(NA,length(pred_mod)) |
|
198 |
n_mod <- vector("numeric",length(pred_mod)) |
|
199 |
n_mod <- rep(NA,length(pred_mod)) |
|
200 |
df_n <- data.frame(n,n_mod,pred_mod) |
|
201 |
sampling_dat <- sampling_dat[rep(seq_len(nrow(sampling_dat)), each=length(pred_mod)),] |
|
202 |
row.names(sampling_dat) <- NULL |
|
203 |
df_n <- data.frame(n,n_mod,pred_mod) |
|
204 |
df_n <- cbind(df_n,sampling_dat) |
|
205 |
|
|
206 |
} |
|
207 |
return(df_n) |
|
208 |
} |
|
209 |
|
|
210 |
extract_daily_training_testing_info <- function(i,list_param){ |
|
211 |
#This function extracts training and testing information from the raster object produced for each tile |
|
212 |
#This is looping through tiles... |
|
213 |
##Functions used |
|
214 |
#Defined outside this script: |
|
215 |
#pred_data_info_fun |
|
216 |
|
|
217 |
##### Parse parameters and arguments #### |
|
218 |
|
|
219 |
raster_obj_file <- list_param$list_raster_obj_files[i] |
|
220 |
use_month <- list_param$use_month |
|
221 |
use_day <- list_param$use_day |
|
222 |
tile_id <- list_param$list_names_tile_id[i] |
|
223 |
|
|
224 |
### Start script ## |
|
225 |
|
|
226 |
raster_obj <- load_obj(unlist(raster_obj_file)) #may not need unlist |
|
227 |
nb_models <- length((raster_obj$clim_method_mod_obj[[1]]$formulas)) |
|
228 |
pred_mod <- paste("mod",c(1:nb_models,"_kr"),sep="") |
|
229 |
#we are assuming no monthly hold out... |
|
230 |
#we are assuming only one specific daily prop? |
|
231 |
nb_models <- length(pred_mod) |
|
232 |
#names(raster_obj$method_mod_obj[[1]]) |
|
233 |
var_interp <- unique(raster_obj$tb_diagnostic_s$var_interp) |
|
234 |
method_interp <- unique(raster_obj$tb_diagnostic_s$method_interp) |
|
235 |
|
|
236 |
if(use_day==TRUE){ |
|
237 |
|
|
238 |
list_data_day_v <- try(extract_list_from_list_obj(raster_obj$validation_mod_obj,"data_v")) |
|
239 |
list_data_day_s <- try(extract_list_from_list_obj(raster_obj$validation_mod_obj,"data_s")) |
|
240 |
sampling_dat_day <- extract_list_from_list_obj(raster_obj$method_mod_obj,"daily_dev_sampling_dat") |
|
241 |
#debug(pred_data_info_fun) |
|
242 |
#list_pred_data_day_s_info <- pred_data_info_fun(1,list_data=list_data_day_s,pred_mod=pred_mod,sampling_dat_info=sampling_dat_day) |
|
243 |
list_pred_data_day_s_info <- lapply(1:length(sampling_dat_day),FUN=pred_data_info_fun, |
|
244 |
list_data=list_data_day_s,pred_mod=pred_mod,sampling_dat_info=sampling_dat_day) |
|
245 |
list_pred_data_day_v_info <- lapply(1:length(sampling_dat_day),FUN=pred_data_info_fun, |
|
246 |
list_data=list_data_day_v,pred_mod=pred_mod,sampling_dat_info=sampling_dat_day) |
|
247 |
pred_data_day_s_info <- do.call(rbind,list_pred_data_day_s_info) |
|
248 |
pred_data_day_v_info <- do.call(rbind,list_pred_data_day_v_info) |
|
249 |
pred_data_day_s_info$training <- rep(1,nrow(pred_data_day_s_info)) |
|
250 |
pred_data_day_v_info$training <- rep(0,nrow(pred_data_day_v_info)) |
|
251 |
pred_data_day_info <-rbind(pred_data_day_v_info,pred_data_day_s_info) |
|
252 |
|
|
253 |
pred_data_day_info$method_interp <- rep(method_interp,nrow(pred_data_day_info)) |
|
254 |
pred_data_day_info$var_interp <- rep(var_interp,nrow(pred_data_day_info)) |
|
255 |
pred_data_day_info$tile_id <- rep(tile_id,nrow(pred_data_day_info)) |
|
256 |
} |
|
257 |
|
|
258 |
if(use_month==TRUE){ |
|
259 |
|
|
260 |
list_data_month_s <- try(extract_list_from_list_obj(raster_obj$validation_mod_month_obj,"data_s")) |
|
261 |
list_data_month_v <- try(extract_list_from_list_obj(raster_obj$validation_mod_month_obj,"data_v")) |
|
262 |
sampling_dat_month <- extract_list_from_list_obj(raster_obj$clim_method_mod_obj,"sampling_month_dat") |
|
263 |
list_pred_data_month_s_info <- lapply(1:length(sampling_dat_month),FUN=pred_data_info_fun, |
|
264 |
list_data=list_data_month_s,pred_mod=pred_mod,sampling_dat_info=sampling_dat_month) |
|
265 |
list_pred_data_month_v_info <- mclapply(1:length(sampling_dat_month),FUN=pred_data_info_fun, |
|
266 |
list_data=list_data_month_v,pred_mod=pred_mod,sampling_dat_info=sampling_dat_month,mc.preschedule=FALSE,mc.cores = 1) |
|
267 |
#Note for list_pred_data_month_v_info it will be try-error when there is no holdout. |
|
268 |
#combine training and testing later? also combined with accuracy |
|
269 |
pred_data_month_s_info <- do.call(rbind,list_pred_data_month_s_info) |
|
270 |
#Adding a column for training: if data item used as training then it is 1 |
|
271 |
pred_data_month_s_info$training <- try(rep(1,nrow(pred_data_month_s_info))) |
|
272 |
|
|
273 |
#Remove try-error?? |
|
274 |
list_pred_data_month_v_info_tmp <- remove_from_list_fun(list_pred_data_month_v_info) |
|
275 |
list_pred_data_month_v_info <- list_pred_data_month_v_info_tmp$list |
|
276 |
if (length(list_pred_data_month_v_info)>0){ |
|
277 |
pred_data_month_v_info <- do.call(rbind,list_pred_data_month_v_info) |
|
278 |
pred_data_month_v_info$training <- try(rep(0,nrow(pred_data_month_v_info))) |
|
279 |
pred_data_month_info <- rbind(pred_data_month_v_info,pred_data_month_s_info) |
|
280 |
}else{ |
|
281 |
pred_data_month_info <- pred_data_month_s_info |
|
282 |
} |
|
283 |
|
|
284 |
pred_data_month_info$method_interp <- rep(method_interp,nrow(pred_data_month_info)) |
|
285 |
pred_data_month_info$var_interp <- rep(var_interp,nrow(pred_data_month_info)) |
|
286 |
pred_data_month_info$tile_id <- rep(tile_id,nrow(pred_data_month_info)) |
|
287 |
} |
|
288 |
|
|
289 |
if(use_month==FALSE){ |
|
290 |
pred_data_month_info <- NULL |
|
291 |
} |
|
292 |
if(use_day==FALSE){ |
|
293 |
pred_data_day_info <- NULL |
|
294 |
} |
|
295 |
#prepare object to return |
|
296 |
pred_data_info_obj <- list(pred_data_month_info,pred_data_day_info) |
|
297 |
names(pred_data_info_obj) <- c("pred_data_month_info","pred_data_day_info") |
|
298 |
#could add data.frame data_s and data_v later... |
|
299 |
### |
|
300 |
return(pred_data_info_obj) |
|
301 |
} |
|
302 |
|
|
303 |
remove_from_list_fun <- function(l_x,condition_class ="try-error"){ |
|
304 |
index <- vector("list",length(l_x)) |
|
305 |
for (i in 1:length(l_x)){ |
|
306 |
if (inherits(l_x[[i]],condition_class)){ |
|
307 |
index[[i]] <- FALSE #remove from list |
|
308 |
}else{ |
|
309 |
index[[i]] <- TRUE |
|
310 |
} |
|
311 |
} |
|
312 |
l_x<-l_x[unlist(index)] #remove from list all elements using subset |
|
313 |
|
|
314 |
obj <- list(l_x,index) |
|
315 |
names(obj) <- c("list","valid") |
|
316 |
return(obj) |
|
317 |
} |
|
318 |
|
|
319 |
##Function to list predicted tif |
|
320 |
list_tif_fun <- function(i,in_dir_list,pattern_str){ |
|
321 |
#list.files(path=x,pattern=".*predicted_mod1_0_1.*20100101.*.tif",full.names=T)}) |
|
322 |
pat_str<- pattern_str[i] |
|
323 |
list_tif_files_dates <-lapply(in_dir_list, |
|
324 |
FUN=function(x,pat_str){list.files(path=x,pattern=pat_str,full.names=T)},pat_str=pat_str) |
|
325 |
return(list_tif_files_dates) |
|
326 |
} |
|
327 |
|
|
328 |
calculate_summary_from_tb_diagnostic <-function(tb_diagnostic,metric_names){#,out_prefix,out_path){ |
|
329 |
#now boxplots and mean per models |
|
330 |
library(gdata) #Nesssary to use cbindX |
|
331 |
|
|
332 |
### Start script |
|
333 |
y_var_name<-unique(tb_diagnostic$var_interp) #extract the name of interpolated variable: dailyTmax, dailyTmin |
|
334 |
|
|
335 |
mod_names<-sort(unique(tb_diagnostic$pred_mod)) #models that have accuracy metrics |
|
336 |
t<-melt(tb_diagnostic, |
|
337 |
#measure=mod_var, |
|
338 |
id=c("date","pred_mod","prop"), |
|
339 |
na.rm=F) |
|
340 |
t$value<-as.numeric(t$value) #problem with char!!! |
|
341 |
avg_tb<-cast(t,pred_mod~variable,mean) |
|
342 |
avg_tb$var_interp<-rep(y_var_name,times=nrow(avg_tb)) |
|
343 |
median_tb<-cast(t,pred_mod~variable,median) |
|
344 |
|
|
345 |
#avg_tb<-cast(t,pred_mod~variable,mean) |
|
346 |
tb<-tb_diagnostic |
|
347 |
|
|
348 |
#mod_names<-sort(unique(tb$pred_mod)) #kept for clarity |
|
349 |
tb_mod_list<-lapply(mod_names, function(k) subset(tb, pred_mod==k)) #this creates a list of 5 based on models names |
|
350 |
names(tb_mod_list)<-mod_names |
|
351 |
#mod_metrics<-do.call(cbind,tb_mod_list) |
|
352 |
#debug here |
|
353 |
if(length(tb_mod_list)>1){ |
|
354 |
mod_metrics<-do.call(cbindX,tb_mod_list) #column bind the list?? |
|
355 |
}else{ |
|
356 |
mod_metrics<-tb_mod_list[[1]] |
|
357 |
} |
|
358 |
|
|
359 |
test_names<-lapply(1:length(mod_names),function(k) paste(names(tb_mod_list[[1]]),mod_names[k],sep="_")) |
|
360 |
#test names are used when plotting the boxplot for the different models |
|
361 |
names(mod_metrics)<-unlist(test_names) |
|
362 |
rows_total<-lapply(tb_mod_list,nrow) |
|
363 |
avg_tb$n<-rows_total #total number of predictions on which the mean is based |
|
364 |
median_tb$n<-rows_total |
|
365 |
summary_obj<-list(avg_tb,median_tb) |
|
366 |
names(summary_obj)<-c("avg","median") |
|
367 |
return(summary_obj) |
|
368 |
} |
|
369 |
|
|
370 |
#boxplot_month_from_tb(tb_diagnostic,metric_names,out_prefix,out_path) |
|
371 |
## Function to display metrics by months/seasons |
|
372 |
calculate_summary_from_tb_month_diagnostic <-function(tb_diagnostic,metric_names){ #,out_prefix,out_path){ |
|
373 |
|
|
374 |
#Generate boxplot per month for models and accuracy metrics |
|
375 |
#Input parameters: |
|
376 |
#1) df: data frame containing accurayc metrics (RMSE etc.) per day) |
|
377 |
#2) metric_names: metrics used for validation |
|
378 |
#3) out_prefix |
|
379 |
# |
|
380 |
|
|
381 |
################# |
|
382 |
## BEGIN |
|
383 |
y_var_name<-unique(tb_diagnostic$var_interp) #extract the name of interpolated variable: dailyTmax, dailyTmin |
|
384 |
date_f<-strptime(tb_diagnostic$date, "%Y%m%d") # interpolation date being processed |
|
385 |
tb_diagnostic$month<-strftime(date_f, "%m") # current month of the date being processed |
|
386 |
mod_names<-sort(unique(tb_diagnostic$pred_mod)) #models that have accuracy metrics |
|
387 |
tb_mod_list<-lapply(mod_names, function(k) subset(tb_diagnostic, pred_mod==k)) #this creates a list of 5 based on models names |
|
388 |
names(tb_mod_list)<-mod_names |
|
389 |
t<-melt(tb_diagnostic, |
|
390 |
#measure=mod_var, |
|
391 |
id=c("date","pred_mod","prop","month"), |
|
392 |
na.rm=F) |
|
393 |
t$value<-as.numeric(t$value) #problem with char!!! |
|
394 |
tb_mod_m_avg <-cast(t,pred_mod+month~variable,mean) #monthly mean for every model |
|
395 |
tb_mod_m_avg$var_interp<-rep(y_var_name,times=nrow(tb_mod_m_avg)) |
|
396 |
|
|
397 |
tb_mod_m_sd <-cast(t,pred_mod+month~variable,sd) #monthly sd for every model |
|
398 |
tb_mod_m_list <-lapply(mod_names, function(k) subset(tb_mod_m_avg, pred_mod==k)) #this creates a list of 5 based on models names |
|
399 |
|
|
400 |
summary_month_obj <-c(tb_mod_m_list,tb_mod_m_avg,tb_mod_m_sd) |
|
401 |
names(summary_month_obj)<-c("tb_list","metric_month_avg","metric_month_sd") |
|
402 |
return(summary_month_obj) |
|
403 |
} |
|
404 |
|
|
405 |
create_raster_prediction_obj<- function(in_dir_list,interpolation_method, y_var_name,out_prefix,out_path_list=NULL){ |
|
406 |
|
|
407 |
|
|
408 |
#gather all necessary info: |
|
409 |
lf_validation_mod_month_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="gam_CAI_validation_mod_month_obj_dailyTmax.*.RData",full.names=T)}) |
|
410 |
lf_validation_mod_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="gam_CAI_validation_mod_obj_dailyTmax.*.RData",full.names=T)}) |
|
411 |
lf_clim_method_mod_obj <- lapply(in_dir_list,FUN=function(x){mixedsort(list.files(path=x,pattern="clim_obj_CAI_month_.*._TMAX_0_1_.*.RData",full.names=T))}) |
|
412 |
lf_method_mod_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="method_mod_obj_gam_CAI_dailyTmax.*.RData",full.names=T)}) |
|
413 |
|
|
414 |
tb_month_diagnostic_v_list <- mclapply(lf_validation_mod_month_obj,FUN=function(x){try( x<- load_obj(x)); try(extract_from_list_obj(x,"metrics_v"))},mc.preschedule=FALSE,mc.cores = 6) |
|
415 |
tb_month_diagnostic_s_list <- mclapply(lf_validation_mod_month_obj,FUN=function(x){try( x<- load_obj(x)); try(extract_from_list_obj(x,"metrics_s"))},mc.preschedule=FALSE,mc.cores = 6) |
|
416 |
tb_diagnostic_v_list <- mclapply(lf_validation_mod_obj,FUN=function(x){try( x<- load_obj(x)); try(extract_from_list_obj(x,"metrics_v"))},mc.preschedule=FALSE,mc.cores = 6) |
|
417 |
tb_diagnostic_s_list <- mclapply(lf_validation_mod_obj,FUN=function(x){try( x<- load_obj(x)); try(extract_from_list_obj(x,"metrics_s"))},mc.preschedule=FALSE,mc.cores = 6) |
|
418 |
|
|
419 |
#rownames(tb_month_diagnostic_v)<-NULL #remove row names |
|
420 |
#tb_month_diagnostic_v$method_interp <- interpolation_method |
|
421 |
|
|
422 |
#Call functions to create plots of metrics for validation dataset |
|
423 |
metric_names<-c("rmse","mae","me","r","m50") |
|
424 |
summary_metrics_v_list <- lapply(tb_diagnostic_v_list,FUN=function(x){try(calculate_summary_from_tb_diagnostic(x,metric_names))}) |
|
425 |
summary_month_metrics_v_list <- lapply(tb_diagnostic_v_list,FUN=function(x){try(calculate_summary_from_tb_month_diagnostic(x,metric_names))}) |
|
426 |
#list_summalist_summary_metrics_vry_month_metrics_v <- calculate_summary_from_tb_month_diagnostic(list_tb_diagnostic_v[[1]],metric_names) |
|
427 |
|
|
428 |
#if (interpolation_method %in% c("gam_CAI","kriging_CAI","gwr_CAI","gam_fusion","kriging_fusion","gwr_fusion")){ |
|
429 |
lf_raster_obj <- vector("list",length=length(in_dir_list)) |
|
430 |
for (i in 1:length(in_dir_list)){ |
|
431 |
|
|
432 |
clim_method_mod_obj <- try(lapply(lf_clim_method_mod_obj[[i]],FUN=try(load_obj))) |
|
433 |
method_mod_obj <- try(load_obj(lf_method_mod_obj[[i]])) |
|
434 |
validation_mod_month_obj <- try(load_obj(lf_validation_mod_month_obj[[i]])) |
|
435 |
validation_mod_obj <- try(load_obj(lf_validation_mod_obj[[i]])) |
|
436 |
|
|
437 |
tb_month_diagnostic_v <- try(tb_month_diagnostic_v_list[[i]]) |
|
438 |
tb_month_diagnostic_s <- try(tb_month_diagnostic_s_list[[i]]) |
|
439 |
tb_diagnostic_v <- try(tb_diagnostic_v_list[[i]]) |
|
440 |
tb_diagnostic_s <- try(tb_diagnostic_s_list[[i]]) |
|
441 |
|
|
442 |
summary_metrics_v <- summary_metrics_v_list[[i]] |
|
443 |
summary_month_metrics_v <- summary_month_metrics_v_list[[i]] |
|
444 |
|
|
445 |
raster_prediction_obj <- list(clim_method_mod_obj,method_mod_obj,validation_mod_obj,validation_mod_month_obj, tb_diagnostic_v, |
|
446 |
tb_diagnostic_s,tb_month_diagnostic_v,tb_month_diagnostic_s,summary_metrics_v,summary_month_metrics_v) |
|
447 |
names(raster_prediction_obj) <-c ("clim_method_mod_obj","method_mod_obj","validation_mod_obj","validation_mod_month_obj","tb_diagnostic_v", |
|
448 |
"tb_diagnostic_s","tb_month_diagnostic_v","tb_month_diagnostic_s","summary_metrics_v","summary_month_metrics_v") |
|
449 |
if(is.null(out_path_list)){ |
|
450 |
out_path <- in_dir_list[[i]] |
|
451 |
} |
|
452 |
if(!is.null(out_path_list)){ |
|
453 |
out_path <- out_path_list[[i]] |
|
454 |
} |
|
455 |
lf_raster_obj[[i]] <- file.path(out_path,paste("raster_prediction_obj_",interpolation_method,"_", y_var_name,out_prefix_str[i],".RData",sep="")) |
|
456 |
|
|
457 |
save(raster_prediction_obj,file= file.path(out_path,paste("raster_prediction_obj_",interpolation_method,"_", y_var_name,out_prefix_str[i],".RData",sep=""))) |
|
458 |
} |
|
459 |
return(lf_raster_obj) |
|
460 |
} |
|
461 |
|
|
462 |
##################### END OF SCRIPT ###################### |
|
463 |
|
|
464 |
|
Also available in: Unified diff
initial commmit for functions part1 fro global scaling and accuracy assessment