Revision 16cc2876
Added by Benoit Parmentier over 10 years ago
climate/research/oregon/interpolation/global_run_scalingup_assessment_part2.R | ||
---|---|---|
1 |
#################################### INTERPOLATION OF TEMPERATURES ####################################### |
|
2 |
############################ Script for assessment of scaling up on NEX ############################## |
|
3 |
#This script uses the worklfow code applied to the globe. Results currently reside on NEX/PLEIADES NASA. |
|
4 |
#Accuracy methods are added in the the function scripts to evaluate results. |
|
5 |
#Analyses, figures, tables and data are also produced in the script. |
|
6 |
#AUTHOR: Benoit Parmentier |
|
7 |
#CREATED ON: 03/23/2014 |
|
8 |
#MODIFIED ON: 05/14/2014 |
|
9 |
#Version: 3 |
|
10 |
#PROJECT: Environmental Layers project |
|
11 |
################################################################################################# |
|
12 |
|
|
13 |
### Loading R library and packages |
|
14 |
#library used in the workflow production: |
|
15 |
library(gtools) # loading some useful tools |
|
16 |
library(mgcv) # GAM package by Simon Wood |
|
17 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
18 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
19 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
20 |
library(gstat) # Kriging and co-kriging by Pebesma et al. |
|
21 |
library(fields) # NCAR Spatial Interpolation methods such as kriging, splines |
|
22 |
library(raster) # Hijmans et al. package for raster processing |
|
23 |
library(gdata) # various tools with xls reading, cbindX |
|
24 |
library(rasterVis) # Raster plotting functions |
|
25 |
library(parallel) # Parallelization of processes with multiple cores |
|
26 |
library(maptools) # Tools and functions for sp and other spatial objects e.g. spCbind |
|
27 |
library(maps) # Tools and data for spatial/geographic objects |
|
28 |
library(reshape) # Change shape of object, summarize results |
|
29 |
library(plotrix) # Additional plotting functions |
|
30 |
library(plyr) # Various tools including rbind.fill |
|
31 |
library(spgwr) # GWR method |
|
32 |
library(automap) # Kriging automatic fitting of variogram using gstat |
|
33 |
library(rgeos) # Geometric, topologic library of functions |
|
34 |
#RPostgreSQL # Interface R and Postgres, not used in this script |
|
35 |
library(gridExtra) |
|
36 |
#Additional libraries not used in workflow |
|
37 |
library(pgirmess) # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess} |
|
38 |
library(colorRamps) |
|
39 |
|
|
40 |
#### FUNCTION USED IN SCRIPT |
|
41 |
|
|
42 |
function_analyses_paper1 <-"contribution_of_covariates_paper_interpolation_functions_10222013.R" #first interp paper |
|
43 |
function_analyses_paper2 <-"multi_timescales_paper_interpolation_functions_05052014.R" |
|
44 |
|
|
45 |
load_obj <- function(f) |
|
46 |
{ |
|
47 |
env <- new.env() |
|
48 |
nm <- load(f, env)[1] |
|
49 |
env[[nm]] |
|
50 |
} |
|
51 |
|
|
52 |
create_dir_fun <- function(out_dir,out_suffix){ |
|
53 |
if(!is.null(out_suffix)){ |
|
54 |
out_name <- paste("output_",out_suffix,sep="") |
|
55 |
out_dir <- file.path(out_dir,out_name) |
|
56 |
} |
|
57 |
#create if does not exists |
|
58 |
if(!file.exists(out_dir)){ |
|
59 |
dir.create(out_dir) |
|
60 |
} |
|
61 |
return(out_dir) |
|
62 |
} |
|
63 |
|
|
64 |
extract_list_from_list_obj<-function(obj_list,list_name){ |
|
65 |
library(plyr) |
|
66 |
|
|
67 |
#Create a list of an object from a given list of object using a name prodived as input |
|
68 |
|
|
69 |
list_tmp<-vector("list",length(obj_list)) |
|
70 |
for (i in 1:length(obj_list)){ |
|
71 |
tmp <- obj_list[[i]][[list_name]] #double bracket to return data.frame |
|
72 |
list_tmp[[i]] <- as.data.frame(tmp) #deal with spdf... |
|
73 |
} |
|
74 |
return(list_tmp) #this is a data.frame |
|
75 |
} |
|
76 |
|
|
77 |
#This extract a data.frame object from raster prediction obj and combine them in one data.frame |
|
78 |
extract_from_list_obj<-function(obj_list,list_name){ |
|
79 |
#extract object from list of list. This useful for raster_prediction_obj |
|
80 |
library(plyr) |
|
81 |
|
|
82 |
list_tmp<-vector("list",length(obj_list)) |
|
83 |
for (i in 1:length(obj_list)){ |
|
84 |
tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame |
|
85 |
list_tmp[[i]]<- as.data.frame(tmp) #if spdf |
|
86 |
} |
|
87 |
tb_list_tmp<-do.call(rbind.fill,list_tmp) #long rownames |
|
88 |
#tb_list_tmp<-do.call(rbind,list_tmp) #long rownames |
|
89 |
|
|
90 |
return(tb_list_tmp) #this is a data.frame |
|
91 |
} |
|
92 |
|
|
93 |
## Function to mosaic modis or other raster images |
|
94 |
|
|
95 |
mosaic_m_raster_list<-function(j,list_param){ |
|
96 |
#This functions returns a subset of tiles from the modis grid. |
|
97 |
#Arguments: modies grid tile,list of tiles |
|
98 |
#Output: spatial grid data frame of the subset of tiles |
|
99 |
#Note that rasters are assumed to be in the same projection system!! |
|
100 |
|
|
101 |
#rast_list<-vector("list",length(mosaic_list)) |
|
102 |
#for (i in 1:length(mosaic_list)){ |
|
103 |
# read the individual rasters into a list of RasterLayer objects |
|
104 |
# this may be changed so that it is not read in the memory!!! |
|
105 |
|
|
106 |
#parse output... |
|
107 |
|
|
108 |
#j<-list_param$j |
|
109 |
mosaic_list<-list_param$mosaic_list |
|
110 |
out_path<-list_param$out_path |
|
111 |
out_names<-list_param$out_rastnames |
|
112 |
file_format <- list_param$file_format |
|
113 |
NA_flag_val <- list_param$NA_flag_val |
|
114 |
out_suffix <- list_param$out_suffix |
|
115 |
## Start |
|
116 |
|
|
117 |
if(class(mosaic_list[[j]])=="list"){ |
|
118 |
m_list <- unlist(mosaic_list[[j]]) |
|
119 |
} |
|
120 |
input.rasters <- lapply(m_list, raster) |
|
121 |
mosaiced_rast<-input.rasters[[1]] |
|
122 |
|
|
123 |
for (k in 2:length(input.rasters)){ |
|
124 |
mosaiced_rast<-mosaic(mosaiced_rast,input.rasters[[k]], fun=mean) |
|
125 |
#mosaiced_rast<-mosaic(mosaiced_rast,raster(input.rasters[[k]]), fun=mean) |
|
126 |
} |
|
127 |
|
|
128 |
data_name<-paste("mosaiced_",sep="") #can add more later... |
|
129 |
#raster_name<-paste(data_name,out_names[j],".tif", sep="") |
|
130 |
raster_name<-paste(data_name,out_names[j],file_format, sep="") |
|
131 |
|
|
132 |
writeRaster(mosaiced_rast, NAflag=NA_flag_val,filename=file.path(out_path,raster_name),overwrite=TRUE) |
|
133 |
#Writing the data in a raster file format... |
|
134 |
rast_list<-file.path(out_path,raster_name) |
|
135 |
|
|
136 |
## The Raster and rgdal packages write temporary files on the disk when memory is an issue. This can potential build up |
|
137 |
## in long loops and can fill up hard drives resulting in errors. The following sections removes these files |
|
138 |
## as they are created in the loop. This code section can be transformed into a "clean-up function later on |
|
139 |
## Start remove |
|
140 |
#tempfiles<-list.files(tempdir(),full.names=T) #GDAL transient files are not removed |
|
141 |
#files_to_remove<-grep(out_suffix,tempfiles,value=T) #list files to remove |
|
142 |
#if(length(files_to_remove)>0){ |
|
143 |
# file.remove(files_to_remove) |
|
144 |
#} |
|
145 |
#now remove temp files from raster package located in rasterTmpDir |
|
146 |
removeTmpFiles(h=0) #did not work if h is not set to 0 |
|
147 |
## end of remove section |
|
148 |
|
|
149 |
return(rast_list) |
|
150 |
} |
|
151 |
|
|
152 |
extract_daily_training_testing_info<- function(i,list_param){ |
|
153 |
#This function extracts training and testing information from the raster object produced for each tile |
|
154 |
#This is looping through tiles... |
|
155 |
|
|
156 |
### Function: |
|
157 |
pred_data_info_fun <- function(k,list_data,pred_mod,sampling_dat_info){ |
|
158 |
|
|
159 |
data <- list_data[[k]] |
|
160 |
sampling_dat <- sampling_dat_info[[k]] |
|
161 |
if(data_day!="try-error"){ |
|
162 |
n <- nrow(data) |
|
163 |
n_mod <- vector("numeric",length(pred_mod)) |
|
164 |
for(j in 1:length(pred_mod)){ |
|
165 |
n_mod[j] <- sum(!is.na(data[[pred_mod[j]]])) |
|
166 |
} |
|
167 |
n <- rep(n,length(pred_mod)) |
|
168 |
sampling_dat <- sampling_dat[rep(seq_len(nrow(sampling_dat)), each=length(pred_mod)),] |
|
169 |
row.names(sampling_dat) <- NULL |
|
170 |
df_n <- data.frame(n,n_mod,pred_mod) |
|
171 |
df_n <- cbind(df_n,sampling_dat) |
|
172 |
}else{ |
|
173 |
n <- rep(NA,length(pred_mod)) |
|
174 |
n_mod <- vector("numeric",length(pred_mod)) |
|
175 |
n_mod <- rep(NA,length(pred_mod)) |
|
176 |
df_n <- data.frame(n,n_mod,pred_mod) |
|
177 |
sampling_dat <- sampling_dat[rep(seq_len(nrow(sampling_dat)), each=length(pred_mod)),] |
|
178 |
row.names(sampling_dat) <- NULL |
|
179 |
df_n <- data.frame(n,n_mod,pred_mod) |
|
180 |
df_n <- cbind(df_n,sampling_dat) |
|
181 |
|
|
182 |
} |
|
183 |
return(df_n) |
|
184 |
} |
|
185 |
|
|
186 |
##### Parse parameters and arguments #### |
|
187 |
|
|
188 |
raster_obj_file <- list_param$list_raster_obj_files[i] |
|
189 |
use_month <- list_param$use_month |
|
190 |
use_day <- list_param$use_day |
|
191 |
tile_id <- list_param$list_names_tile_id[i] |
|
192 |
|
|
193 |
### Start script ## |
|
194 |
|
|
195 |
raster_obj <- load_obj(unlist(raster_obj_file)) #may not need unlist |
|
196 |
nb_models <- length((raster_obj$clim_method_mod_obj[[1]]$formulas)) |
|
197 |
pred_mod <- paste("mod",c(1:nb_models,"_kr"),sep="") |
|
198 |
#we are assuming no monthly hold out... |
|
199 |
#we are assuming only one specific daily prop? |
|
200 |
nb_models <- length(pred_mod) |
|
201 |
#names(raster_obj$method_mod_obj[[1]]) |
|
202 |
var_interp <- unique(raster_obj$tb_diagnostic_s$var_interp) |
|
203 |
method_interp <- unique(raster_obj$tb_diagnostic_s$method_interp) |
|
204 |
|
|
205 |
if(use_day==TRUE){ |
|
206 |
|
|
207 |
list_data_day_v <- try(extract_list_from_list_obj(raster_obj$validation_mod_obj,"data_v")) |
|
208 |
list_data_day_s <- try(extract_list_from_list_obj(raster_obj$validation_mod_obj,"data_s")) |
|
209 |
sampling_dat_day <- extract_list_from_list_obj(raster_obj$method_mod_obj,"daily_dev_sampling_dat") |
|
210 |
list_pred_data_day_s_info <- lapply(1:length(sampling_dat_day),FUN=pred_data_info_fun, |
|
211 |
list_data=list_data_day_s,pred_mod=pred_mod,sampling_dat_info=sampling_dat_day) |
|
212 |
list_pred_data_day_v_info <- lapply(1:length(sampling_dat_day),FUN=pred_data_info_fun, |
|
213 |
list_data=list_data_day_v,pred_mod=pred_mod,sampling_dat_info=sampling_dat_day) |
|
214 |
pred_data_day_s_info <- do.call(rbind,list_pred_data_day_s_info) |
|
215 |
pred_data_day_v_info <- do.call(rbind,list_pred_data_day_v_info) |
|
216 |
pred_data_day_s_info$training <- rep(1,nrow(pred_data_day_s_info)) |
|
217 |
pred_data_day_v_info$training <- rep(0,nrow(pred_data_day_v_info)) |
|
218 |
pred_data_day_info <-rbind(pred_data_day_v_info,pred_data_day_s_info) |
|
219 |
|
|
220 |
pred_data_day_info$method_interp <- rep(method_interp,nrow(pred_data_day_info)) |
|
221 |
pred_data_day_info$var_interp <- rep(var_interp,nrow(pred_data_day_info)) |
|
222 |
pred_data_day_info$tile_id <- rep(tile_id,nrow(pred_data_day_info)) |
|
223 |
|
|
224 |
#pred_data_day_s_info$method_interp <- rep(method_interp,nrow(pred_data_day_s_info)) |
|
225 |
#pred_data_day_s_info$var_interp <- rep(var_interp,nrow(pred_data_day_s_info)) |
|
226 |
#pred_data_day_s_info$tile_id <- rep(tile_id,nrow(pred_data_day_s_info)) |
|
227 |
#pred_data_day_v_info <- do.call(rbind,list_pred_data_day_v_info) |
|
228 |
#pred_data_day_v_info$method_interp <- rep(method_interp,nrow(pred_data_day_v_info)) |
|
229 |
#pred_data_day_v_info$var_interp <- rep(var_interp,nrow(pred_data_day_v_info)) |
|
230 |
#pred_data_day_v_info$tile_id <- rep(tile_id,nrow(pred_data_day_v_info)) |
|
231 |
|
|
232 |
} |
|
233 |
if(use_month==TRUE){ |
|
234 |
|
|
235 |
list_data_month_s <- try(extract_list_from_list_obj(raster_obj$validation_mod_month_obj,"data_s")) |
|
236 |
list_data_month_v <- try(extract_list_from_list_obj(raster_obj$validation_mod_month_obj,"data_v")) |
|
237 |
sampling_dat_month <- extract_list_from_list_obj(raster_obj$clim_method_mod_obj,"sampling_month_dat") |
|
238 |
list_pred_data_month_s_info <- lapply(1:length(sampling_dat_month),FUN=pred_data_info_fun, |
|
239 |
list_data=list_data_month_s,pred_mod=pred_mod,sampling_dat_info=sampling_dat_month) |
|
240 |
list_pred_data_month_v_info <- lapply(1:length(sampling_dat_month),FUN=pred_data_info_fun, |
|
241 |
list_data=list_data_month_v,pred_mod=pred_mod,sampling_dat_info=sampling_dat_month) |
|
242 |
|
|
243 |
#combine training and testing later? also combined with accuracy |
|
244 |
pred_data_month_s_info <- do.call(rbind,list_pred_data_month_s_info) |
|
245 |
pred_data_month_v_info <- do.call(rbind,list_pred_data_month_v_info) |
|
246 |
|
|
247 |
pred_data_month_v_info$training <- rep(0,nrow(pred_data_month_v_info)) |
|
248 |
pred_data_month_s_info$training <- rep(1,nrow(pred_data_month_v_info)) |
|
249 |
pred_data_month_info <- rbind(pred_data_month_v_info,pred_data_month_s_info) |
|
250 |
|
|
251 |
pred_data_month_info$method_interp <- rep(method_interp,nrow(pred_data_month_info)) |
|
252 |
pred_data_month_info$var_interp <- rep(var_interp,nrow(pred_data_month_info)) |
|
253 |
pred_data_month_info$tile_id <- rep(tile_id,nrow(pred_data_month_info)) |
|
254 |
|
|
255 |
#pred_data_month_s_info$method_interp <- rep(method_interp,nrow(pred_data_month_s_info)) |
|
256 |
#pred_data_month_s_info$var_interp <- rep(var_interp,nrow(pred_data_month_s_info)) |
|
257 |
#pred_data_month_s_info$tile_id <- rep(tile_id,nrow(pred_data_month_s_info)) |
|
258 |
#pred_data_month_v_info$method_interp <- rep(method_interp,nrow(pred_data_month_v_info)) |
|
259 |
#pred_data_month_v_info$var_interp <- rep(var_interp,nrow(pred_data_month_v_info)) |
|
260 |
#pred_data_month_v_info$tile_id <- rep(tile_id,nrow(pred_data_month_v_info)) |
|
261 |
|
|
262 |
} |
|
263 |
|
|
264 |
if(use_month==FALSE){ |
|
265 |
pred_data_month_info <- NULL |
|
266 |
} |
|
267 |
if(use_day==FALSE){ |
|
268 |
pred_data_day_info <- NULL |
|
269 |
} |
|
270 |
|
|
271 |
#prepare object to return |
|
272 |
pred_data_info_obj <- list(pred_data_month_info,pred_data_day_info) |
|
273 |
names(pred_data_info_obj) <- c("pred_data_month_info","pred_data_day_info") |
|
274 |
#could add data.frame data_s and data_v later... |
|
275 |
|
|
276 |
return(pred_data_info_obj) |
|
277 |
} |
|
278 |
|
|
279 |
remove_from_list_fun <- function(l_x,condition_class ="try-error"){ |
|
280 |
index <- vector("list",length(l_x)) |
|
281 |
for (i in 1:length(l_x)){ |
|
282 |
if (inherits(l_x[[i]],condition_class)){ |
|
283 |
index[[i]] <- FALSE #remove from list |
|
284 |
}else{ |
|
285 |
index[[i]] <- TRUE |
|
286 |
} |
|
287 |
} |
|
288 |
l_x<-l_x[unlist(index)] #remove from list all elements using subset |
|
289 |
|
|
290 |
obj <- list(l_x,index) |
|
291 |
names(obj) <- c("list","valid") |
|
292 |
return(obj) |
|
293 |
} |
|
294 |
|
|
295 |
##Function to list predicted tif |
|
296 |
list_tif_fun <- function(i,in_dir_list,pattern_str){ |
|
297 |
#list.files(path=x,pattern=".*predicted_mod1_0_1.*20100101.*.tif",full.names=T)}) |
|
298 |
pat_str<- pattern_str[i] |
|
299 |
list_tif_files_dates <-lapply(in_dir_list, |
|
300 |
FUN=function(x,pat_str){list.files(path=x,pattern=pat_str,full.names=T)},pat_str=pat_str) |
|
301 |
return(list_tif_files_dates) |
|
302 |
} |
|
303 |
|
|
304 |
############################## |
|
305 |
#### Parameters and constants |
|
306 |
|
|
307 |
#in_dir1 <- "/data/project/layers/commons/NEX_data/test_run1_03232014/output" #On Atlas |
|
308 |
in_dir1 <- "/nobackupp4/aguzman4/climateLayers/output4" #On NEX |
|
309 |
|
|
310 |
#in_dir_list <- list.dirs(path=in_dir1) #get the list of directories with resutls by 10x10 degree tiles |
|
311 |
#use subset for now: |
|
312 |
|
|
313 |
in_dir_list <- file.path(in_dir1,read.table(file.path(in_dir1,"processed.txt"))$V1) |
|
314 |
#in_dir_list <- as.list(in_dir_list[-1]) |
|
315 |
#in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1 |
|
316 |
#in_dir_shp <- in_dir_list[grep("shapefiles",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles... |
|
317 |
in_dir_shp <- "/nobackupp4/aguzman4/climateLayers/output4/subset/shapefiles/" |
|
318 |
#in_dir_list <- in_dir_list[grep("shapefiles",basename(in_dir_list),invert=TRUE)] |
|
319 |
#the first one is the in_dir1 |
|
320 |
# the last directory contains shapefiles |
|
321 |
y_var_name <- "dailyTmax" |
|
322 |
interpolation_method <- c("gam_CAI") |
|
323 |
out_prefix<-"run2_global_analyses_05122014" |
|
324 |
|
|
325 |
#out_dir<-"/data/project/layers/commons/NEX_data/" #On NCEAS Atlas |
|
326 |
out_dir <- "/nobackup/bparmen1/" #on NEX |
|
327 |
#out_dir <-paste(out_dir,"_",out_prefix,sep="") |
|
328 |
create_out_dir_param <- TRUE |
|
329 |
|
|
330 |
#system("ls /nobackup/bparmen1") |
|
331 |
|
|
332 |
if(create_out_dir_param==TRUE){ |
|
333 |
out_dir <- create_dir_fun(out_dir,out_prefix) |
|
334 |
setwd(out_dir) |
|
335 |
}else{ |
|
336 |
setwd(out_dir) #use previoulsy defined directory |
|
337 |
} |
|
338 |
|
|
339 |
setwd(out_dir) |
|
340 |
|
|
341 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 |
|
342 |
|
|
343 |
##raster_prediction object : contains testing and training stations with RMSE and model object |
|
344 |
|
|
345 |
list_raster_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="^raster_prediction_obj.*.RData",full.names=T)}) |
|
346 |
basename(dirname(list_raster_obj_files[[1]])) |
|
347 |
list_names_tile_coord <- lapply(list_raster_obj_files,FUN=function(x){basename(dirname(x))}) |
|
348 |
list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_") |
|
349 |
names(list_raster_obj_files)<- list_names_tile_id |
|
350 |
|
|
351 |
########################## START SCRIPT ############################## |
|
352 |
|
|
353 |
################################################################ |
|
354 |
######## PART 1: Generate tables to collect information |
|
355 |
######## over all tiles in North America |
|
356 |
|
|
357 |
##Function to collect all the tables from tiles into a table |
|
358 |
###Table 1: Average accuracy metrics |
|
359 |
###Table 2: daily accuracy metrics for all tiles |
|
360 |
|
|
361 |
#First create table of tiles under analysis and their coord |
|
362 |
df_tile_processed <- data.frame(tile_coord=basename(in_dir_list)) |
|
363 |
df_tile_processed$tile_id <- unlist(list_names_tile_id) |
|
364 |
|
|
365 |
##Quick exploration of raster object |
|
366 |
robj1 <- load_obj(list_raster_obj_files[[1]]) |
|
367 |
names(robj1) |
|
368 |
names(robj1$method_mod_obj[[1]]) #for January 1, 2010 |
|
369 |
names(robj1$method_mod_obj[[1]]$dailyTmax) #for January |
|
370 |
|
|
371 |
names(robj1$clim_method_mod_obj[[1]]$data_month) #for January |
|
372 |
names(robj1$validation_mod_month_obj[[1]]$data_s) #for January with predictions |
|
373 |
|
|
374 |
################ |
|
375 |
#### Table 1: Average accuracy metrics |
|
376 |
|
|
377 |
#can use a maximum of 6 cores on the NEX Bridge |
|
378 |
summary_metrics_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = 6) |
|
379 |
names(summary_metrics_v_list) <- list_names_tile_id |
|
380 |
|
|
381 |
summary_metrics_v_tmp <- remove_from_list_fun(summary_metrics_v_list)$list |
|
382 |
df_tile_processed$metrics_v <- remove_from_list_fun(summary_metrics_v_list)$valid |
|
383 |
#Now remove "try-error" from list of accuracy) |
|
384 |
|
|
385 |
summary_metrics_v_NA <- do.call(rbind.fill,summary_metrics_v_tmp) #create a df for NA tiles with all accuracy metrics |
|
386 |
#tile_coord <- lapply(1:length(summary_metrics_v_list),FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=summary_metrics_v_list) |
|
387 |
#add the tile id identifier |
|
388 |
tile_id_tmp <- lapply(1:length(summary_metrics_v_tmp), |
|
389 |
FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=summary_metrics_v_tmp,y=names(summary_metrics_v_tmp)) |
|
390 |
#adding tile id summary data.frame |
|
391 |
summary_metrics_v_NA$tile_id <-unlist(tile_id_tmp) |
|
392 |
summary_metrics_v_NA$n <- as.integer(summary_metrics_v_NA$n) |
|
393 |
write.table(as.data.frame(summary_metrics_v_NA), |
|
394 |
file=file.path(out_dir,paste("summary_metrics_v2_NA_",out_prefix,".txt",sep="")),sep=",") |
|
395 |
|
|
396 |
################# |
|
397 |
###Table 2: daily accuracy metrics for all tiles |
|
398 |
#this takes about 25min |
|
399 |
#tb_diagnostic_v_list <- lapply(list_raster_obj_files,FUN=function(x){x<-load_obj(x);x[["tb_diagnostic_v"]]}) |
|
400 |
tb_diagnostic_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_v"]])},mc.preschedule=FALSE,mc.cores = 6) |
|
401 |
|
|
402 |
names(tb_diagnostic_v_list) <- list_names_tile_id |
|
403 |
tb_diagnostic_v_tmp <- remove_from_list_fun(tb_diagnostic_v_list)$list |
|
404 |
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid |
|
405 |
|
|
406 |
tb_diagnostic_v_NA <- do.call(rbind.fill,tb_diagnostic_v_tmp) #create a df for NA tiles with all accuracy metrics |
|
407 |
tile_id_tmp <- lapply(1:length(tb_diagnostic_v_tmp), |
|
408 |
FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_v_tmp,y=names(tb_diagnostic_v_tmp)) |
|
409 |
|
|
410 |
tb_diagnostic_v_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile |
|
411 |
|
|
412 |
tb_diagnostic_v_NA <- merge(tb_diagnostic_v_NA,df_tile_processed[,1:2],by="tile_id") |
|
413 |
|
|
414 |
write.table((tb_diagnostic_v_NA), |
|
415 |
file=file.path(out_dir,paste("tb_diagnostic_v_NA","_",out_prefix,".txt",sep="")),sep=",") |
|
416 |
|
|
417 |
################# |
|
418 |
###Table 3: monthly station information with predictions for all tiles |
|
419 |
|
|
420 |
#load data_month for specific tiles |
|
421 |
# data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month") |
|
422 |
# names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info |
|
423 |
# |
|
424 |
# data_month_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x$validation_mod_month_obj[["data_s"]])},mc.preschedule=FALSE,mc.cores = 6) |
|
425 |
# |
|
426 |
# names(data_month_s_list) <- list_names_tile_id |
|
427 |
# |
|
428 |
# data_month_tmp <- remove_from_list_fun(data_month_s_list)$list |
|
429 |
# #df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid |
|
430 |
# |
|
431 |
# tile_id <- lapply(1:length(data_month_tmp), |
|
432 |
# FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_month_tmp) |
|
433 |
# data_month_NAM <- do.call(rbind.fill,data_month_list) #combined data_month for "NAM" North America |
|
434 |
# data_month_NAM$tile_id <- unlist(tile_id) |
|
435 |
# |
|
436 |
# write.table((data_month_NAM), |
|
437 |
# file=file.path(out_dir,paste("data_month_s_NAM","_",out_prefix,".txt",sep="")),sep=",") |
|
438 |
|
|
439 |
##### Table 4: Add later on: daily info |
|
440 |
### with also data_s and data_v saved!!! |
|
441 |
|
|
442 |
#Copy back to atlas |
|
443 |
system("scp -p ./*.txt parmentier@atlas.nceas.ucsb.edu:/data/project/layers/commons/NEX_data/output_run2_global_analyses_05122014") |
|
444 |
#system("scp -p ./*.txt ./*.tif parmentier@atlas.nceas.ucsb.edu:/data/project/layers/commons/NEX_data/output_run2_global_analyses_05122014") |
|
445 |
|
|
446 |
###################################################### |
|
447 |
####### PART 2 CREATE MOSAIC OF PREDICTIONS PER DAY ### |
|
448 |
|
|
449 |
dates_l <- unique(robj1$tb_diagnostic_s$date) #list of dates to query tif |
|
450 |
|
|
451 |
#First mosaic mod1 |
|
452 |
|
|
453 |
## make this a function? report on number of tiles used for mosaic... |
|
454 |
|
|
455 |
#inputs: build a pattern to find files |
|
456 |
y_var_name <- "dailyTmax" |
|
457 |
interpolation_method <- c("gam_CAI") |
|
458 |
name_method <- paste(interpolation_method,"_",y_var_name,"_",sep="") |
|
459 |
l_pattern_models <- lapply(c(".*predicted_mod1_0_1.*",".*predicted_mod2_0_1.*",".*predicted_mod3_0_1.*"), |
|
460 |
FUN=function(x){paste(x,dates_l,".*.tif",sep="")}) |
|
461 |
out_prefix_s <- paste(name_method,c("predicted_mod1_0_01","predicted_mod2_0_01","predicted_mod3_0_01"),sep="") |
|
462 |
dates_l #list of predicted dates |
|
463 |
|
|
464 |
for (i in 1:lenth(l_pattern_models)){ |
|
465 |
|
|
466 |
l_pattern_mod <- l_pattern_models[[i]] |
|
467 |
#out_prefix_s <- |
|
468 |
|
|
469 |
l_out_rastnames_var <- paste("gam_CAI_dailyTmax_","predicted_mod1_0_01_",dates_l,sep="") |
|
470 |
|
|
471 |
#list_tif_files_dates <- list_tif_fun(1,in_dir_list,l_pattern_mod) |
|
472 |
|
|
473 |
##List of predicted tif ... |
|
474 |
list_tif_files_dates <-lapply(1:length(l_pattern_mod),FUN=list_tif_fun, |
|
475 |
in_dir_list=in_dir_list,pattern_str=l_pattern_mod) |
|
476 |
#save(list_tif_files_dates, file=paste("list_tif_files_dates","_",out_prefix,".RData",sep="")) |
|
477 |
|
|
478 |
|
|
479 |
mosaic_list_var <- list_tif_files_dates |
|
480 |
out_rastnames_var <- l_out_rastnames_var |
|
481 |
|
|
482 |
file_format <- ".tif" |
|
483 |
NA_flag_val <- -9999 |
|
484 |
|
|
485 |
j<-1 |
|
486 |
list_param_mosaic<-list(j,mosaic_list_var,out_rastnames_var,out_dir,file_format,NA_flag_val) |
|
487 |
names(list_param_mosaic)<-c("j","mosaic_list","out_rastnames","out_path","file_format","NA_flag_val") |
|
488 |
list_var_mosaiced <- mclapply(1:2,FUN=mosaic_m_raster_list,list_param=list_param_mosaic,mc.preschedule=FALSE,mc.cores = 1) |
|
489 |
#list_var_mosaiced <- mclapply(1:365,FUN=mosaic_m_raster_list,list_param=list_param_mosaic,mc.preschedule=FALSE,mc.cores = 2) |
|
490 |
|
|
491 |
} |
|
492 |
|
|
493 |
### Now find out how many files were predicted |
|
494 |
|
|
495 |
l_pattern_mod1<-paste(".*predicted_mod1_0_1.*",dates_l,".*.tif",sep="") |
|
496 |
|
|
497 |
l_f_t12 <- list.files(path=in_dir_list[12],".*predicted_mod1_0_1.*") |
|
498 |
|
|
499 |
|
|
500 |
l_f_bytiles<-lapply(in_dir_list,function(x){list.files(path=x,pattern=".*predicted_mod1_0_1.*")}) |
|
501 |
|
|
502 |
|
|
503 |
unlist(lapply(l_f_bytiles,length)) |
|
504 |
|
|
505 |
system("scp -p ./*.tif parmentier@atlas.nceas.ucsb.edu:/data/project/layers/commons/NEX_data/output_run2_global_analyses_05122014") |
|
506 |
|
|
507 |
###################################################### |
|
508 |
####### PART 3: EXAMINE STATIONS AND MODEL FITTING ### |
|
509 |
|
|
510 |
### Stations and model fitting ### |
|
511 |
#summarize location and number of training and testing used by tiles |
|
512 |
|
|
513 |
names(robj1$clim_method_mod_obj[[1]]$data_month) # monthly data for January |
|
514 |
#names(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions |
|
515 |
#note that there is no holdout in the current run at the monthly time scale: |
|
516 |
|
|
517 |
robj1$clim_method_mod_obj[[1]]$data_month_v #zero rows for testing stations at monthly timescale |
|
518 |
#load data_month for specific tiles |
|
519 |
data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month") |
|
520 |
|
|
521 |
names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info |
|
522 |
|
|
523 |
#problem with tile 12...the raster ojbect has missing sub object |
|
524 |
#data_month_list <- lapply(1:length(list_raster_obj_files),x=list_raster_obj_files, |
|
525 |
# FUN=function(i,x){x<-load_obj(x[[i]]); |
|
526 |
# extract_from_list_obj(x$validation_mod_month_obj,"data_s")}) |
|
527 |
|
|
528 |
### make this part a function: |
|
529 |
|
|
530 |
#create a table for every month, day and tiles... |
|
531 |
# data_month_list <- lapply(1:length(list_raster_obj_files),x=list_raster_obj_files, |
|
532 |
# FUN=function(i,x){x<-load_obj(x[[i]]); |
|
533 |
# extract_from_list_obj(x$clim_method_mod_obj,"data_month")}) |
|
534 |
# |
|
535 |
# names(data_month_list) <- paste("tile","_",1:length(data_month_list),sep="") |
|
536 |
# |
|
537 |
# #names(data_month_list) <- basename(in_dir_list) #use folder id instead |
|
538 |
# |
|
539 |
# list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_") |
|
540 |
# |
|
541 |
# #tile_id <- lapply(1:length(data_month_list), |
|
542 |
# # FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_month_list) |
|
543 |
# |
|
544 |
# data_month_NAM <- do.call(rbind.fill,data_month_list) #combined data_month for "NAM" North America |
|
545 |
# data_month_NAM$tile_id <- unlist(tile_id) |
|
546 |
# |
|
547 |
# names(robj1$validation_mod_day_obj[[1]]$data_s) # daily for January with predictions |
|
548 |
# dim(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions |
|
549 |
# |
|
550 |
# use_day=TRUE |
|
551 |
# use_month=TRUE |
|
552 |
# |
|
553 |
# list_param_training_testing_info <- list(list_raster_obj_files,use_month,use_day,list_names_tile_id) |
|
554 |
# names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id") |
|
555 |
# |
|
556 |
# list_param <- list_param_training_testing_info |
|
557 |
# |
|
558 |
# pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info) |
|
559 |
# |
|
560 |
# pred_data_month_info <- do.call(rbind,lapply(pred_data_info,function(x){x$pred_data_month_info})) |
|
561 |
# pred_data_day_info <- do.call(rbind,lapply(pred_data_info,function(x){x$pred_data_day_info})) |
|
562 |
|
|
563 |
###################################################### |
|
564 |
####### PART 4: GENERATE DIAGNOSTIC plots for the area analyzed ### |
|
565 |
|
|
566 |
### Create a combined shape file for all region? |
|
567 |
|
|
568 |
#get centroid |
|
569 |
#plot the average RMSE at the centroid?? |
|
570 |
#quick kriging for every tile? |
|
571 |
|
|
572 |
### Create a combined boxplot for every tile (can also do that in pannel) |
|
573 |
### Create a quick mosaic (mean method...) |
|
574 |
#mean predicitons |
|
575 |
#mean of kriging error? |
|
576 |
#plot(mm_01 ~ elev_s,data=data_month_NAM) #Jan across all tiles |
|
577 |
#plot(mm_06 ~ elev_s,data=data_month_NAM) #June across all tiles |
|
578 |
#plot(TMax ~ mm_01,data=data_month_NAM) #monthly tmax averages and LST across all tiles |
|
579 |
|
|
580 |
tb <- read.table(file.path(out_dir,"tb_diagnostic_v2_NA_run1_NA_analyses_03232013.txt"),sep=",") |
|
581 |
summary_metrics_v <- read.table(file.path(out_dir,"summary_metrics_v2_NA_run1_NA_analyses_03232013.txt"),sep=",") |
|
582 |
|
|
583 |
strsplit(unique(tb$tile_id),"_") |
|
584 |
tb$tile_id <- factor(tb$tile_id, levels=unique(tb$tile_id)) |
|
585 |
#tb$tile_id <- as.character(tb$tile_id) |
|
586 |
boxplot(rmse~tile_id,data=subset(tb,tb$pred_mod=="mod1"), |
|
587 |
names=1:24) |
|
588 |
title("RMSE per tile") |
|
589 |
#bwplot(rmse~as.factor(tile_id), data=subset(tb,tb$pred_mod=="mod1")) |
|
590 |
|
|
591 |
#Boxplot of RMSE by model |
|
592 |
boxplot(rmse~pred_mod,data=tb)#,names=tb$pred_mod) |
|
593 |
title("RMSE per model over 24 tiles") |
|
594 |
|
|
595 |
#Turn summary table to a point shp |
|
596 |
|
|
597 |
tx<-strsplit(as.character(summary_metrics_v$tile_coord),"_") |
|
598 |
lat<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx)) |
|
599 |
long<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx)) |
|
600 |
summary_metrics_v$lat <- lat |
|
601 |
summary_metrics_v$lon <- long |
|
602 |
|
|
603 |
coordinates(summary_metrics_v) <- cbind(long,lat) |
|
604 |
|
|
605 |
ac_mod1 <- summary_metrics_v[summary_metrics_v$pred_mod=="mod1",] |
|
606 |
|
|
607 |
plot(r2) |
|
608 |
#plot(ac_mod1,cex=sqrt(ac_mod1$rmse),pch=1,add=T) |
|
609 |
plot(ac_mod1,cex=(ac_mod1$rmse^2)/10,pch=1,add=T) |
|
610 |
plot(ac_mod1,cex=(ac_mod1$rmse1)*2,pch=1,add=T) |
|
611 |
|
|
612 |
df <-arrange(as.data.frame(ac_mod1),desc(rmse))[,c("rmse","mae","tile_id")] |
|
613 |
#View(df) |
|
614 |
#quick kriging... |
|
615 |
autokrige(rmse~1,r2,) |
|
616 |
### COMBINED SHAPEFILES AND EXAMING CENTROID |
|
617 |
|
|
618 |
##Get the list of shapefiles |
|
619 |
in_dir_list_NEX <- read.table("/data/project/layers/commons/NEX_data/test_run1_03232014/output/in_dir_list.txt",sep=" ") |
|
620 |
|
|
621 |
list.files(path=in_dir_shp,pattern=paste("*.",as.character(in_dir_list_NEX[1,1]),".*.shp",sep="")) |
|
622 |
list_shp_reg_files <- list.files(path=in_dir_shp,pattern="*.shp") |
|
623 |
list_shp_reg_files2 <- list_shp_reg_files[grep("shapefiles",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles... |
|
624 |
|
|
625 |
tx <- strsplit(as.character(in_dir_list_NEX[,1]),"_") |
|
626 |
lat_shp<- lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx) |
|
627 |
long_shp<- lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx) |
|
628 |
|
|
629 |
summary_metrics |
|
630 |
#print.numeric<-function(x, digits = 2) formatC(x, digits = digits, format = "f") |
|
631 |
#print.numeric(long_shp[[1]]) |
|
632 |
|
|
633 |
pattern_str<-lapply(1:length(long_shp),function(i,y,x){paste(y[[i]],"0_",x[[i]],"0",sep="")},y=lat_shp,x=long_shp) |
|
634 |
#Select the 24 tiles that were used in the predictions based on lat, long |
|
635 |
list_shp_reg_files <- lapply(pattern_str,FUN=function(x){list.files(path=in_dir_shp, |
|
636 |
pattern=paste("*.",x,".*.shp$",sep=""),full.names=T)}) |
|
637 |
### |
|
638 |
#OK now get the shapefiles merged |
|
639 |
#ghcn_dat <- readOGR(dsn=dirname(met_stations_obj$monthly_covar_ghcn_data), |
|
640 |
# sub(".shp","",basename(met_stations_obj$monthly_covar_ghcn_data))) |
|
641 |
|
|
642 |
#plot(r44) |
|
643 |
|
|
644 |
#usa_map <- getData('GADM', country='USA', level=1) #Get US map |
|
645 |
usa_map <- getData('GADM', country='USA',level=1) #Get US map, this is not working right now |
|
646 |
usa_map_2 <- usa_map[usa_map$NAME_1!="Alaska",] #remove Alaska |
|
647 |
usa_map_2 <- usa_map_2[usa_map_2$NAME_1!="Hawaii",] #remove Hawai |
|
648 |
|
|
649 |
centroids_pts <- vector("list",length(list_shp_reg_files)) |
|
650 |
shps_tiles <- vector("list",length(list_shp_reg_files)) |
|
651 |
#collect info |
|
652 |
for(i in 1:length(list_shp_reg_files)){ |
|
653 |
shp1<-readOGR(dirname(list_shp_reg_files[[i]]),sub(".shp","",basename(list_shp_reg_files[[i]]))) |
|
654 |
pt <- gCentroid(shp1) |
|
655 |
centroids_pts[[i]] <-pt |
|
656 |
shps_tiles[[i]] <- shp1 |
|
657 |
} |
|
658 |
#plot info: with labels |
|
659 |
plot(usa_map_2) |
|
660 |
for(i in 1:length(list_shp_reg_files)){ |
|
661 |
shp1 <- shps_tiles[[i]] |
|
662 |
pt <- centroids_pts[[i]] |
|
663 |
plot(shp1,add=T,border="blue") |
|
664 |
#plot(pt,add=T,cex=2,pch=5) |
|
665 |
text(coordinates(pt)[1],coordinates(pt)[2],labels=i,cex=1,col=c("red")) |
|
666 |
} |
|
667 |
title("Tiles location 10x10 degrees for NAM") |
|
668 |
|
|
669 |
## Now plot RMSE: with labels |
|
670 |
plot(usa_map_2) |
|
671 |
for(i in 1:length(list_shp_reg_files)){ |
|
672 |
shp1 <- shps_tiles[[i]] |
|
673 |
pt <- centroids_pts[[i]] |
|
674 |
plot(shp1,add=T,border="blue") |
|
675 |
#plot(pt,add=T,cex=2,pch=5) |
|
676 |
text(coordinates(pt)[1],coordinates(pt)[2],labels=i,cex=1,col=c("red")) |
|
677 |
} |
|
678 |
title("Tiles location 10x10 degrees for NAM") |
|
679 |
|
|
680 |
|
|
681 |
#unique(summaty_metrics$tile_id) |
|
682 |
#text(lat-shp,) |
|
683 |
#union(list_shp_reg_files[[1]],list_shp_reg_files[[2]]) |
|
684 |
|
|
685 |
l_m_tif <- list.files(pattern="*mosaiced.*.tif") |
|
686 |
r1 <- raster("mosaiced_gam_fusion_dailyTmax_predicted_mod1_0_01_20100101.tif") |
|
687 |
r2 <- raster("mosaiced_gam_fusion_dailyTmax_predicted_mod1_0_01_20100901.tif") |
|
688 |
|
|
689 |
pred_s <- stack(l_m_tif[2:5]) |
|
690 |
levelplot(pred_s) |
|
691 |
|
|
692 |
##################### END OF SCRIPT ###################### |
Also available in: Unified diff
global scaling up assessment part 2: automation of figures for evaluation using tables and mosaics from part1