Revision edd7e7d5
Added by Benoit Parmentier almost 9 years ago
climate/research/oregon/interpolation/global_run_scalingup_assessment_part1a.R | ||
---|---|---|
1 |
#################################### INTERPOLATION OF TEMPERATURES ####################################### |
|
2 |
############################ Script for assessment of scaling up on NEX: part 1 ############################## |
|
3 |
#This script uses the worklfow code applied to the globe. Results currently reside on NEX/PLEIADES NASA. |
|
4 |
#The purpose is to create as set of functions to diagnose and assess quickly a set of predictd tiles. |
|
5 |
#Part 1 create summary tables and inputs files for figure in part 2 and part 3. |
|
6 |
#AUTHOR: Benoit Parmentier |
|
7 |
#CREATED ON: 03/23/2014 |
|
8 |
#MODIFIED ON: 12/28/2015 |
|
9 |
#Version: 4 |
|
10 |
#PROJECT: Environmental Layers project |
|
11 |
#TO DO: |
|
12 |
# - |
|
13 |
# - Make this a function... |
|
14 |
# - Drop mosaicing from the script |
|
15 |
# |
|
16 |
#First source these files: |
|
17 |
#Resolved call issues from R. |
|
18 |
#source /nobackupp6/aguzman4/climateLayers/sharedModules2/etc/environ.sh |
|
19 |
#MODULEPATH=$MODULEPATH:/nex/modules/files |
|
20 |
#module load pythonkits/gdal_1.10.0_python_2.7.3_nex |
|
21 |
|
|
22 |
# These are the names and number for the current subset regions used for global runs: |
|
23 |
#reg1 - North America (NAM) |
|
24 |
#reg2 - Europe (WE) |
|
25 |
#reg3 - Asia |
|
26 |
#reg4 - South America (SAM) |
|
27 |
#reg5 - Africa (AF) |
|
28 |
#reg6 - East Asia and Australia |
|
29 |
|
|
30 |
################################################################################################# |
|
31 |
|
|
32 |
### Loading R library and packages |
|
33 |
#library used in the workflow production: |
|
34 |
library(gtools) # loading some useful tools |
|
35 |
library(mgcv) # GAM package by Simon Wood |
|
36 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
37 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
38 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
39 |
library(gstat) # Kriging and co-kriging by Pebesma et al. |
|
40 |
library(fields) # NCAR Spatial Interpolation methods such as kriging, splines |
|
41 |
library(raster) # Hijmans et al. package for raster processing |
|
42 |
library(gdata) # various tools with xls reading, cbindX |
|
43 |
library(rasterVis) # Raster plotting functions |
|
44 |
library(parallel) # Parallelization of processes with multiple cores |
|
45 |
library(maptools) # Tools and functions for sp and other spatial objects e.g. spCbind |
|
46 |
library(maps) # Tools and data for spatial/geographic objects |
|
47 |
library(reshape) # Change shape of object, summarize results |
|
48 |
library(plotrix) # Additional plotting functions |
|
49 |
library(plyr) # Various tools including rbind.fill |
|
50 |
library(spgwr) # GWR method |
|
51 |
library(automap) # Kriging automatic fitting of variogram using gstat |
|
52 |
library(rgeos) # Geometric, topologic library of functions |
|
53 |
#RPostgreSQL # Interface R and Postgres, not used in this script |
|
54 |
library(gridExtra) |
|
55 |
#Additional libraries not used in workflow |
|
56 |
library(pgirmess) # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess} |
|
57 |
library(colorRamps) |
|
58 |
|
|
59 |
#### FUNCTION USED IN SCRIPT |
|
60 |
|
|
61 |
function_analyses_paper1 <- "global_run_scalingup_assessment_part1_functions_02112015.R" #PARAM12 |
|
62 |
script_path <- "/nobackupp8/bparmen1/env_layers_scripts" #path to script |
|
63 |
source(file.path(script_path,function_analyses_paper1)) #source all functions used in this script |
|
64 |
|
|
65 |
############################## |
|
66 |
#### Parameters and constants |
|
67 |
|
|
68 |
#Make this a function |
|
69 |
#reg1 (North Am), reg2(Europe),reg3(Asia), reg4 (South Am), reg5 (Africa), reg6 (Australia-Asia) |
|
70 |
#master directory containing the definition of tile size and tiles predicted |
|
71 |
in_dir1 <- "/nobackupp6/aguzman4/climateLayers/out/" |
|
72 |
#/nobackupp6/aguzman4/climateLayers/out_15x45/1982 |
|
73 |
|
|
74 |
#region_names <- c("reg23","reg4") #selected region names, #PARAM2 |
|
75 |
region_names <- c("reg4") |
|
76 |
#region_names <- c("1992") #no specific region here so use date |
|
77 |
#region_names <- c("reg1","reg2","reg3","reg4","reg5","reg6") #selected region names, #PARAM2 |
|
78 |
#region_namesb <- c("reg_1b","reg_1c","reg_2b","reg_3b","reg_6b") #selected region names, #PARAM2 |
|
79 |
|
|
80 |
y_var_name <- "dailyTmax" #PARAM3 |
|
81 |
interpolation_method <- c("gam_CAI") #PARAM4 |
|
82 |
out_prefix<-"run_global_analyses_pred_12282015" #PARAM5 |
|
83 |
|
|
84 |
#output_run10_1500x4500_global_analyses_pred_2003_04102015/ |
|
85 |
|
|
86 |
#out_dir<-"/data/project/layers/commons/NEX_data/" #On NCEAS Atlas |
|
87 |
#out_dir <- "/nobackup/bparmen1/" #on NEX |
|
88 |
out_dir <- "/nobackupp8/bparmen1/" #PARAM6 |
|
89 |
#out_dir <-paste(out_dir,"_",out_prefix,sep="") |
|
90 |
create_out_dir_param <- TRUE #PARAM7 |
|
91 |
|
|
92 |
CRS_locs_WGS84 <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84, #PARAM8 |
|
93 |
|
|
94 |
#day_to_mosaic <- c("19920101","19920102","19920103,19920104,19920105") |
|
95 |
#day_to_mosaic <- NULL #if day to mosaic is null then mosaic all dates? |
|
96 |
list_year_predicted <- 1984:2004 |
|
97 |
year_predicted <- list_year_predicted[1] |
|
98 |
|
|
99 |
file_format <- ".tif" #format for mosaiced files #PARAM10 |
|
100 |
NA_flag_val <- -9999 #No data value, #PARAM11 |
|
101 |
num_cores <- 6 #number of cores used #PARAM13 |
|
102 |
|
|
103 |
#Models used. |
|
104 |
#list_models<-c("y_var ~ s(lat,lon,k=4) + s(elev_s,k=3) + s(LST,k=3)", |
|
105 |
# "y_var ~ s(lat,lon,k=5) + s(elev_s,k=3) + s(LST,k=3)", |
|
106 |
# "y_var ~ s(lat,lon,k=8) + s(elev_s,k=4) + s(LST,k=4)", |
|
107 |
|
|
108 |
#module_path <- "/nobackupp6/aguzman4/climateLayers/sharedCode/" #PARAM14 |
|
109 |
#mosaics script #PARAM 15 |
|
110 |
#shell global mosaic script #PARAM 16 |
|
111 |
#gather station data |
|
112 |
|
|
113 |
########################## START SCRIPT ######################################### |
|
114 |
|
|
115 |
#Need to make this a function to run as a job... |
|
116 |
|
|
117 |
######################## PART0: Read content of predictions first.... ##### |
|
118 |
|
|
119 |
|
|
120 |
in_dir_list <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run |
|
121 |
#basename(in_dir_list) |
|
122 |
in_dir_list<- lapply(region_names,FUN=function(x,y){y[grep(paste(x,"$",sep=""),basename(y),invert=FALSE)]}, |
|
123 |
y=in_dir_list) |
|
124 |
|
|
125 |
in_dir_list_all <- unlist(lapply(in_dir_list,function(x){list.dirs(path=x,recursive=F)})) |
|
126 |
in_dir_list <- in_dir_list_all |
|
127 |
#in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1 |
|
128 |
|
|
129 |
#this was changed on 10052015 because the shapefiles were not matching!!! |
|
130 |
#in_dir_list_tmp <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run |
|
131 |
#in_dir_subset <- in_dir_list_tmp[grep("subset",basename(in_dir_list_tmp),invert=FALSE)] #select directory with shapefiles... |
|
132 |
#in_dir_shp <- file.path(in_dir_subset,"shapefiles") |
|
133 |
#in_dir_list_tmp <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run |
|
134 |
in_dir_subset <- in_dir_list_all[grep("subset",basename(in_dir_list_all),invert=FALSE)] #select directory with shapefiles... |
|
135 |
in_dir_shp <- file.path(in_dir_subset,"shapefiles") |
|
136 |
|
|
137 |
|
|
138 |
#select only directories used for predictions |
|
139 |
in_dir_reg <- in_dir_list[grep(".*._.*.",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles... |
|
140 |
#in_dir_reg <- in_dir_list[grep("july_tiffs",basename(in_dir_reg),invert=TRUE)] #select directory with shapefiles... |
|
141 |
in_dir_list <- in_dir_reg |
|
142 |
|
|
143 |
|
|
144 |
in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1 |
|
145 |
#list of shapefiles used to define tiles |
|
146 |
in_dir_shp_list <- list.files(in_dir_shp,".shp",full.names=T) |
|
147 |
|
|
148 |
## load problematic tiles or additional runs |
|
149 |
#modify later... |
|
150 |
|
|
151 |
#system("ls /nobackup/bparmen1") |
|
152 |
|
|
153 |
if(create_out_dir_param==TRUE){ |
|
154 |
out_dir <- create_dir_fun(out_dir,out_prefix) |
|
155 |
setwd(out_dir) |
|
156 |
}else{ |
|
157 |
setwd(out_dir) #use previoulsy defined directory |
|
158 |
} |
|
159 |
|
|
160 |
setwd(out_dir) |
|
161 |
|
|
162 |
##raster_prediction object : contains testing and training stations with RMSE and model object |
|
163 |
in_dir_list_tmp <- file.path(in_dir_list,year_predicted) |
|
164 |
list_raster_obj_files <- lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^raster_prediction_obj.*.RData",full.names=T)}) |
|
165 |
basename(dirname(list_raster_obj_files[[1]])) |
|
166 |
list_names_tile_coord <- lapply(list_raster_obj_files,FUN=function(x){basename(dirname(x))}) |
|
167 |
list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_") |
|
168 |
names(list_raster_obj_files)<- list_names_tile_id |
|
169 |
|
|
170 |
#one level up |
|
171 |
lf_covar_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar_obj.*.RData",full.names=T)}) |
|
172 |
lf_covar_tif <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar.*.tif",full.names=T)}) |
|
173 |
|
|
174 |
#sub_sampling_obj_daily_gam_CAI_10.0_-75.0.RData |
|
175 |
#sub_sampling_obj_gam_CAI_10.0_-75.0.RData |
|
176 |
|
|
177 |
lf_sub_sampling_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern=paste("^sub_sampling_obj_",interpolation_method,".*.RData",sep=""),full.names=T)}) |
|
178 |
lf_sub_sampling_obj_daily_files <- lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^sub_sampling_obj_daily.*.RData",full.names=T)}) |
|
179 |
|
|
180 |
## This will be part of the raster_obj function |
|
181 |
#debug(create_raster_prediction_obj) |
|
182 |
#out_prefix_str <- paste(basename(in_dir_list),out_prefix,sep="_") |
|
183 |
#lf_raster_obj <- create_raster_prediction_obj(in_dir_list,interpolation_method, y_var_name,out_prefix_str,out_path_list=NULL) |
|
184 |
|
|
185 |
################################################################ |
|
186 |
######## PART 1: Generate tables to collect information: |
|
187 |
######## over all tiles in North America |
|
188 |
|
|
189 |
##Function to collect all the tables from tiles into a table |
|
190 |
###Table 1: Average accuracy metrics |
|
191 |
###Table 2: daily accuracy metrics for all tiles |
|
192 |
|
|
193 |
#First create table of tiles under analysis and their coord |
|
194 |
df_tile_processed <- data.frame(tile_coord=basename(in_dir_list)) |
|
195 |
df_tile_processed$tile_id <- unlist(list_names_tile_id) #Arbitrary tiling number!! |
|
196 |
df_tile_processed$path_NEX <- in_dir_list |
|
197 |
df_tile_processed$year_predicted <- year_predicted |
|
198 |
df_tile_processed$sub_sampling_clim <- lf_sub_sampling_obj_files |
|
199 |
df_tile_processed$sub_sampling_daily <- lf_sub_sampling_obj_daily_files |
|
200 |
lf_sub_sampling_obj_files |
|
201 |
|
|
202 |
##Quick exploration of raster object |
|
203 |
#Should be commented out to make this a function |
|
204 |
#robj1 <- try(load_obj(list_raster_obj_files[[3]])) #This is an example tile |
|
205 |
#robj1 <- load_obj(lf_raster_obj[4]) #This is tile tile |
|
206 |
|
|
207 |
#names(robj1) |
|
208 |
#names(robj1$method_mod_obj[[2]]) #for January 1, 2010 |
|
209 |
#names(robj1$method_mod_obj[[2]]$dailyTmax) #for January |
|
210 |
#names(robj1$method_mod_obj[[11]]) #for January 1, 2010 |
|
211 |
#names(robj1$method_mod_obj[[11]]$dailyTmax) #for January |
|
212 |
|
|
213 |
#names(robj1$clim_method_mod_obj[[1]]$data_month) #for January |
|
214 |
#names(robj1$validation_mod_month_obj[[1]]$data_s) #for January with predictions |
|
215 |
#Get the number of models predicted |
|
216 |
#nb_mod <- length(unique(robj1$tb_diagnostic_v$pred_mod))# |
|
217 |
#list_formulas <- (robj1$clim_method_mod_obj[[1]]$formulas) |
|
218 |
#dates_predicted <- (unique(robj1$tb_diagnostic_v$date)) |
|
219 |
|
|
220 |
|
|
221 |
#list_tb_diagnostic_v <- mclapply(lf_validation_obj,FUN=function(x){try( x<- load_obj(x)); try(extract_from_list_obj(x,"metrics_v"))},mc.preschedule=FALSE,mc.cores = 6) |
|
222 |
#names(list_tb_diagnostic_v) <- list_names_tile_id |
|
223 |
|
|
224 |
################ |
|
225 |
#### Table 1: Average accuracy metrics per tile and predictions |
|
226 |
|
|
227 |
#can use a maximum of 6 cores on the NEX Bridge |
|
228 |
#For 28 tiles and 28 RData boject it takes 15-16 min |
|
229 |
#summary_metrics_v_list <- mclapply(list_raster_obj_files[5:6],FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = 2) |
|
230 |
|
|
231 |
summary_metrics_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = num_cores) |
|
232 |
#summary_metrics_v_list <- lapply(summary_metrics_v_list,FUN=function(x){try(x$avg)}) |
|
233 |
names(summary_metrics_v_list) <- list_names_tile_id |
|
234 |
|
|
235 |
summary_metrics_v_tmp <- remove_from_list_fun(summary_metrics_v_list)$list |
|
236 |
df_tile_processed$metrics_v <- as.integer(remove_from_list_fun(summary_metrics_v_list)$valid) |
|
237 |
#Now remove "try-error" from list of accuracy) |
|
238 |
|
|
239 |
summary_metrics_v_NA <- do.call(rbind.fill,summary_metrics_v_tmp) #create a df for NA tiles with all accuracy metrics |
|
240 |
#tile_coord <- lapply(1:length(summary_metrics_v_list),FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=summary_metrics_v_list) |
|
241 |
#add the tile id identifier |
|
242 |
tile_id_tmp <- lapply(1:length(summary_metrics_v_tmp), |
|
243 |
FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=summary_metrics_v_tmp,y=names(summary_metrics_v_tmp)) |
|
244 |
#adding tile id summary data.frame |
|
245 |
summary_metrics_v_NA$tile_id <-unlist(tile_id_tmp) |
|
246 |
summary_metrics_v_NA$n <- as.integer(summary_metrics_v_NA$n) |
|
247 |
|
|
248 |
summary_metrics_v_NA <- merge(summary_metrics_v_NA,df_tile_processed[,1:2],by="tile_id") |
|
249 |
|
|
250 |
tx<-strsplit(as.character(summary_metrics_v_NA$tile_coord),"_") |
|
251 |
lat<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx)) |
|
252 |
long<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx)) |
|
253 |
summary_metrics_v_NA$lat <- lat |
|
254 |
summary_metrics_v_NA$lon <- long |
|
255 |
|
|
256 |
write.table(as.data.frame(summary_metrics_v_NA), |
|
257 |
file=file.path(out_dir,paste("summary_metrics_v2_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",") |
|
258 |
|
|
259 |
################# |
|
260 |
###Table 2: daily validation/testing accuracy metrics for all tiles |
|
261 |
#this takes about 15min for 28 tiles (reg4) |
|
262 |
#tb_diagnostic_v_list <- lapply(list_raster_obj_files,FUN=function(x){x<-load_obj(x);x[["tb_diagnostic_v"]]}) |
|
263 |
tb_diagnostic_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_v"]])},mc.preschedule=FALSE,mc.cores = num_cores) |
|
264 |
|
|
265 |
names(tb_diagnostic_v_list) <- list_names_tile_id |
|
266 |
tb_diagnostic_v_tmp <- remove_from_list_fun(tb_diagnostic_v_list)$list |
|
267 |
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid |
|
268 |
|
|
269 |
tb_diagnostic_v_NA <- do.call(rbind.fill,tb_diagnostic_v_tmp) #create a df for NA tiles with all accuracy metrics |
|
270 |
tile_id_tmp <- lapply(1:length(tb_diagnostic_v_tmp), |
|
271 |
FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_v_tmp,y=names(tb_diagnostic_v_tmp)) |
|
272 |
|
|
273 |
tb_diagnostic_v_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile |
|
274 |
|
|
275 |
tb_diagnostic_v_NA <- merge(tb_diagnostic_v_NA,df_tile_processed[,1:2],by="tile_id") |
|
276 |
|
|
277 |
write.table((tb_diagnostic_v_NA), |
|
278 |
file=file.path(out_dir,paste("tb_diagnostic_v_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",") |
|
279 |
|
|
280 |
################# |
|
281 |
###Table 3: monthly fit/training accuracy information for all tiles |
|
282 |
|
|
283 |
## Monthly fitting information |
|
284 |
tb_month_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_month_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = num_cores) |
|
285 |
|
|
286 |
names(tb_month_diagnostic_s_list) <- list_names_tile_id |
|
287 |
tb_month_diagnostic_s_tmp <- remove_from_list_fun(tb_month_diagnostic_s_list)$list |
|
288 |
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid |
|
289 |
|
|
290 |
tb_month_diagnostic_s_NA <- do.call(rbind.fill,tb_month_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics |
|
291 |
tile_id_tmp <- lapply(1:length(tb_month_diagnostic_s_tmp), |
|
292 |
FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_month_diagnostic_s_tmp,y=names(tb_month_diagnostic_s_tmp)) |
|
293 |
|
|
294 |
tb_month_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile |
|
295 |
|
|
296 |
tb_month_diagnostic_s_NA <- merge(tb_month_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id") |
|
297 |
|
|
298 |
date_f<-strptime(tb_month_diagnostic_s_NA$date, "%Y%m%d") # interpolation date being processed |
|
299 |
tb_month_diagnostic_s_NA$month<-strftime(date_f, "%m") # current month of the date being processed |
|
300 |
|
|
301 |
write.table((tb_month_diagnostic_s_NA), |
|
302 |
file=file.path(out_dir,paste("tb_month_diagnostic_s_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",") |
|
303 |
|
|
304 |
################# |
|
305 |
###Table 4: daily fit/training accuracy information with predictions for all tiles |
|
306 |
|
|
307 |
## daily fit info: |
|
308 |
|
|
309 |
tb_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = num_cores) |
|
310 |
|
|
311 |
names(tb_diagnostic_s_list) <- list_names_tile_id |
|
312 |
tb_diagnostic_s_tmp <- remove_from_list_fun(tb_diagnostic_s_list)$list |
|
313 |
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid |
|
314 |
|
|
315 |
tb_diagnostic_s_NA <- do.call(rbind.fill,tb_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics |
|
316 |
tile_id_tmp <- lapply(1:length(tb_diagnostic_s_tmp), |
|
317 |
FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_s_tmp,y=names(tb_diagnostic_s_tmp)) |
|
318 |
|
|
319 |
tb_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile |
|
320 |
|
|
321 |
tb_diagnostic_s_NA <- merge(tb_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id") |
|
322 |
|
|
323 |
write.table((tb_diagnostic_s_NA), |
|
324 |
file=file.path(out_dir,paste("tb_diagnostic_s_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",") |
|
325 |
|
|
326 |
##### Table 5: Add later on: daily info |
|
327 |
### with also data_s and data_v saved!!! |
|
328 |
|
|
329 |
#Insert here...compute input and predicted ranges to spot potential errors? |
|
330 |
|
|
331 |
### Make this part a function...this is repetitive |
|
332 |
##### SPDF of Monhtly Station info |
|
333 |
#load data_month for specific tiles |
|
334 |
#10.45pm |
|
335 |
#data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month") |
|
336 |
#names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info |
|
337 |
|
|
338 |
#data_month_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x$validation_mod_month_obj[["data_s"]])},mc.preschedule=FALSE,mc.cores = 6) |
|
339 |
data_month_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = 6) |
|
340 |
#test <- mclapply(list_raster_obj_files[1:6],FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = 6) |
|
341 |
|
|
342 |
names(data_month_s_list) <- list_names_tile_id |
|
343 |
|
|
344 |
data_month_tmp <- remove_from_list_fun(data_month_s_list)$list |
|
345 |
#df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid |
|
346 |
|
|
347 |
tile_id <- lapply(1:length(data_month_tmp), |
|
348 |
FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_month_tmp) |
|
349 |
data_month_NAM <- do.call(rbind.fill,data_month_tmp) #combined data_month for "NAM" North America |
|
350 |
data_month_NAM$tile_id <- unlist(tile_id) |
|
351 |
|
|
352 |
write.table((data_month_NAM), |
|
353 |
file=file.path(out_dir,paste("data_month_s_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",") |
|
354 |
|
|
355 |
#Get validation data?? Find other object from within the dir |
|
356 |
#Som region don't have validation data at monthly time scale. |
|
357 |
|
|
358 |
#### SPDF of daily Station info |
|
359 |
#load data_month for specific tiles |
|
360 |
#data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month") |
|
361 |
#names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info |
|
362 |
|
|
363 |
data_day_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = num_cores) |
|
364 |
data_day_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_obj,"data_v"))},mc.preschedule=FALSE,mc.cores = num_cores) |
|
365 |
|
|
366 |
names(data_day_s_list) <- list_names_tile_id |
|
367 |
names(data_day_v_list) <- list_names_tile_id |
|
368 |
|
|
369 |
data_day_s_tmp <- remove_from_list_fun(data_day_s_list)$list |
|
370 |
data_day_v_tmp <- remove_from_list_fun(data_day_v_list)$list |
|
371 |
|
|
372 |
#df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid |
|
373 |
|
|
374 |
tile_id <- lapply(1:length(data_day_s_tmp), |
|
375 |
FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_day_s_tmp) |
|
376 |
data_day_s_NAM <- do.call(rbind.fill,data_day_s_tmp) #combined data_month for "NAM" North America |
|
377 |
data_day_s_NAM$tile_id <- unlist(tile_id) |
|
378 |
|
|
379 |
tile_id <- lapply(1:length(data_day_v_tmp), |
|
380 |
FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_day_v_tmp) |
|
381 |
data_day_v_NAM <- do.call(rbind.fill,data_day_v_tmp) #combined data_month for "NAM" North America |
|
382 |
data_day_v_NAM$tile_id <- unlist(tile_id) |
|
383 |
|
|
384 |
write.table((data_day_s_NAM), |
|
385 |
file=file.path(out_dir,paste("data_day_s_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",") |
|
386 |
write.table((data_day_v_NAM), |
|
387 |
file=file.path(out_dir,paste("data_day_v_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",") |
|
388 |
|
|
389 |
#### Recover subsampling data |
|
390 |
#For tiles with many stations, there is a subsampling done in terms of distance (spatial pruning) and |
|
391 |
#in terms of station numbers if there are still too many stations to consider. This is done at the |
|
392 |
#daily and monthly stages. |
|
393 |
|
|
394 |
#lf_sub_sampling_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern=paste("^sub_sampling_obj_",interpolation_method,".*.RData",sep=""),full.names=T)}) |
|
395 |
#lf_sub_sampling_obj_daily_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="^sub_sampling_obj_daily.*.RData",full.names=T)}) |
|
396 |
#sub_sampling_obj <- try(load_obj(lf_sub_sampling_obj_files[[3]])) #This is an example tile |
|
397 |
#data_removed contains the validation data... |
|
398 |
#this data can be used for validation of the product. Note that it may be missing for some tiles |
|
399 |
#as no stations are removed if there are too enough stations in the tile |
|
400 |
#this will need to be checked later on... |
|
401 |
|
|
402 |
data_month_v_subsampling_list <- mclapply(lf_sub_sampling_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_removed"))},mc.preschedule=FALSE,mc.cores = 6) |
|
403 |
#test <- mclapply(list_raster_obj_files[1:6],FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = 6) |
|
404 |
|
|
405 |
names(data_month_v_subsampling_list) <- list_names_tile_id |
|
406 |
|
|
407 |
data_month_v_subsampling_tmp <- remove_from_list_fun(data_month_v_subsampling_list)$list |
|
408 |
#df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid |
|
409 |
#if no stations have been removed then there are no validation stations !!! |
|
410 |
if(length(data_month_v_subsampling_tmp)!=0){ |
|
411 |
|
|
412 |
tile_id <- lapply(1:length(data_month_v_subsampling_tmp), |
|
413 |
FUN=function(i,x){try(rep(names(x)[i],nrow(x[[i]])))},x=data_month_v_subsampling_tmp) |
|
414 |
data_month_v_subsmapling_NAM <- do.call(rbind.fill,ddata_month_v_subsampling_tmp) #combined data_month for "NAM" North America |
|
415 |
data_month_v_subsampling_NAM$tile_id <- unlist(tile_id) |
|
416 |
|
|
417 |
write.table((data_month_v_subsampling_NAM), |
|
418 |
file=file.path(out_dir,paste("data_month_v_subsampling_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",") |
|
419 |
|
|
420 |
} |
|
421 |
|
|
422 |
## Do the same for daily... |
|
423 |
## End of potential function started in line 317...this section will be cut down for simplicity |
|
424 |
|
|
425 |
###################################################### |
|
426 |
####### PART 3: EXAMINE STATIONS AND MODEL FITTING ### |
|
427 |
|
|
428 |
### Stations and model fitting ### |
|
429 |
#summarize location and number of training and testing used by tiles |
|
430 |
|
|
431 |
#names(robj1$clim_method_mod_obj[[1]]$data_month) # monthly data for January |
|
432 |
#names(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions |
|
433 |
#note that there is no holdout in the current run at the monthly time scale: |
|
434 |
|
|
435 |
#robj1$clim_method_mod_obj[[1]]$data_month_v #zero rows for testing stations at monthly timescale |
|
436 |
#load data_month for specific tiles |
|
437 |
data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month") |
|
438 |
|
|
439 |
#names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info |
|
440 |
|
|
441 |
use_day=TRUE |
|
442 |
use_month=TRUE |
|
443 |
|
|
444 |
#list_raster_obj_files <- c("/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-100.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-100.0.RData", |
|
445 |
# "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-105.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-105.0.RData") |
|
446 |
|
|
447 |
list_names_tile_id <- df_tile_processed$tile_id |
|
448 |
list_raster_obj_files[list_names_tile_id] |
|
449 |
#list_names_tile_id <- c("tile_1","tile_2") |
|
450 |
list_param_training_testing_info <- list(list_raster_obj_files[list_names_tile_id],use_month,use_day,list_names_tile_id) |
|
451 |
names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id") |
|
452 |
|
|
453 |
list_param <- list_param_training_testing_info |
|
454 |
#debug(extract_daily_training_testing_info) |
|
455 |
#pred_data_info <- extract_daily_training_testing_info(1,list_param=list_param_training_testing_info) |
|
456 |
pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = num_cores) |
|
457 |
#pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id][1:6]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = 6) |
|
458 |
#pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info) |
|
459 |
#pred_data_info <- lapply(1:length(list_raster_obj_files[1]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info) |
|
460 |
|
|
461 |
pred_data_info_tmp <- remove_from_list_fun(pred_data_info)$list #remove data not predicted |
|
462 |
##Add tile nanmes?? it is alreaready there |
|
463 |
#names(pred_data_info)<-list_names_tile_id |
|
464 |
pred_data_month_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_month_info})) |
|
465 |
pred_data_day_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_day_info})) |
|
466 |
|
|
467 |
#putput inforamtion in csv !! |
|
468 |
write.table(pred_data_month_info, |
|
469 |
file=file.path(out_dir,paste("pred_data_month_info_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",") |
|
470 |
write.table(pred_data_day_info, |
|
471 |
file=file.path(out_dir,paste("pred_data_day_info_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",") |
|
472 |
|
|
473 |
###################################################### |
|
474 |
####### PART 4: Get shapefile tiling with centroids ### |
|
475 |
|
|
476 |
#get shape files for the region being assessed: |
|
477 |
|
|
478 |
list_shp_world <- list.files(path=in_dir_shp,pattern=".*.shp",full.names=T) |
|
479 |
l_shp <- gsub(".shp","",basename(list_shp_world)) |
|
480 |
l_shp <- sub("shp_","",l_shp) |
|
481 |
|
|
482 |
#l_shp <- unlist(lapply(1:length(list_shp_world), |
|
483 |
# FUN=function(i){paste(strsplit(list_shp_world[i],"_")[[1]][3:4],collapse="_")})) |
|
484 |
l_shp <- unlist(lapply(1:length(l_shp), |
|
485 |
FUN=function(i){paste(strsplit(l_shp[i],"_")[[1]][1:2],collapse="_")})) |
|
486 |
|
|
487 |
df_tiles_all <- as.data.frame(as.character(unlist(list_shp_world))) |
|
488 |
df_tiles_all$tile_coord <- l_shp |
|
489 |
#names(df_tiles_all) <- "list_shp_world" |
|
490 |
names(df_tiles_all) <- c("shp_files","tile_coord") |
|
491 |
matching_index <- match(basename(in_dir_list),l_shp) |
|
492 |
list_shp_reg_files <- list_shp_world[matching_index] |
|
493 |
df_tile_processed$shp_files <-list_shp_reg_files |
|
494 |
#df_tile_processed$shp_files <- "" |
|
495 |
#df_tile_processed$tile_coord <- as.character(df_tile_processed$tile_coord) |
|
496 |
#test <- df_tile_processed |
|
497 |
#test$shp_files <- NULL |
|
498 |
#test3 <- merge(test,df_tiles_all,by=c("tile_coord")) |
|
499 |
#test3 <- merge(df_tiles_all,test,by=c("tile_coord")) |
|
500 |
#merge(df_tile_processed,df_tiles_all,by="shp_files") |
|
501 |
|
|
502 |
tx<-strsplit(as.character(df_tile_processed$tile_coord),"_") |
|
503 |
lat<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx)) |
|
504 |
long<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx)) |
|
505 |
df_tile_processed$lat <- lat |
|
506 |
df_tile_processed$lon <- long |
|
507 |
|
|
508 |
#put that list in the df_processed and also the centroids!! |
|
509 |
write.table(df_tile_processed, |
|
510 |
file=file.path(out_dir,paste("df_tile_processed_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",") |
|
511 |
|
|
512 |
write.table(df_tiles_all, |
|
513 |
file=file.path(out_dir,paste("df_tiles_all_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",") |
|
514 |
|
|
515 |
#Copy to local home directory on NAS-NEX |
|
516 |
# |
|
517 |
dir.create(file.path(out_dir,"shapefiles")) |
|
518 |
file.copy(list_shp_world,file.path(out_dir,"shapefiles")) |
|
519 |
|
|
520 |
#save a list of all files... |
|
521 |
write.table(df_tiles_all, |
|
522 |
file=file.path(out_dir,"shapefiles",paste("df_tiles_all_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",") |
|
523 |
|
|
524 |
###Prepare files for copying back? |
|
525 |
|
|
526 |
##################### END OF SCRIPT ###################### |
|
527 |
|
|
528 |
|
Also available in: Unified diff
splitting assessment and removing copy back to NCEAS