Project

General

Profile

« Previous | Next » 

Revision edd7e7d5

Added by Benoit Parmentier about 9 years ago

splitting assessment and removing copy back to NCEAS

View differences:

climate/research/oregon/interpolation/global_run_scalingup_assessment_part1a.R
1
####################################  INTERPOLATION OF TEMPERATURES  #######################################
2
############################  Script for assessment of scaling up on NEX: part 1 ##############################
3
#This script uses the worklfow code applied to the globe. Results currently reside on NEX/PLEIADES NASA.
4
#The purpose is to create as set of functions to diagnose and assess quickly a set of predictd tiles.
5
#Part 1 create summary tables and inputs files for figure in part 2 and part 3.
6
#AUTHOR: Benoit Parmentier 
7
#CREATED ON: 03/23/2014  
8
#MODIFIED ON: 12/28/2015            
9
#Version: 4
10
#PROJECT: Environmental Layers project  
11
#TO DO:
12
# - 
13
# - Make this a function...
14
# - Drop mosaicing from the script
15
#
16
#First source these files:
17
#Resolved call issues from R.
18
#source /nobackupp6/aguzman4/climateLayers/sharedModules2/etc/environ.sh  
19
#MODULEPATH=$MODULEPATH:/nex/modules/files
20
#module load pythonkits/gdal_1.10.0_python_2.7.3_nex
21

  
22
# These are the names and number for the current subset regions used for global runs:
23
#reg1 - North America (NAM)
24
#reg2 - Europe (WE)
25
#reg3 - Asia 
26
#reg4 - South America (SAM)
27
#reg5 - Africa (AF)
28
#reg6 - East Asia and Australia 
29

  
30
#################################################################################################
31

  
32
### Loading R library and packages        
33
#library used in the workflow production:
34
library(gtools)                              # loading some useful tools 
35
library(mgcv)                                # GAM package by Simon Wood
36
library(sp)                                  # Spatial pacakge with class definition by Bivand et al.
37
library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
38
library(rgdal)                               # GDAL wrapper for R, spatial utilities
39
library(gstat)                               # Kriging and co-kriging by Pebesma et al.
40
library(fields)                              # NCAR Spatial Interpolation methods such as kriging, splines
41
library(raster)                              # Hijmans et al. package for raster processing
42
library(gdata)                               # various tools with xls reading, cbindX
43
library(rasterVis)                           # Raster plotting functions
44
library(parallel)                            # Parallelization of processes with multiple cores
45
library(maptools)                            # Tools and functions for sp and other spatial objects e.g. spCbind
46
library(maps)                                # Tools and data for spatial/geographic objects
47
library(reshape)                             # Change shape of object, summarize results 
48
library(plotrix)                             # Additional plotting functions
49
library(plyr)                                # Various tools including rbind.fill
50
library(spgwr)                               # GWR method
51
library(automap)                             # Kriging automatic fitting of variogram using gstat
52
library(rgeos)                               # Geometric, topologic library of functions
53
#RPostgreSQL                                 # Interface R and Postgres, not used in this script
54
library(gridExtra)
55
#Additional libraries not used in workflow
56
library(pgirmess)                            # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess}  
57
library(colorRamps)
58
  
59
#### FUNCTION USED IN SCRIPT
60
  
61
function_analyses_paper1 <- "global_run_scalingup_assessment_part1_functions_02112015.R" #PARAM12
62
script_path <- "/nobackupp8/bparmen1/env_layers_scripts" #path to script
63
source(file.path(script_path,function_analyses_paper1)) #source all functions used in this script 
64

  
65
##############################
66
#### Parameters and constants  
67

  
68
#Make this a function
69
#reg1 (North Am), reg2(Europe),reg3(Asia), reg4 (South Am), reg5 (Africa), reg6 (Australia-Asia)
70
#master directory containing the definition of tile size and tiles predicted
71
in_dir1 <- "/nobackupp6/aguzman4/climateLayers/out/"
72
#/nobackupp6/aguzman4/climateLayers/out_15x45/1982
73

  
74
#region_names <- c("reg23","reg4") #selected region names, #PARAM2 
75
region_names <- c("reg4")
76
#region_names <- c("1992") #no specific region here so use date
77
#region_names <- c("reg1","reg2","reg3","reg4","reg5","reg6") #selected region names, #PARAM2
78
#region_namesb <- c("reg_1b","reg_1c","reg_2b","reg_3b","reg_6b") #selected region names, #PARAM2
79

  
80
y_var_name <- "dailyTmax" #PARAM3
81
interpolation_method <- c("gam_CAI") #PARAM4
82
out_prefix<-"run_global_analyses_pred_12282015" #PARAM5
83

  
84
#output_run10_1500x4500_global_analyses_pred_2003_04102015/
85

  
86
#out_dir<-"/data/project/layers/commons/NEX_data/" #On NCEAS Atlas
87
#out_dir <- "/nobackup/bparmen1/" #on NEX
88
out_dir <- "/nobackupp8/bparmen1/" #PARAM6
89
#out_dir <-paste(out_dir,"_",out_prefix,sep="")
90
create_out_dir_param <- TRUE #PARAM7
91

  
92
CRS_locs_WGS84 <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84, #PARAM8
93

  
94
#day_to_mosaic <- c("19920101","19920102","19920103,19920104,19920105")
95
#day_to_mosaic <- NULL #if day to mosaic is null then mosaic all dates?
96
list_year_predicted <- 1984:2004
97
year_predicted <- list_year_predicted[1]
98

  
99
file_format <- ".tif" #format for mosaiced files #PARAM10
100
NA_flag_val <- -9999  #No data value, #PARAM11
101
num_cores <- 6 #number of cores used #PARAM13
102

  
103
#Models used.
104
#list_models<-c("y_var ~ s(lat,lon,k=4) + s(elev_s,k=3) + s(LST,k=3)",
105
#               "y_var ~ s(lat,lon,k=5) + s(elev_s,k=3) + s(LST,k=3)",
106
#               "y_var ~ s(lat,lon,k=8) + s(elev_s,k=4) + s(LST,k=4)",
107
                                  
108
#module_path <- "/nobackupp6/aguzman4/climateLayers/sharedCode/" #PARAM14
109
#mosaics script #PARAM 15
110
#shell global mosaic script #PARAM 16
111
#gather station data
112

  
113
########################## START SCRIPT #########################################
114

  
115
#Need to make this a function to run as a job...
116

  
117
######################## PART0: Read content of predictions first.... #####
118

  
119

  
120
in_dir_list <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run
121
#basename(in_dir_list)
122
in_dir_list<- lapply(region_names,FUN=function(x,y){y[grep(paste(x,"$",sep=""),basename(y),invert=FALSE)]},
123
                                               y=in_dir_list) 
124

  
125
in_dir_list_all  <- unlist(lapply(in_dir_list,function(x){list.dirs(path=x,recursive=F)}))
126
in_dir_list <- in_dir_list_all
127
#in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1
128

  
129
#this was changed on 10052015 because the shapefiles were not matching!!!
130
#in_dir_list_tmp <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run
131
#in_dir_subset <- in_dir_list_tmp[grep("subset",basename(in_dir_list_tmp),invert=FALSE)] #select directory with shapefiles...
132
#in_dir_shp <- file.path(in_dir_subset,"shapefiles")
133
#in_dir_list_tmp <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run
134
in_dir_subset <- in_dir_list_all[grep("subset",basename(in_dir_list_all),invert=FALSE)] #select directory with shapefiles...
135
in_dir_shp <- file.path(in_dir_subset,"shapefiles")
136

  
137

  
138
#select only directories used for predictions
139
in_dir_reg <- in_dir_list[grep(".*._.*.",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles...
140
#in_dir_reg <- in_dir_list[grep("july_tiffs",basename(in_dir_reg),invert=TRUE)] #select directory with shapefiles...
141
in_dir_list <- in_dir_reg
142
    
143

  
144
in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1
145
#list of shapefiles used to define tiles
146
in_dir_shp_list <- list.files(in_dir_shp,".shp",full.names=T)
147

  
148
## load problematic tiles or additional runs
149
#modify later...
150

  
151
#system("ls /nobackup/bparmen1")
152

  
153
if(create_out_dir_param==TRUE){
154
  out_dir <- create_dir_fun(out_dir,out_prefix)
155
  setwd(out_dir)
156
}else{
157
  setwd(out_dir) #use previoulsy defined directory
158
}
159

  
160
setwd(out_dir)
161

  
162
##raster_prediction object : contains testing and training stations with RMSE and model object
163
in_dir_list_tmp <- file.path(in_dir_list,year_predicted)
164
list_raster_obj_files <- lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^raster_prediction_obj.*.RData",full.names=T)})
165
basename(dirname(list_raster_obj_files[[1]]))
166
list_names_tile_coord <- lapply(list_raster_obj_files,FUN=function(x){basename(dirname(x))})
167
list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_")
168
names(list_raster_obj_files)<- list_names_tile_id
169

  
170
#one level up
171
lf_covar_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar_obj.*.RData",full.names=T)})
172
lf_covar_tif <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar.*.tif",full.names=T)})
173

  
174
#sub_sampling_obj_daily_gam_CAI_10.0_-75.0.RData
175
#sub_sampling_obj_gam_CAI_10.0_-75.0.RData
176

  
177
lf_sub_sampling_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern=paste("^sub_sampling_obj_",interpolation_method,".*.RData",sep=""),full.names=T)})
178
lf_sub_sampling_obj_daily_files <- lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^sub_sampling_obj_daily.*.RData",full.names=T)})
179

  
180
## This will be part of the raster_obj function
181
#debug(create_raster_prediction_obj)
182
#out_prefix_str <- paste(basename(in_dir_list),out_prefix,sep="_") 
183
#lf_raster_obj <- create_raster_prediction_obj(in_dir_list,interpolation_method, y_var_name,out_prefix_str,out_path_list=NULL)
184

  
185
################################################################
186
######## PART 1: Generate tables to collect information:
187
######## over all tiles in North America 
188

  
189
##Function to collect all the tables from tiles into a table
190
###Table 1: Average accuracy metrics
191
###Table 2: daily accuracy metrics for all tiles
192

  
193
#First create table of tiles under analysis and their coord
194
df_tile_processed <- data.frame(tile_coord=basename(in_dir_list))
195
df_tile_processed$tile_id <- unlist(list_names_tile_id) #Arbitrary tiling number!!
196
df_tile_processed$path_NEX <- in_dir_list
197
df_tile_processed$year_predicted <- year_predicted
198
df_tile_processed$sub_sampling_clim  <- lf_sub_sampling_obj_files
199
df_tile_processed$sub_sampling_daily  <- lf_sub_sampling_obj_daily_files
200
lf_sub_sampling_obj_files
201

  
202
##Quick exploration of raster object
203
#Should be commented out to make this a function
204
#robj1 <- try(load_obj(list_raster_obj_files[[3]])) #This is an example tile
205
#robj1 <- load_obj(lf_raster_obj[4]) #This is tile tile
206

  
207
#names(robj1)
208
#names(robj1$method_mod_obj[[2]]) #for January 1, 2010
209
#names(robj1$method_mod_obj[[2]]$dailyTmax) #for January
210
#names(robj1$method_mod_obj[[11]]) #for January 1, 2010
211
#names(robj1$method_mod_obj[[11]]$dailyTmax) #for January
212

  
213
#names(robj1$clim_method_mod_obj[[1]]$data_month) #for January
214
#names(robj1$validation_mod_month_obj[[1]]$data_s) #for January with predictions
215
#Get the number of models predicted
216
#nb_mod <- length(unique(robj1$tb_diagnostic_v$pred_mod))#
217
#list_formulas <- (robj1$clim_method_mod_obj[[1]]$formulas)
218
#dates_predicted <- (unique(robj1$tb_diagnostic_v$date))
219

  
220

  
221
#list_tb_diagnostic_v <- mclapply(lf_validation_obj,FUN=function(x){try( x<- load_obj(x)); try(extract_from_list_obj(x,"metrics_v"))},mc.preschedule=FALSE,mc.cores = 6)                           
222
#names(list_tb_diagnostic_v) <- list_names_tile_id
223

  
224
################
225
#### Table 1: Average accuracy metrics per tile and predictions
226

  
227
#can use a maximum of 6 cores on the NEX Bridge
228
#For 28 tiles and  28 RData boject it takes 15-16 min
229
#summary_metrics_v_list <- mclapply(list_raster_obj_files[5:6],FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = 2)                           
230

  
231
summary_metrics_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = num_cores)                         
232
#summary_metrics_v_list <- lapply(summary_metrics_v_list,FUN=function(x){try(x$avg)})
233
names(summary_metrics_v_list) <- list_names_tile_id
234

  
235
summary_metrics_v_tmp <- remove_from_list_fun(summary_metrics_v_list)$list
236
df_tile_processed$metrics_v <- as.integer(remove_from_list_fun(summary_metrics_v_list)$valid)
237
#Now remove "try-error" from list of accuracy)
238

  
239
summary_metrics_v_NA <- do.call(rbind.fill,summary_metrics_v_tmp) #create a df for NA tiles with all accuracy metrics
240
#tile_coord <- lapply(1:length(summary_metrics_v_list),FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=summary_metrics_v_list)
241
#add the tile id identifier
242
tile_id_tmp <- lapply(1:length(summary_metrics_v_tmp),
243
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=summary_metrics_v_tmp,y=names(summary_metrics_v_tmp))
244
#adding tile id summary data.frame
245
summary_metrics_v_NA$tile_id <-unlist(tile_id_tmp)
246
summary_metrics_v_NA$n <- as.integer(summary_metrics_v_NA$n)
247

  
248
summary_metrics_v_NA <- merge(summary_metrics_v_NA,df_tile_processed[,1:2],by="tile_id")
249

  
250
tx<-strsplit(as.character(summary_metrics_v_NA$tile_coord),"_")
251
lat<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx))
252
long<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx))
253
summary_metrics_v_NA$lat <- lat
254
summary_metrics_v_NA$lon <- long
255

  
256
write.table(as.data.frame(summary_metrics_v_NA),
257
            file=file.path(out_dir,paste("summary_metrics_v2_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
258

  
259
#################
260
###Table 2: daily validation/testing accuracy metrics for all tiles
261
#this takes about 15min for 28 tiles (reg4)
262
#tb_diagnostic_v_list <- lapply(list_raster_obj_files,FUN=function(x){x<-load_obj(x);x[["tb_diagnostic_v"]]})                           
263
tb_diagnostic_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_v"]])},mc.preschedule=FALSE,mc.cores = num_cores)                           
264

  
265
names(tb_diagnostic_v_list) <- list_names_tile_id
266
tb_diagnostic_v_tmp <- remove_from_list_fun(tb_diagnostic_v_list)$list
267
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
268

  
269
tb_diagnostic_v_NA <- do.call(rbind.fill,tb_diagnostic_v_tmp) #create a df for NA tiles with all accuracy metrics
270
tile_id_tmp <- lapply(1:length(tb_diagnostic_v_tmp),
271
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_v_tmp,y=names(tb_diagnostic_v_tmp))
272

  
273
tb_diagnostic_v_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
274

  
275
tb_diagnostic_v_NA <- merge(tb_diagnostic_v_NA,df_tile_processed[,1:2],by="tile_id")
276

  
277
write.table((tb_diagnostic_v_NA),
278
            file=file.path(out_dir,paste("tb_diagnostic_v_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
279

  
280
#################
281
###Table 3: monthly fit/training accuracy information for all tiles
282

  
283
## Monthly fitting information
284
tb_month_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_month_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = num_cores)                           
285

  
286
names(tb_month_diagnostic_s_list) <- list_names_tile_id
287
tb_month_diagnostic_s_tmp <- remove_from_list_fun(tb_month_diagnostic_s_list)$list
288
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
289

  
290
tb_month_diagnostic_s_NA <- do.call(rbind.fill,tb_month_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics
291
tile_id_tmp <- lapply(1:length(tb_month_diagnostic_s_tmp),
292
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_month_diagnostic_s_tmp,y=names(tb_month_diagnostic_s_tmp))
293

  
294
tb_month_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
295

  
296
tb_month_diagnostic_s_NA <- merge(tb_month_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id")
297

  
298
date_f<-strptime(tb_month_diagnostic_s_NA$date, "%Y%m%d")   # interpolation date being processed
299
tb_month_diagnostic_s_NA$month<-strftime(date_f, "%m")          # current month of the date being processed
300

  
301
write.table((tb_month_diagnostic_s_NA),
302
            file=file.path(out_dir,paste("tb_month_diagnostic_s_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
303

  
304
#################
305
###Table 4: daily fit/training accuracy information with predictions for all tiles
306

  
307
## daily fit info:
308

  
309
tb_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = num_cores)                           
310

  
311
names(tb_diagnostic_s_list) <- list_names_tile_id
312
tb_diagnostic_s_tmp <- remove_from_list_fun(tb_diagnostic_s_list)$list
313
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
314

  
315
tb_diagnostic_s_NA <- do.call(rbind.fill,tb_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics
316
tile_id_tmp <- lapply(1:length(tb_diagnostic_s_tmp),
317
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_s_tmp,y=names(tb_diagnostic_s_tmp))
318

  
319
tb_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
320

  
321
tb_diagnostic_s_NA <- merge(tb_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id")
322

  
323
write.table((tb_diagnostic_s_NA),
324
            file=file.path(out_dir,paste("tb_diagnostic_s_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
325

  
326
##### Table 5: Add later on: daily info
327
### with also data_s and data_v saved!!!
328

  
329
#Insert here...compute input and predicted ranges to spot potential errors?
330

  
331
### Make this part a function...this is repetitive
332
##### SPDF of Monhtly Station info
333
#load data_month for specific tiles
334
#10.45pm
335
#data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
336
#names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
337

  
338
#data_month_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x$validation_mod_month_obj[["data_s"]])},mc.preschedule=FALSE,mc.cores = 6)                           
339
data_month_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = 6)                           
340
#test <- mclapply(list_raster_obj_files[1:6],FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = 6)                           
341

  
342
names(data_month_s_list) <- list_names_tile_id
343

  
344
data_month_tmp <- remove_from_list_fun(data_month_s_list)$list
345
#df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid
346

  
347
tile_id <- lapply(1:length(data_month_tmp),
348
                  FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_month_tmp)
349
data_month_NAM <- do.call(rbind.fill,data_month_tmp) #combined data_month for "NAM" North America
350
data_month_NAM$tile_id <- unlist(tile_id)
351

  
352
write.table((data_month_NAM),
353
            file=file.path(out_dir,paste("data_month_s_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
354

  
355
#Get validation data?? Find other object from within the dir 
356
#Som region don't have validation data at monthly time scale.
357

  
358
#### SPDF of daily Station info
359
#load data_month for specific tiles
360
#data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
361
#names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
362

  
363
data_day_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = num_cores)    
364
data_day_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_obj,"data_v"))},mc.preschedule=FALSE,mc.cores = num_cores)    
365

  
366
names(data_day_s_list) <- list_names_tile_id
367
names(data_day_v_list) <- list_names_tile_id
368

  
369
data_day_s_tmp <- remove_from_list_fun(data_day_s_list)$list
370
data_day_v_tmp <- remove_from_list_fun(data_day_v_list)$list
371

  
372
#df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid
373

  
374
tile_id <- lapply(1:length(data_day_s_tmp),
375
                  FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_day_s_tmp)
376
data_day_s_NAM <- do.call(rbind.fill,data_day_s_tmp) #combined data_month for "NAM" North America
377
data_day_s_NAM$tile_id <- unlist(tile_id)
378

  
379
tile_id <- lapply(1:length(data_day_v_tmp),
380
                  FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_day_v_tmp)
381
data_day_v_NAM <- do.call(rbind.fill,data_day_v_tmp) #combined data_month for "NAM" North America
382
data_day_v_NAM$tile_id <- unlist(tile_id)
383

  
384
write.table((data_day_s_NAM),
385
            file=file.path(out_dir,paste("data_day_s_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
386
write.table((data_day_v_NAM),
387
            file=file.path(out_dir,paste("data_day_v_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
388

  
389
#### Recover subsampling data
390
#For tiles with many stations, there is a subsampling done in terms of distance (spatial pruning) and 
391
#in terms of station numbers if there are still too many stations to consider. This is done at the 
392
#daily and monthly stages.
393

  
394
#lf_sub_sampling_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern=paste("^sub_sampling_obj_",interpolation_method,".*.RData",sep=""),full.names=T)})
395
#lf_sub_sampling_obj_daily_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="^sub_sampling_obj_daily.*.RData",full.names=T)})
396
#sub_sampling_obj <- try(load_obj(lf_sub_sampling_obj_files[[3]])) #This is an example tile
397
#data_removed contains the validation data...
398
#this data can be used for validation of the product. Note that it may be missing for some tiles
399
#as no stations are removed if there are too enough stations in the tile
400
#this will need to be checked later on...
401

  
402
data_month_v_subsampling_list <- mclapply(lf_sub_sampling_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_removed"))},mc.preschedule=FALSE,mc.cores = 6)                           
403
#test <- mclapply(list_raster_obj_files[1:6],FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = 6)                           
404

  
405
names(data_month_v_subsampling_list) <- list_names_tile_id
406

  
407
data_month_v_subsampling_tmp <- remove_from_list_fun(data_month_v_subsampling_list)$list
408
#df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid
409
#if no stations have been removed then there are no validation stations !!!
410
if(length(data_month_v_subsampling_tmp)!=0){
411
  
412
  tile_id <- lapply(1:length(data_month_v_subsampling_tmp),
413
                  FUN=function(i,x){try(rep(names(x)[i],nrow(x[[i]])))},x=data_month_v_subsampling_tmp)
414
  data_month_v_subsmapling_NAM <- do.call(rbind.fill,ddata_month_v_subsampling_tmp) #combined data_month for "NAM" North America
415
  data_month_v_subsampling_NAM$tile_id <- unlist(tile_id)
416

  
417
  write.table((data_month_v_subsampling_NAM),
418
            file=file.path(out_dir,paste("data_month_v_subsampling_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
419

  
420
}
421

  
422
## Do the same for daily...
423
## End of potential function started in line 317...this section will be cut down for simplicity
424

  
425
######################################################
426
####### PART 3: EXAMINE STATIONS AND MODEL FITTING ###
427

  
428
### Stations and model fitting ###
429
#summarize location and number of training and testing used by tiles
430

  
431
#names(robj1$clim_method_mod_obj[[1]]$data_month) # monthly data for January
432
#names(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions
433
#note that there is no holdout in the current run at the monthly time scale:
434

  
435
#robj1$clim_method_mod_obj[[1]]$data_month_v #zero rows for testing stations at monthly timescale
436
#load data_month for specific tiles
437
data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
438

  
439
#names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
440

  
441
use_day=TRUE
442
use_month=TRUE
443
 
444
#list_raster_obj_files <- c("/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-100.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-100.0.RData",
445
#                    "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-105.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-105.0.RData")
446

  
447
list_names_tile_id <- df_tile_processed$tile_id
448
list_raster_obj_files[list_names_tile_id]
449
#list_names_tile_id <- c("tile_1","tile_2")
450
list_param_training_testing_info <- list(list_raster_obj_files[list_names_tile_id],use_month,use_day,list_names_tile_id)
451
names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id")
452
 
453
list_param <- list_param_training_testing_info
454
#debug(extract_daily_training_testing_info)
455
#pred_data_info <- extract_daily_training_testing_info(1,list_param=list_param_training_testing_info)
456
pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = num_cores)
457
#pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id][1:6]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = 6)
458
#pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
459
#pred_data_info <- lapply(1:length(list_raster_obj_files[1]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
460

  
461
pred_data_info_tmp <- remove_from_list_fun(pred_data_info)$list #remove data not predicted
462
##Add tile nanmes?? it is alreaready there
463
#names(pred_data_info)<-list_names_tile_id
464
pred_data_month_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_month_info}))
465
pred_data_day_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_day_info}))
466

  
467
#putput inforamtion in csv !!
468
write.table(pred_data_month_info,
469
            file=file.path(out_dir,paste("pred_data_month_info_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
470
write.table(pred_data_day_info,
471
            file=file.path(out_dir,paste("pred_data_day_info_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
472

  
473
######################################################
474
####### PART 4: Get shapefile tiling with centroids ###
475

  
476
#get shape files for the region being assessed:
477

  
478
list_shp_world <- list.files(path=in_dir_shp,pattern=".*.shp",full.names=T)
479
l_shp <- gsub(".shp","",basename(list_shp_world))
480
l_shp <- sub("shp_","",l_shp)
481

  
482
#l_shp <- unlist(lapply(1:length(list_shp_world),
483
#                       FUN=function(i){paste(strsplit(list_shp_world[i],"_")[[1]][3:4],collapse="_")}))
484
l_shp <- unlist(lapply(1:length(l_shp),
485
                       FUN=function(i){paste(strsplit(l_shp[i],"_")[[1]][1:2],collapse="_")}))
486

  
487
df_tiles_all <- as.data.frame(as.character(unlist(list_shp_world)))
488
df_tiles_all$tile_coord <- l_shp
489
#names(df_tiles_all) <- "list_shp_world"
490
names(df_tiles_all) <- c("shp_files","tile_coord")
491
matching_index <- match(basename(in_dir_list),l_shp)
492
list_shp_reg_files <- list_shp_world[matching_index]
493
df_tile_processed$shp_files <-list_shp_reg_files
494
#df_tile_processed$shp_files <- ""
495
#df_tile_processed$tile_coord <- as.character(df_tile_processed$tile_coord)
496
#test <- df_tile_processed
497
#test$shp_files <- NULL
498
#test3 <- merge(test,df_tiles_all,by=c("tile_coord"))
499
#test3 <- merge(df_tiles_all,test,by=c("tile_coord"))
500
#merge(df_tile_processed,df_tiles_all,by="shp_files")
501

  
502
tx<-strsplit(as.character(df_tile_processed$tile_coord),"_")
503
lat<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx))
504
long<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx))
505
df_tile_processed$lat <- lat
506
df_tile_processed$lon <- long
507

  
508
#put that list in the df_processed and also the centroids!!
509
write.table(df_tile_processed,
510
            file=file.path(out_dir,paste("df_tile_processed_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
511

  
512
write.table(df_tiles_all,
513
            file=file.path(out_dir,paste("df_tiles_all_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
514

  
515
#Copy to local home directory on NAS-NEX
516
#
517
dir.create(file.path(out_dir,"shapefiles"))
518
file.copy(list_shp_world,file.path(out_dir,"shapefiles"))
519

  
520
#save a list of all files...
521
write.table(df_tiles_all,
522
            file=file.path(out_dir,"shapefiles",paste("df_tiles_all_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
523

  
524
###Prepare files for copying back?
525

  
526
##################### END OF SCRIPT ######################
527

  
528

  

Also available in: Unified diff