Project

General

Profile

« Previous | Next » 

Revision 14214e68

Added by Benoit Parmentier about 9 years ago

transforming part1a assessment into a function for call from stage 6: initial changes

View differences:

climate/research/oregon/interpolation/global_run_scalingup_assessment_part1a.R
1
####################################  INTERPOLATION OF TEMPERATURES  #######################################
2
############################  Script for assessment of scaling up on NEX: part 1 ##############################
1
####################################  Cimate Layers Predctions  #######################################
2
############################  Script for assessment of scaling up on NEX: part 1a ##############################
3 3
#This script uses the worklfow code applied to the globe. Results currently reside on NEX/PLEIADES NASA.
4 4
#The purpose is to create as set of functions to diagnose and assess quickly a set of predictd tiles.
5 5
#Part 1 create summary tables and inputs files for figure in part 2 and part 3.
6 6
#AUTHOR: Benoit Parmentier 
7 7
#CREATED ON: 03/23/2014  
8
#MODIFIED ON: 12/28/2015            
8
#MODIFIED ON: 12/29/2015            
9 9
#Version: 4
10 10
#PROJECT: Environmental Layers project  
11 11
#TO DO:
12 12
# - 
13
# - Make this a function...
14
# - Drop mosaicing from the script
15
#
13
# - Separate call in a master script for assessment
14
# - add second stage in the master script for assessment
15
# - add mosaicing in the master script for assessment
16

  
16 17
#First source these files:
17 18
#Resolved call issues from R.
18 19
#source /nobackupp6/aguzman4/climateLayers/sharedModules2/etc/environ.sh  
......
27 28
#reg5 - Africa (AF)
28 29
#reg6 - East Asia and Australia 
29 30

  
30
#################################################################################################
31

  
32
### Loading R library and packages        
33
#library used in the workflow production:
34
library(gtools)                              # loading some useful tools 
35
library(mgcv)                                # GAM package by Simon Wood
36
library(sp)                                  # Spatial pacakge with class definition by Bivand et al.
37
library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
38
library(rgdal)                               # GDAL wrapper for R, spatial utilities
39
library(gstat)                               # Kriging and co-kriging by Pebesma et al.
40
library(fields)                              # NCAR Spatial Interpolation methods such as kriging, splines
41
library(raster)                              # Hijmans et al. package for raster processing
42
library(gdata)                               # various tools with xls reading, cbindX
43
library(rasterVis)                           # Raster plotting functions
44
library(parallel)                            # Parallelization of processes with multiple cores
45
library(maptools)                            # Tools and functions for sp and other spatial objects e.g. spCbind
46
library(maps)                                # Tools and data for spatial/geographic objects
47
library(reshape)                             # Change shape of object, summarize results 
48
library(plotrix)                             # Additional plotting functions
49
library(plyr)                                # Various tools including rbind.fill
50
library(spgwr)                               # GWR method
51
library(automap)                             # Kriging automatic fitting of variogram using gstat
52
library(rgeos)                               # Geometric, topologic library of functions
53
#RPostgreSQL                                 # Interface R and Postgres, not used in this script
54
library(gridExtra)
55
#Additional libraries not used in workflow
56
library(pgirmess)                            # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess}  
57
library(colorRamps)
58
  
59
#### FUNCTION USED IN SCRIPT
60
  
61
function_analyses_paper1 <- "global_run_scalingup_assessment_part1_functions_02112015.R" #PARAM12
62
script_path <- "/nobackupp8/bparmen1/env_layers_scripts" #path to script
63
source(file.path(script_path,function_analyses_paper1)) #source all functions used in this script 
64

  
65
##############################
66
#### Parameters and constants  
67

  
68
#Make this a function
69
#reg1 (North Am), reg2(Europe),reg3(Asia), reg4 (South Am), reg5 (Africa), reg6 (Australia-Asia)
70
#master directory containing the definition of tile size and tiles predicted
71
in_dir1 <- "/nobackupp6/aguzman4/climateLayers/out/"
72
#/nobackupp6/aguzman4/climateLayers/out_15x45/1982
73

  
74
#region_names <- c("reg23","reg4") #selected region names, #PARAM2 
75
region_names <- c("reg4")
76
#region_names <- c("1992") #no specific region here so use date
77
#region_names <- c("reg1","reg2","reg3","reg4","reg5","reg6") #selected region names, #PARAM2
78
#region_namesb <- c("reg_1b","reg_1c","reg_2b","reg_3b","reg_6b") #selected region names, #PARAM2
79

  
80
y_var_name <- "dailyTmax" #PARAM3
81
interpolation_method <- c("gam_CAI") #PARAM4
82
out_prefix<-"run_global_analyses_pred_12282015" #PARAM5
83

  
84
#output_run10_1500x4500_global_analyses_pred_2003_04102015/
85

  
86
#out_dir<-"/data/project/layers/commons/NEX_data/" #On NCEAS Atlas
87
#out_dir <- "/nobackup/bparmen1/" #on NEX
88
out_dir <- "/nobackupp8/bparmen1/" #PARAM6
89
#out_dir <-paste(out_dir,"_",out_prefix,sep="")
90
create_out_dir_param <- TRUE #PARAM7
91

  
92
CRS_locs_WGS84 <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84, #PARAM8
93

  
94
#day_to_mosaic <- c("19920101","19920102","19920103,19920104,19920105")
95
#day_to_mosaic <- NULL #if day to mosaic is null then mosaic all dates?
96
list_year_predicted <- 1984:2004
97
year_predicted <- list_year_predicted[1]
98

  
99
file_format <- ".tif" #format for mosaiced files #PARAM10
100
NA_flag_val <- -9999  #No data value, #PARAM11
101
num_cores <- 6 #number of cores used #PARAM13
102

  
103
#Models used.
104
#list_models<-c("y_var ~ s(lat,lon,k=4) + s(elev_s,k=3) + s(LST,k=3)",
105
#               "y_var ~ s(lat,lon,k=5) + s(elev_s,k=3) + s(LST,k=3)",
106
#               "y_var ~ s(lat,lon,k=8) + s(elev_s,k=4) + s(LST,k=4)",
107
                                  
108
#module_path <- "/nobackupp6/aguzman4/climateLayers/sharedCode/" #PARAM14
109
#mosaics script #PARAM 15
110
#shell global mosaic script #PARAM 16
111
#gather station data
112

  
113
########################## START SCRIPT #########################################
114

  
115
#Need to make this a function to run as a job...
116

  
117
######################## PART0: Read content of predictions first.... #####
118

  
119

  
120
in_dir_list <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run
121
#basename(in_dir_list)
122
in_dir_list<- lapply(region_names,FUN=function(x,y){y[grep(paste(x,"$",sep=""),basename(y),invert=FALSE)]},
123
                                               y=in_dir_list) 
124

  
125
in_dir_list_all  <- unlist(lapply(in_dir_list,function(x){list.dirs(path=x,recursive=F)}))
126
in_dir_list <- in_dir_list_all
127
#in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1
128

  
129
#this was changed on 10052015 because the shapefiles were not matching!!!
130
#in_dir_list_tmp <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run
131
#in_dir_subset <- in_dir_list_tmp[grep("subset",basename(in_dir_list_tmp),invert=FALSE)] #select directory with shapefiles...
132
#in_dir_shp <- file.path(in_dir_subset,"shapefiles")
133
#in_dir_list_tmp <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run
134
in_dir_subset <- in_dir_list_all[grep("subset",basename(in_dir_list_all),invert=FALSE)] #select directory with shapefiles...
135
in_dir_shp <- file.path(in_dir_subset,"shapefiles")
136

  
137

  
138
#select only directories used for predictions
139
in_dir_reg <- in_dir_list[grep(".*._.*.",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles...
140
#in_dir_reg <- in_dir_list[grep("july_tiffs",basename(in_dir_reg),invert=TRUE)] #select directory with shapefiles...
141
in_dir_list <- in_dir_reg
142
    
143

  
144
in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1
145
#list of shapefiles used to define tiles
146
in_dir_shp_list <- list.files(in_dir_shp,".shp",full.names=T)
147

  
148
## load problematic tiles or additional runs
149
#modify later...
150

  
151
#system("ls /nobackup/bparmen1")
152

  
153
if(create_out_dir_param==TRUE){
154
  out_dir <- create_dir_fun(out_dir,out_prefix)
31
###################################################################################################
32

  
33

  
34
run_assessment_prediction_fun <-function(i,list_param_run_assessment_prediction){
35

  
36
  ##Function to predict temperature interpolation with 12 input parameters
37
  #12 parameters used in the data preparation stage and input in the current script
38
  #
39
  #1) in_dir1 : llcoactation of rroot directory containging tiles
40
  #2) region_names : region_names #e.g. c("reg23","reg4") 
41
  #3) y_var_name : list_param_run_assessment_prediction$y_var_name # e.g. dailyTmax" #PARAM3
42
  #4) interpolation_method :  e.g. #c("gam_CAI") #PARAM4
43
  #5) out_prefix : e.g. "run_global_analyses_pred_12282015" #PARAM5
44
  #6) out_dir <- list_param_run_assessment_prediction$out_dir #<- "/nobackupp8/bparmen1/" #PARAM6
45
  #7) create_out_dir_param: if true a new dirrectory  is craeted for the outputs 
46
  #8) CRS_locs_WGS84: coordinates  CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84, #PARAM8
47
  #9) list_year_predicted : list of years predicted e.g. 1984:2004
48
  #10) file_format : format for mosaiced files #PARAM10
49
  #11) NA_flag_val : No data value, #PARAM11
50
  #12) num_cores : number of cores used #PARAM13
51
  
52
  ###Loading R library and packages     
53
  ### Loading R library and packages        
54
  #library used in the workflow production:
55
  library(gtools)                              # loading some useful tools 
56
  library(mgcv)                                # GAM package by Simon Wood
57
  library(sp)                                  # Spatial pacakge with class definition by Bivand et al.
58
  library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
59
  library(rgdal)                               # GDAL wrapper for R, spatial utilities
60
  library(gstat)                               # Kriging and co-kriging by Pebesma et al.
61
  library(fields)                              # NCAR Spatial Interpolation methods such as kriging, splines
62
  library(raster)                              # Hijmans et al. package for raster processing
63
  library(gdata)                               # various tools with xls reading, cbindX
64
  library(rasterVis)                           # Raster plotting functions
65
  library(parallel)                            # Parallelization of processes with multiple cores
66
  library(maptools)                            # Tools and functions for sp and other spatial objects e.g. spCbind
67
  library(maps)                                # Tools and data for spatial/geographic objects
68
  library(reshape)                             # Change shape of object, summarize results 
69
  library(plotrix)                             # Additional plotting functions
70
  library(plyr)                                # Various tools including rbind.fill
71
  library(spgwr)                               # GWR method
72
  library(automap)                             # Kriging automatic fitting of variogram using gstat
73
  library(rgeos)                               # Geometric, topologic library of functions
74
  #RPostgreSQL                                 # Interface R and Postgres, not used in this script
75
  library(gridExtra)
76
  #Additional libraries not used in workflow
77
  library(pgirmess)                            # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess}  
78
  library(colorRamps)
79
  
80
  #### FUNCTION USED IN SCRIPT
81
  
82
  #This is read in in the master script
83
  #function_analyses_paper1 <- "global_run_scalingup_assessment_part1_functions_02112015.R" #PARAM12
84
  #script_path <- "/nobackupp8/bparmen1/env_layers_scripts" #path to script
85
  #source(file.path(script_path,function_analyses_paper1)) #source all functions used in this script 
86
  
87
  ##############################
88
  #### Parameters and constants  
89
  
90
  #Make this a function
91
  #reg1 (North Am), reg2(Europe),reg3(Asia), reg4 (South Am), reg5 (Africa), reg6 (Australia-Asia)
92
  #master directory containing the definition of tile size and tiles predicted
93
  in_dir1 <- list_param_run_assessment_prediction$in_dir1 
94
  region_names <- list_param_run_assessment_prediction$region_names #e.g. c("reg23","reg4") 
95
  y_var_name <- list_param_run_assessment_prediction$y_var_name # e.g. dailyTmax" #PARAM3
96
  interpolation_method <- list_param_run_assessment_prediction$interpolation_method #c("gam_CAI") #PARAM4
97
  out_prefix <- list_param_run_assessment_prediction$out_prefix #"run_global_analyses_pred_12282015" #PARAM5
98
  out_dir <- list_param_run_assessment_prediction$out_dir #<- "/nobackupp8/bparmen1/" #PARAM6
99
  create_out_dir_param <-list_param_run_assessment_prediction$create_out_dir_param #<- TRUE #PARAM7
100
  CRS_locs_WGS84 <- list_param_run_assessment_prediction$CRS_locs_WGS84 #<- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84, #PARAM8
101
  list_year_predicted <- list_param_run_assessment_prediction$list_year_predicted #<- 1984:2004
102
  #list_param_run_assessment_prediction$year_predicted <- list_year_predicted[1]
103
  file_format <- list_param_run_assessment_prediction$file_format #<- ".tif" #format for mosaiced files #PARAM10
104
  NA_flag_val <- list_param_run_assessment_prediction$NA_flag_val #<- -9999  #No data value, #PARAM11
105
  num_cores <- list_param_run_assessment_prediction$num_cores #<- 6 #number of cores used #PARAM13
106
  
107
  ########################## START SCRIPT #########################################
108
  
109
  #Need to make this a function to run as a job...
110
  
111
  ######################## PART0: Read content of predictions first.... #####
112
  #function looped over i, correspoding to year predicted
113
  
114
  year_predicted <- list_param_run_assessment_prediction$list_year_predicted[i] 
115
  list_outfiles <- vector("list", length=6) #collect names of output files
116
  
117
  in_dir_list <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run
118
  #basename(in_dir_list)
119
#                       y=in_dir_list) 
120
  
121
  in_dir_list_all  <- unlist(lapply(in_dir_list,function(x){list.dirs(path=x,recursive=F)}))
122
  in_dir_list <- in_dir_list_all
123
  #in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1
124
  
125
  #this was changed on 10052015 because the shapefiles were not matching!!!
126
  #in_dir_list_tmp <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run
127
  #in_dir_subset <- in_dir_list_tmp[grep("subset",basename(in_dir_list_tmp),invert=FALSE)] #select directory with shapefiles...
128
  #in_dir_shp <- file.path(in_dir_subset,"shapefiles")
129
  #in_dir_list_tmp <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run
130
  in_dir_subset <- in_dir_list_all[grep("subset",basename(in_dir_list_all),invert=FALSE)] #select directory with shapefiles...
131
  in_dir_shp <- file.path(in_dir_subset,"shapefiles")
132
  
133
  
134
  #select only directories used for predictions
135
  in_dir_reg <- in_dir_list[grep(".*._.*.",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles...
136
  #in_dir_reg <- in_dir_list[grep("july_tiffs",basename(in_dir_reg),invert=TRUE)] #select directory with shapefiles...
137
  in_dir_list <- in_dir_reg
138
  
139
  
140
  in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1
141
  #list of shapefiles used to define tiles
142
  in_dir_shp_list <- list.files(in_dir_shp,".shp",full.names=T)
143
  
144
  ## load problematic tiles or additional runs
145
  #modify later...
146
  
147
  #system("ls /nobackup/bparmen1")
148
  
149
  if(create_out_dir_param==TRUE){
150
    out_dir <- create_dir_fun(out_dir,out_prefix)
151
    setwd(out_dir)
152
  }else{
153
    setwd(out_dir) #use previoulsy defined directory
154
  }
155
  
155 156
  setwd(out_dir)
156
}else{
157
  setwd(out_dir) #use previoulsy defined directory
158
}
159

  
160
setwd(out_dir)
161

  
162
##raster_prediction object : contains testing and training stations with RMSE and model object
163
in_dir_list_tmp <- file.path(in_dir_list,year_predicted)
164
list_raster_obj_files <- lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^raster_prediction_obj.*.RData",full.names=T)})
165
basename(dirname(list_raster_obj_files[[1]]))
166
list_names_tile_coord <- lapply(list_raster_obj_files,FUN=function(x){basename(dirname(x))})
167
list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_")
168
names(list_raster_obj_files)<- list_names_tile_id
169

  
170
#one level up
171
lf_covar_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar_obj.*.RData",full.names=T)})
172
lf_covar_tif <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar.*.tif",full.names=T)})
173

  
174
#sub_sampling_obj_daily_gam_CAI_10.0_-75.0.RData
175
#sub_sampling_obj_gam_CAI_10.0_-75.0.RData
176

  
177
lf_sub_sampling_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern=paste("^sub_sampling_obj_",interpolation_method,".*.RData",sep=""),full.names=T)})
178
lf_sub_sampling_obj_daily_files <- lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^sub_sampling_obj_daily.*.RData",full.names=T)})
179

  
180
## This will be part of the raster_obj function
181
#debug(create_raster_prediction_obj)
182
#out_prefix_str <- paste(basename(in_dir_list),out_prefix,sep="_") 
183
#lf_raster_obj <- create_raster_prediction_obj(in_dir_list,interpolation_method, y_var_name,out_prefix_str,out_path_list=NULL)
184

  
185
################################################################
186
######## PART 1: Generate tables to collect information:
187
######## over all tiles in North America 
188

  
189
##Function to collect all the tables from tiles into a table
190
###Table 1: Average accuracy metrics
191
###Table 2: daily accuracy metrics for all tiles
192

  
193
#First create table of tiles under analysis and their coord
194
df_tile_processed <- data.frame(tile_coord=basename(in_dir_list))
195
df_tile_processed$tile_id <- unlist(list_names_tile_id) #Arbitrary tiling number!!
196
df_tile_processed$path_NEX <- in_dir_list
197
df_tile_processed$year_predicted <- year_predicted
198
df_tile_processed$sub_sampling_clim  <- lf_sub_sampling_obj_files
199
df_tile_processed$sub_sampling_daily  <- lf_sub_sampling_obj_daily_files
200
lf_sub_sampling_obj_files
201

  
202
##Quick exploration of raster object
203
#Should be commented out to make this a function
204
#robj1 <- try(load_obj(list_raster_obj_files[[3]])) #This is an example tile
205
#robj1 <- load_obj(lf_raster_obj[4]) #This is tile tile
206

  
207
#names(robj1)
208
#names(robj1$method_mod_obj[[2]]) #for January 1, 2010
209
#names(robj1$method_mod_obj[[2]]$dailyTmax) #for January
210
#names(robj1$method_mod_obj[[11]]) #for January 1, 2010
211
#names(robj1$method_mod_obj[[11]]$dailyTmax) #for January
212

  
213
#names(robj1$clim_method_mod_obj[[1]]$data_month) #for January
214
#names(robj1$validation_mod_month_obj[[1]]$data_s) #for January with predictions
215
#Get the number of models predicted
216
#nb_mod <- length(unique(robj1$tb_diagnostic_v$pred_mod))#
217
#list_formulas <- (robj1$clim_method_mod_obj[[1]]$formulas)
218
#dates_predicted <- (unique(robj1$tb_diagnostic_v$date))
219

  
220

  
221
#list_tb_diagnostic_v <- mclapply(lf_validation_obj,FUN=function(x){try( x<- load_obj(x)); try(extract_from_list_obj(x,"metrics_v"))},mc.preschedule=FALSE,mc.cores = 6)                           
222
#names(list_tb_diagnostic_v) <- list_names_tile_id
223

  
224
################
225
#### Table 1: Average accuracy metrics per tile and predictions
226

  
227
#can use a maximum of 6 cores on the NEX Bridge
228
#For 28 tiles and  28 RData boject it takes 15-16 min
229
#summary_metrics_v_list <- mclapply(list_raster_obj_files[5:6],FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = 2)                           
230

  
231
summary_metrics_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = num_cores)                         
232
#summary_metrics_v_list <- lapply(summary_metrics_v_list,FUN=function(x){try(x$avg)})
233
names(summary_metrics_v_list) <- list_names_tile_id
234

  
235
summary_metrics_v_tmp <- remove_from_list_fun(summary_metrics_v_list)$list
236
df_tile_processed$metrics_v <- as.integer(remove_from_list_fun(summary_metrics_v_list)$valid)
237
#Now remove "try-error" from list of accuracy)
238

  
239
summary_metrics_v_NA <- do.call(rbind.fill,summary_metrics_v_tmp) #create a df for NA tiles with all accuracy metrics
240
#tile_coord <- lapply(1:length(summary_metrics_v_list),FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=summary_metrics_v_list)
241
#add the tile id identifier
242
tile_id_tmp <- lapply(1:length(summary_metrics_v_tmp),
243
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=summary_metrics_v_tmp,y=names(summary_metrics_v_tmp))
244
#adding tile id summary data.frame
245
summary_metrics_v_NA$tile_id <-unlist(tile_id_tmp)
246
summary_metrics_v_NA$n <- as.integer(summary_metrics_v_NA$n)
247

  
248
summary_metrics_v_NA <- merge(summary_metrics_v_NA,df_tile_processed[,1:2],by="tile_id")
249

  
250
tx<-strsplit(as.character(summary_metrics_v_NA$tile_coord),"_")
251
lat<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx))
252
long<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx))
253
summary_metrics_v_NA$lat <- lat
254
summary_metrics_v_NA$lon <- long
255

  
256
write.table(as.data.frame(summary_metrics_v_NA),
257
            file=file.path(out_dir,paste("summary_metrics_v2_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
258

  
259
#################
260
###Table 2: daily validation/testing accuracy metrics for all tiles
261
#this takes about 15min for 28 tiles (reg4)
262
#tb_diagnostic_v_list <- lapply(list_raster_obj_files,FUN=function(x){x<-load_obj(x);x[["tb_diagnostic_v"]]})                           
263
tb_diagnostic_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_v"]])},mc.preschedule=FALSE,mc.cores = num_cores)                           
264

  
265
names(tb_diagnostic_v_list) <- list_names_tile_id
266
tb_diagnostic_v_tmp <- remove_from_list_fun(tb_diagnostic_v_list)$list
267
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
268

  
269
tb_diagnostic_v_NA <- do.call(rbind.fill,tb_diagnostic_v_tmp) #create a df for NA tiles with all accuracy metrics
270
tile_id_tmp <- lapply(1:length(tb_diagnostic_v_tmp),
271
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_v_tmp,y=names(tb_diagnostic_v_tmp))
272

  
273
tb_diagnostic_v_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
274

  
275
tb_diagnostic_v_NA <- merge(tb_diagnostic_v_NA,df_tile_processed[,1:2],by="tile_id")
276

  
277
write.table((tb_diagnostic_v_NA),
278
            file=file.path(out_dir,paste("tb_diagnostic_v_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
279

  
280
#################
281
###Table 3: monthly fit/training accuracy information for all tiles
282

  
283
## Monthly fitting information
284
tb_month_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_month_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = num_cores)                           
285

  
286
names(tb_month_diagnostic_s_list) <- list_names_tile_id
287
tb_month_diagnostic_s_tmp <- remove_from_list_fun(tb_month_diagnostic_s_list)$list
288
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
289

  
290
tb_month_diagnostic_s_NA <- do.call(rbind.fill,tb_month_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics
291
tile_id_tmp <- lapply(1:length(tb_month_diagnostic_s_tmp),
292
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_month_diagnostic_s_tmp,y=names(tb_month_diagnostic_s_tmp))
293

  
294
tb_month_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
295

  
296
tb_month_diagnostic_s_NA <- merge(tb_month_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id")
297

  
298
date_f<-strptime(tb_month_diagnostic_s_NA$date, "%Y%m%d")   # interpolation date being processed
299
tb_month_diagnostic_s_NA$month<-strftime(date_f, "%m")          # current month of the date being processed
300

  
301
write.table((tb_month_diagnostic_s_NA),
302
            file=file.path(out_dir,paste("tb_month_diagnostic_s_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
303

  
304
#################
305
###Table 4: daily fit/training accuracy information with predictions for all tiles
306

  
307
## daily fit info:
308

  
309
tb_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = num_cores)                           
310

  
311
names(tb_diagnostic_s_list) <- list_names_tile_id
312
tb_diagnostic_s_tmp <- remove_from_list_fun(tb_diagnostic_s_list)$list
313
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
314

  
315
tb_diagnostic_s_NA <- do.call(rbind.fill,tb_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics
316
tile_id_tmp <- lapply(1:length(tb_diagnostic_s_tmp),
317
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_s_tmp,y=names(tb_diagnostic_s_tmp))
318

  
319
tb_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
320

  
321
tb_diagnostic_s_NA <- merge(tb_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id")
322

  
323
write.table((tb_diagnostic_s_NA),
324
            file=file.path(out_dir,paste("tb_diagnostic_s_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
325

  
326
##### Table 5: Add later on: daily info
327
### with also data_s and data_v saved!!!
328

  
329
#Insert here...compute input and predicted ranges to spot potential errors?
330

  
331
### Make this part a function...this is repetitive
332
##### SPDF of Monhtly Station info
333
#load data_month for specific tiles
334
#10.45pm
335
#data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
336
#names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
337

  
338
#data_month_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x$validation_mod_month_obj[["data_s"]])},mc.preschedule=FALSE,mc.cores = 6)                           
339
data_month_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = 6)                           
340
#test <- mclapply(list_raster_obj_files[1:6],FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = 6)                           
341

  
342
names(data_month_s_list) <- list_names_tile_id
343

  
344
data_month_tmp <- remove_from_list_fun(data_month_s_list)$list
345
#df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid
346

  
347
tile_id <- lapply(1:length(data_month_tmp),
348
                  FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_month_tmp)
349
data_month_NAM <- do.call(rbind.fill,data_month_tmp) #combined data_month for "NAM" North America
350
data_month_NAM$tile_id <- unlist(tile_id)
351

  
352
write.table((data_month_NAM),
353
            file=file.path(out_dir,paste("data_month_s_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
354

  
355
#Get validation data?? Find other object from within the dir 
356
#Som region don't have validation data at monthly time scale.
357

  
358
#### SPDF of daily Station info
359
#load data_month for specific tiles
360
#data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
361
#names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
362

  
363
data_day_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = num_cores)    
364
data_day_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_obj,"data_v"))},mc.preschedule=FALSE,mc.cores = num_cores)    
365

  
366
names(data_day_s_list) <- list_names_tile_id
367
names(data_day_v_list) <- list_names_tile_id
368

  
369
data_day_s_tmp <- remove_from_list_fun(data_day_s_list)$list
370
data_day_v_tmp <- remove_from_list_fun(data_day_v_list)$list
371

  
372
#df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid
373

  
374
tile_id <- lapply(1:length(data_day_s_tmp),
375
                  FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_day_s_tmp)
376
data_day_s_NAM <- do.call(rbind.fill,data_day_s_tmp) #combined data_month for "NAM" North America
377
data_day_s_NAM$tile_id <- unlist(tile_id)
378

  
379
tile_id <- lapply(1:length(data_day_v_tmp),
380
                  FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_day_v_tmp)
381
data_day_v_NAM <- do.call(rbind.fill,data_day_v_tmp) #combined data_month for "NAM" North America
382
data_day_v_NAM$tile_id <- unlist(tile_id)
383

  
384
write.table((data_day_s_NAM),
385
            file=file.path(out_dir,paste("data_day_s_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
386
write.table((data_day_v_NAM),
387
            file=file.path(out_dir,paste("data_day_v_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
388

  
389
#### Recover subsampling data
390
#For tiles with many stations, there is a subsampling done in terms of distance (spatial pruning) and 
391
#in terms of station numbers if there are still too many stations to consider. This is done at the 
392
#daily and monthly stages.
393

  
394
#lf_sub_sampling_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern=paste("^sub_sampling_obj_",interpolation_method,".*.RData",sep=""),full.names=T)})
395
#lf_sub_sampling_obj_daily_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="^sub_sampling_obj_daily.*.RData",full.names=T)})
396
#sub_sampling_obj <- try(load_obj(lf_sub_sampling_obj_files[[3]])) #This is an example tile
397
#data_removed contains the validation data...
398
#this data can be used for validation of the product. Note that it may be missing for some tiles
399
#as no stations are removed if there are too enough stations in the tile
400
#this will need to be checked later on...
401

  
402
data_month_v_subsampling_list <- mclapply(lf_sub_sampling_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_removed"))},mc.preschedule=FALSE,mc.cores = 6)                           
403
#test <- mclapply(list_raster_obj_files[1:6],FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = 6)                           
404

  
405
names(data_month_v_subsampling_list) <- list_names_tile_id
406

  
407
data_month_v_subsampling_tmp <- remove_from_list_fun(data_month_v_subsampling_list)$list
408
#df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid
409
#if no stations have been removed then there are no validation stations !!!
410
if(length(data_month_v_subsampling_tmp)!=0){
411
  
412
  tile_id <- lapply(1:length(data_month_v_subsampling_tmp),
413
                  FUN=function(i,x){try(rep(names(x)[i],nrow(x[[i]])))},x=data_month_v_subsampling_tmp)
414
  data_month_v_subsmapling_NAM <- do.call(rbind.fill,ddata_month_v_subsampling_tmp) #combined data_month for "NAM" North America
415
  data_month_v_subsampling_NAM$tile_id <- unlist(tile_id)
416

  
417
  write.table((data_month_v_subsampling_NAM),
418
            file=file.path(out_dir,paste("data_month_v_subsampling_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
419

  
157
  
158
  ##raster_prediction object : contains testing and training stations with RMSE and model object
159
  in_dir_list_tmp <- file.path(in_dir_list,year_predicted)
160
  list_raster_obj_files <- lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^raster_prediction_obj.*.RData",full.names=T)})
161
  basename(dirname(list_raster_obj_files[[1]]))
162
  list_names_tile_coord <- lapply(list_raster_obj_files,FUN=function(x){basename(dirname(x))})
163
  list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_")
164
  names(list_raster_obj_files)<- list_names_tile_id
165
  
166
  #one level up
167
  lf_covar_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar_obj.*.RData",full.names=T)})
168
  lf_covar_tif <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar.*.tif",full.names=T)})
169
  
170
  #sub_sampling_obj_daily_gam_CAI_10.0_-75.0.RData
171
  #sub_sampling_obj_gam_CAI_10.0_-75.0.RData
172
  
173
  lf_sub_sampling_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern=paste("^sub_sampling_obj_",interpolation_method,".*.RData",sep=""),full.names=T)})
174
  lf_sub_sampling_obj_daily_files <- lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^sub_sampling_obj_daily.*.RData",full.names=T)})
175
  
176
  ## This will be part of the raster_obj function
177
  #debug(create_raster_prediction_obj)
178
  #out_prefix_str <- paste(basename(in_dir_list),out_prefix,sep="_") 
179
  #lf_raster_obj <- create_raster_prediction_obj(in_dir_list,interpolation_method, y_var_name,out_prefix_str,out_path_list=NULL)
180
  
181
  ################################################################
182
  ######## PART 1: Generate tables to collect information:
183
  ######## over all tiles in North America 
184
  
185
  ##Function to collect all the tables from tiles into a table
186
  ###Table 1: Average accuracy metrics
187
  ###Table 2: daily accuracy metrics for all tiles
188
  
189
  #First create table of tiles under analysis and their coord
190
  df_tile_processed <- data.frame(tile_coord=basename(in_dir_list))
191
  df_tile_processed$tile_id <- unlist(list_names_tile_id) #Arbitrary tiling number!!
192
  df_tile_processed$path_NEX <- in_dir_list
193
  df_tile_processed$year_predicted <- year_predicted
194
  df_tile_processed$sub_sampling_clim  <- lf_sub_sampling_obj_files
195
  df_tile_processed$sub_sampling_daily  <- lf_sub_sampling_obj_daily_files
196
  #lf_sub_sampling_obj_files
197
  
198
  ##Quick exploration of raster object
199
  #Should be commented out to make this a function
200
  #robj1 <- try(load_obj(list_raster_obj_files[[3]])) #This is an example tile
201
  #robj1 <- load_obj(lf_raster_obj[4]) #This is tile tile
202
  
203
  #names(robj1)
204
  #names(robj1$method_mod_obj[[2]]) #for January 1, 2010
205
  #names(robj1$method_mod_obj[[2]]$dailyTmax) #for January
206
  #names(robj1$method_mod_obj[[11]]) #for January 1, 2010
207
  #names(robj1$method_mod_obj[[11]]$dailyTmax) #for January
208
  
209
  #names(robj1$clim_method_mod_obj[[1]]$data_month) #for January
210
  #names(robj1$validation_mod_month_obj[[1]]$data_s) #for January with predictions
211
  #Get the number of models predicted
212
  #nb_mod <- length(unique(robj1$tb_diagnostic_v$pred_mod))#
213
  #list_formulas <- (robj1$clim_method_mod_obj[[1]]$formulas)
214
  #dates_predicted <- (unique(robj1$tb_diagnostic_v$date))
215
  
216
  
217
  #list_tb_diagnostic_v <- mclapply(lf_validation_obj,FUN=function(x){try( x<- load_obj(x)); try(extract_from_list_obj(x,"metrics_v"))},mc.preschedule=FALSE,mc.cores = 6)                           
218
  #names(list_tb_diagnostic_v) <- list_names_tile_id
219
  
220
  ################
221
  #### Table 1: Average accuracy metrics per tile and predictions
222
  
223
  #can use a maximum of 6 cores on the NEX Bridge
224
  #For 28 tiles and  28 RData boject it takes 15-16 min
225
  #summary_metrics_v_list <- mclapply(list_raster_obj_files[5:6],FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = 2)                           
226
  
227
  summary_metrics_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = num_cores)                         
228
  #summary_metrics_v_list <- lapply(summary_metrics_v_list,FUN=function(x){try(x$avg)})
229
  names(summary_metrics_v_list) <- list_names_tile_id
230
  
231
  summary_metrics_v_tmp <- remove_from_list_fun(summary_metrics_v_list)$list
232
  df_tile_processed$metrics_v <- as.integer(remove_from_list_fun(summary_metrics_v_list)$valid)
233
  #Now remove "try-error" from list of accuracy)
234
  
235
  summary_metrics_v_NA <- do.call(rbind.fill,summary_metrics_v_tmp) #create a df for NA tiles with all accuracy metrics
236
  #tile_coord <- lapply(1:length(summary_metrics_v_list),FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=summary_metrics_v_list)
237
  #add the tile id identifier
238
  tile_id_tmp <- lapply(1:length(summary_metrics_v_tmp),
239
                        FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=summary_metrics_v_tmp,y=names(summary_metrics_v_tmp))
240
  #adding tile id summary data.frame
241
  summary_metrics_v_NA$tile_id <-unlist(tile_id_tmp)
242
  summary_metrics_v_NA$n <- as.integer(summary_metrics_v_NA$n)
243
  
244
  summary_metrics_v_NA <- merge(summary_metrics_v_NA,df_tile_processed[,1:2],by="tile_id")
245
  
246
  tx<-strsplit(as.character(summary_metrics_v_NA$tile_coord),"_")
247
  lat<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx))
248
  long<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx))
249
  summary_metrics_v_NA$lat <- lat
250
  summary_metrics_v_NA$lon <- long
251
  
252
  list_out_files
253
  write.table(as.data.frame(summary_metrics_v_NA),
254
              file=file.path(out_dir,paste("summary_metrics_v2_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
255
  
256
  #################
257
  ###Table 2: daily validation/testing accuracy metrics for all tiles
258
  #this takes about 15min for 28 tiles (reg4)
259
  #tb_diagnostic_v_list <- lapply(list_raster_obj_files,FUN=function(x){x<-load_obj(x);x[["tb_diagnostic_v"]]})                           
260
  tb_diagnostic_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_v"]])},mc.preschedule=FALSE,mc.cores = num_cores)                           
261
  
262
  names(tb_diagnostic_v_list) <- list_names_tile_id
263
  tb_diagnostic_v_tmp <- remove_from_list_fun(tb_diagnostic_v_list)$list
264
  #df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
265
  
266
  tb_diagnostic_v_NA <- do.call(rbind.fill,tb_diagnostic_v_tmp) #create a df for NA tiles with all accuracy metrics
267
  tile_id_tmp <- lapply(1:length(tb_diagnostic_v_tmp),
268
                        FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_v_tmp,y=names(tb_diagnostic_v_tmp))
269
  
270
  tb_diagnostic_v_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
271
  
272
  tb_diagnostic_v_NA <- merge(tb_diagnostic_v_NA,df_tile_processed[,1:2],by="tile_id")
273
  
274
  write.table((tb_diagnostic_v_NA),
275
              file=file.path(out_dir,paste("tb_diagnostic_v_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
276
  
277
  #################
278
  ###Table 3: monthly fit/training accuracy information for all tiles
279
  
280
  ## Monthly fitting information
281
  tb_month_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_month_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = num_cores)                           
282
  
283
  names(tb_month_diagnostic_s_list) <- list_names_tile_id
284
  tb_month_diagnostic_s_tmp <- remove_from_list_fun(tb_month_diagnostic_s_list)$list
285
  #df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
286
  
287
  tb_month_diagnostic_s_NA <- do.call(rbind.fill,tb_month_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics
288
  tile_id_tmp <- lapply(1:length(tb_month_diagnostic_s_tmp),
289
                        FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_month_diagnostic_s_tmp,y=names(tb_month_diagnostic_s_tmp))
290
  
291
  tb_month_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
292
  
293
  tb_month_diagnostic_s_NA <- merge(tb_month_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id")
294
  
295
  date_f<-strptime(tb_month_diagnostic_s_NA$date, "%Y%m%d")   # interpolation date being processed
296
  tb_month_diagnostic_s_NA$month<-strftime(date_f, "%m")          # current month of the date being processed
297
  
298
  write.table((tb_month_diagnostic_s_NA),
299
              file=file.path(out_dir,paste("tb_month_diagnostic_s_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
300
  
301
  #################
302
  ###Table 4: daily fit/training accuracy information with predictions for all tiles
303
  
304
  ## daily fit info:
305
  
306
  tb_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = num_cores)                           
307
  
308
  names(tb_diagnostic_s_list) <- list_names_tile_id
309
  tb_diagnostic_s_tmp <- remove_from_list_fun(tb_diagnostic_s_list)$list
310
  #df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
311
  
312
  tb_diagnostic_s_NA <- do.call(rbind.fill,tb_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics
313
  tile_id_tmp <- lapply(1:length(tb_diagnostic_s_tmp),
314
                        FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_s_tmp,y=names(tb_diagnostic_s_tmp))
315
  
316
  tb_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
317
  
318
  tb_diagnostic_s_NA <- merge(tb_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id")
319
  
320
  write.table((tb_diagnostic_s_NA),
321
              file=file.path(out_dir,paste("tb_diagnostic_s_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
322
  
323
  ##### Table 5: Add later on: daily info
324
  ### with also data_s and data_v saved!!!
325
  
326
  #Insert here...compute input and predicted ranges to spot potential errors?
327
  
328
  ### Make this part a function...this is repetitive
329
  ##### SPDF of Monhtly Station info
330
  #load data_month for specific tiles
331
  #10.45pm
332
  #data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
333
  #names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
334
  
335
  #data_month_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x$validation_mod_month_obj[["data_s"]])},mc.preschedule=FALSE,mc.cores = 6)                           
336
  data_month_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = 6)                           
337
  #test <- mclapply(list_raster_obj_files[1:6],FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = 6)                           
338
  
339
  names(data_month_s_list) <- list_names_tile_id
340
  
341
  data_month_tmp <- remove_from_list_fun(data_month_s_list)$list
342
  #df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid
343
  
344
  tile_id <- lapply(1:length(data_month_tmp),
345
                    FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_month_tmp)
346
  data_month_NAM <- do.call(rbind.fill,data_month_tmp) #combined data_month for "NAM" North America
347
  data_month_NAM$tile_id <- unlist(tile_id)
348
  
349
  write.table((data_month_NAM),
350
              file=file.path(out_dir,paste("data_month_s_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
351
  
352
  #Get validation data?? Find other object from within the dir 
353
  #Som region don't have validation data at monthly time scale.
354
  
355
  #### SPDF of daily Station info
356
  #load data_month for specific tiles
357
  #data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
358
  #names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
359
  
360
  data_day_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = num_cores)    
361
  data_day_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_obj,"data_v"))},mc.preschedule=FALSE,mc.cores = num_cores)    
362
  
363
  names(data_day_s_list) <- list_names_tile_id
364
  names(data_day_v_list) <- list_names_tile_id
365
  
366
  data_day_s_tmp <- remove_from_list_fun(data_day_s_list)$list
367
  data_day_v_tmp <- remove_from_list_fun(data_day_v_list)$list
368
  
369
  #df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid
370
  
371
  tile_id <- lapply(1:length(data_day_s_tmp),
372
                    FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_day_s_tmp)
373
  data_day_s_NAM <- do.call(rbind.fill,data_day_s_tmp) #combined data_month for "NAM" North America
374
  data_day_s_NAM$tile_id <- unlist(tile_id)
375
  
376
  tile_id <- lapply(1:length(data_day_v_tmp),
377
                    FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_day_v_tmp)
378
  data_day_v_NAM <- do.call(rbind.fill,data_day_v_tmp) #combined data_month for "NAM" North America
379
  data_day_v_NAM$tile_id <- unlist(tile_id)
380
  
381
  write.table((data_day_s_NAM),
382
              file=file.path(out_dir,paste("data_day_s_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
383
  write.table((data_day_v_NAM),
384
              file=file.path(out_dir,paste("data_day_v_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
385
  
386
  #### Recover subsampling data
387
  #For tiles with many stations, there is a subsampling done in terms of distance (spatial pruning) and 
388
  #in terms of station numbers if there are still too many stations to consider. This is done at the 
389
  #daily and monthly stages.
390
  
391
  #lf_sub_sampling_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern=paste("^sub_sampling_obj_",interpolation_method,".*.RData",sep=""),full.names=T)})
392
  #lf_sub_sampling_obj_daily_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="^sub_sampling_obj_daily.*.RData",full.names=T)})
393
  #sub_sampling_obj <- try(load_obj(lf_sub_sampling_obj_files[[3]])) #This is an example tile
394
  #data_removed contains the validation data...
395
  #this data can be used for validation of the product. Note that it may be missing for some tiles
396
  #as no stations are removed if there are too enough stations in the tile
397
  #this will need to be checked later on...
398
  
399
  data_month_v_subsampling_list <- mclapply(lf_sub_sampling_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_removed"))},mc.preschedule=FALSE,mc.cores = 6)                           
400
  #test <- mclapply(list_raster_obj_files[1:6],FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = 6)                           
401
  
402
  names(data_month_v_subsampling_list) <- list_names_tile_id
403
  
404
  data_month_v_subsampling_tmp <- remove_from_list_fun(data_month_v_subsampling_list)$list
405
  #df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid
406
  #if no stations have been removed then there are no validation stations !!!
407
  if(length(data_month_v_subsampling_tmp)!=0){
408
    
409
    tile_id <- lapply(1:length(data_month_v_subsampling_tmp),
410
                      FUN=function(i,x){try(rep(names(x)[i],nrow(x[[i]])))},x=data_month_v_subsampling_tmp)
411
    data_month_v_subsmapling_NAM <- do.call(rbind.fill,ddata_month_v_subsampling_tmp) #combined data_month for "NAM" North America
412
    data_month_v_subsampling_NAM$tile_id <- unlist(tile_id)
413
    
414
    write.table((data_month_v_subsampling_NAM),
415
                file=file.path(out_dir,paste("data_month_v_subsampling_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
416
    
417
  }
418
  
419
  ## Do the same for daily...
420
  ## End of potential function started in line 317...this section will be cut down for simplicity
421
  
422
  ######################################################
423
  ####### PART 3: EXAMINE STATIONS AND MODEL FITTING ###
424
  
425
  ### Stations and model fitting ###
426
  #summarize location and number of training and testing used by tiles
427
  
428
  #names(robj1$clim_method_mod_obj[[1]]$data_month) # monthly data for January
429
  #names(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions
430
  #note that there is no holdout in the current run at the monthly time scale:
431
  
432
  #robj1$clim_method_mod_obj[[1]]$data_month_v #zero rows for testing stations at monthly timescale
433
  #load data_month for specific tiles
434
  data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
435
  
436
  #names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
437
  
438
  use_day=TRUE
439
  use_month=TRUE
440
  
441
  #list_raster_obj_files <- c("/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-100.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-100.0.RData",
442
  #                    "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-105.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-105.0.RData")
443
  
444
  list_names_tile_id <- df_tile_processed$tile_id
445
  list_raster_obj_files[list_names_tile_id]
446
  #list_names_tile_id <- c("tile_1","tile_2")
447
  list_param_training_testing_info <- list(list_raster_obj_files[list_names_tile_id],use_month,use_day,list_names_tile_id)
448
  names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id")
449
  
450
  list_param <- list_param_training_testing_info
451
  #debug(extract_daily_training_testing_info)
452
  #pred_data_info <- extract_daily_training_testing_info(1,list_param=list_param_training_testing_info)
453
  pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = num_cores)
454
  #pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id][1:6]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = 6)
455
  #pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
456
  #pred_data_info <- lapply(1:length(list_raster_obj_files[1]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
457
  
458
  pred_data_info_tmp <- remove_from_list_fun(pred_data_info)$list #remove data not predicted
459
  ##Add tile nanmes?? it is alreaready there
460
  #names(pred_data_info)<-list_names_tile_id
461
  pred_data_month_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_month_info}))
462
  pred_data_day_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_day_info}))
463
  
464
  #putput inforamtion in csv !!
465
  write.table(pred_data_month_info,
466
              file=file.path(out_dir,paste("pred_data_month_info_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
467
  write.table(pred_data_day_info,
468
              file=file.path(out_dir,paste("pred_data_day_info_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
469
  
470
  ######################################################
471
  ####### PART 4: Get shapefile tiling with centroids ###
472
  
473
  #get shape files for the region being assessed:
474
  
475
  list_shp_world <- list.files(path=in_dir_shp,pattern=".*.shp",full.names=T)
476
  l_shp <- gsub(".shp","",basename(list_shp_world))
477
  l_shp <- sub("shp_","",l_shp)
478
  
479
  #l_shp <- unlist(lapply(1:length(list_shp_world),
480
  #                       FUN=function(i){paste(strsplit(list_shp_world[i],"_")[[1]][3:4],collapse="_")}))
481
  l_shp <- unlist(lapply(1:length(l_shp),
482
                         FUN=function(i){paste(strsplit(l_shp[i],"_")[[1]][1:2],collapse="_")}))
483
  
484
  df_tiles_all <- as.data.frame(as.character(unlist(list_shp_world)))
485
  df_tiles_all$tile_coord <- l_shp
486
  #names(df_tiles_all) <- "list_shp_world"
487
  names(df_tiles_all) <- c("shp_files","tile_coord")
488
  matching_index <- match(basename(in_dir_list),l_shp)
489
  list_shp_reg_files <- list_shp_world[matching_index]
490
  df_tile_processed$shp_files <-list_shp_reg_files
491
  #df_tile_processed$shp_files <- ""
492
  #df_tile_processed$tile_coord <- as.character(df_tile_processed$tile_coord)
493
  #test <- df_tile_processed
494
  #test$shp_files <- NULL
495
  #test3 <- merge(test,df_tiles_all,by=c("tile_coord"))
496
  #test3 <- merge(df_tiles_all,test,by=c("tile_coord"))
497
  #merge(df_tile_processed,df_tiles_all,by="shp_files")
498
  
499
  tx<-strsplit(as.character(df_tile_processed$tile_coord),"_")
500
  lat<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx))
501
  long<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx))
502
  df_tile_processed$lat <- lat
503
  df_tile_processed$lon <- long
504
  
505
  #put that list in the df_processed and also the centroids!!
506
  write.table(df_tile_processed,
507
              file=file.path(out_dir,paste("df_tile_processed_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
508
  
509
  write.table(df_tiles_all,
510
              file=file.path(out_dir,paste("df_tiles_all_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
511
  
512
  #Copy to local home directory on NAS-NEX
513
  #
514
  dir.create(file.path(out_dir,"shapefiles"))
515
  file.copy(list_shp_world,file.path(out_dir,"shapefiles"))
516
  
517
  #save a list of all files...
518
  write.table(df_tiles_all,
519
              file=file.path(out_dir,"shapefiles",paste("df_tiles_all_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
520
  
521
  ###Prepare files for copying back?
522
  
523
  ## Prepare list of files to return...
524
  return(1)
420 525
}
421 526

  
422
## Do the same for daily...
423
## End of potential function started in line 317...this section will be cut down for simplicity
424

  
425
######################################################
426
####### PART 3: EXAMINE STATIONS AND MODEL FITTING ###
427

  
428
### Stations and model fitting ###
429
#summarize location and number of training and testing used by tiles
430

  
431
#names(robj1$clim_method_mod_obj[[1]]$data_month) # monthly data for January
432
#names(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions
433
#note that there is no holdout in the current run at the monthly time scale:
434

  
435
#robj1$clim_method_mod_obj[[1]]$data_month_v #zero rows for testing stations at monthly timescale
436
#load data_month for specific tiles
437
data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
438

  
439
#names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
440

  
441
use_day=TRUE
442
use_month=TRUE
443
 
444
#list_raster_obj_files <- c("/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-100.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-100.0.RData",
445
#                    "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-105.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-105.0.RData")
446

  
447
list_names_tile_id <- df_tile_processed$tile_id
448
list_raster_obj_files[list_names_tile_id]
449
#list_names_tile_id <- c("tile_1","tile_2")
450
list_param_training_testing_info <- list(list_raster_obj_files[list_names_tile_id],use_month,use_day,list_names_tile_id)
451
names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id")
452
 
453
list_param <- list_param_training_testing_info
454
#debug(extract_daily_training_testing_info)
455
#pred_data_info <- extract_daily_training_testing_info(1,list_param=list_param_training_testing_info)
456
pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = num_cores)
457
#pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id][1:6]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = 6)
458
#pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
459
#pred_data_info <- lapply(1:length(list_raster_obj_files[1]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
460

  
461
pred_data_info_tmp <- remove_from_list_fun(pred_data_info)$list #remove data not predicted
462
##Add tile nanmes?? it is alreaready there
463
#names(pred_data_info)<-list_names_tile_id
464
pred_data_month_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_month_info}))
465
pred_data_day_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_day_info}))
466

  
467
#putput inforamtion in csv !!
468
write.table(pred_data_month_info,
469
            file=file.path(out_dir,paste("pred_data_month_info_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
470
write.table(pred_data_day_info,
471
            file=file.path(out_dir,paste("pred_data_day_info_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
472

  
473
######################################################
474
####### PART 4: Get shapefile tiling with centroids ###
475

  
476
#get shape files for the region being assessed:
477

  
478
list_shp_world <- list.files(path=in_dir_shp,pattern=".*.shp",full.names=T)
479
l_shp <- gsub(".shp","",basename(list_shp_world))
480
l_shp <- sub("shp_","",l_shp)
481

  
482
#l_shp <- unlist(lapply(1:length(list_shp_world),
483
#                       FUN=function(i){paste(strsplit(list_shp_world[i],"_")[[1]][3:4],collapse="_")}))
484
l_shp <- unlist(lapply(1:length(l_shp),
485
                       FUN=function(i){paste(strsplit(l_shp[i],"_")[[1]][1:2],collapse="_")}))
486

  
487
df_tiles_all <- as.data.frame(as.character(unlist(list_shp_world)))
488
df_tiles_all$tile_coord <- l_shp
489
#names(df_tiles_all) <- "list_shp_world"
490
names(df_tiles_all) <- c("shp_files","tile_coord")
491
matching_index <- match(basename(in_dir_list),l_shp)
492
list_shp_reg_files <- list_shp_world[matching_index]
493
df_tile_processed$shp_files <-list_shp_reg_files
494
#df_tile_processed$shp_files <- ""
495
#df_tile_processed$tile_coord <- as.character(df_tile_processed$tile_coord)
496
#test <- df_tile_processed
497
#test$shp_files <- NULL
498
#test3 <- merge(test,df_tiles_all,by=c("tile_coord"))
499
#test3 <- merge(df_tiles_all,test,by=c("tile_coord"))
500
#merge(df_tile_processed,df_tiles_all,by="shp_files")
501

  
502
tx<-strsplit(as.character(df_tile_processed$tile_coord),"_")
503
lat<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx))
504
long<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx))
505
df_tile_processed$lat <- lat
506
df_tile_processed$lon <- long
507

  
508
#put that list in the df_processed and also the centroids!!
509
write.table(df_tile_processed,
510
            file=file.path(out_dir,paste("df_tile_processed_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
511

  
512
write.table(df_tiles_all,
513
            file=file.path(out_dir,paste("df_tiles_all_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
514

  
515
#Copy to local home directory on NAS-NEX
516
#
517
dir.create(file.path(out_dir,"shapefiles"))
518
file.copy(list_shp_world,file.path(out_dir,"shapefiles"))
519

  
520
#save a list of all files...
521
write.table(df_tiles_all,
522
            file=file.path(out_dir,"shapefiles",paste("df_tiles_all_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
523

  
524
###Prepare files for copying back?
525

  
526 527
##################### END OF SCRIPT ######################
527 528

  
528 529

  

Also available in: Unified diff