Project

General

Profile

« Previous | Next » 

Revision 16cc2876

Added by Benoit Parmentier over 10 years ago

global scaling up assessment part 2: automation of figures for evaluation using tables and mosaics from part1

View differences:

climate/research/oregon/interpolation/global_run_scalingup_assessment_part2.R
1
####################################  INTERPOLATION OF TEMPERATURES  #######################################
2
############################  Script for assessment of scaling up on NEX ##############################
3
#This script uses the worklfow code applied to the globe. Results currently reside on NEX/PLEIADES NASA.
4
#Accuracy methods are added in the the function scripts to evaluate results.
5
#Analyses, figures, tables and data are also produced in the script.
6
#AUTHOR: Benoit Parmentier 
7
#CREATED ON: 03/23/2014  
8
#MODIFIED ON: 05/14/2014            
9
#Version: 3
10
#PROJECT: Environmental Layers project                                     
11
#################################################################################################
12

  
13
### Loading R library and packages        
14
#library used in the workflow production:
15
library(gtools)                              # loading some useful tools 
16
library(mgcv)                                # GAM package by Simon Wood
17
library(sp)                                  # Spatial pacakge with class definition by Bivand et al.
18
library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
19
library(rgdal)                               # GDAL wrapper for R, spatial utilities
20
library(gstat)                               # Kriging and co-kriging by Pebesma et al.
21
library(fields)                              # NCAR Spatial Interpolation methods such as kriging, splines
22
library(raster)                              # Hijmans et al. package for raster processing
23
library(gdata)                               # various tools with xls reading, cbindX
24
library(rasterVis)                           # Raster plotting functions
25
library(parallel)                            # Parallelization of processes with multiple cores
26
library(maptools)                            # Tools and functions for sp and other spatial objects e.g. spCbind
27
library(maps)                                # Tools and data for spatial/geographic objects
28
library(reshape)                             # Change shape of object, summarize results 
29
library(plotrix)                             # Additional plotting functions
30
library(plyr)                                # Various tools including rbind.fill
31
library(spgwr)                               # GWR method
32
library(automap)                             # Kriging automatic fitting of variogram using gstat
33
library(rgeos)                               # Geometric, topologic library of functions
34
#RPostgreSQL                                 # Interface R and Postgres, not used in this script
35
library(gridExtra)
36
#Additional libraries not used in workflow
37
library(pgirmess)                            # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess}  
38
library(colorRamps)
39

  
40
#### FUNCTION USED IN SCRIPT
41

  
42
function_analyses_paper1 <-"contribution_of_covariates_paper_interpolation_functions_10222013.R" #first interp paper
43
function_analyses_paper2 <-"multi_timescales_paper_interpolation_functions_05052014.R"
44

  
45
load_obj <- function(f)
46
{
47
  env <- new.env()
48
  nm <- load(f, env)[1]
49
  env[[nm]]
50
}
51

  
52
create_dir_fun <- function(out_dir,out_suffix){
53
  if(!is.null(out_suffix)){
54
    out_name <- paste("output_",out_suffix,sep="")
55
    out_dir <- file.path(out_dir,out_name)
56
  }
57
  #create if does not exists
58
  if(!file.exists(out_dir)){
59
    dir.create(out_dir)
60
  }
61
  return(out_dir)
62
}
63

  
64
extract_list_from_list_obj<-function(obj_list,list_name){
65
  library(plyr)
66
  
67
  #Create a list of an object from a given list of object using a name prodived as input
68
  
69
  list_tmp<-vector("list",length(obj_list))
70
  for (i in 1:length(obj_list)){
71
    tmp <- obj_list[[i]][[list_name]] #double bracket to return data.frame
72
    list_tmp[[i]] <- as.data.frame(tmp) #deal with spdf...
73
  }
74
  return(list_tmp) #this is  a data.frame
75
}
76

  
77
#This extract a data.frame object from raster prediction obj and combine them in one data.frame 
78
extract_from_list_obj<-function(obj_list,list_name){
79
  #extract object from list of list. This useful for raster_prediction_obj
80
  library(plyr)
81
  
82
  list_tmp<-vector("list",length(obj_list))
83
  for (i in 1:length(obj_list)){
84
    tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame
85
    list_tmp[[i]]<- as.data.frame(tmp) #if spdf
86
  }
87
  tb_list_tmp<-do.call(rbind.fill,list_tmp) #long rownames
88
  #tb_list_tmp<-do.call(rbind,list_tmp) #long rownames
89
  
90
  return(tb_list_tmp) #this is  a data.frame
91
}
92

  
93
## Function to mosaic modis or other raster images
94

  
95
mosaic_m_raster_list<-function(j,list_param){
96
  #This functions returns a subset of tiles from the modis grid.
97
  #Arguments: modies grid tile,list of tiles
98
  #Output: spatial grid data frame of the subset of tiles
99
  #Note that rasters are assumed to be in the same projection system!!
100
  
101
  #rast_list<-vector("list",length(mosaic_list))
102
  #for (i in 1:length(mosaic_list)){  
103
  # read the individual rasters into a list of RasterLayer objects
104
  # this may be changed so that it is not read in the memory!!!
105
  
106
  #parse output...
107
  
108
  #j<-list_param$j
109
  mosaic_list<-list_param$mosaic_list
110
  out_path<-list_param$out_path
111
  out_names<-list_param$out_rastnames
112
  file_format <- list_param$file_format
113
  NA_flag_val <- list_param$NA_flag_val
114
  out_suffix <- list_param$out_suffix
115
  ## Start
116
  
117
  if(class(mosaic_list[[j]])=="list"){
118
    m_list <- unlist(mosaic_list[[j]])
119
  }
120
  input.rasters <- lapply(m_list, raster)
121
  mosaiced_rast<-input.rasters[[1]]
122
  
123
  for (k in 2:length(input.rasters)){
124
    mosaiced_rast<-mosaic(mosaiced_rast,input.rasters[[k]], fun=mean)
125
    #mosaiced_rast<-mosaic(mosaiced_rast,raster(input.rasters[[k]]), fun=mean)
126
  }
127
  
128
  data_name<-paste("mosaiced_",sep="") #can add more later...
129
  #raster_name<-paste(data_name,out_names[j],".tif", sep="")
130
  raster_name<-paste(data_name,out_names[j],file_format, sep="")
131
  
132
  writeRaster(mosaiced_rast, NAflag=NA_flag_val,filename=file.path(out_path,raster_name),overwrite=TRUE)  
133
  #Writing the data in a raster file format...  
134
  rast_list<-file.path(out_path,raster_name)
135
  
136
  ## The Raster and rgdal packages write temporary files on the disk when memory is an issue. This can potential build up
137
  ## in long  loops and can fill up hard drives resulting in errors. The following  sections removes these files 
138
  ## as they are created in the loop. This code section  can be transformed into a "clean-up function later on
139
  ## Start remove
140
  #tempfiles<-list.files(tempdir(),full.names=T) #GDAL transient files are not removed
141
  #files_to_remove<-grep(out_suffix,tempfiles,value=T) #list files to remove
142
  #if(length(files_to_remove)>0){
143
  #  file.remove(files_to_remove)
144
  #}
145
  #now remove temp files from raster package located in rasterTmpDir
146
  removeTmpFiles(h=0) #did not work if h is not set to 0
147
  ## end of remove section
148
  
149
  return(rast_list)
150
}
151

  
152
extract_daily_training_testing_info<- function(i,list_param){
153
  #This function extracts training and testing information from the raster object produced for each tile
154
  #This is looping through tiles...
155
  
156
  ### Function:
157
  pred_data_info_fun <- function(k,list_data,pred_mod,sampling_dat_info){
158
    
159
    data <- list_data[[k]]
160
    sampling_dat <- sampling_dat_info[[k]]
161
    if(data_day!="try-error"){
162
      n <- nrow(data)
163
      n_mod <- vector("numeric",length(pred_mod))
164
      for(j in 1:length(pred_mod)){
165
        n_mod[j] <- sum(!is.na(data[[pred_mod[j]]]))
166
      }
167
      n <- rep(n,length(pred_mod))
168
      sampling_dat <- sampling_dat[rep(seq_len(nrow(sampling_dat)), each=length(pred_mod)),]
169
      row.names(sampling_dat) <- NULL
170
      df_n <- data.frame(n,n_mod,pred_mod)
171
      df_n <- cbind(df_n,sampling_dat)
172
    }else{        
173
      n <- rep(NA,length(pred_mod))
174
      n_mod <- vector("numeric",length(pred_mod))
175
      n_mod <- rep(NA,length(pred_mod))
176
      df_n <- data.frame(n,n_mod,pred_mod)
177
      sampling_dat <- sampling_dat[rep(seq_len(nrow(sampling_dat)), each=length(pred_mod)),]
178
      row.names(sampling_dat) <- NULL
179
      df_n <- data.frame(n,n_mod,pred_mod)
180
      df_n <- cbind(df_n,sampling_dat)
181

  
182
    }
183
    return(df_n)
184
  }
185

  
186
  ##### Parse parameters and arguments ####
187
  
188
  raster_obj_file <- list_param$list_raster_obj_files[i]
189
  use_month <- list_param$use_month
190
  use_day <- list_param$use_day
191
  tile_id <- list_param$list_names_tile_id[i]
192
  
193
  ### Start script ##
194
  
195
  raster_obj <- load_obj(unlist(raster_obj_file)) #may not need unlist
196
  nb_models <- length((raster_obj$clim_method_mod_obj[[1]]$formulas))
197
  pred_mod <- paste("mod",c(1:nb_models,"_kr"),sep="")
198
  #we are assuming no monthly hold out...
199
  #we are assuming only one specific daily prop?
200
  nb_models <- length(pred_mod)
201
  #names(raster_obj$method_mod_obj[[1]])
202
  var_interp <- unique(raster_obj$tb_diagnostic_s$var_interp)
203
  method_interp <- unique(raster_obj$tb_diagnostic_s$method_interp)
204
  
205
  if(use_day==TRUE){
206
    
207
    list_data_day_v <- try(extract_list_from_list_obj(raster_obj$validation_mod_obj,"data_v"))
208
    list_data_day_s <- try(extract_list_from_list_obj(raster_obj$validation_mod_obj,"data_s"))
209
    sampling_dat_day <- extract_list_from_list_obj(raster_obj$method_mod_obj,"daily_dev_sampling_dat")
210
    list_pred_data_day_s_info <- lapply(1:length(sampling_dat_day),FUN=pred_data_info_fun,
211
           list_data=list_data_day_s,pred_mod=pred_mod,sampling_dat_info=sampling_dat_day)
212
    list_pred_data_day_v_info <- lapply(1:length(sampling_dat_day),FUN=pred_data_info_fun,
213
           list_data=list_data_day_v,pred_mod=pred_mod,sampling_dat_info=sampling_dat_day)
214
    pred_data_day_s_info <- do.call(rbind,list_pred_data_day_s_info)
215
    pred_data_day_v_info <- do.call(rbind,list_pred_data_day_v_info)
216
    pred_data_day_s_info$training <- rep(1,nrow(pred_data_day_s_info)) 
217
    pred_data_day_v_info$training <- rep(0,nrow(pred_data_day_v_info)) 
218
    pred_data_day_info <-rbind(pred_data_day_v_info,pred_data_day_s_info)
219
    
220
    pred_data_day_info$method_interp <- rep(method_interp,nrow(pred_data_day_info)) 
221
    pred_data_day_info$var_interp <- rep(var_interp,nrow(pred_data_day_info)) 
222
    pred_data_day_info$tile_id <- rep(tile_id,nrow(pred_data_day_info)) 
223

  
224
    #pred_data_day_s_info$method_interp <- rep(method_interp,nrow(pred_data_day_s_info)) 
225
    #pred_data_day_s_info$var_interp <- rep(var_interp,nrow(pred_data_day_s_info)) 
226
    #pred_data_day_s_info$tile_id <- rep(tile_id,nrow(pred_data_day_s_info)) 
227
    #pred_data_day_v_info <- do.call(rbind,list_pred_data_day_v_info)
228
    #pred_data_day_v_info$method_interp <- rep(method_interp,nrow(pred_data_day_v_info)) 
229
    #pred_data_day_v_info$var_interp <- rep(var_interp,nrow(pred_data_day_v_info)) 
230
    #pred_data_day_v_info$tile_id <- rep(tile_id,nrow(pred_data_day_v_info)) 
231
                                      
232
  }
233
  if(use_month==TRUE){
234
    
235
    list_data_month_s <- try(extract_list_from_list_obj(raster_obj$validation_mod_month_obj,"data_s"))
236
    list_data_month_v <- try(extract_list_from_list_obj(raster_obj$validation_mod_month_obj,"data_v"))
237
    sampling_dat_month <- extract_list_from_list_obj(raster_obj$clim_method_mod_obj,"sampling_month_dat")
238
    list_pred_data_month_s_info <- lapply(1:length(sampling_dat_month),FUN=pred_data_info_fun,
239
           list_data=list_data_month_s,pred_mod=pred_mod,sampling_dat_info=sampling_dat_month)
240
    list_pred_data_month_v_info <- lapply(1:length(sampling_dat_month),FUN=pred_data_info_fun,
241
           list_data=list_data_month_v,pred_mod=pred_mod,sampling_dat_info=sampling_dat_month)
242
    
243
    #combine training and testing later? also combined with accuracy
244
    pred_data_month_s_info <- do.call(rbind,list_pred_data_month_s_info)
245
    pred_data_month_v_info <- do.call(rbind,list_pred_data_month_v_info)
246
    
247
    pred_data_month_v_info$training <- rep(0,nrow(pred_data_month_v_info))
248
    pred_data_month_s_info$training <- rep(1,nrow(pred_data_month_v_info))
249
    pred_data_month_info <- rbind(pred_data_month_v_info,pred_data_month_s_info)
250
    
251
    pred_data_month_info$method_interp <- rep(method_interp,nrow(pred_data_month_info)) 
252
    pred_data_month_info$var_interp <- rep(var_interp,nrow(pred_data_month_info)) 
253
    pred_data_month_info$tile_id <- rep(tile_id,nrow(pred_data_month_info)) 
254

  
255
    #pred_data_month_s_info$method_interp <- rep(method_interp,nrow(pred_data_month_s_info)) 
256
    #pred_data_month_s_info$var_interp <- rep(var_interp,nrow(pred_data_month_s_info)) 
257
    #pred_data_month_s_info$tile_id <- rep(tile_id,nrow(pred_data_month_s_info)) 
258
    #pred_data_month_v_info$method_interp <- rep(method_interp,nrow(pred_data_month_v_info)) 
259
    #pred_data_month_v_info$var_interp <- rep(var_interp,nrow(pred_data_month_v_info)) 
260
    #pred_data_month_v_info$tile_id <- rep(tile_id,nrow(pred_data_month_v_info)) 
261

  
262
  }    
263
    
264
  if(use_month==FALSE){
265
    pred_data_month_info <- NULL
266
  }
267
  if(use_day==FALSE){
268
    pred_data_day_info <- NULL
269
  }
270
  
271
  #prepare object to return
272
  pred_data_info_obj <- list(pred_data_month_info,pred_data_day_info)
273
  names(pred_data_info_obj) <- c("pred_data_month_info","pred_data_day_info")
274
  #could add data.frame data_s and data_v later...
275

  
276
  return(pred_data_info_obj)
277
}
278

  
279
remove_from_list_fun <- function(l_x,condition_class ="try-error"){
280
  index <- vector("list",length(l_x))
281
  for (i in 1:length(l_x)){
282
    if (inherits(l_x[[i]],condition_class)){
283
      index[[i]] <- FALSE #remove from list
284
    }else{
285
      index[[i]] <- TRUE
286
    }
287
  }
288
  l_x<-l_x[unlist(index)] #remove from list all elements using subset
289
  
290
  obj <- list(l_x,index)
291
  names(obj) <- c("list","valid")
292
  return(obj)
293
}
294

  
295
##Function to list predicted tif
296
list_tif_fun <- function(i,in_dir_list,pattern_str){
297
  #list.files(path=x,pattern=".*predicted_mod1_0_1.*20100101.*.tif",full.names=T)})
298
  pat_str<- pattern_str[i]
299
  list_tif_files_dates <-lapply(in_dir_list,
300
         FUN=function(x,pat_str){list.files(path=x,pattern=pat_str,full.names=T)},pat_str=pat_str)
301
  return(list_tif_files_dates)
302
} 
303

  
304
##############################
305
#### Parameters and constants  
306

  
307
#in_dir1 <- "/data/project/layers/commons/NEX_data/test_run1_03232014/output" #On Atlas
308
in_dir1 <- "/nobackupp4/aguzman4/climateLayers/output4" #On NEX
309

  
310
#in_dir_list <- list.dirs(path=in_dir1) #get the list of directories with resutls by 10x10 degree tiles
311
#use subset for now:
312

  
313
in_dir_list <- file.path(in_dir1,read.table(file.path(in_dir1,"processed.txt"))$V1)
314
#in_dir_list <- as.list(in_dir_list[-1])
315
#in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1
316
#in_dir_shp <- in_dir_list[grep("shapefiles",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles...
317
in_dir_shp <- "/nobackupp4/aguzman4/climateLayers/output4/subset/shapefiles/"
318
#in_dir_list <- in_dir_list[grep("shapefiles",basename(in_dir_list),invert=TRUE)] 
319
#the first one is the in_dir1
320
# the last directory contains shapefiles 
321
y_var_name <- "dailyTmax"
322
interpolation_method <- c("gam_CAI")
323
out_prefix<-"run2_global_analyses_05122014"
324

  
325
#out_dir<-"/data/project/layers/commons/NEX_data/" #On NCEAS Atlas
326
out_dir <- "/nobackup/bparmen1/" #on NEX
327
#out_dir <-paste(out_dir,"_",out_prefix,sep="")
328
create_out_dir_param <- TRUE
329

  
330
#system("ls /nobackup/bparmen1")
331

  
332
if(create_out_dir_param==TRUE){
333
  out_dir <- create_dir_fun(out_dir,out_prefix)
334
  setwd(out_dir)
335
}else{
336
  setwd(out_dir) #use previoulsy defined directory
337
}
338

  
339
setwd(out_dir)
340
                                   
341
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
342

  
343
##raster_prediction object : contains testing and training stations with RMSE and model object
344

  
345
list_raster_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="^raster_prediction_obj.*.RData",full.names=T)})
346
basename(dirname(list_raster_obj_files[[1]]))
347
list_names_tile_coord <- lapply(list_raster_obj_files,FUN=function(x){basename(dirname(x))})
348
list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_")
349
names(list_raster_obj_files)<- list_names_tile_id
350

  
351
########################## START SCRIPT ##############################
352

  
353
################################################################
354
######## PART 1: Generate tables to collect information 
355
######## over all tiles in North America 
356

  
357
##Function to collect all the tables from tiles into a table
358
###Table 1: Average accuracy metrics
359
###Table 2: daily accuracy metrics for all tiles
360

  
361
#First create table of tiles under analysis and their coord
362
df_tile_processed <- data.frame(tile_coord=basename(in_dir_list))
363
df_tile_processed$tile_id <- unlist(list_names_tile_id)
364

  
365
##Quick exploration of raster object
366
robj1 <- load_obj(list_raster_obj_files[[1]])
367
names(robj1)
368
names(robj1$method_mod_obj[[1]]) #for January 1, 2010
369
names(robj1$method_mod_obj[[1]]$dailyTmax) #for January
370

  
371
names(robj1$clim_method_mod_obj[[1]]$data_month) #for January
372
names(robj1$validation_mod_month_obj[[1]]$data_s) #for January with predictions
373

  
374
################
375
#### Table 1: Average accuracy metrics
376

  
377
#can use a maximum of 6 cores on the NEX Bridge
378
summary_metrics_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = 6)                           
379
names(summary_metrics_v_list) <- list_names_tile_id
380

  
381
summary_metrics_v_tmp <- remove_from_list_fun(summary_metrics_v_list)$list
382
df_tile_processed$metrics_v <- remove_from_list_fun(summary_metrics_v_list)$valid
383
#Now remove "try-error" from list of accuracy)
384

  
385
summary_metrics_v_NA <- do.call(rbind.fill,summary_metrics_v_tmp) #create a df for NA tiles with all accuracy metrics
386
#tile_coord <- lapply(1:length(summary_metrics_v_list),FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=summary_metrics_v_list)
387
#add the tile id identifier
388
tile_id_tmp <- lapply(1:length(summary_metrics_v_tmp),
389
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=summary_metrics_v_tmp,y=names(summary_metrics_v_tmp))
390
#adding tile id summary data.frame
391
summary_metrics_v_NA$tile_id <-unlist(tile_id_tmp)
392
summary_metrics_v_NA$n <- as.integer(summary_metrics_v_NA$n)
393
write.table(as.data.frame(summary_metrics_v_NA),
394
            file=file.path(out_dir,paste("summary_metrics_v2_NA_",out_prefix,".txt",sep="")),sep=",")
395

  
396
#################
397
###Table 2: daily accuracy metrics for all tiles
398
#this takes about 25min
399
#tb_diagnostic_v_list <- lapply(list_raster_obj_files,FUN=function(x){x<-load_obj(x);x[["tb_diagnostic_v"]]})                           
400
tb_diagnostic_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_v"]])},mc.preschedule=FALSE,mc.cores = 6)                           
401

  
402
names(tb_diagnostic_v_list) <- list_names_tile_id
403
tb_diagnostic_v_tmp <- remove_from_list_fun(tb_diagnostic_v_list)$list
404
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
405

  
406
tb_diagnostic_v_NA <- do.call(rbind.fill,tb_diagnostic_v_tmp) #create a df for NA tiles with all accuracy metrics
407
tile_id_tmp <- lapply(1:length(tb_diagnostic_v_tmp),
408
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_v_tmp,y=names(tb_diagnostic_v_tmp))
409

  
410
tb_diagnostic_v_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
411

  
412
tb_diagnostic_v_NA <- merge(tb_diagnostic_v_NA,df_tile_processed[,1:2],by="tile_id")
413

  
414
write.table((tb_diagnostic_v_NA),
415
            file=file.path(out_dir,paste("tb_diagnostic_v_NA","_",out_prefix,".txt",sep="")),sep=",")
416

  
417
#################
418
###Table 3: monthly station information with predictions for all tiles
419

  
420
#load data_month for specific tiles
421
# data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
422
# names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
423
# 
424
# data_month_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x$validation_mod_month_obj[["data_s"]])},mc.preschedule=FALSE,mc.cores = 6)                           
425
# 
426
# names(data_month_s_list) <- list_names_tile_id
427
# 
428
# data_month_tmp <- remove_from_list_fun(data_month_s_list)$list
429
# #df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid
430
# 
431
# tile_id <- lapply(1:length(data_month_tmp),
432
#                   FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_month_tmp)
433
# data_month_NAM <- do.call(rbind.fill,data_month_list) #combined data_month for "NAM" North America
434
# data_month_NAM$tile_id <- unlist(tile_id)
435
# 
436
# write.table((data_month_NAM),
437
#             file=file.path(out_dir,paste("data_month_s_NAM","_",out_prefix,".txt",sep="")),sep=",")
438

  
439
##### Table 4: Add later on: daily info
440
### with also data_s and data_v saved!!!
441

  
442
#Copy back to atlas
443
system("scp -p ./*.txt parmentier@atlas.nceas.ucsb.edu:/data/project/layers/commons/NEX_data/output_run2_global_analyses_05122014")
444
#system("scp -p ./*.txt ./*.tif parmentier@atlas.nceas.ucsb.edu:/data/project/layers/commons/NEX_data/output_run2_global_analyses_05122014")
445

  
446
######################################################
447
####### PART 2 CREATE MOSAIC OF PREDICTIONS PER DAY ###
448

  
449
dates_l <- unique(robj1$tb_diagnostic_s$date) #list of dates to query tif
450

  
451
#First mosaic mod1
452

  
453
## make this a function? report on number of tiles used for mosaic...
454

  
455
#inputs: build a pattern to find files
456
y_var_name <- "dailyTmax"
457
interpolation_method <- c("gam_CAI")
458
name_method <- paste(interpolation_method,"_",y_var_name,"_",sep="")
459
l_pattern_models <- lapply(c(".*predicted_mod1_0_1.*",".*predicted_mod2_0_1.*",".*predicted_mod3_0_1.*"),
460
                           FUN=function(x){paste(x,dates_l,".*.tif",sep="")})
461
out_prefix_s <- paste(name_method,c("predicted_mod1_0_01","predicted_mod2_0_01","predicted_mod3_0_01"),sep="")
462
dates_l #list of predicted dates
463
                      
464
for (i in 1:lenth(l_pattern_models)){
465
  
466
  l_pattern_mod <- l_pattern_models[[i]]
467
  #out_prefix_s <-    
468
    
469
  l_out_rastnames_var <- paste("gam_CAI_dailyTmax_","predicted_mod1_0_01_",dates_l,sep="")
470

  
471
  #list_tif_files_dates <- list_tif_fun(1,in_dir_list,l_pattern_mod)
472

  
473
  ##List of predicted tif ...
474
  list_tif_files_dates <-lapply(1:length(l_pattern_mod),FUN=list_tif_fun, 
475
                              in_dir_list=in_dir_list,pattern_str=l_pattern_mod)
476
  #save(list_tif_files_dates, file=paste("list_tif_files_dates","_",out_prefix,".RData",sep=""))
477

  
478

  
479
  mosaic_list_var <- list_tif_files_dates
480
  out_rastnames_var <- l_out_rastnames_var  
481

  
482
  file_format <- ".tif"
483
  NA_flag_val <- -9999
484

  
485
  j<-1
486
  list_param_mosaic<-list(j,mosaic_list_var,out_rastnames_var,out_dir,file_format,NA_flag_val)
487
  names(list_param_mosaic)<-c("j","mosaic_list","out_rastnames","out_path","file_format","NA_flag_val")
488
  list_var_mosaiced <- mclapply(1:2,FUN=mosaic_m_raster_list,list_param=list_param_mosaic,mc.preschedule=FALSE,mc.cores = 1)
489
  #list_var_mosaiced <- mclapply(1:365,FUN=mosaic_m_raster_list,list_param=list_param_mosaic,mc.preschedule=FALSE,mc.cores = 2)
490

  
491
}
492

  
493
### Now find out how many files were predicted
494

  
495
l_pattern_mod1<-paste(".*predicted_mod1_0_1.*",dates_l,".*.tif",sep="")
496

  
497
l_f_t12 <- list.files(path=in_dir_list[12],".*predicted_mod1_0_1.*")
498

  
499

  
500
l_f_bytiles<-lapply(in_dir_list,function(x){list.files(path=x,pattern=".*predicted_mod1_0_1.*")})
501

  
502

  
503
unlist(lapply(l_f_bytiles,length))
504

  
505
system("scp -p ./*.tif parmentier@atlas.nceas.ucsb.edu:/data/project/layers/commons/NEX_data/output_run2_global_analyses_05122014")
506

  
507
######################################################
508
####### PART 3: EXAMINE STATIONS AND MODEL FITTING ###
509

  
510
### Stations and model fitting ###
511
#summarize location and number of training and testing used by tiles
512

  
513
names(robj1$clim_method_mod_obj[[1]]$data_month) # monthly data for January
514
#names(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions
515
#note that there is no holdout in the current run at the monthly time scale:
516

  
517
robj1$clim_method_mod_obj[[1]]$data_month_v #zero rows for testing stations at monthly timescale
518
#load data_month for specific tiles
519
data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
520

  
521
names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
522

  
523
#problem with tile 12...the raster ojbect has missing sub object
524
#data_month_list <- lapply(1:length(list_raster_obj_files),x=list_raster_obj_files,
525
#                          FUN=function(i,x){x<-load_obj(x[[i]]);
526
#                                            extract_from_list_obj(x$validation_mod_month_obj,"data_s")})                           
527

  
528
### make this part a function:
529

  
530
#create a table for every month, day and tiles...
531
# data_month_list <- lapply(1:length(list_raster_obj_files),x=list_raster_obj_files,
532
#                           FUN=function(i,x){x<-load_obj(x[[i]]);
533
#                                             extract_from_list_obj(x$clim_method_mod_obj,"data_month")})                           
534
# 
535
# names(data_month_list) <- paste("tile","_",1:length(data_month_list),sep="")
536
# 
537
# #names(data_month_list) <- basename(in_dir_list) #use folder id instead
538
# 
539
# list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_")
540
# 
541
# #tile_id <- lapply(1:length(data_month_list),
542
# #                  FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_month_list)
543
# 
544
# data_month_NAM <- do.call(rbind.fill,data_month_list) #combined data_month for "NAM" North America
545
# data_month_NAM$tile_id <- unlist(tile_id)
546
# 
547
# names(robj1$validation_mod_day_obj[[1]]$data_s) # daily for January with predictions
548
# dim(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions
549
# 
550
# use_day=TRUE
551
# use_month=TRUE
552
# 
553
# list_param_training_testing_info <- list(list_raster_obj_files,use_month,use_day,list_names_tile_id)
554
# names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id")
555
# 
556
# list_param <- list_param_training_testing_info
557
# 
558
# pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
559
# 
560
# pred_data_month_info <- do.call(rbind,lapply(pred_data_info,function(x){x$pred_data_month_info}))
561
# pred_data_day_info <- do.call(rbind,lapply(pred_data_info,function(x){x$pred_data_day_info}))
562

  
563
######################################################
564
####### PART 4: GENERATE DIAGNOSTIC plots for the area analyzed ###
565

  
566
### Create a combined shape file for all region?
567

  
568
#get centroid
569
#plot the average RMSE at the centroid??
570
#quick kriging for every tile?
571
                    
572
### Create a combined boxplot for every tile (can also do that in pannel)
573
### Create a quick mosaic (mean method...)
574
#mean predicitons
575
#mean of kriging error?
576
#plot(mm_01 ~ elev_s,data=data_month_NAM) #Jan across all tiles
577
#plot(mm_06 ~ elev_s,data=data_month_NAM) #June across all tiles
578
#plot(TMax ~ mm_01,data=data_month_NAM) #monthly tmax averages and LST across all tiles
579
              
580
tb <- read.table(file.path(out_dir,"tb_diagnostic_v2_NA_run1_NA_analyses_03232013.txt"),sep=",")
581
summary_metrics_v <- read.table(file.path(out_dir,"summary_metrics_v2_NA_run1_NA_analyses_03232013.txt"),sep=",")
582

  
583
strsplit(unique(tb$tile_id),"_")
584
tb$tile_id <- factor(tb$tile_id, levels=unique(tb$tile_id))
585
#tb$tile_id <- as.character(tb$tile_id)
586
boxplot(rmse~tile_id,data=subset(tb,tb$pred_mod=="mod1"),
587
        names=1:24)
588
title("RMSE per tile")
589
#bwplot(rmse~as.factor(tile_id), data=subset(tb,tb$pred_mod=="mod1"))
590

  
591
#Boxplot of RMSE by model
592
boxplot(rmse~pred_mod,data=tb)#,names=tb$pred_mod)
593
title("RMSE per model over 24 tiles")
594

  
595
#Turn summary table to a point shp
596

  
597
tx<-strsplit(as.character(summary_metrics_v$tile_coord),"_")
598
lat<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx))
599
long<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx))
600
summary_metrics_v$lat <- lat
601
summary_metrics_v$lon <- long
602

  
603
coordinates(summary_metrics_v) <- cbind(long,lat) 
604
 
605
ac_mod1 <- summary_metrics_v[summary_metrics_v$pred_mod=="mod1",]
606
  
607
plot(r2)  
608
#plot(ac_mod1,cex=sqrt(ac_mod1$rmse),pch=1,add=T)
609
plot(ac_mod1,cex=(ac_mod1$rmse^2)/10,pch=1,add=T)
610
plot(ac_mod1,cex=(ac_mod1$rmse1)*2,pch=1,add=T)
611

  
612
df <-arrange(as.data.frame(ac_mod1),desc(rmse))[,c("rmse","mae","tile_id")]
613
#View(df)
614
#quick kriging...
615
autokrige(rmse~1,r2,)
616
### COMBINED SHAPEFILES AND EXAMING CENTROID
617

  
618
##Get the list of shapefiles
619
in_dir_list_NEX <- read.table("/data/project/layers/commons/NEX_data/test_run1_03232014/output/in_dir_list.txt",sep=" ")
620

  
621
list.files(path=in_dir_shp,pattern=paste("*.",as.character(in_dir_list_NEX[1,1]),".*.shp",sep=""))
622
list_shp_reg_files <- list.files(path=in_dir_shp,pattern="*.shp")
623
list_shp_reg_files2 <- list_shp_reg_files[grep("shapefiles",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles...
624

  
625
tx <- strsplit(as.character(in_dir_list_NEX[,1]),"_")
626
lat_shp<- lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx)
627
long_shp<- lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx)
628

  
629
summary_metrics
630
#print.numeric<-function(x, digits = 2) formatC(x, digits = digits, format = "f")
631
#print.numeric(long_shp[[1]])
632

  
633
pattern_str<-lapply(1:length(long_shp),function(i,y,x){paste(y[[i]],"0_",x[[i]],"0",sep="")},y=lat_shp,x=long_shp)
634
#Select the 24 tiles that were used in the predictions based on lat, long
635
list_shp_reg_files <- lapply(pattern_str,FUN=function(x){list.files(path=in_dir_shp,
636
                                                                        pattern=paste("*.",x,".*.shp$",sep=""),full.names=T)})
637
###
638
#OK now get the shapefiles merged
639
#ghcn_dat <- readOGR(dsn=dirname(met_stations_obj$monthly_covar_ghcn_data),
640
#        sub(".shp","",basename(met_stations_obj$monthly_covar_ghcn_data)))
641

  
642
#plot(r44)
643

  
644
#usa_map <- getData('GADM', country='USA', level=1) #Get US map
645
usa_map <- getData('GADM', country='USA',level=1) #Get US map, this is not working right now
646
usa_map_2 <- usa_map[usa_map$NAME_1!="Alaska",] #remove Alaska
647
usa_map_2 <- usa_map_2[usa_map_2$NAME_1!="Hawaii",] #remove Hawai 
648

  
649
centroids_pts <- vector("list",length(list_shp_reg_files))
650
shps_tiles <- vector("list",length(list_shp_reg_files))
651
#collect info
652
for(i in 1:length(list_shp_reg_files)){
653
  shp1<-readOGR(dirname(list_shp_reg_files[[i]]),sub(".shp","",basename(list_shp_reg_files[[i]])))
654
  pt <- gCentroid(shp1)
655
  centroids_pts[[i]] <-pt
656
  shps_tiles[[i]] <- shp1
657
}
658
#plot info: with labels
659
plot(usa_map_2)
660
for(i in 1:length(list_shp_reg_files)){
661
  shp1 <- shps_tiles[[i]]
662
  pt <- centroids_pts[[i]]
663
  plot(shp1,add=T,border="blue")
664
  #plot(pt,add=T,cex=2,pch=5)
665
  text(coordinates(pt)[1],coordinates(pt)[2],labels=i,cex=1,col=c("red"))
666
}
667
title("Tiles location 10x10 degrees for NAM")
668

  
669
## Now plot RMSE: with labels
670
plot(usa_map_2)
671
for(i in 1:length(list_shp_reg_files)){
672
  shp1 <- shps_tiles[[i]]
673
  pt <- centroids_pts[[i]]
674
  plot(shp1,add=T,border="blue")
675
  #plot(pt,add=T,cex=2,pch=5)
676
  text(coordinates(pt)[1],coordinates(pt)[2],labels=i,cex=1,col=c("red"))
677
}
678
title("Tiles location 10x10 degrees for NAM")
679

  
680

  
681
#unique(summaty_metrics$tile_id)
682
#text(lat-shp,)
683
#union(list_shp_reg_files[[1]],list_shp_reg_files[[2]])
684

  
685
l_m_tif <- list.files(pattern="*mosaiced.*.tif")
686
r1 <- raster("mosaiced_gam_fusion_dailyTmax_predicted_mod1_0_01_20100101.tif")
687
r2 <- raster("mosaiced_gam_fusion_dailyTmax_predicted_mod1_0_01_20100901.tif")
688

  
689
pred_s <- stack(l_m_tif[2:5])
690
levelplot(pred_s)
691

  
692
##################### END OF SCRIPT ######################

Also available in: Unified diff