Project

General

Profile

« Previous | Next » 

Revision 563fe056

Added by Benoit Parmentier over 10 years ago

assessment NEX run 3 North America with prediction info for training and testing stations

View differences:

climate/research/oregon/interpolation/global_run_scalingup_assessment_part1.R
5 5
#Part 1 create summary tables and inputs for figure in part 2 and part 3.
6 6
#AUTHOR: Benoit Parmentier 
7 7
#CREATED ON: 03/23/2014  
8
#MODIFIED ON: 06/19/2014            
8
#MODIFIED ON: 06/27/2014            
9 9
#Version: 3
10 10
#PROJECT: Environmental Layers project  
11 11
#TO DO:
......
61 61
    out_name <- paste("output_",out_suffix,sep="")
62 62
    out_dir <- file.path(out_dir,out_name)
63 63
  }
64
  #create if does not exists
64
  #create if does not exists: create the output dir as defined 
65 65
  if(!file.exists(out_dir)){
66 66
    dir.create(out_dir)
67 67
  }
......
158 158

  
159 159
### Function:
160 160
pred_data_info_fun <- function(k,list_data,pred_mod,sampling_dat_info){
161
  #Summarizing input info
161
  #Summarizing input info from sampling and df used in training/testing
162 162
    
163 163
  data <- list_data[[k]]
164 164
  sampling_dat <- sampling_dat_info[[k]]
165
  if(data_day!="try-error"){
165
  if(data!="try-error"){
166 166
    n <- nrow(data)
167 167
    n_mod <- vector("numeric",length(pred_mod))
168 168
    for(j in 1:length(pred_mod)){
......
188 188
  return(df_n)
189 189
}
190 190

  
191
extract_daily_training_testing_info<- function(i,list_param){
191
extract_daily_training_testing_info <- function(i,list_param){
192 192
  #This function extracts training and testing information from the raster object produced for each tile
193 193
  #This is looping through tiles...
194
  
195 194
  ##Functions used
196 195
  #Defined outside this script:
197 196
  #pred_data_info_fun
......
220 219
    list_data_day_v <- try(extract_list_from_list_obj(raster_obj$validation_mod_obj,"data_v"))
221 220
    list_data_day_s <- try(extract_list_from_list_obj(raster_obj$validation_mod_obj,"data_s"))
222 221
    sampling_dat_day <- extract_list_from_list_obj(raster_obj$method_mod_obj,"daily_dev_sampling_dat")
222
    #debug(pred_data_info_fun)
223
    #list_pred_data_day_s_info <- pred_data_info_fun(1,list_data=list_data_day_s,pred_mod=pred_mod,sampling_dat_info=sampling_dat_day)
223 224
    list_pred_data_day_s_info <- lapply(1:length(sampling_dat_day),FUN=pred_data_info_fun,
224 225
           list_data=list_data_day_s,pred_mod=pred_mod,sampling_dat_info=sampling_dat_day)
225 226
    list_pred_data_day_v_info <- lapply(1:length(sampling_dat_day),FUN=pred_data_info_fun,
......
242 243
    sampling_dat_month <- extract_list_from_list_obj(raster_obj$clim_method_mod_obj,"sampling_month_dat")
243 244
    list_pred_data_month_s_info <- lapply(1:length(sampling_dat_month),FUN=pred_data_info_fun,
244 245
           list_data=list_data_month_s,pred_mod=pred_mod,sampling_dat_info=sampling_dat_month)
245
    list_pred_data_month_v_info <- lapply(1:length(sampling_dat_month),FUN=pred_data_info_fun,
246
           list_data=list_data_month_v,pred_mod=pred_mod,sampling_dat_info=sampling_dat_month)
247
    
246
    list_pred_data_month_v_info <- mclapply(1:length(sampling_dat_month),FUN=pred_data_info_fun,
247
           list_data=list_data_month_v,pred_mod=pred_mod,sampling_dat_info=sampling_dat_month,mc.preschedule=FALSE,mc.cores = 1)
248
    #Note for list_pred_data_month_v_info it will be try-error when there is no holdout.
248 249
    #combine training and testing later? also combined with accuracy
249 250
    pred_data_month_s_info <- do.call(rbind,list_pred_data_month_s_info)
250
    pred_data_month_v_info <- do.call(rbind,list_pred_data_month_v_info)
251
    
252
    pred_data_month_v_info$training <- rep(0,nrow(pred_data_month_v_info))
253
    pred_data_month_s_info$training <- rep(1,nrow(pred_data_month_v_info))
254
    pred_data_month_info <- rbind(pred_data_month_v_info,pred_data_month_s_info)
251
    #Adding a column for training: if data item used as training then it is 1
252
    pred_data_month_s_info$training <- try(rep(1,nrow(pred_data_month_s_info)))
253

  
254
    #Remove try-error??
255
    list_pred_data_month_v_info_tmp <- remove_from_list_fun(list_pred_data_month_v_info)
256
    list_pred_data_month_v_info <- list_pred_data_month_v_info_tmp$list
257
    if (length(list_pred_data_month_v_info)>0){
258
      pred_data_month_v_info <- do.call(rbind,list_pred_data_month_v_info)
259
      pred_data_month_v_info$training <- try(rep(0,nrow(pred_data_month_v_info)))
260
      pred_data_month_info <- rbind(pred_data_month_v_info,pred_data_month_s_info)
261
    }else{
262
      pred_data_month_info <- pred_data_month_s_info
263
    }
255 264
    
256 265
    pred_data_month_info$method_interp <- rep(method_interp,nrow(pred_data_month_info)) 
257 266
    pred_data_month_info$var_interp <- rep(var_interp,nrow(pred_data_month_info)) 
......
263 272
  }
264 273
  if(use_day==FALSE){
265 274
    pred_data_day_info <- NULL
266
  }
267
  
275
  }  
268 276
  #prepare object to return
269 277
  pred_data_info_obj <- list(pred_data_month_info,pred_data_day_info)
270 278
  names(pred_data_info_obj) <- c("pred_data_month_info","pred_data_day_info")
271 279
  #could add data.frame data_s and data_v later...
272

  
280
  ###
273 281
  return(pred_data_info_obj)
274 282
}
275 283

  
......
341 349

  
342 350
setwd(out_dir)
343 351
                                   
344
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
352
CRS_locs_WGS84 <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
345 353

  
346 354
day_to_mosaic <- c("20100101","20100901")
347 355
file_format <- ".tif" #format for mosaiced files
......
472 480

  
473 481
#Insert here...compute input and predicted ranges to spot potential errors?
474 482

  
483
######################################################
484
####### PART 4: Get shapefile tiling with centroids ###
485

  
486
#in_dir_shp <- "/nobackupp4/aguzman4/climateLayers/output4/subset/shapefiles/"
487

  
488
#get shape files for the region being assessed:
489
list_shp_global_tiles_files <- list.files(path=in_dir_shp,pattern="*.shp")
490
l_shp<-lapply(1:length(in_dir_shp_list),FUN=function(i){paste(strsplit(in_dir_shp_list[i],"_")[[1]][2:3],collapse="_")})
491
list_shp_global_tiles_files <- l_shp
492
pattern_str <- basename(in_dir_list)
493
list_shp_global_tiles_files
494
list_shp_reg_files <- lapply(pattern_str,function(x){list_shp_global_tiles_files[grep(x,invert=FALSE,list_shp_global_tiles_files)]}) #select directory with shapefiles...
495
df_tile_processed$shp_files <- unlist(list_shp_reg_files)
496

  
497
tx<-strsplit(as.character(df_tile_processed$tile_coord),"_")
498
lat<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx))
499
long<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx))
500
df_tile_processed$lat <- lat
501
df_tile_processed$lon <- long
502

  
503
list_shp_world <- list.files(in_dir_shp,".shp")
504
l_shp <- unlist(lapply(1:length(list_shp_world),FUN=function(i){paste(strsplit(list_shp_world[i],"_")[[1]][2:3],collapse="_")}))
505
list_shp_reg_files <- as.character(df_tile_processed$tile_coord)
506
#matching_index <- match(l_shp,list_shp_reg_files)
507
matching_index <- match(list_shp_reg_files,l_shp)
508
df_tile_processed$shp_files <-list_shp_world[matching_index]
509

  
510
df_tile_processed <- na.omit(df_tile_processed) #remove other list of folders irrelevant
511
list_shp_reg_files <- df_tile_processed$shp_files
512
list_shp_reg_files<- file.path(in_dir_shp,list_shp_reg_files)
513

  
514
#put that list in the df_processed and also the centroids!!
515
write.table(df_tile_processed,
516
            file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",")
517

  
475 518
######################################################
476 519
####### PART 2 CREATE MOSAIC OF PREDICTIONS PER DAY, Delta surfaces and clim ###
477 520

  
......
694 737
# names(robj1$validation_mod_day_obj[[1]]$data_s) # daily for January with predictions
695 738
# dim(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions
696 739
# 
697
# use_day=TRUE
698
# use_month=TRUE
699
# 
700

  
701
# list_param_training_testing_info <- list(list_raster_obj_files,use_month,use_day,list_names_tile_id)
702
# names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id")
703
# 
704
# list_param <- list_param_training_testing_info
705
# 
706
# pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
707
# 
708
# pred_data_month_info <- do.call(rbind,lapply(pred_data_info,function(x){x$pred_data_month_info}))
709
# pred_data_day_info <- do.call(rbind,lapply(pred_data_info,function(x){x$pred_data_day_info}))
710

  
711
######################################################
712
####### PART 4: Get shapefile tiling with centroids ###
713

  
714
#in_dir_shp <- "/nobackupp4/aguzman4/climateLayers/output4/subset/shapefiles/"
715

  
716
#get shape files for the region being assessed:
717
#list_shp_global_tiles_files <- list.files(path=in_dir_shp,pattern="*.shp")
718
#l_shp<-lapply(1:length(in_dir_shp_list),FUN=function(i){paste(strsplit(in_dir_shp_list[i],"_")[[1]][2:3],collapse="_")})
719
list_shp_global_tiles_files <- l_shp
720
pattern_str <- basename(in_dir_list)
721
#list_shp_global_tiles_files
722
list_shp_reg_files <- lapply(pattern_str,function(x){list_shp_global_tiles_files[grep(x,invert=FALSE,list_shp_global_tiles_files)]}) #select directory with shapefiles...
723
df_tile_processed$shp_files <- unlist(list_shp_reg_files)
724

  
725
tx<-strsplit(as.character(df_tile_processed$tile_coord),"_")
726
lat<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx))
727
long<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx))
728
df_tile_processed$lat <- lat
729
df_tile_processed$lon <- long
730 740

  
731
#put that list in the df_processed and also the centroids!!
732
write.table(df_tile_processed,
733
            file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",")
741
use_day=TRUE
742
use_month=TRUE
743
 
744
#list_raster_obj_files <- c("/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-100.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-100.0.RData",
745
#                    "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-105.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-105.0.RData")
746

  
747
list_names_tile_id <- df_tile_processed$tile_id
748
list_raster_obj_files[list_names_tile_id]
749
#list_names_tile_id <- c("tile_1","tile_2")
750
list_param_training_testing_info <- list(list_raster_obj_files[list_names_tile_id],use_month,use_day,list_names_tile_id)
751
names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id")
752
 
753
list_param <- list_param_training_testing_info
754
#debug(extract_daily_training_testing_info)
755
#pred_data_info <- extract_daily_training_testing_info(1,list_param=list_param_training_testing_info)
756
pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = 6)
757
#pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id][1:6]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = 6)
758
#pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
759
#pred_data_info <- lapply(1:length(list_raster_obj_files[1]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
760

  
761
pred_data_info_tmp <- remove_from_list_fun(pred_data_info)$list #remove data not predicted
762
##Add tile nanmes?? it is alreaready there
763
#names(pred_data_info)<-list_names_tile_id
764
pred_data_month_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_month_info}))
765
pred_data_day_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_day_info}))
766

  
767
#putput inforamtion in csv !!
768
write.table(pred_data_month_info,
769
            file=file.path(out_dir,paste("pred_data_month_info_",out_prefix,".txt",sep="")),sep=",")
770
write.table(pred_data_day_info,
771
            file=file.path(out_dir,paste("pred_data_day_info_",out_prefix,".txt",sep="")),sep=",")
734 772

  
735 773
########### LAST PART: COPY SOME DATA BACK TO ATLAS #####
736 774

  
775
### This assumes the tree structure has been replicated on Atlas:
776
#for i in 1:length(df_tiled_processed$tile_coord)
777
output_atlas_dir <- "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1"
778
#for (i in 1:length(df_tiled_processed$tile_coord)){
779
#  create_dir_fun(file.path(output_atlas_dir,as.character(df_tiled_processed$tile_coord[i])),out_suffix=NULL)
780
#}  
781

  
737 782
#### FIRST COPY DATA FOR SPECIFIC TILES #####
738 783
#Copy specific tiles info back...This assumes that the tree structre 
739 784
#has been created on ATLAS:
......
741 786

  
742 787
list_tile_scp <- c(1,2)
743 788

  
744
for (i in 1:length(list_tile_scp)){
745
  tile_nb <- list_tile_scp[i]
789
for (j in 1:length(list_tile_scp)){
790
  tile_nb <- list_tile_scp[j]
746 791
  #nb_mod <- 3+1 #set up earlier
747 792
  date_selected <- c("20100101","20100901") #should be set up earlier
748 793
  date_index <- c(1,244) #list_day??
......
750 795

  
751 796
  in_dir_tile <- basename(df_tile_processed$path_NEX[tile_nb])
752 797
  #/data/project/layers/commons/NEX_data/output_run2_05122014/output
753
  Atlas_dir <- file.path(file.path("/data/project/layers/commons/NEX_data/",basename(out_dir),"output"),in_dir_tile)
798
  #output_atlas_dir
799
  #Atlas_dir <- file.path(file.path("/data/project/layers/commons/NEX_data/",basename(out_dir),"output"),in_dir_tile)
800
  Atlas_dir <- file.path(output_atlas_dir,in_dir_tile)
754 801
  Atlas_hostname <- "parmentier@atlas.nceas.ucsb.edu"
755 802
  #filenames_NEX <- list_raster_obj_files[tile_nb] #copy raster prediction object
756 803
  #cmd_str <- paste("scp -p",filenames_NEX,paste(Atlas_hostname,Atlas_dir,sep=":"), sep=" ")

Also available in: Unified diff