Project

General

Profile

« Previous | Next » 

Revision a08f31e9

Added by Benoit Parmentier almost 9 years ago

assessment function stage 6, clean up and collecting outputs in assessemnt table

View differences:

climate/research/oregon/interpolation/global_run_scalingup_assessment_part1a.R
5 5
#Part 1 create summary tables and inputs files for figure in part 2 and part 3.
6 6
#AUTHOR: Benoit Parmentier 
7 7
#CREATED ON: 03/23/2014  
8
#MODIFIED ON: 12/29/2015            
8
#MODIFIED ON: 12/31/2015            
9 9
#Version: 4
10 10
#PROJECT: Environmental Layers project  
11 11
#TO DO:
......
13 13
# - Separate call in a master script for assessment
14 14
# - add second stage in the master script for assessment
15 15
# - add mosaicing in the master script for assessment
16
# - clean up the code by making two function to clarify the code and remove repetition 
16 17

  
17 18
#First source these files:
18 19
#Resolved call issues from R.
......
113 114
  
114 115
  year_predicted <- list_param_run_assessment_prediction$list_year_predicted[i] 
115 116
  #region_name is not null then restrict the assessment to a specific region
116
  if(!is.null(region_name)){
117
    in_dir1 <- file.path(in_dir1,region_name)
118
  }
119

  
117
  #if(!is.null(region_name)){
118
  #  in_dir1 <- file.path(in_dir1,region_name)
119
  #}
120
  in_dir1 <- file.path(in_dir1,region_name)
120 121
  
121
  list_outfiles <- vector("list", length=6) #collect names of output files
122
  list_outfiles <- vector("list", length=14) #collect names of output files
122 123
  
123 124
  in_dir_list <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run
124 125
  #basename(in_dir_list)
125 126
  #                       y=in_dir_list) 
126 127
  
127
  in_dir_list_all  <- unlist(lapply(in_dir_list,function(x){list.dirs(path=x,recursive=F)}))
128
  in_dir_list <- in_dir_list_all
128
  #in_dir_list_all  <- unlist(lapply(in_dir_list,function(x){list.dirs(path=x,recursive=F)}))
129
  in_dir_list_all <- in_dir_list
130
  #in_dir_list <- in_dir_list_all
129 131
  #in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1
130 132
  
131 133
  #this was changed on 10052015 because the shapefiles were not matching!!!
......
138 140
  
139 141
  
140 142
  #select only directories used for predictions
143
  #nested structure, we need to go to higher level to obtain the tiles...
141 144
  in_dir_reg <- in_dir_list[grep(".*._.*.",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles...
142 145
  #in_dir_reg <- in_dir_list[grep("july_tiffs",basename(in_dir_reg),invert=TRUE)] #select directory with shapefiles...
143 146
  in_dir_list <- in_dir_reg
......
164 167
  ##raster_prediction object : contains testing and training stations with RMSE and model object
165 168
  in_dir_list_tmp <- file.path(in_dir_list,year_predicted)
166 169
  list_raster_obj_files <- lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^raster_prediction_obj.*.RData",full.names=T)})
167
  basename(dirname(list_raster_obj_files[[1]]))
170

  
168 171
  list_names_tile_coord <- lapply(list_raster_obj_files,FUN=function(x){basename(dirname(x))})
169 172
  list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_")
170 173
  names(list_raster_obj_files)<- list_names_tile_id
......
173 176
  lf_covar_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar_obj.*.RData",full.names=T)})
174 177
  lf_covar_tif <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar.*.tif",full.names=T)})
175 178
  
176
  #sub_sampling_obj_daily_gam_CAI_10.0_-75.0.RData
177
  #sub_sampling_obj_gam_CAI_10.0_-75.0.RData
178
  
179 179
  lf_sub_sampling_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern=paste("^sub_sampling_obj_",interpolation_method,".*.RData",sep=""),full.names=T)})
180 180
  lf_sub_sampling_obj_daily_files <- lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^sub_sampling_obj_daily.*.RData",full.names=T)})
181 181
  
182
  ## This will be part of the raster_obj function
183
  #debug(create_raster_prediction_obj)
184
  #out_prefix_str <- paste(basename(in_dir_list),out_prefix,sep="_") 
185
  #lf_raster_obj <- create_raster_prediction_obj(in_dir_list,interpolation_method, y_var_name,out_prefix_str,out_path_list=NULL)
186
  
187 182
  ################################################################
188 183
  ######## PART 1: Generate tables to collect information:
189 184
  ######## over all tiles in North America 
......
197 192
  df_tile_processed$tile_id <- unlist(list_names_tile_id) #Arbitrary tiling number!!
198 193
  df_tile_processed$path_NEX <- in_dir_list
199 194
  df_tile_processed$year_predicted <- year_predicted
200
  df_tile_processed$sub_sampling_clim  <- lf_sub_sampling_obj_files
201
  df_tile_processed$sub_sampling_daily  <- lf_sub_sampling_obj_daily_files
195
  #Deal with the abscence of subsampling object for specific tiles
196
  lf_sub_sampling_obj_files_tmp <- lapply(1:length(lf_sub_sampling_obj_files),FUN=function(i,x){val <- x[[i]];if(length(val)==0){val<-0};val},x=lf_sub_sampling_obj_files)
197
  lf_sub_sampling_obj_daily_files_tmp <- lapply(1:length(lf_sub_sampling_obj_daily_files),FUN=function(i,x){val <- x[[i]];if(length(val)==0){val<-0};val},x=lf_sub_sampling_obj_daily_files)
198
  df_tile_processed$sub_sampling_clim  <- unlist(lf_sub_sampling_obj_files_tmp)
199
  df_tile_processed$sub_sampling_daily  <- unlist(lf_sub_sampling_obj_daily_files_tmp)
202 200
  #lf_sub_sampling_obj_files
203 201
  
204
  ##Quick exploration of raster object
205
  #Should be commented out to make this a function
206
  #robj1 <- try(load_obj(list_raster_obj_files[[3]])) #This is an example tile
207
  #robj1 <- load_obj(lf_raster_obj[4]) #This is tile tile
208
  
209
  #names(robj1)
210
  #names(robj1$method_mod_obj[[2]]) #for January 1, 2010
211
  #names(robj1$method_mod_obj[[2]]$dailyTmax) #for January
212
  #names(robj1$method_mod_obj[[11]]) #for January 1, 2010
213
  #names(robj1$method_mod_obj[[11]]$dailyTmax) #for January
214
  
215
  #names(robj1$clim_method_mod_obj[[1]]$data_month) #for January
216
  #names(robj1$validation_mod_month_obj[[1]]$data_s) #for January with predictions
217
  #Get the number of models predicted
218
  #nb_mod <- length(unique(robj1$tb_diagnostic_v$pred_mod))#
219
  #list_formulas <- (robj1$clim_method_mod_obj[[1]]$formulas)
220
  #dates_predicted <- (unique(robj1$tb_diagnostic_v$date))
221
  
222
  
223
  #list_tb_diagnostic_v <- mclapply(lf_validation_obj,FUN=function(x){try( x<- load_obj(x)); try(extract_from_list_obj(x,"metrics_v"))},mc.preschedule=FALSE,mc.cores = 6)                           
224
  #names(list_tb_diagnostic_v) <- list_names_tile_id
225
  
202
 
226 203
  ################
227 204
  #### Table 1: Average accuracy metrics per tile and predictions
228 205
  
......
255 232
  summary_metrics_v_NA$lat <- lat
256 233
  summary_metrics_v_NA$lon <- long
257 234
  
258
  list_out_files
259 235
  write.table(as.data.frame(summary_metrics_v_NA),
260 236
              file=file.path(out_dir,paste("summary_metrics_v2_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
261 237
  
238
  list_outfiles[[1]] <- file.path(out_dir,paste("summary_metrics_v2_NA_",year_predicted,"_",out_prefix,".txt",sep=""))
239
    
262 240
  #################
263 241
  ###Table 2: daily validation/testing accuracy metrics for all tiles
264 242
  #this takes about 15min for 28 tiles (reg4)
......
279 257
  
280 258
  write.table((tb_diagnostic_v_NA),
281 259
              file=file.path(out_dir,paste("tb_diagnostic_v_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
282
  
260
 
261
  list_outfiles[[2]] <- file.path(out_dir,paste("tb_diagnostic_v_NA_",year_predicted,"_",out_prefix,".txt",sep=""))
262
 
283 263
  #################
284 264
  ###Table 3: monthly fit/training accuracy information for all tiles
285 265
  
......
303 283
  
304 284
  write.table((tb_month_diagnostic_s_NA),
305 285
              file=file.path(out_dir,paste("tb_month_diagnostic_s_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
286

  
287
  list_outfiles[[3]] <- file.path(out_dir,paste("tb_month_diagnostic_s_NA_",year_predicted,"_",out_prefix,".txt",sep=""))
306 288
  
307 289
  #################
308 290
  ###Table 4: daily fit/training accuracy information with predictions for all tiles
......
325 307
  
326 308
  write.table((tb_diagnostic_s_NA),
327 309
              file=file.path(out_dir,paste("tb_diagnostic_s_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
310
  list_outfiles[[4]] <- file.path(out_dir,paste("tb_diagnostic_s_NA_",year_predicted,"_",out_prefix,".txt",sep=""))
328 311
  
329 312
  ##### Table 5: Add later on: daily info
330 313
  ### with also data_s and data_v saved!!!
......
354 337
  
355 338
  write.table((data_month_NAM),
356 339
              file=file.path(out_dir,paste("data_month_s_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
340
  list_outfiles[[5]] <- file.path(out_dir,paste("data_month_s_NAM_",year_predicted,"_",out_prefix,".txt",sep=""))
357 341
  
358
  #Get validation data?? Find other object from within the dir 
359
  #Som region don't have validation data at monthly time scale.
360
  
361
  #### SPDF of daily Station info
362
  #load data_month for specific tiles
363
  #data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
364
  #names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
342
  ##### Table 6 and table 7: stations for daily predictions
365 343
  
366 344
  data_day_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = num_cores)    
367 345
  data_day_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_obj,"data_v"))},mc.preschedule=FALSE,mc.cores = num_cores)    
......
388 366
              file=file.path(out_dir,paste("data_day_s_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
389 367
  write.table((data_day_v_NAM),
390 368
              file=file.path(out_dir,paste("data_day_v_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
369
  list_outfiles[[6]] <- file.path(out_dir,paste("data_day_s_NAM_",year_predicted,"_",out_prefix,".txt",sep=""))
370
  list_outfiles[[7]] <- file.path(out_dir,paste("data_day_v_NAM_",year_predicted,"_",out_prefix,".txt",sep=""))
371
  
372
  ##### Table 8: validation stations for monthly predictions
391 373
  
392 374
  #### Recover subsampling data
393 375
  #For tiles with many stations, there is a subsampling done in terms of distance (spatial pruning) and 
......
404 386
  
405 387
  data_month_v_subsampling_list <- mclapply(lf_sub_sampling_obj_files,FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_removed"))},mc.preschedule=FALSE,mc.cores = 6)                           
406 388
  #test <- mclapply(list_raster_obj_files[1:6],FUN=function(x){try(x<-load_obj(x));try(extract_from_list_obj(x$validation_mod_month_obj,"data_s"))},mc.preschedule=FALSE,mc.cores = 6)                           
407
  
408 389
  names(data_month_v_subsampling_list) <- list_names_tile_id
409
  
410 390
  data_month_v_subsampling_tmp <- remove_from_list_fun(data_month_v_subsampling_list)$list
411 391
  #df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid
412 392
  #if no stations have been removed then there are no validation stations !!!
413 393
  if(length(data_month_v_subsampling_tmp)!=0){
414
    
415 394
    tile_id <- lapply(1:length(data_month_v_subsampling_tmp),
416 395
                      FUN=function(i,x){try(rep(names(x)[i],nrow(x[[i]])))},x=data_month_v_subsampling_tmp)
417 396
    data_month_v_subsmapling_NAM <- do.call(rbind.fill,ddata_month_v_subsampling_tmp) #combined data_month for "NAM" North America
418 397
    data_month_v_subsampling_NAM$tile_id <- unlist(tile_id)
419
    
420 398
    write.table((data_month_v_subsampling_NAM),
421 399
                file=file.path(out_dir,paste("data_month_v_subsampling_NAM_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
422
    
400
    list_outfiles[[8]] <- file.path(out_dir,paste("data_month_v_subsampling_NAM_",year_predicted,"_",out_prefix,".txt",sep=""))
401
  }else{
402
    list_outfiles[[8]] <- NA
423 403
  }
424 404
  
425
  ## Do the same for daily...
426
  ## End of potential function started in line 317...this section will be cut down for simplicity
405
  ##### Table 9: validation accuracy metrics for monthly predictions
406
  
407
  #Get validation data?? Find other object from within the dir 
408
  #Som region don't have validation data at monthly time scale.
409

  
410
  #### To be changed later...there is no validation data at this stage
411
  ## Monthly fitting information
412
  #tb_month_diagnostic_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_month_diagnostic_v"]])},mc.preschedule=FALSE,mc.cores = num_cores)                           
413
  #names(tb_month_diagnostic_v_list) <- list_names_tile_id
414
  #tb_month_diagnostic_v_tmp <- remove_from_list_fun(tb_month_diagnostic_v_list)$list
415
  #tb_month_diagnostic_v_NA <- do.call(rbind.fill,tb_month_diagnostic_v_tmp) #create a df for NA tiles with all accuracy metrics
416
  #tile_id_tmp <- lapply(1:length(tb_month_diagnostic_v_tmp),
417
  #                      FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_month_diagnostic_v_tmp,y=names(tb_month_diagnostic_v_tmp))
418
  #tb_month_diagnostic_v_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
419
  #tb_month_diagnostic_v_NA <- merge(tb_month_diagnostic_v_NA,df_tile_processed[,1:2],by="tile_id")
420
  #date_f<-strptime(tb_month_diagnostic_v_NA$date, "%Y%m%d")   # interpolation date being processed
421
  #tb_month_diagnostic_v_NA$month<-strftime(date_f, "%m")          # current month of the date being processed
422
  #write.table((tb_month_diagnostic_v_NA),
423
  #            file=file.path(out_dir,paste("tb_month_diagnostic_v_NA_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
424

  
425
  #list_outfiles[[9]] <- file.path(out_dir,paste("tb_month_diagnostic_v_NA_",year_predicted,"_",out_prefix,".txt",sep=""))
426
  list_outfiles[[9]] <- NA
427 427
  
428 428
  ######################################################
429 429
  ####### PART 3: EXAMINE STATIONS AND MODEL FITTING ###
430 430
  
431
  ##### Table 10 and Table 11: extracting accuracy information from daily and monthly predictions
432
  
431 433
  ### Stations and model fitting ###
432 434
  #summarize location and number of training and testing used by tiles
433 435
  
434
  #names(robj1$clim_method_mod_obj[[1]]$data_month) # monthly data for January
435
  #names(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions
436
  #note that there is no holdout in the current run at the monthly time scale:
437
  
438
  #robj1$clim_method_mod_obj[[1]]$data_month_v #zero rows for testing stations at monthly timescale
439
  #load data_month for specific tiles
440
  data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
441
  
442 436
  #names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
443 437
  
444 438
  use_day=TRUE
445 439
  use_month=TRUE
446 440
  
447
  #list_raster_obj_files <- c("/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-100.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-100.0.RData",
448
  #                    "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-105.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-105.0.RData")
449
  
450 441
  list_names_tile_id <- df_tile_processed$tile_id
451 442
  list_raster_obj_files[list_names_tile_id]
452 443
  #list_names_tile_id <- c("tile_1","tile_2")
......
457 448
  #debug(extract_daily_training_testing_info)
458 449
  #pred_data_info <- extract_daily_training_testing_info(1,list_param=list_param_training_testing_info)
459 450
  pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = num_cores)
460
  #pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id][1:6]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = 6)
461
  #pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
462
  #pred_data_info <- lapply(1:length(list_raster_obj_files[1]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
463
  
451

  
464 452
  pred_data_info_tmp <- remove_from_list_fun(pred_data_info)$list #remove data not predicted
465 453
  ##Add tile nanmes?? it is alreaready there
466 454
  #names(pred_data_info)<-list_names_tile_id
......
472 460
              file=file.path(out_dir,paste("pred_data_month_info_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
473 461
  write.table(pred_data_day_info,
474 462
              file=file.path(out_dir,paste("pred_data_day_info_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
463
  list_outfiles[[10]] <- file.path(out_dir,paste("pred_data_month_info_",year_predicted,"_",out_prefix,".txt",sep=""))
464
  list_outfiles[[11]] <- file.path(out_dir,paste("pred_data_day_info_",year_predicted,"_",out_prefix,".txt",sep=""))
475 465
  
476 466
  ######################################################
477
  ####### PART 4: Get shapefile tiling with centroids ###
467
  ####### PART 4: Get shapefiles defining region tiling with centroids ###
478 468
  
469
  ##### Table 12, Table 13, Table 14: collect location of predictions from shapefiles
470

  
479 471
  #get shape files for the region being assessed:
480 472
  
481 473
  list_shp_world <- list.files(path=in_dir_shp,pattern=".*.shp",full.names=T)
482 474
  l_shp <- gsub(".shp","",basename(list_shp_world))
483 475
  l_shp <- sub("shp_","",l_shp)
484
  
485
  #l_shp <- unlist(lapply(1:length(list_shp_world),
486
  #                       FUN=function(i){paste(strsplit(list_shp_world[i],"_")[[1]][3:4],collapse="_")}))
487 476
  l_shp <- unlist(lapply(1:length(l_shp),
488 477
                         FUN=function(i){paste(strsplit(l_shp[i],"_")[[1]][1:2],collapse="_")}))
489 478
  
......
494 483
  matching_index <- match(basename(in_dir_list),l_shp)
495 484
  list_shp_reg_files <- list_shp_world[matching_index]
496 485
  df_tile_processed$shp_files <-list_shp_reg_files
497
  #df_tile_processed$shp_files <- ""
498
  #df_tile_processed$tile_coord <- as.character(df_tile_processed$tile_coord)
499
  #test <- df_tile_processed
500
  #test$shp_files <- NULL
501
  #test3 <- merge(test,df_tiles_all,by=c("tile_coord"))
502
  #test3 <- merge(df_tiles_all,test,by=c("tile_coord"))
503
  #merge(df_tile_processed,df_tiles_all,by="shp_files")
504
  
486

  
505 487
  tx<-strsplit(as.character(df_tile_processed$tile_coord),"_")
506 488
  lat<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx))
507 489
  long<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx))
......
514 496
  
515 497
  write.table(df_tiles_all,
516 498
              file=file.path(out_dir,paste("df_tiles_all_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
499
  list_outfiles[[12]] <- file.path(out_dir,paste("df_tile_processed_",year_predicted,"_",out_prefix,".txt",sep=""))
500
  list_outfiles[[13]] <- file.path(out_dir,paste("df_tiles_all_",year_predicted,"_",out_prefix,".txt",sep=""))
517 501
  
518 502
  #Copy to local home directory on NAS-NEX
519 503
  #
......
523 507
  #save a list of all files...
524 508
  write.table(df_tiles_all,
525 509
              file=file.path(out_dir,"shapefiles",paste("df_tiles_all_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
526
  
510
  list_outfiles[[14]] <- file.path(out_dir,"shapefiles",paste("df_tiles_all_",year_predicted,"_",out_prefix,".txt",sep=""))
511

  
512
  ######################################################
513
  ##### Prepare objet to return ####
514

  
515
  outfiles_names <- c("summary_metrics_v_names","tb_v_accuracy_name","tb_month_s_name","tb_s_accuracy_name", 
516
  "data_month_s_name","data_day_v_name","data_day_s_name","data_month_v_name", "tb_month_v_name",
517
  "pred_data_month_info_name","pred_data_day_info_name","df_tile_processed_name","df_tiles_all_name", 
518
  "df_tiles_all_name") 
519
  names(list_outfiles) <- outfiles_names
520
  
521
  #This data.frame contains all the files from the assessment
522
  df_assessment_files <- data.frame(filename=outfiles_names,files=unlist(list_outfiles),
523
                                    reg=region_name,year=year_predicted)
527 524
  ###Prepare files for copying back?
528
  
525
  write.table(df_assessment_files,
526
              file=file.path(out_dir,paste("df_assessment_files_",region_name,"_",year_predicted,"_",out_prefix,".txt",sep="")),sep=",")
527

  
529 528
  ## Prepare list of files to return...
530
  return(1)
529
  return(df_assessment_files)
531 530
}
532 531

  
533 532
##################### END OF SCRIPT ######################

Also available in: Unified diff