Project

General

Profile

« Previous | Next » 

Revision f0164c5b

Added by Benoit Parmentier about 10 years ago

run5 assessment NEX part1: adding functions to recreate raster object for assessment

View differences:

climate/research/oregon/interpolation/global_run_scalingup_assessment_part1.R
5 5
#Part 1 create summary tables and inputs for figure in part 2 and part 3.
6 6
#AUTHOR: Benoit Parmentier 
7 7
#CREATED ON: 03/23/2014  
8
#MODIFIED ON: 08/25/2014            
8
#MODIFIED ON: 08/28/2014            
9 9
#Version: 3
10 10
#PROJECT: Environmental Layers project  
11 11
#TO DO:
......
320 320
  return(list_tif_files_dates)
321 321
} 
322 322

  
323
calculate_summary_from_tb_diagnostic <-function(tb_diagnostic,metric_names){#,out_prefix,out_path){
324
  #now boxplots and mean per models
325
  library(gdata) #Nesssary to use cbindX
326
  
327
  ### Start script
328
  y_var_name<-unique(tb_diagnostic$var_interp) #extract the name of interpolated variable: dailyTmax, dailyTmin
329
  
330
  mod_names<-sort(unique(tb_diagnostic$pred_mod)) #models that have accuracy metrics
331
  t<-melt(tb_diagnostic,
332
          #measure=mod_var, 
333
          id=c("date","pred_mod","prop"),
334
          na.rm=F)
335
  t$value<-as.numeric(t$value) #problem with char!!!
336
  avg_tb<-cast(t,pred_mod~variable,mean)
337
  avg_tb$var_interp<-rep(y_var_name,times=nrow(avg_tb))
338
  median_tb<-cast(t,pred_mod~variable,median)
339
  
340
  #avg_tb<-cast(t,pred_mod~variable,mean)
341
  tb<-tb_diagnostic
342
 
343
  #mod_names<-sort(unique(tb$pred_mod)) #kept for clarity
344
  tb_mod_list<-lapply(mod_names, function(k) subset(tb, pred_mod==k)) #this creates a list of 5 based on models names
345
  names(tb_mod_list)<-mod_names
346
  #mod_metrics<-do.call(cbind,tb_mod_list)
347
  #debug here
348
  if(length(tb_mod_list)>1){
349
    mod_metrics<-do.call(cbindX,tb_mod_list) #column bind the list??
350
  }else{
351
    mod_metrics<-tb_mod_list[[1]]
352
  }
353
  
354
  test_names<-lapply(1:length(mod_names),function(k) paste(names(tb_mod_list[[1]]),mod_names[k],sep="_"))
355
  #test names are used when plotting the boxplot for the different models
356
  names(mod_metrics)<-unlist(test_names)
357
  rows_total<-lapply(tb_mod_list,nrow)
358
  avg_tb$n<-rows_total #total number of predictions on which the mean is based
359
  median_tb$n<-rows_total
360
  summary_obj<-list(avg_tb,median_tb)
361
  names(summary_obj)<-c("avg","median")
362
  return(summary_obj)  
363
}
364

  
365
#boxplot_month_from_tb(tb_diagnostic,metric_names,out_prefix,out_path)
366
## Function to display metrics by months/seasons
367
calculate_summary_from_tb_month_diagnostic <-function(tb_diagnostic,metric_names){ #,out_prefix,out_path){
368
  
369
  #Generate boxplot per month for models and accuracy metrics
370
  #Input parameters:
371
  #1) df: data frame containing accurayc metrics (RMSE etc.) per day)
372
  #2) metric_names: metrics used for validation
373
  #3) out_prefix
374
  #
375
  
376
  #################
377
  ## BEGIN
378
  y_var_name<-unique(tb_diagnostic$var_interp) #extract the name of interpolated variable: dailyTmax, dailyTmin  
379
  date_f<-strptime(tb_diagnostic$date, "%Y%m%d")   # interpolation date being processed
380
  tb_diagnostic$month<-strftime(date_f, "%m")          # current month of the date being processed
381
  mod_names<-sort(unique(tb_diagnostic$pred_mod)) #models that have accuracy metrics
382
  tb_mod_list<-lapply(mod_names, function(k) subset(tb_diagnostic, pred_mod==k)) #this creates a list of 5 based on models names
383
  names(tb_mod_list)<-mod_names
384
  t<-melt(tb_diagnostic,
385
          #measure=mod_var, 
386
          id=c("date","pred_mod","prop","month"),
387
          na.rm=F)
388
  t$value<-as.numeric(t$value) #problem with char!!!
389
  tb_mod_m_avg <-cast(t,pred_mod+month~variable,mean) #monthly mean for every model
390
  tb_mod_m_avg$var_interp<-rep(y_var_name,times=nrow(tb_mod_m_avg))
391
  
392
  tb_mod_m_sd <-cast(t,pred_mod+month~variable,sd)   #monthly sd for every model  
393
  tb_mod_m_list <-lapply(mod_names, function(k) subset(tb_mod_m_avg, pred_mod==k)) #this creates a list of 5 based on models names
394
  
395
  summary_month_obj <-c(tb_mod_m_list,tb_mod_m_avg,tb_mod_m_sd)
396
  names(summary_month_obj)<-c("tb_list","metric_month_avg","metric_month_sd")
397
  return(summary_month_obj)  
398
}
399

  
400
create_raster_prediction_obj<- function(in_dir_list,interpolation_method, y_var_name,out_prefix,out_path_list=NULL){
401
  
402
  
403
  #gather all necessary info:
404
  lf_validation_mod_month_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="gam_CAI_validation_mod_month_obj_dailyTmax.*.RData",full.names=T)})
405
  lf_validation_mod_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="gam_CAI_validation_mod_obj_dailyTmax.*.RData",full.names=T)})
406
  lf_clim_method_mod_obj <- lapply(in_dir_list,FUN=function(x){mixedsort(list.files(path=x,pattern="clim_obj_CAI_month_.*._TMAX_0_1_.*.RData",full.names=T))})
407
  lf_method_mod_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="method_mod_obj_gam_CAI_dailyTmax.*.RData",full.names=T)})
408

  
409
  tb_month_diagnostic_v_list <- mclapply(lf_validation_mod_month_obj,FUN=function(x){try( x<- load_obj(x)); try(extract_from_list_obj(x,"metrics_v"))},mc.preschedule=FALSE,mc.cores = 6)                           
410
  tb_month_diagnostic_s_list <- mclapply(lf_validation_mod_month_obj,FUN=function(x){try( x<- load_obj(x)); try(extract_from_list_obj(x,"metrics_s"))},mc.preschedule=FALSE,mc.cores = 6)                           
411
  tb_diagnostic_v_list <- mclapply(lf_validation_mod_obj,FUN=function(x){try( x<- load_obj(x)); try(extract_from_list_obj(x,"metrics_v"))},mc.preschedule=FALSE,mc.cores = 6)                           
412
  tb_diagnostic_s_list <- mclapply(lf_validation_mod_obj,FUN=function(x){try( x<- load_obj(x)); try(extract_from_list_obj(x,"metrics_s"))},mc.preschedule=FALSE,mc.cores = 6)                           
413

  
414
  #rownames(tb_month_diagnostic_v)<-NULL #remove row names
415
  #tb_month_diagnostic_v$method_interp <- interpolation_method
416

  
417
  #Call functions to create plots of metrics for validation dataset
418
  metric_names<-c("rmse","mae","me","r","m50")
419
  summary_metrics_v_list <- lapply(tb_diagnostic_v_list,FUN=function(x){try(calculate_summary_from_tb_diagnostic(x,metric_names))})
420
  summary_month_metrics_v_list <- lapply(tb_diagnostic_v_list,FUN=function(x){try(calculate_summary_from_tb_month_diagnostic(x,metric_names))}) 
421
  #list_summalist_summary_metrics_vry_month_metrics_v <- calculate_summary_from_tb_month_diagnostic(list_tb_diagnostic_v[[1]],metric_names)
422

  
423
  #if (interpolation_method %in% c("gam_CAI","kriging_CAI","gwr_CAI","gam_fusion","kriging_fusion","gwr_fusion")){
424
  lf_raster_obj <- vector("list",length=length(in_dir_list))
425
  for (i in 1:length(in_dir_list)){
426
    
427
    clim_method_mod_obj <- try(lapply(lf_clim_method_mod_obj[[i]],FUN=try(load_obj)))
428
    method_mod_obj <- try(load_obj(lf_method_mod_obj[[i]]))
429
    validation_mod_month_obj <- try(load_obj(lf_validation_mod_month_obj[[i]]))   
430
    validation_mod_obj <- try(load_obj(lf_validation_mod_obj[[i]]))
431

  
432
    tb_month_diagnostic_v <- try(tb_month_diagnostic_v_list[[i]])
433
    tb_month_diagnostic_s <- try(tb_month_diagnostic_s_list[[i]])
434
    tb_diagnostic_v <- try(tb_diagnostic_v_list[[i]])
435
    tb_diagnostic_s <- try(tb_diagnostic_s_list[[i]])
436

  
437
    summary_metrics_v <- summary_metrics_v_list[[i]]
438
    summary_month_metrics_v <- summary_month_metrics_v_list[[i]] 
439
    
440
    raster_prediction_obj <- list(clim_method_mod_obj,method_mod_obj,validation_mod_obj,validation_mod_month_obj, tb_diagnostic_v,
441
                                tb_diagnostic_s,tb_month_diagnostic_v,tb_month_diagnostic_s,summary_metrics_v,summary_month_metrics_v)
442
    names(raster_prediction_obj) <-c ("clim_method_mod_obj","method_mod_obj","validation_mod_obj","validation_mod_month_obj","tb_diagnostic_v",
443
                                "tb_diagnostic_s","tb_month_diagnostic_v","tb_month_diagnostic_s","summary_metrics_v","summary_month_metrics_v") 
444
    if(is.null(out_path_list)){
445
      out_path <- in_dir_list[[i]]
446
    }  
447
    if(!is.null(out_path_list)){
448
      out_path <- out_path_list[[i]]
449
    }  
450
    lf_raster_obj[[i]] <- file.path(out_path,paste("raster_prediction_obj_",interpolation_method,"_", y_var_name,out_prefix_str[i],".RData",sep=""))  
451

  
452
    save(raster_prediction_obj,file= file.path(out_path,paste("raster_prediction_obj_",interpolation_method,"_", y_var_name,out_prefix_str[i],".RData",sep="")))
453
  }
454
  return(lf_raster_obj)
455
}
456

  
323 457
##############################
324 458
#### Parameters and constants  
325 459

  
......
413 547
lf_diagnostic_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="diagnostics_.*.RData",full.names=T)})
414 548
lf_diagnostic_obj <- lf_diagnostic_obj[grep("lk_min",lf_diagnostic_obj,invert=T)] #remove object that have lk_min...
415 549

  
416
lf_validation_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="gam_CAI_validation_mod_obj_dailyTmax.*.RData",full.names=T)})
417
#validation_mod_obj <-load_obj("/nobackupp4/aguzman4/climateLayers/output20Deg/reg6/60.0_40.0/gam_CAI_validation_mod_obj_dailyTmax60.0_40.0.RData")
418
debug(extract_from_list_obj)
419
tb_diagnostic_v<-extract_from_list_obj(validation_mod_obj,"metrics_v") 
550
## This will be part of the raster_obj function
551
debug(create_raster_prediction_obj)
552
out_prefix_str <- paste(basename(in_dir_list),out_prefix,sep="_") 
553
lf_raster_obj <- create_raster_prediction_obj(in_dir_list,interpolation_method, y_var_name,out_prefix_str,out_path_list=NULL)
554

  
555
lf_raster_obj <- c("/nobackupp4/aguzman4/climateLayers/output20Deg/reg2//-10.0_-70.0//raster_prediction_obj_gam_CAI_dailyTmax-10.0_-70.0_run5_global_analyses_08252014.RData"
556
  ,"/nobackupp4/aguzman4/climateLayers/output20Deg/reg4//40.0_0.0//raster_prediction_obj_gam_CAI_dailyTmax40.0_0.0_run5_global_analyses_08252014.RData"   
557
  ,"/nobackupp4/aguzman4/climateLayers/output20Deg/reg4//50.0_0.0//raster_prediction_obj_gam_CAI_dailyTmax50.0_0.0_run5_global_analyses_08252014.RData"
558
  ,"/nobackupp4/aguzman4/climateLayers/output20Deg/reg6//60.0_40.0//raster_prediction_obj_gam_CAI_dailyTmax60.0_40.0_run5_global_analyses_08252014.RData"
559
  ,"/nobackupp4/aguzman4/climateLayers/output20Deg/reg6//30.0_40.0//raster_prediction_obj_gam_CAI_dailyTmax30.0_40.0_run5_global_analyses_08252014.RData"
560
  ,"/nobackupp4/aguzman4/climateLayers/output20Deg/reg8//40.0_130.0//raster_prediction_obj_gam_CAI_dailyTmax40.0_130.0_run5_global_analyses_08252014.RData")
420 561

  
421 562
########################## START SCRIPT ##############################
422 563

  
......
445 586
#Get the number of models predicted
446 587
nb_mod <- length(unique(robj1$tb_diagnostic_v$pred_mod))
447 588

  
448
validation_mod_obj <-load_obj("/nobackupp4/aguzman4/climateLayers/output20Deg/reg6/60.0_40.0/gam_CAI_validation_mod_obj_dailyTmax60.0_40.0.RData")
449
tb_diagnostic_v<-extract_from_list_obj(validation_mod_obj,"metrics_v") 
450

  
451

  
452
#clim_method_mod_obj <- load_obj("/nobackupp4/aguzman4/climateLayers/output20Deg/reg6/60.0_40.0/gam_CAI_mod_dailyTmax60.0_40.0.RData")
453
#list_data_v <- extract_list_from_list_obj(clim_method_mod_obj,"data_month_v") #extract monthly testing/validation dataset
454
#list_data_s <- extract_list_from_list_obj(clim_method_mod_obj,"data_month") #extract monthly training/fitting dataset
455
#rast_day_yearlist <- extract_list_from_list_obj(clim_method_mod_obj,"clim") #list_tmp #list of predicted images over full year at monthly time scale
456
#list_sampling_dat <- extract_list_from_list_obj(clim_method_mod_obj,"sampling_month_dat")
457

  
458 589
list_tb_diagnostic_v <- mclapply(lf_validation_obj,FUN=function(x){try( x<- load_obj(x)); try(extract_from_list_obj(x,"metrics_v"))},mc.preschedule=FALSE,mc.cores = 6)                           
459 590
names(list_tb_diagnostic_v) <- list_names_tile_id
460 591

  
461
#undebug(extract_from_list_obj)
462
#validation_mod_month_obj <-load_obj("/nobackupp4/aguzman4/climateLayers/output20Deg/reg6/60.0_40.0/gam_CAI_validation_mod_month_obj_dailyTmax60.0_40.0.RData")
463
#tb_diagnostic_v<-extract_from_list_obj(validation_mod_obj,"metrics_v") 
464

  
465

  
466 592
################
467 593
#### Table 1: Average accuracy metrics
468 594

  
......
470 596
#summary_metrics_v_list <- mclapply(list_raster_obj_files[5:6],FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = 2)                           
471 597

  
472 598
summary_metrics_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = 6)                           
599
summary_metrics_v_list <- lapply(summary_metrics_v_list,FUN=function(x){try(x$avg)})
473 600
names(summary_metrics_v_list) <- list_names_tile_id
474 601

  
475 602
summary_metrics_v_tmp <- remove_from_list_fun(summary_metrics_v_list)$list
......
517 644
write.table((tb_diagnostic_v_NA),
518 645
            file=file.path(out_dir,paste("tb_diagnostic_v_NA","_",out_prefix,".txt",sep="")),sep=",")
519 646

  
647
## Monthly fitting information
648
tb_month_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_month_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = 6)                           
649

  
650
names(tb_month_diagnostic_s_list) <- list_names_tile_id
651
tb_month_diagnostic_s_tmp <- remove_from_list_fun(tb_month_diagnostic_s_list)$list
652
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
653

  
654
tb_month_diagnostic_s_NA <- do.call(rbind.fill,tb_month_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics
655
tile_id_tmp <- lapply(1:length(tb_month_diagnostic_s_tmp),
656
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_month_diagnostic_s_tmp,y=names(tb_month_diagnostic_s_tmp))
657

  
658
tb_month_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
659

  
660
tb_month_diagnostic_s_NA <- merge(tb_month_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id")
661

  
662
date_f<-strptime(tb_month_diagnostic_s_NA$date, "%Y%m%d")   # interpolation date being processed
663
tb_month_diagnostic_s_NA$month<-strftime(date_f, "%m")          # current month of the date being processed
664

  
665
write.table((tb_month_diagnostic_s_NA),
666
            file=file.path(out_dir,paste("tb_month_diagnostic_s_NA","_",out_prefix,".txt",sep="")),sep=",")
667

  
668
## daily fit info:
669

  
670
tb_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = 6)                           
671

  
672
names(tb_diagnostic_s_list) <- list_names_tile_id
673
tb_diagnostic_s_tmp <- remove_from_list_fun(tb_diagnostic_s_list)$list
674
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
675

  
676
tb_diagnostic_s_NA <- do.call(rbind.fill,tb_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics
677
tile_id_tmp <- lapply(1:length(tb_diagnostic_s_tmp),
678
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_s_tmp,y=names(tb_diagnostic_s_tmp))
679

  
680
tb_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
681

  
682
tb_diagnostic_s_NA <- merge(tb_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id")
683

  
684
write.table((tb_diagnostic_s_NA),
685
            file=file.path(out_dir,paste("tb_diagnostic_s_NA","_",out_prefix,".txt",sep="")),sep=",")
686

  
687

  
520 688
####### process gam fitting diagnostic info
521 689

  
522 690
#/nobackupp4/aguzman4/climateLayers/output20Deg/reg5/20.0_30.0//diagnostics_obj_gam_fitting_TMax_9_mod2_08062014.RData
......
882 1050
### This assumes the tree structure has been replicated on Atlas:
883 1051
#for i in 1:length(df_tiled_processed$tile_coord)
884 1052
#output_atlas_dir <- "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1"
885
output_atlas_dir <- "/data/project/layers/commons/NEX_data/output_run4_global_analyses_08142014/output20Deg"
886

  
1053
output_atlas_dir <- "/data/project/layers/commons/NEX_data/output_run5_global_analyses_08252014/output20Deg"
887 1054
#Make directories on ATLAS
888
#for (i in 1:length(df_tiled_processed$tile_coord)){
889
#  create_dir_fun(file.path(output_atlas_dir,as.character(df_tiled_processed$tile_coord[i])),out_suffix=NULL)
1055
#for (i in 1:length(df_tile_processed$tile_coord)){
1056
#  create_dir_fun(file.path(output_atlas_dir,as.character(df_tile_processed$tile_coord[i])),out_suffix=NULL)
890 1057
#}  
891 1058

  
892 1059
#Make directories on ATLAS for shapefiles
893
#for (i in 1:length(df_tiled_processed$tile_coord)){
894
#  create_dir_fun(file.path(output_atlas_dir,as.character(df_tiled_processed$tile_coord[i]),"/shapefiles"),out_suffix=NULL)
1060
#for (i in 1:length(df_tile_processed$tile_coord)){
1061
#  create_dir_fun(file.path(output_atlas_dir,as.character(df_tile_processed$tile_coord[i]),"/shapefiles"),out_suffix=NULL)
895 1062
#}  
896 1063

  
897 1064

  
......
914 1081
Atlas_hostname <- "parmentier@atlas.nceas.ucsb.edu"
915 1082
lf_cp_shp <- df_tile_processed$shp_files #get all the files...
916 1083

  
1084
layer_name <- sub(".shp","",basename(lf_cp_shp[[i]]))
1085

  
917 1086
#lf_cp_shp <- list.files(in_dir_shp, ".shp",full.names=T)
918
list_tile_scp <- 1:8
1087
list_tile_scp <- 1:6
919 1088

  
920 1089
for (j in 1:length(list_tile_scp)){
921 1090
  tile_nb <- list_tile_scp[j]
......
927 1096
  Atlas_dir <- file.path(output_atlas_dir,as.character(df_tile_processed$tile_coord[j]),"/shapefiles")
928 1097

  
929 1098
  Atlas_hostname <- "parmentier@atlas.nceas.ucsb.edu"
1099
  
1100
  lf_cp_shp_pattern <- gsub(".shp","*",lf_cp_shp)
1101

  
1102
  #filenames_NEX <- paste(lf_cp_shp,collapse=" ")  #copy raster prediction object
1103
  filenames_NEX <- paste(lf_cp_shp_pattern,collapse=" ")  #copy raster prediction object
930 1104

  
931
  filenames_NEX <- paste(lf_cp_shp,collapse=" ")  #copy raster prediction object
932 1105
  cmd_str <- paste("scp -p",filenames_NEX,paste(Atlas_hostname,Atlas_dir,sep=":"), sep=" ")
933 1106
  system(cmd_str)
934 1107
}
......
939 1112
#../$out_dir/ouput/tile_coord
940 1113

  
941 1114
#list_tile_scp <- c(1,2)
942
list_tile_scp <- 1:8
1115
list_tile_scp <- 1:6
943 1116

  
944 1117
for (j in 1:length(list_tile_scp)){
945 1118
  tile_nb <- list_tile_scp[j]

Also available in: Unified diff