Project

General

Profile

« Previous | Next » 

Revision 51e58cfc

Added by Benoit Parmentier over 10 years ago

scaling up assessemnt part 1 testing 75% overlap in North Africa

View differences:

climate/research/oregon/interpolation/global_run_scalingup_assessment_part1.R
5 5
#Part 1 create summary tables and inputs for figure in part 2 and part 3.
6 6
#AUTHOR: Benoit Parmentier 
7 7
#CREATED ON: 03/23/2014  
8
#MODIFIED ON: 09/16/2014            
8
#MODIFIED ON: 10/04/2014            
9 9
#Version: 3
10 10
#PROJECT: Environmental Layers project  
11 11
#TO DO:
......
458 458
#### Parameters and constants  
459 459

  
460 460
#in_dir1 <- "/data/project/layers/commons/NEX_data/test_run1_03232014/output" #On Atlas
461
#in_dir1 <- "/nobackupp4/aguzman4/climateLayers/output10Deg/reg1/" #On NEX
462
in_dir1 <- "/nobackupp4/aguzman4/climateLayers/output20Deg2/"
463

  
461
#in_dir1 <- "/nobackupp4/aguzman4/climateLayers/output20Deg2/"
462
in_dir1 <-"/nobackupp4/aguzman4/climateLayers/output20Deg_75overlap/reg4"
464 463
#/nobackupp4/aguzman4/climateLayers/output10Deg/reg1/finished.txt
465
in_dir_reg <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run
466
in_dir_list <- list.dirs(path=in_dir_reg,recursive=FALSE) #get the list of tiles/directories with outputs 
464
in_dir_list <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run
465
in_dir_list_all <- in_dir_list
466
#in_dir_list <- list.dirs(path=in_dir_reg,recursive=FALSE) #get the list of tiles/directories with outputs 
467 467

  
468 468
#in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1
469
in_dir_subset <- in_dir_list[grep("subset",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles...
469
in_dir_subset <- in_dir_list[grep("subset",basename(in_dir_reg),invert=FALSE)] #select directory with shapefiles...
470 470
in_dir_shp <- file.path(in_dir_subset,"shapefiles")
471
#in_dir_shp <- in_dir_list[grep("shapefiles",basename(in_dir_subset),invert=FALSE)] #select directory with shapefiles...
472

  
473
#[27] "/nobackupp4/aguzman4/climateLayers/output20Deg2//reg2/outLogs"    
474
#[28] "/nobackupp4/aguzman4/climateLayers/output20Deg2//reg2/sept01"     
475
#[29] "/nobackupp4/aguzman4/climateLayers/output20Deg2//reg2/serial"     
476
#[30] "/nobackupp4/aguzman4/climateLayers/output20Deg2//reg2/subset"     
477
#[31] "/nobackupp4/aguzman4/climateLayers/output20Deg2//reg2/testFit" 
478

  
479
#in_dir_subset <- in_dir_list[grep("subset",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles...
480
#in_dir_subset <- in_dir_list[grep("serial",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles...
481
#in_dir_subset <- in_dir_list[grep("sept01",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles...
482
#in_dir_subset <- in_dir_list[grep("testFit",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles...
483

  
484
#Only 6 folders/regions contain information  
485

  
486
#[1] "/nobackupp4/aguzman4/climateLayers/output20Deg2//reg2/subset"
487
#[2] "/nobackupp4/aguzman4/climateLayers/output20Deg2//reg3/subset"
488
#[3] "/nobackupp4/aguzman4/climateLayers/output20Deg2//reg4/subset"
489
#[4] "/nobackupp4/aguzman4/climateLayers/output20Deg2//reg5/subset"
490
#[5] "/nobackupp4/aguzman4/climateLayers/output20Deg2//reg6/subset"
491
#[6] "/nobackupp4/aguzman4/climateLayers/output20Deg2//reg7/subset"
492

  
493
in_dir_reg <- dirname(in_dir_subset)
494
in_dir_list <- list.dirs(path=in_dir_reg,recursive=FALSE) #get the list of directories with resutls by 10x10 degree tiles
495
#select only directories used for predictions
496
in_dir_list <- in_dir_list[grep(".*._.*.",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles...
497 471

  
472
#select only directories used for predictions
473
in_dir_reg <- in_dir_list[grep(".*._.*.",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles...
474
in_dir_reg <- in_dir_list[grep("july_tiffs",basename(in_dir_reg),invert=TRUE)] #select directory with shapefiles...
475
in_dir_list <- in_dir_reg
498 476
#Models used.
499 477
#list_models<-c("y_var ~ s(lat,lon,k=4) + s(elev_s,k=3) + s(LST,k=3)",
500 478
#               "y_var ~ s(lat,lon,k=5) + s(elev_s,k=3) + s(LST,k=3)",
501 479
#               "y_var ~ s(lat,lon,k=8) + s(elev_s,k=4) + s(LST,k=4)",
502 480
  
503
#use subset for now:
504

  
505
#in_dir_list <- c(
506
#"/nobackupp4/aguzman4/climateLayers/output10Deg/reg1/40.0_-120.0/",
507
#"/nobackupp4/aguzman4/climateLayers/output10Deg/reg1/35.0_-115.0/")
508 481
  
509 482
#in_dir_list <- file.path(in_dir1,read.table(file.path(in_dir1,"processed.txt"))$V1)
510 483
#in_dir_list <- as.list(in_dir_list[-1])
......
520 493
# the last directory contains shapefiles 
521 494
y_var_name <- "dailyTmax"
522 495
interpolation_method <- c("gam_CAI")
523
out_prefix<-"run6_global_analyses_09162014"
496
out_prefix<-"run7_global_analyses_10042014"
524 497

  
525 498
#out_dir<-"/data/project/layers/commons/NEX_data/" #On NCEAS Atlas
526 499
out_dir <- "/nobackup/bparmen1/" #on NEX
......
560 533

  
561 534
lf_covar_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar_obj.*.RData",full.names=T)})
562 535
lf_covar_tif <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar.*.tif",full.names=T)})
563
#diagnostics_obj_gam_fitting_dailyTmax7__08062014.RData
564
#lf_diagnostic_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="diagnostics_.*.RData",full.names=T)})
565
#lf_diagnostic_obj <- lf_diagnostic_obj[grep("lk_min",lf_diagnostic_obj,invert=T)] #remove object that have lk_min...
566 536

  
567 537
## This will be part of the raster_obj function
568 538
#debug(create_raster_prediction_obj)
569 539
#out_prefix_str <- paste(basename(in_dir_list),out_prefix,sep="_") 
570 540
#lf_raster_obj <- create_raster_prediction_obj(in_dir_list,interpolation_method, y_var_name,out_prefix_str,out_path_list=NULL)
571 541

  
572
#lf_raster_obj <- c("/nobackupp4/aguzman4/climateLayers/output20Deg/reg2//-10.0_-70.0//raster_prediction_obj_gam_CAI_dailyTmax-10.0_-70.0_run5_global_analyses_08252014.RData"
573
#  ,"/nobackupp4/aguzman4/climateLayers/output20Deg/reg4//40.0_0.0//raster_prediction_obj_gam_CAI_dailyTmax40.0_0.0_run5_global_analyses_08252014.RData"   
574
#  ,"/nobackupp4/aguzman4/climateLayers/output20Deg/reg4//50.0_0.0//raster_prediction_obj_gam_CAI_dailyTmax50.0_0.0_run5_global_analyses_08252014.RData"
575
#  ,"/nobackupp4/aguzman4/climateLayers/output20Deg/reg6//60.0_40.0//raster_prediction_obj_gam_CAI_dailyTmax60.0_40.0_run5_global_analyses_08252014.RData"
576
#  ,"/nobackupp4/aguzman4/climateLayers/output20Deg/reg6//30.0_40.0//raster_prediction_obj_gam_CAI_dailyTmax30.0_40.0_run5_global_analyses_08252014.RData"
577
#  ,"/nobackupp4/aguzman4/climateLayers/output20Deg/reg8//40.0_130.0//raster_prediction_obj_gam_CAI_dailyTmax40.0_130.0_run5_global_analyses_08252014.RData")#
578

  
579 542
########################## START SCRIPT ##############################
580 543

  
581 544
################################################################
......
592 555
df_tile_processed$path_NEX <- in_dir_list
593 556
  
594 557
##Quick exploration of raster object
595
robj1 <- load_obj(list_raster_obj_files[[4]]) #This is an example tile
558
robj1 <- load_obj(list_raster_obj_files[[7]]) #This is an example tile
596 559
#robj1 <- load_obj(lf_raster_obj[4]) #This is tile tile
597 560

  
598 561
names(robj1)
599
names(robj1$method_mod_obj[[1]]) #for January 1, 2010
600
names(robj1$method_mod_obj[[1]]$dailyTmax) #for January
562
names(robj1$method_mod_obj[[2]]) #for January 1, 2010
563
names(robj1$method_mod_obj[[2]]$dailyTmax) #for January
564
names(robj1$method_mod_obj[[11]]) #for January 1, 2010
565
names(robj1$method_mod_obj[[11]]$dailyTmax) #for January
601 566

  
602 567
names(robj1$clim_method_mod_obj[[1]]$data_month) #for January
603 568
names(robj1$validation_mod_month_obj[[1]]$data_s) #for January with predictions
604 569
#Get the number of models predicted
605 570
nb_mod <- length(unique(robj1$tb_diagnostic_v$pred_mod))
571
list_formulas <- (robj1$clim_method_mod_obj[[1]]$formulas)
606 572

  
607 573
#list_tb_diagnostic_v <- mclapply(lf_validation_obj,FUN=function(x){try( x<- load_obj(x)); try(extract_from_list_obj(x,"metrics_v"))},mc.preschedule=FALSE,mc.cores = 6)                           
608 574
#names(list_tb_diagnostic_v) <- list_names_tile_id
......
663 629
write.table((tb_diagnostic_v_NA),
664 630
            file=file.path(out_dir,paste("tb_diagnostic_v_NA","_",out_prefix,".txt",sep="")),sep=",")
665 631

  
632
#################
633
###Table 3: monthly station information with predictions for all tiles
634

  
666 635
## Monthly fitting information
667 636
tb_month_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_month_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = 6)                           
668 637

  
......
684 653
write.table((tb_month_diagnostic_s_NA),
685 654
            file=file.path(out_dir,paste("tb_month_diagnostic_s_NA","_",out_prefix,".txt",sep="")),sep=",")
686 655

  
687
## daily fit info:
688

  
689
tb_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = 6)                           
690

  
691
names(tb_diagnostic_s_list) <- list_names_tile_id
692
tb_diagnostic_s_tmp <- remove_from_list_fun(tb_diagnostic_s_list)$list
693
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
694

  
695
tb_diagnostic_s_NA <- do.call(rbind.fill,tb_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics
696
tile_id_tmp <- lapply(1:length(tb_diagnostic_s_tmp),
697
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_s_tmp,y=names(tb_diagnostic_s_tmp))
698

  
699
tb_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
700

  
701
tb_diagnostic_s_NA <- merge(tb_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id")
702

  
703
write.table((tb_diagnostic_s_NA),
704
            file=file.path(out_dir,paste("tb_diagnostic_s_NA","_",out_prefix,".txt",sep="")),sep=",")
705

  
706

  
707
####### process gam fitting diagnostic info
708

  
709
#/nobackupp4/aguzman4/climateLayers/output20Deg/reg5/20.0_30.0//diagnostics_obj_gam_fitting_TMax_9_mod2_08062014.RData
710
#lf_diagnostic_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="diagnostics_.*.RData",full.names=T)})
711
#lf_diagnostic_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="diagnostics_obj_gam_fitting_TMax_*_mod*_08062014.RData",full.names=T)})
712
#lf_diagnostic_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="diagnostics_obj_gam_fitting_TMax_.*._mod.*._08062014.RData",full.names=T)})
713

  
714
#lf_diagnostic_obj <- lf_diagnostic_obj[grep("lk_min",lf_diagnostic_obj,invert=T)] #remove object that have lk_min...
715

  
716
#names(lf_diagnostic_obj) <- list_names_tile_id
717
#lf_diagnostic_obj_tmp <- remove_from_list_fun(lf_diagnostic_obj)$list
718
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
719

  
720
#gam_diagnostic_tb_list <- vector("list",length=length(lf_diagnostic_obj_tmp))
721
#for(i in 1:length(lf_diagnostic_obj_tmp)){
722
#  l_diagnostic_obj_tmp <- lf_diagnostic_obj_tmp[[i]]
723
#  tile_id_name <-  names(lf_diagnostic_obj_tmp)[i]
724
#  #l_diagnostic_obj_tmp <- l_diagnostic_obj_tmp[grep("lk_min",l_diagnostic_obj_tmp,invert=T)] #remove object that have lk_min...
725
#  l_diagnostic_obj_tmp_list <- lapply(l_diagnostic_obj_tmp,FUN=function(x){try(x<-load_obj(x));try(x[["df_diagnostics"]])})#,mc.preschedule=FALSE,mc.cores = 6)                            
726
#  gam_diagnostic_tb <- do.call(rbind.fill,l_diagnostic_obj_tmp_list)#create a df for NA tiles with all accuracy metrics
727
#  gam_diagnostic_tb$tile_id <- tile_id_name
728
#  gam_diagnostic_tb_list[[i]] <- gam_diagnostic_tb    
729
#}
730

  
731
#gam_diagnostic_df <- do.call(rbind.fill,gam_diagnostic_tb_list) #create a df for NA tiles with all accuracy metrics
732

  
733
#write.table(gam_diagnostic_df,
734
#            file=file.path(out_dir,paste("gam_diagnostic_df_",out_prefix,".txt",sep="")),sep=",")
735

  
736

  
737
#Now look at the 100 tiles of 10x10
738
#lf_test<-list.files("/nobackupp4/aguzman4/climateLayers/output10Deg/*/*/","diagnostics_obj_gam_fitting*")  
739
#lf_test <-list.files("/nobackupp4/aguzman4/climateLayers/output10Deg/","diagnostics_obj_gam_fitting.*.RData",recursive=T,full.names=T)
740

  
741
#gam_diagnostic_10x10tb_list <- vector("list",length=length(lf_test))
742
#lf_diagnostic_obj_tmp <- lf_test  
743
#for(i in 1:length( lf_diagnostic_obj_tmp)){
744
#  l_diagnostic_obj_tmp <-  lf_diagnostic_obj_tmp[[i]]
745
#  tile_coord <-  basename(dirname(lf_diagnostic_obj_tmp[i]))
746
#  #l_diagnostic_obj_tmp <- l_diagnostic_obj_tmp[grep("lk_min",l_diagnostic_obj_tmp,invert=T)] #remove object that have lk_min...
747
#  l_diagnostic_obj_tmp_list <- lapply(l_diagnostic_obj_tmp,FUN=function(x){try(x<-load_obj(x));try(x[["df_diagnostics"]])})#,mc.preschedule=FALSE,mc.cores = 6)                            
748
#  gam_diagnostic_tb <- do.call(rbind.fill,l_diagnostic_obj_tmp_list)#create a df for NA tiles with all accuracy metrics
749
#  gam_diagnostic_tb$tile_coord <- tile_coord
750
#  gam_diagnostic_10x10tb_list[[i]] <- gam_diagnostic_tb    
751
#}
752

  
753
#gam_diagnostic_10x10_df <- do.call(rbind.fill,gam_diagnostic_10x10tb_list) #create a df for NA tiles with all accuracy metrics
754

  
755
#list_tile_coord <- unique(gam_diagnostic_10x10_df$tile_coord)
756
#list_tile_id <- paste("tile_",1:length(list_tile_coord),sep="")
757

  
758
#tile_id_df <- data.frame(tile_coord=list_tile_coord,tile_id=list_tile_id)
759
#gam_diagnostic_10x10_df <- merge(gam_diagnostic_10x10_df,tile_id_df,by="tile_coord")
760

  
761
# write.table(gam_diagnostic_10x10_df,
762
#             file=file.path(out_dir,paste("gam_diagnostic_10x10_df_",out_prefix,".txt",sep="")),sep=",")
763

  
764
#################
765
###Table 3: monthly station information with predictions for all tiles
766

  
767 656
#load data_month for specific tiles
768 657
# data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
769 658
# names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
......
783 672
# write.table((data_month_NAM),
784 673
#             file=file.path(out_dir,paste("data_month_s_NAM","_",out_prefix,".txt",sep="")),sep=",")
785 674

  
675
## daily fit info:
676

  
677
tb_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = 6)                           
678

  
679
names(tb_diagnostic_s_list) <- list_names_tile_id
680
tb_diagnostic_s_tmp <- remove_from_list_fun(tb_diagnostic_s_list)$list
681
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
682

  
683
tb_diagnostic_s_NA <- do.call(rbind.fill,tb_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics
684
tile_id_tmp <- lapply(1:length(tb_diagnostic_s_tmp),
685
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_s_tmp,y=names(tb_diagnostic_s_tmp))
686

  
687
tb_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
688

  
689
tb_diagnostic_s_NA <- merge(tb_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id")
690

  
691
write.table((tb_diagnostic_s_NA),
692
            file=file.path(out_dir,paste("tb_diagnostic_s_NA","_",out_prefix,".txt",sep="")),sep=",")
693

  
786 694
##### Table 4: Add later on: daily info
787 695
### with also data_s and data_v saved!!!
788 696

  
......
794 702
#get shape files for the region being assessed:
795 703

  
796 704
list_shp_world <- list.files(path=in_dir_shp,pattern=".*.shp",full.names=T)
797
l_shp <- unlist(lapply(1:length(list_shp_world),FUN=function(i){paste(strsplit(list_shp_world[i],"_")[[1]][2:3],collapse="_")}))
705
l_shp <- unlist(lapply(1:length(list_shp_world),
706
                       FUN=function(i){paste(strsplit(list_shp_world[i],"_")[[1]][3:4],collapse="_")}))
707

  
798 708
matching_index <- match(basename(in_dir_list),l_shp)
799 709
list_shp_reg_files <- list_shp_world[matching_index]
800 710
df_tile_processed$shp_files <-list_shp_world[matching_index]
......
879 789
#l_pattern_models <- lapply(c(".*predicted_mod1_0_1.*",".*predicted_mod2_0_1.*",".*predicted_mod3_0_1.*",".*predicted_mod_kr_0_1.*"),
880 790
#                           FUN=function(x){paste(x,dates_l,".*.tif",sep="")})
881 791
#l_pattern_models <- lapply(pred_pattern_str,
882
                           FUN=function(x){paste(x,dates_l,".*.tif",sep="")})
792
#                           FUN=function(x){paste(x,dates_l,".*.tif",sep="")})
883 793

  
884
date_l# <- paste("clim_month_",1:12,sep="")
794
#date_l# <- paste("clim_month_",1:12,sep="")
885 795
#l_pattern_models <- lapply(c("_mod1_0_1.*","_mod2_0_1.*","_mod3_0_1.*","_mod_kr_0_1.*"),
886 796
#                           FUN=function(x){paste("*.",month_l,x,".*.tif",sep="")})
887 797
#l_pattern_models <- lapply(c(".*delta_dailyTmax_mod1_del_0_1.*",".*delta_dailyTmax_mod2_del_0_1.*",".*delta_dailyTmax_mod3_del_0_1.*",".*delta_dailyTmax_mod_kr_del_0_1.*"),
......
1075 985
pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = 6)
1076 986
#pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id][1:6]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = 6)
1077 987
#pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
1078
pred_data_info <- lapply(1:length(list_raster_obj_files[1]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
988
#pred_data_info <- lapply(1:length(list_raster_obj_files[1]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
1079 989

  
1080 990
pred_data_info_tmp <- remove_from_list_fun(pred_data_info)$list #remove data not predicted
1081 991
##Add tile nanmes?? it is alreaready there
......
1126 1036
Atlas_hostname <- "parmentier@atlas.nceas.ucsb.edu"
1127 1037
lf_cp_shp <- df_tile_processed$shp_files #get all the files...
1128 1038

  
1129
lf_cp_shp_pattern <- gsub(".shp","*",base_name(lf_cp_shp))
1039
lf_cp_shp_pattern <- gsub(".shp","*",basename(lf_cp_shp))
1130 1040
lf_cp_shp_pattern <- file.path(dirname(lf_cp_shp),lf_cp_shp_pattern)
1131 1041
filenames_NEX <- paste(lf_cp_shp_pattern,collapse=" ")  #copy raster prediction object
1132 1042

  

Also available in: Unified diff