/ - Diff - Environment and organisms - NCEAS Projects

« Previous | Next »

Revision e0b5cc14

Added by Benoit Parmentier over 10 years ago

ID e0b5cc14410f201d4bb4a5fa943d0feaab9ad8fd
Parent e8db2d42
Child 71539f11

assessment NEX run part3: major updates to generate stat and fig for specific tiles

     #Analyses, figures, tables and data are also produced in the script.
     #AUTHOR: Benoit Parmentier
     #CREATED ON: 05/21/2014
     #MODIFIED ON: 05/21/2014
     #MODIFIED ON: 06/01/2014
     #Version: 1
     #PROJECT: Environmental Layers project
     #################################################################################################
-...
     function_assessment_by_tile <- "results_interpolation_date_output_analyses_05212014.R"
     #source(file.path(script_path,"results_interpolation_date_output_analyses_08052013.R"))
     script_path<-"/home/parmentier/Data/IPLANT_project/env_layers_scripts/" #path to script
     source(file.path(script_path,function_analyses_paper1)) #source all functions used in this script 1.
     source(file.path(script_path,function_analyses_paper2)) #source all functions used in this script 2.
     source(file.path(script_path,function_assessment_by_tile)) #source all functions used in this script 2.
     load_obj <- function(f)
+    {
       env <- new.env()
-...
     #if(Atlas_server==TRUE){
+    #
     #}
     script_path<-"/home/parmentier/Data/IPLANT_project/env_layers_scripts/" #path to script
     source(file.path(script_path,function_analyses_paper1)) #source all functions used in this script 1.
     source(file.path(script_path,function_analyses_paper2)) #source all functions used in this script 2.
     source(file.path(script_path,function_assessment_by_tile)) #source all functions used in this script 2.
     #in_dir1 <- "/data/project/layers/commons/NEX_data/test_run1_03232014/output" #On Atlas
     #parent output dir : contains subset of the data produced on NEX
     in_dir1 <- "/data/project/layers/commons/NEX_data/output_run2_05122014/output/"
     in_dir1 <- "/data/project/layers/commons/NEX_data/output_run3_global_analyses_05292014/output/"
     # parent output dir for the curent script analyes
     out_dir <-"/data/project/layers/commons/NEX_data/" #On NCEAS Atlas
     out_dir <- "/data/project/layers/commons/NEX_data/output_run3_global_analyses_05292014/" #On NCEAS Atlas
     # input dir containing shapefiles defining tiles
     in_dir_shp <- "/data/project/layers/commons/NEX_data/output_run2_05122014/output/subset/shapefiles"
     in_dir_shp <- "/data/project/layers/commons/NEX_data/output_run3_global_analyses_05292014/output/subset/shapefiles"
     #On NEX
     #contains all data from the run by Alberto
-...
     y_var_name <- "dailyTmax"
     interpolation_method <- c("gam_CAI")
     out_prefix<-"run2_global_analyses_05122014"
     out_prefix<-"run3_global_analyses_05292014"
     #out_dir <-paste(out_dir,"_",out_prefix,sep="")
     create_out_dir_param <- TRUE
     create_out_dir_param <- FALSE
     if(create_out_dir_param==TRUE){
       out_dir <- create_dir_fun(out_dir,out_prefix)
-...
       setwd(out_dir) #use previoulsy defined directory
+    }
     setwd(out_dir)
     df_tile_processed <- read.table(file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",")
     #in_dir_list <- file.path(in_dir1,read.table(file.path(in_dir1,"processed.txt"))$V1)
     CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
     region_name <- "USA"
-...
     summary_metrics_v <- read.table(file=file.path(out_dir,paste("summary_metrics_v2_NA_",out_prefix,".txt",sep="")),sep=",")
     tb <- read.table(file=file.path(out_dir,paste("tb_diagnostic_v_NA","_",out_prefix,".txt",sep="")),sep=",")
     #df_tile_processed <- read.table(file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",")
     df_tile_processed <- read.table(file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",")
     #in_dir_list <- file.path(in_dir1,read.table(file.path(in_dir1,"processed.txt"))$V1)
     ########################## START SCRIPT ##############################
     #Now add things here...
+    #
     selected_tiles <- c("45.0_-120.0","35.0_-115.0")
     #selected_tiles <- c("45.0_-120.0","35.0_-115.0")
     selected_tiles <- c("40.0_-120.0","35.0_-115.0")
     ##raster_prediction object : contains testing and training stations with RMSE and model object
     in_dir_list <- list.files(path=in_dir1,full.names=T)
     in_dir_list <- in_dir_list[grep("subset",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1
-...
     list_shp_reg_files <- file.path(in_dir_shp,df_tile_processed$shp_files)
     ###############
     ##Quick interactive  exploration of raster object to check possible errors
     robj1 <- load_obj(list_raster_obj_files[[1]]) #This is tile in CA
     names(robj1)
     names(robj1$method_mod_obj[[1]]) #for January 1, 2010
     names(robj1$method_mod_obj[[1]]$dailyTmax) #for January
     names(robj1$clim_method_mod_obj[[1]]$data_month) #for January
     names(robj1$validation_mod_month_obj[[1]]$data_s) #for January with predictions
     #Get the number of models predicted
     nb_mod <- length(unique(robj1$tb_diagnostic_v$pred_mod))
     ### Figure 1: plot location of the study area with tiles processed
     ### Figures diagnostic tile:
-...
     covar_obj <- lf_covar_obj[[1]]
     var <- "TMAX"
     list_param_results_analyses<-list(out_dir,in_path,script_path,raster_prediction_obj,interpolation_method,
     list_param_results_analyses<-list(out_dir,in_path_tile,script_path,raster_prediction_obj,interpolation_method,
                                       covar_obj,date_selected_results,var,out_prefix)
     names(list_param_results_analyses)<-c("out_path","in_path_tile","script_path","raster_prediction_obj","interpolation_method",
                          "covar_obj","date_selected_results","var","out_prefix")
     list_param <- list_param_results_analyses
     #list_param <- list_param_results_analyses
     #Run modified code from stage 5...
     #plots_assessment_by_date<-function(j,list_param){
-...
     ##Create data.frame with validation and fit metrics for a full year/full numbe of runs
     #Call functions to create plots of metrics for validation dataset
     tile_selected <- 6
     tb_diagnostic_v <- subset(tb,tile_id==6)
     metric_names<-c("rmse","mae","me","r","m50")
     #tile_selected <- 6
     #tb_diagnostic_v <- subset(tb,tile_id==6)
     #metric_names<-c("rmse","mae","me","r","m50")
     summary_metrics_v<- boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path) #if adding for fit need to change outprefix
     #summary_metrics_v<- boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path) #if adding for fit need to change outprefix
     names(summary_metrics_v)<-c("avg","median")
     #names(summary_metrics_v)<-c("avg","median")
     summary_month_metrics_v<- boxplot_month_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path)
     #summary_month_metrics_v<- boxplot_month_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path)
     #Call functions to create plots of metrics for validation dataset
     metric_names<-c("rmse","mae","me","r","m50")
     #metric_names<-c("rmse","mae","me","r","m50")
     summary_metrics_v<- boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path) #if adding for fit need to change outprefix
     #summary_metrics_v<- boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path) #if adding for fit need to change outprefix
     names(summary_metrics_v)<-c("avg","median")
     summary_month_metrics_v<- boxplot_month_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path)
     #names(summary_metrics_v)<-c("avg","median")
     #summary_month_metrics_v<- boxplot_month_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path)
     #### Function to plot boxplot from data.frame table of accuracy metrics
     ### need to improve these
     boxplot_from_tb <-function(tb_diagnostic,metric_names,out_prefix,out_path){
       #now boxplots and mean per models
       library(gdata) #Nesssary to use cbindX
       ### Start script
       y_var_name<-unique(tb_diagnostic$var_interp) #extract the name of interpolated variable: dailyTmax, dailyTmin
       mod_names<-sort(unique(tb_diagnostic$pred_mod)) #models that have accuracy metrics
       t<-melt(tb_diagnostic,
               #measure=mod_var,
               id=c("date","pred_mod","prop"),
               na.rm=F)
       t$value<-as.numeric(t$value) #problem with char!!!
       avg_tb<-cast(t,pred_mod~variable,mean)
       avg_tb$var_interp<-rep(y_var_name,times=nrow(avg_tb))
       median_tb<-cast(t,pred_mod~variable,median)
       #avg_tb<-cast(t,pred_mod~variable,mean)
       tb<-tb_diagnostic
       #mod_names<-sort(unique(tb$pred_mod)) #kept for clarity
       tb_mod_list<-lapply(mod_names, function(k) subset(tb, pred_mod==k)) #this creates a list of 5 based on models names
       names(tb_mod_list)<-mod_names
       #mod_metrics<-do.call(cbind,tb_mod_list)
       #debug here
       if(length(tb_mod_list)>1){
         mod_metrics<-do.call(cbindX,tb_mod_list) #column bind the list??
       }else{
         mod_metrics<-tb_mod_list[[1]]
+      }
       test_names<-lapply(1:length(mod_names),function(k) paste(names(tb_mod_list[[1]]),mod_names[k],sep="_"))
       #test names are used when plotting the boxplot for the different models
       names(mod_metrics)<-unlist(test_names)
       rows_total<-lapply(tb_mod_list,nrow)
       for (j in 1:length(metric_names)){
         metric_ac<-metric_names[j]
         mod_pat<-glob2rx(paste(metric_ac,"_*",sep=""))
         mod_var<-grep(mod_pat,names(mod_metrics),value=TRUE) # using grep with "value" extracts the matching names
         #browser()
         test<-mod_metrics[mod_var]
         png(file.path(out_path,paste("boxplot_metric_",metric_ac, out_prefix,".png", sep="")))
         #boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5,
         #        ylab=paste(metric_ac,"in degree C",sep=" "))
         boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5,
                   ylab=paste(metric_ac,"in degree C",sep=" "),axisnames=FALSE,axes=FALSE)
         axis(1, labels = FALSE)
         ## Create some text labels
         labels <- labels<- names(test)
         ## Plot x axis labels at default tick marks
         text(1:ncol(test), par("usr")[3] - 0.25, srt = 45, adj = 1,
              labels = labels, xpd = TRUE)
         axis(2)
         box()
         #legend("bottomleft",legend=paste(names(rows_total),":",rows_total,sep=""),cex=0.7,bty="n")
         #title(as.character(t(paste(t(names(rows_total)),":",rows_total,sep=""))),cex=0.8)
         title(paste(metric_ac,"for",y_var_name,sep=" "),cex=0.8)
         dev.off()
+      }
       avg_tb$n<-rows_total #total number of predictions on which the mean is based
       median_tb$n<-rows_total
       summary_obj<-list(avg_tb,median_tb)
       names(summary_obj)<-c("avg","median")
       return(summary_obj)
+    }
     #boxplot_month_from_tb(tb_diagnostic,metric_names,out_prefix,out_path)
     ## Function to display metrics by months/seasons
     boxplot_month_from_tb <-function(tb_diagnostic,metric_names,out_prefix,out_path){
       #Generate boxplot per month for models and accuracy metrics
       #Input parameters:
       #1) df: data frame containing accurayc metrics (RMSE etc.) per day)
       #2) metric_names: metrics used for validation
       #3) out_prefix
+      #
       #################
       ## BEGIN
       y_var_name<-unique(tb_diagnostic$var_interp) #extract the name of interpolated variable: dailyTmax, dailyTmin
       date_f<-strptime(tb_diagnostic$date, "%Y%m%d")   # interpolation date being processed
       tb_diagnostic$month<-strftime(date_f, "%m")          # current month of the date being processed
       mod_names<-sort(unique(tb_diagnostic$pred_mod)) #models that have accuracy metrics
       tb_mod_list<-lapply(mod_names, function(k) subset(tb_diagnostic, pred_mod==k)) #this creates a list of 5 based on models names
       names(tb_mod_list)<-mod_names
       t<-melt(tb_diagnostic,
               #measure=mod_var,
               id=c("date","pred_mod","prop","month"),
               na.rm=F)
       t$value<-as.numeric(t$value) #problem with char!!!
       tb_mod_m_avg <-cast(t,pred_mod+month~variable,mean) #monthly mean for every model
       tb_mod_m_avg$var_interp<-rep(y_var_name,times=nrow(tb_mod_m_avg))
       tb_mod_m_sd <-cast(t,pred_mod+month~variable,sd)   #monthly sd for every model
       tb_mod_m_list <-lapply(mod_names, function(k) subset(tb_mod_m_avg, pred_mod==k)) #this creates a list of 5 based on models names
       for (k in 1:length(mod_names)){
         mod_metrics <-tb_mod_list[[k]]
         current_mod_name<- mod_names[k]
         for (j in 1:length(metric_names)){
           metric_ac<-metric_names[j]
           col_selected<-c(metric_ac,"month")
           test<-mod_metrics[col_selected]
           png(file.path(out_path,paste("boxplot_metric_",metric_ac,"_",current_mod_name,"_by_month_",out_prefix,".png", sep="")))
           boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5,
                   ylab=paste(metric_ac,"in degree C",sep=" "),,axisnames=FALSE,axes=FALSE)
           #boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5,
           #        ylab=paste(metric_ac,"in degree C",sep=" "))
           axis(1, labels = FALSE)
           ## Create some text labels
           labels <- month.abb # abbreviated names for each month
           ## Plot x axis labels at default tick marks
           text(1:length(labels), par("usr")[3] - 0.25, srt = 45, adj = 1,
                labels = labels, xpd = TRUE)
           axis(2)
           box()
           #legend("bottomleft",legend=paste(names(rows_total),":",rows_total,sep=""),cex=0.7,bty="n")
           title(paste(metric_ac,"for",current_mod_name,"by month",sep=" "))
           dev.off()
+        }
+      }
       summary_month_obj <-c(tb_mod_m_list,tb_mod_m_avg,tb_mod_m_sd)
       names(summary_month_obj)<-c("tb_list","metric_month_avg","metric_month_sd")
       return(summary_month_obj)
+    }
     # ### need to improve these
     # boxplot_from_tb <-function(tb_diagnostic,metric_names,out_prefix,out_path){
     #   #now boxplots and mean per models
     #   library(gdata) #Nesssary to use cbindX
+    #
     #   ### Start script
     #   y_var_name<-unique(tb_diagnostic$var_interp) #extract the name of interpolated variable: dailyTmax, dailyTmin
+    #
     #   mod_names<-sort(unique(tb_diagnostic$pred_mod)) #models that have accuracy metrics
     #   t<-melt(tb_diagnostic,
     #           #measure=mod_var,
     #           id=c("date","pred_mod","prop"),
     #           na.rm=F)
     #   t$value<-as.numeric(t$value) #problem with char!!!
     #   avg_tb<-cast(t,pred_mod~variable,mean)
     #   avg_tb$var_interp<-rep(y_var_name,times=nrow(avg_tb))
     #   median_tb<-cast(t,pred_mod~variable,median)
+    #
     #   #avg_tb<-cast(t,pred_mod~variable,mean)
     #   tb<-tb_diagnostic
+    #
     #   #mod_names<-sort(unique(tb$pred_mod)) #kept for clarity
     #   tb_mod_list<-lapply(mod_names, function(k) subset(tb, pred_mod==k)) #this creates a list of 5 based on models names
     #   names(tb_mod_list)<-mod_names
     #   #mod_metrics<-do.call(cbind,tb_mod_list)
     #   #debug here
     #   if(length(tb_mod_list)>1){
     #     mod_metrics<-do.call(cbindX,tb_mod_list) #column bind the list??
     #   }else{
     #     mod_metrics<-tb_mod_list[[1]]
     #   }
+    #
     #   test_names<-lapply(1:length(mod_names),function(k) paste(names(tb_mod_list[[1]]),mod_names[k],sep="_"))
     #   #test names are used when plotting the boxplot for the different models
     #   names(mod_metrics)<-unlist(test_names)
     #   rows_total<-lapply(tb_mod_list,nrow)
     #   for (j in 1:length(metric_names)){
     #     metric_ac<-metric_names[j]
     #     mod_pat<-glob2rx(paste(metric_ac,"_*",sep=""))
     #     mod_var<-grep(mod_pat,names(mod_metrics),value=TRUE) # using grep with "value" extracts the matching names
     #     #browser()
     #     test<-mod_metrics[mod_var]
     #     png(file.path(out_path,paste("boxplot_metric_",metric_ac, out_prefix,".png", sep="")))
     #     #boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5,
     #     #        ylab=paste(metric_ac,"in degree C",sep=" "))
+    #
     #     boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5,
     #               ylab=paste(metric_ac,"in degree C",sep=" "),axisnames=FALSE,axes=FALSE)
     #     axis(1, labels = FALSE)
     #     ## Create some text labels
     #     labels <- labels<- names(test)
     #     ## Plot x axis labels at default tick marks
     #     text(1:ncol(test), par("usr")[3] - 0.25, srt = 45, adj = 1,
     #          labels = labels, xpd = TRUE)
     #     axis(2)
     #     box()
     #     #legend("bottomleft",legend=paste(names(rows_total),":",rows_total,sep=""),cex=0.7,bty="n")
     #     #title(as.character(t(paste(t(names(rows_total)),":",rows_total,sep=""))),cex=0.8)
     #     title(paste(metric_ac,"for",y_var_name,sep=" "),cex=0.8)
     #     dev.off()
     #   }
+    #
     #   avg_tb$n<-rows_total #total number of predictions on which the mean is based
     #   median_tb$n<-rows_total
     #   summary_obj<-list(avg_tb,median_tb)
     #   names(summary_obj)<-c("avg","median")
     #   return(summary_obj)
     # }
     # #boxplot_month_from_tb(tb_diagnostic,metric_names,out_prefix,out_path)
     # ## Function to display metrics by months/seasons
     # boxplot_month_from_tb <-function(tb_diagnostic,metric_names,out_prefix,out_path){
+    #
     #   #Generate boxplot per month for models and accuracy metrics
     #   #Input parameters:
     #   #1) df: data frame containing accurayc metrics (RMSE etc.) per day)
     #   #2) metric_names: metrics used for validation
     #   #3) out_prefix
     #   #
+    #
     #   #################
     #   ## BEGIN
     #   y_var_name<-unique(tb_diagnostic$var_interp) #extract the name of interpolated variable: dailyTmax, dailyTmin
     #   date_f<-strptime(tb_diagnostic$date, "%Y%m%d")   # interpolation date being processed
     #   tb_diagnostic$month<-strftime(date_f, "%m")          # current month of the date being processed
     #   mod_names<-sort(unique(tb_diagnostic$pred_mod)) #models that have accuracy metrics
     #   tb_mod_list<-lapply(mod_names, function(k) subset(tb_diagnostic, pred_mod==k)) #this creates a list of 5 based on models names
     #   names(tb_mod_list)<-mod_names
     #   t<-melt(tb_diagnostic,
     #           #measure=mod_var,
     #           id=c("date","pred_mod","prop","month"),
     #           na.rm=F)
     #   t$value<-as.numeric(t$value) #problem with char!!!
     #   tb_mod_m_avg <-cast(t,pred_mod+month~variable,mean) #monthly mean for every model
     #   tb_mod_m_avg$var_interp<-rep(y_var_name,times=nrow(tb_mod_m_avg))
+    #
     #   tb_mod_m_sd <-cast(t,pred_mod+month~variable,sd)   #monthly sd for every model
+    #
     #   tb_mod_m_list <-lapply(mod_names, function(k) subset(tb_mod_m_avg, pred_mod==k)) #this creates a list of 5 based on models names
+    #
     #   for (k in 1:length(mod_names)){
     #     mod_metrics <-tb_mod_list[[k]]
     #     current_mod_name<- mod_names[k]
     #     for (j in 1:length(metric_names)){
     #       metric_ac<-metric_names[j]
     #       col_selected<-c(metric_ac,"month")
     #       test<-mod_metrics[col_selected]
     #       png(file.path(out_path,paste("boxplot_metric_",metric_ac,"_",current_mod_name,"_by_month_",out_prefix,".png", sep="")))
     #       boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5,
     #               ylab=paste(metric_ac,"in degree C",sep=" "),,axisnames=FALSE,axes=FALSE)
     #       #boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5,
     #       #        ylab=paste(metric_ac,"in degree C",sep=" "))
     #       axis(1, labels = FALSE)
     #       ## Create some text labels
     #       labels <- month.abb # abbreviated names for each month
     #       ## Plot x axis labels at default tick marks
     #       text(1:length(labels), par("usr")[3] - 0.25, srt = 45, adj = 1,
     #            labels = labels, xpd = TRUE)
     #       axis(2)
     #       box()
     #       #legend("bottomleft",legend=paste(names(rows_total),":",rows_total,sep=""),cex=0.7,bty="n")
     #       title(paste(metric_ac,"for",current_mod_name,"by month",sep=" "))
     #       dev.off()
     #     }
+    #
     #   }
     #   summary_month_obj <-c(tb_mod_m_list,tb_mod_m_avg,tb_mod_m_sd)
     #   names(summary_month_obj)<-c("tb_list","metric_month_avg","metric_month_sd")
     #   return(summary_month_obj)
     # }
     ##################### END OF SCRIPT ######################

Also available in: Unified diff

Project

General

Profile

Revision e0b5cc14

Added by Benoit Parmentier over 10 years ago