Project

General

Profile

« Previous | Next » 

Revision ff3e42fe

Added by Benoit Parmentier over 8 years ago

testing function combine extraction and assessment data, also moved to function script

View differences:

climate/research/oregon/interpolation/global_product_assessment_part1.R
4 4
#Combining tables and figures for individual runs for years and tiles.
5 5
#AUTHOR: Benoit Parmentier 
6 6
#CREATED ON: 05/15/2016  
7
#MODIFIED ON: 09/18/2016            
7
#MODIFIED ON: 09/19/2016            
8 8
#Version: 1
9 9
#PROJECT: Environmental Layers project     
10 10
#COMMENTS: Initial commit, script based on part NASA biodiversity conferenc 
......
19 19
#
20 20
#setfacl -Rmd user:aguzman4:rwx /nobackupp8/bparmen1/output_run10_1500x4500_global_analyses_pred_1992_10052015
21 21

  
22
#COMMIT: combine extracted and stations information from assessment 
22
#COMMIT: testing function combine extraction and assessment data, also moved to function script 
23 23

  
24 24
#################################################################################################
25 25

  
......
85 85
source(file.path(script_path,function_assessment_part3)) #source all functions used in this script 
86 86

  
87 87
#Product assessment
88
function_product_assessment_part1_functions <- "global_product_assessment_part1_functions_09112016.R"
88
function_product_assessment_part1_functions <- "global_product_assessment_part1_functions_09192016.R"
89 89
source(file.path(script_path,function_product_assessment_part1_functions)) #source all functions used in this script 
90 90

  
91 91
###############################
......
589 589
#dates_val <-
590 590
df_raster
591 591
df_time_series
592
plot_fig <- false
592
plot_fig <- FALSE
593
i<-1
593 594

  
594
combine_measurements_and_predictions_df <- function(i,df_raster,df_time_series,df_ts_pix,data_var,list_selected_ID,r_ts_name,var_name,plot_fig=T){
595
  
596
  # Input arguments:
597
  # i : selected station
598
  # df_ts_pix_data : data extracted from raster layer
599
  # data_var : data with station measurements (tmin,tmax or precip)
600
  # list_selected_ID : list of selected station
601
  # plot_fig : if T, figures are plotted
602
  # Output
603
  #
604
  
605
  ##### START FUNCTION ############
606
  
607
  #get the relevant station
608
  id_name <- list_selected_ID[i] # e.g. WS037.00,1238099999
609
  #id_selected <- df_ts_pix[[var_ID]]==id_name
610
  id_selected <- df_ts_pix[["id"]]== id_name
611
  
612
  ### Not get the data from the time series
613
  data_pixel <- df_ts_pix[id_selected,] #this should be a unique row!!!
614
  #data_pixel <- data_pixel[1,]
615
  data_pixel <- as.data.frame(data_pixel)
616
  
617
  ##Transpose data to have rows as date and one unique column
618
  pix_ts <- t(as.data.frame(subset(data_pixel,select=r_ts_name))) #can subset to range later
619
  #pix_ts <- subset(as.data.frame(pix_ts),select=r_ts_name)
620
  pix_ts <- (as.data.frame(pix_ts))
621
  names(pix_ts) <- paste(var_pred,"_mosaic",sep="")
622
  #add scaling option
623
  #!is.null(scaling)
624
  ## Process the measurements data (with tmax/tmin/precip)
625
  
626
  #there are several measurements per day for some stations !!!
627
  #id_name <- data_pixel[[var_ID]]
628
  
629
  #df_tmp  <-data_var[data_var$LOCATION_ID==id_name,]
630
  df_tmp <- subset(data_var,data_var$id==id_name)
631
  #if(da)
632
  #aggregate(df_tmp
633
  #if(nrow(df_tmp)>1){
634
  #  
635
  #  formula_str <- paste(var_name," ~ ","TRIP_START_DATE_f",sep="")
636
  #  #var_pix <- aggregate(COL_SCORE ~ TRIP_START_DATE_f, data = df_tmp, mean) #aggregate by date
637
  #  var_pix <- try(aggregate(as.formula(formula_str), data = df_tmp, FUN=mean)) #aggregate by date
638
  #  #length(unique(test$TRIP_START_DATE_f))
639
  #  #var_pix_ts <- t(as.data.frame(subset(data_pixel,select=var_name)))
640
  #  #pix <- t(data_pixel[1,24:388])#can subset to range later
641
  #}else{
642
  #  var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!!
643
  #}
644
  #var_pix <- subset(as.data.frame(data_id_selected,c(var_name,"TRIP_START_DATE_f")])) #,select=var_name)
645
  var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!!
646
  var_pix$date_str <- as.character(var_pix$date)
647
  #match from 20011231 to 2001-12-31 to date format
648
  var_pix$date <- as.character(as.Date(var_pix$date_str,"%Y%m%d")) #format back to the relevant date format for files
649
  
650
  #dates_val <- df_time_series$date
651
  dates_val <- df_raster$date
652
  pix_ts$date <- dates_val 
653
  #pix_ts <- merge(df_raster,pix_ts,by="date")
654
  
655
  pix_ts$lf <- df_raster$lf
656
  #pix_ts$
657
  pix_ts <- merge(df_time_series,pix_ts,by="date",all=T)
658
  
659
  #check for duplicates in extracted values (this can happen if there is a test layer or repetition
660
  if(nrow(pix_ts)!=length(unique(pix_ts$date))){
661
    var_pred_tmp <- paste0(var_pred,"_mosaic")
662
    md <- melt(pix_ts, id=(c("date")),measure.vars=c(var_pred_tmp, "missing")) #c("x","y","dailyTmax","mod1","res_mod1"))
663
    #formula_str <- "id + date ~ x + y + dailyTmax + mod1 + res_mod1"
664
    pix_ts <- cast(md, date ~ variable, fun.aggregate = mean, 
665
    na.rm = TRUE)
666

  
667
  }
668
  
669
  #if(nrow(var_pix)!=length(unique(var_pix$date))){
670
  #
671
  #  md <- melt(var_pix, id=(c("date")),measure.vars=c(var_pred, "missing")) #c("x","y","dailyTmax","mod1","res_mod1"))
672
  #  #formula_str <- "id + date ~ x + y + dailyTmax + mod1 + res_mod1"
673
  #  test <- cast(md, date ~ variable, fun.aggregate = mean, 
674
  #  na.rm = TRUE)
675
  #
676
  #  
677
  #}
678
  df_pix_ts <- merge(pix_ts,var_pix,by="date",all=T)
679
  #Create time series object from extract pixel time series
680

  
681
  nb_zero <- sum( df_pix_ts[[var_pred_tmp]]==0) #relevant for precip
682
  #nb_NA <- sum(is.na(df2$COL_SCORE))
683
  nb_NA <- sum(is.na( df_pix_ts[[var_pred_tmp]])) #for ID 394 DMR it is 361 missing values for 2012!!
684
  ## Cumulated precip and lag?
685
  #Keep number of  0 for every year for rainfall
686
  #summarize by month
687
  #Kepp number of NA for scores... 
688
  #Summarize by season...
689
  ## Threshold?
690
  station_summary_obj <- list(nb_zero,nb_NA, df_pix_ts)
691
  names(station_summary_obj) <- c("nb_zero_precip","nb_NA_var"," df_pix_ts")
692
  return(station_summary_obj)
693
}
595
#Product assessment
596
#function_product_assessment_part1_functions <- "global_product_assessment_part1_functions_09192016.R"
597
#source(file.path(script_path,function_product_assessment_part1_functions)) #source all functions used in this script 
598
#undebug(combine_measurements_and_predictions_df)
599

  
600
#this can be run with mclapply, very fast right now:
601
station_summary_obj <- combine_measurements_and_predictions_df(i=i,
602
                                        df_raster=df_raster,
603
                                        df_time_series=df_time_series,
604
                                        df_ts_pix=df_ts_pix,
605
                                        data_var=data_var,
606
                                        list_selected_ID=list_selected_ID,
607
                                        r_ts_name=r_ts_name,
608
                                        var_name=var_name,
609
                                        var_pred = var_pred,
610
                                        plot_fig=T)
611
df_pix_ts <- station_summary_obj$df_pix_ts
612
#station_summary_obj <- list(nb_zero,nb_NA, df_pix_ts)
613

  
614
id_name <- list_selected_ID[i]
615
df_pix_ts_filename <- file.path(out_dir,paste0("df_pix_ts_",id_name,out_suffix,y_var_name,".txt"))
616
write.table(df_pix_ts,df_pix_ts_filename,sep=",")
694 617

  
695
list_dates_produced <- unlist(mclapply(1:length(var_names),
696
                                       FUN=extract_date,
697
                                       x=var_names,
698
                                       item_no=12,
699
                                       mc.preschedule=FALSE,
700
                                       mc.cores = num_cores))                         
701 618

  
702
list_dates_produced_date_val <- as.Date(strptime(list_dates_produced,"%Y%m%d"))
619
##################### plot time series: make this a function!!! ####
620
###start of plotting
621
### makes this a function:
622

  
703 623
pix_ts$date <- list_dates_produced_date_val
704 624
pix_ts[,1]*scaling #scale?
705 625

  
......
711 631
  
712 632
#head(pix_ts)
713 633

  
714

  
715
##################### plot time series: make this a function!!! ####
716
###start of plotting
717
### makes this a function:
718 634
id_selected <- "82111099999"
719 635
#station_id <- 8
720 636
station_id <- id_selected

Also available in: Unified diff