Project

General

Profile

« Previous | Next » 

Revision 35380459

Added by Benoit Parmentier over 8 years ago

adding functions to product assessment part1: extracting from raster and combining data to assessment stage

View differences:

climate/research/oregon/interpolation/global_product_assessment_part1_functions.R
544 544
 return(df_selected)
545 545
}
546 546

  
547

  
548

  
549
finding_missing_dates <- function(date_start,date_end,list_dates){
550
  #this assumes daily time steps!!
551
  #can be improved later on
552
  
553
  #date_start <- "19840101"
554
  #date_end <- "19991231"
555
  date1 <- as.Date(strptime(date_start,"%Y%m%d"))
556
  date2 <- as.Date(strptime(date_end,"%Y%m%d"))
557
  dates_range <- seq.Date(date1, date2, by="1 day") #sequence of dates
558

  
559
  missing_dates <- setdiff(as.character(dates_range),as.character(list_dates))
560
  #df_dates_missing <- data.frame(date=missing_dates)
561
  #which(df_dates$date%in%missing_dates)
562
  #df_dates_missing$missing <- 1
563
  
564
  df_dates <- data.frame(date=as.character(dates_range),missing = 0) 
565

  
566
  df_dates$missing[df_dates$date %in% missing_dates] <- 1
567
  #a$flag[a$id %in% temp] <- 1
568

  
569
  missing_dates_obj <- list(missing_dates,df_dates)
570
  names(missing_dates_obj) <- c("missing_dates","df_dates")
571
  return(missing_dates)
572
}
573

  
574

  
575

  
576
extract_from_time_series_raster_stack <- function(df_points,date_start,date_end,lf_raster,item_no=13,num_cores=4,pattern_str=NULL,in_dir=NULL,out_dir=".",out_suffix=""){
577
  #
578
  #This function extract value given from a raster stack time series given a spatial data.frame and a list of files
579
  #
580
  #INPUTS
581
  #1) df_points
582
  #2) date_start,num_cores=4,pattern_str=NULL,in_dir=NULL,out_dir=".",out_suffix=
583
  #3) date_end
584
  #3) lf_raster
585
  #4) item_no=13
586
  #5) num_cores=4,
587
  #6) pattern_str=NULL
588
  #7) in_dir=NULL,
589
  #8) out_dir="."
590
  #9) out_suffix=""
591
  #OUTPUTS
592
  #
593
  #
594
  
595
  #### Start script ####
596
  
597
  if(is.null(lf_raster)){
598
    #pattern_str <- ".*.tif"
599
    pattern_str <-"*.tif"
600
    lf_raster <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T)
601
    r_stack <- stack(lf_raster,quick=T) #this is very fast now with the quick option!
602
    #save(r_mosaic,file="r_mosaic.RData")
603
    
604
  }else{
605
    r_stack <- stack(lf_raster,quick=T) #this is very fast now with the quick option!
606
  }
607

  
608
  #df_points$files <- lf_mosaic_list
609
  #Use the global output??
610

  
611
  ##23.09 (on 05/22)
612
  #df_points_day_extracted <- extract(r_mosaic,data_stations,df=T)
613
  #df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname",".txt") 
614
  #write.table(df_points_day_extracted,file=df_points_day_extracted_fname) #5.1 Giga
615
  #4.51 (on 05/23)
616
  #df_points_day <- data_stations_var_pred_data_s
617

  
618
  #15.17 (on 09/08)
619
  ##10.41 (on 05/22)
620
  #took about 7h for 5262 layers, maybe can be sped up later
621
  df_points_extracted <- extract(r_stack,df_points,df=T,sp=T) #attach back to the original data...
622

  
623
  #17.19 (on 05/23)
624
  #22.27 (on 09/08)
625
  #df_points_extracted_fname <- paste0("df_points_day_extracted_fname2",".txt")
626
  #17.27 (on 05/23)
627
  
628
  df_points_extracted_fname <- file.path(out_dir,paste0("df_points_extracted_",out_suffix,".txt"))
629
  write.table(df_points_extracted,file= df_points_extracted_fname,sep=",",row.names = F) 
630
  #17.19 (on 05/23)
631
  
632
  #### Now check for missing dates
633
  
634
  #debug(extract_date)
635
  #test <- extract_date(6431,lf_mosaic_list,12) #extract item number 12 from the name of files to get the data
636
  #list_dates_produced <- unlist(mclapply(1:length(lf_raster),FUN=extract_date,x=lf_raster,item_no=13,mc.preschedule=FALSE,mc.cores = num_cores))                         
637
  #list_dates_produced <-  mclapply(1:2,FUN=extract_date,x=lf_mosaic_list,item_no=13,mc.preschedule=FALSE,mc.cores = 2)                         
638
  list_dates_produced <- unlist(mclapply(1:length(lf_raster),FUN=extract_date,x=lf_raster,item_no=item_no,
639
                                         mc.preschedule=FALSE,mc.cores = num_cores))                         
640

  
641
  list_dates_produced_date_val <- as.Date(strptime(list_dates_produced,"%Y%m%d"))
642
  month_str <- format(list_dates_produced_date_val, "%b") ## Month, char, abbreviated
643
  year_str <- format(list_dates_produced_date_val, "%Y") ## Year with century
644
  day_str <- as.numeric(format(list_dates_produced_date_val, "%d")) ## numeric month
645

  
646
  df_raster <- data.frame(lf=basename(lf_raster),
647
                          date=list_dates_produced_date_val,
648
                          month_str=month_str,
649
                          year=year_str,
650
                          day=day_str,
651
                          dir=dirname(lf_mosaic_list))
652

  
653
  df_raster_fname <- file.path(out_dir,paste0("df_raster_",out_suffix,".txt"))
654
  write.table(df_raster,file= df_raster_fname,sep=",",row.names = F) 
655

  
656
  missing_dates_obj <- finding_missing_dates (date_start,date_end,list_dates_produced_date_val)
657
  
658
  df_time_series <- missing_dates_obj$df_dates
659
  df_time_series$date <- as.character(df_time_series$date)  
660
  df_raster$date <- as.character(df_raster$date)
661
  
662
  df_time_series <- merge(df_time_series,df_raster,by="date",all=T) #outer join to keep missing dates
663
  
664
  df_time_series_fname <- file.path(out_dir,paste0("df_time_series_",out_suffix,".txt")) #add the name of var later (tmax)
665
  write.table(df_time_series,file= df_time_series_fname,sep=",",row.names = F) 
666
  
667
  extract_obj <- list(df_points_extracted_fname,df_raster_fname,df_time_series_fname)
668
  names(extract_obj) <- c("df_points_extracted_fname","df_raster_fname","df_time_series_fname")
669
  
670
  return(extract_obj)
671
}
672

  
673

  
674
combine_measurements_and_predictions_df <- function(i,df_raster,df_time_series,df_ts_pix,data_var,list_selected_ID,r_ts_name,var_name,plot_fig=T){
675
  
676
  # Input arguments:
677
  # i : selected station
678
  # df_ts_pix_data : data extracted from raster layer
679
  # data_var : data with station measurements (tmin,tmax or precip)
680
  # list_selected_ID : list of selected station
681
  # plot_fig : if T, figures are plotted
682
  # Output
683
  #
684
  
685
  ##### START FUNCTION ############
686
  
687
  #get the relevant station
688
  id_name <- list_selected_ID[i] # e.g. WS037.00,1238099999
689
  #id_selected <- df_ts_pix[[var_ID]]==id_name
690
  id_selected <- df_ts_pix[["id"]]== id_name
691
  
692
  ### Not get the data from the time series
693
  data_pixel <- df_ts_pix[id_selected,] #this should be a unique row!!!
694
  #data_pixel <- data_pixel[1,]
695
  data_pixel <- as.data.frame(data_pixel)
696
  
697
  ##Transpose data to have rows as date and one unique column
698
  pix_ts <- t(as.data.frame(subset(data_pixel,select=r_ts_name))) #can subset to range later
699
  #pix_ts <- subset(as.data.frame(pix_ts),select=r_ts_name)
700
  pix_ts <- (as.data.frame(pix_ts))
701
  names(pix_ts) <- paste(var_pred,"_mosaic",sep="")
702
  #add scaling option
703
  #!is.null(scaling)
704
  ## Process the measurements data (with tmax/tmin/precip)
705
  
706
  #there are several measurements per day for some stations !!!
707
  #id_name <- data_pixel[[var_ID]]
708
  
709
  #df_tmp  <-data_var[data_var$LOCATION_ID==id_name,]
710
  df_tmp <- subset(data_var,data_var$id==id_name)
711
  #if(da)
712
  #aggregate(df_tmp
713
  #if(nrow(df_tmp)>1){
714
  #  
715
  #  formula_str <- paste(var_name," ~ ","TRIP_START_DATE_f",sep="")
716
  #  #var_pix <- aggregate(COL_SCORE ~ TRIP_START_DATE_f, data = df_tmp, mean) #aggregate by date
717
  #  var_pix <- try(aggregate(as.formula(formula_str), data = df_tmp, FUN=mean)) #aggregate by date
718
  #  #length(unique(test$TRIP_START_DATE_f))
719
  #  #var_pix_ts <- t(as.data.frame(subset(data_pixel,select=var_name)))
720
  #  #pix <- t(data_pixel[1,24:388])#can subset to range later
721
  #}else{
722
  #  var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!!
723
  #}
724
  #var_pix <- subset(as.data.frame(data_id_selected,c(var_name,"TRIP_START_DATE_f")])) #,select=var_name)
725
  var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!!
726
  var_pix$date_str <- as.character(var_pix$date)
727
  #match from 20011231 to 2001-12-31 to date format
728
  var_pix$date <- as.character(as.Date(var_pix$date_str,"%Y%m%d")) #format back to the relevant date format for files
729
  
730
  #dates_val <- df_time_series$date
731
  dates_val <- df_raster$date
732
  pix_ts$date <- dates_val 
733
  #pix_ts <- merge(df_raster,pix_ts,by="date")
734
  
735
  pix_ts$lf <- df_raster$lf
736
  #pix_ts$
737
  pix_ts <- merge(df_time_series,pix_ts,by="date",all=T)
738
  
739
  #check for duplicates in extracted values (this can happen if there is a test layer or repetition
740
  if(nrow(pix_ts)!=length(unique(pix_ts$date))){
741
    var_pred_tmp <- paste0(var_pred,"_mosaic")
742
    md <- melt(pix_ts, id=(c("date")),measure.vars=c(var_pred_tmp, "missing")) #c("x","y","dailyTmax","mod1","res_mod1"))
743
    #formula_str <- "id + date ~ x + y + dailyTmax + mod1 + res_mod1"
744
    pix_ts <- cast(md, date ~ variable, fun.aggregate = mean, 
745
    na.rm = TRUE)
746

  
747
  }
748
  
749
  #if(nrow(var_pix)!=length(unique(var_pix$date))){
750
  #
751
  #  md <- melt(var_pix, id=(c("date")),measure.vars=c(var_pred, "missing")) #c("x","y","dailyTmax","mod1","res_mod1"))
752
  #  #formula_str <- "id + date ~ x + y + dailyTmax + mod1 + res_mod1"
753
  #  test <- cast(md, date ~ variable, fun.aggregate = mean, 
754
  #  na.rm = TRUE)
755
  #
756
  #  
757
  #}
758
  df_pix_ts <- merge(pix_ts,var_pix,by="date",all=T)
759
  #Create time series object from extract pixel time series
760

  
761
  nb_zero <- sum( df_pix_ts[[var_pred_tmp]]==0) #relevant for precip
762
  #nb_NA <- sum(is.na(df2$COL_SCORE))
763
  nb_NA <- sum(is.na( df_pix_ts[[var_pred_tmp]])) #for ID 394 DMR it is 361 missing values for 2012!!
764
  ##Add quantile, and range info later on...
765
  
766
  ## Cumulated precip and lag?
767
  #Keep number of  0 for every year for rainfall
768
  #summarize by month
769
  #Kepp number of NA for scores... 
770
  #Summarize by season...
771
  ## Threshold?
772
  station_summary_obj <- list(nb_zero,nb_NA, df_pix_ts)
773
  names(station_summary_obj) <- c("nb_zero_precip","nb_NA_var"," df_pix_ts")
774
  return(station_summary_obj)
775
}
776

  
547 777
############################ END OF SCRIPT ##################################

Also available in: Unified diff