Project

General

Profile

« Previous | Next » 

Revision e6ce3b29

Added by Benoit Parmentier over 8 years ago

removing code related to time series and combine extracted and stations information from assessment

View differences:

climate/research/oregon/interpolation/global_product_assessment_part1.R
4 4
#Combining tables and figures for individual runs for years and tiles.
5 5
#AUTHOR: Benoit Parmentier 
6 6
#CREATED ON: 05/15/2016  
7
#MODIFIED ON: 09/17/2016            
7
#MODIFIED ON: 09/18/2016            
8 8
#Version: 1
9 9
#PROJECT: Environmental Layers project     
10 10
#COMMENTS: Initial commit, script based on part NASA biodiversity conferenc 
......
19 19
#
20 20
#setfacl -Rmd user:aguzman4:rwx /nobackupp8/bparmen1/output_run10_1500x4500_global_analyses_pred_1992_10052015
21 21

  
22
#COMMIT: clean up and first testing of extraction function
22
#COMMIT: combine extracted and stations information from assessment 
23 23

  
24 24
#################################################################################################
25 25

  
......
149 149

  
150 150
day_start <- "1984101" #PARAM 12 arg 12
151 151
day_end <- "19991231" #PARAM 13 arg 13
152
#date_start <-
153
#date_end <- 
152
#date_start <- day_start
153
#date_end <- day_end
154 154

  
155 155
#infile_mask <- "/nobackupp8/bparmen1/NEX_data/regions_input_files/r_mask_LST_reg4.tif"
156 156
#infile_mask <- "/data/project/layers/commons/NEX_data/regions_input_files/r_mask_LST_reg5.tif"
......
506 506
#started at 16.51, 09/07
507 507

  
508 508

  
509
#####  This could be moved in a separate file!!
509 510
###############  PART4: Checking for mosaic produced for given region ##############
510 511
## From list of mosaic files predicted extract dates
511 512
## Check dates predicted against date range for a given date range
......
526 527
  in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaics/mosaic"
527 528
  #in_dir_mosaic_rmse <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaicsRMSE/mosaic"
528 529
  pattern_str <- ".*.tif"
529
  df_points <- #this contains the location of points to be used for extraction
530
  
530
  #df_points <- #this contains the location of points to be used for extraction
531
  #df_points <- data_stations_var_pred_data_s #collected previously
532
    
531 533
  #extract from var pred mosaic, tmax in this case:
532 534
  extract_obj_var_pred <- extract_from_time_series_raster_stack(df_points,date_start,date_end,lf_raster=NULL,item_no=13,
533 535
                                      num_cores=11,pattern_str=NULL,in_dir=in_dir_mosaic,out_dir=out_dir,out_suffix=out_suffix)
......
551 553
#### Now combined with the station data extracted from the assessment stage
552 554

  
553 555
#combine
554
data_stations_var_pred
555

  
556
##write function to combine data!!!
557

  
558
pix_ts <- as.data.frame(t(df_points_day_extracted))
559
pix_ts <- pix_ts[-1,]
560
#var_names <- rownames(df_points_day_extracted) #same as lf_mosaic_list but without (*.tif)
561
var_names <- rownames(pix_ts) #same as lf_mosaic_list but without (*.tif)
562
var_id <- df_points_day$id
563
df_points_day_extracted$id <- var_id
564
 
565
#lf_var <- names(r_mosaic)
566
### Not get the data from the time series
567
#data_pixel <- df_ts_pix[id_selected,]
568
#data_pixel <- as.data.frame(data_pixel)
569
#pix_ts <- t(as.data.frame(subset(data_pixel,select=r_ts_name))) #can subset to range later
570
#pix_ts <- subset(as.data.frame(pix_ts),select=r_ts_name)
571

  
572
combine_measurements_and_predictions_df <- function(i,dates_val,df_ts_pix,data_var,list_selected_ID,r_ts_name,var_name,dates_str,plot_fig=T){
556
head(data_stations_var_pred)
557

  
558

  
559
#           id     date        x      y dailyTmax     mod1  res_mod1          id     date training testing
560
#1 71238099999 19930703 -112.867 53.683      20.2 18.19909 -2.000915 71238099999 19930703        4       0
561
#2 71238099999 19930704 -112.867 53.683      23.4 17.74476 -5.655237 71238099999 19930704        3       1
562
#3 71238099999 19930705 -112.867 53.683      21.7 19.22313 -2.476870 71238099999 19930705        3       1
563
#4 71238099999 19930706 -112.867 53.683      22.0 19.53294 -2.467061 71238099999 19930706        3       1
564
#5 71238099999 19930707 -112.867 53.683      20.9 17.84168 -3.058324 71238099999 19930707        2       2
565
#6 71238099999 19930708 -112.867 53.683      21.2 16.50887 -4.691128 71238099999 19930708        1       3
566

  
567
#need to combine:
568
#data_stations_var_pred: constains id, x, y, dailyTmax, mod1, res_mod1, date, training, testing
569
#df_time_series
570
#df_points_extracted #contains id,x,y of stations and extracted values from raster stack
571
dim(df_time_series)
572
#dim(data_stations_var_pred)
573
df_points_extracted_tmp <- df_points_extracted
574
#df_points_extracted <- cbind(df_points,df_points_extracted)
575

  
576
df_points_extracted$id <- df_points$id #this should have been done earlier in the extraction function
577
df_points_extracted$x <- df_points$x #this should have been done earlier in the extraction function
578
df_points_extracted$y <- df_points$y #this should have been done earlier in the extraction function
579

  
580
dim(df_time_series)
581

  
582
list_selected_ID <- unique(data_stations_var_pred$id) #11 stations selected
583
data_var <- data_stations_var_pred
584
df_ts_pix <- df_points_extracted
585
r_ts_name <- sub(extension(lf_raster),"",basename(lf_raster))
586
var_name <- "dailyTmax" #observed measurements
587
var_pred <- "mod1" #predictions
588
#dates_str <-
589
#dates_val <-
590
df_raster
591
df_time_series
592
plot_fig <- false
593

  
594
combine_measurements_and_predictions_df <- function(i,df_raster,df_time_series,df_ts_pix,data_var,list_selected_ID,r_ts_name,var_name,plot_fig=T){
573 595
  
574 596
  # Input arguments:
575 597
  # i : selected station
......
583 605
  ##### START FUNCTION ############
584 606
  
585 607
  #get the relevant station
586
  id_name <- list_selected_ID[i] # e.g. WS037.00
608
  id_name <- list_selected_ID[i] # e.g. WS037.00,1238099999
587 609
  #id_selected <- df_ts_pix[[var_ID]]==id_name
588
  id_selected <- df_ts_pix[["ID_stat"]]==id_name
610
  id_selected <- df_ts_pix[["id"]]== id_name
589 611
  
590 612
  ### Not get the data from the time series
591
  data_pixel <- df_ts_pix[id_selected,]
613
  data_pixel <- df_ts_pix[id_selected,] #this should be a unique row!!!
614
  #data_pixel <- data_pixel[1,]
592 615
  data_pixel <- as.data.frame(data_pixel)
593 616
  
617
  ##Transpose data to have rows as date and one unique column
594 618
  pix_ts <- t(as.data.frame(subset(data_pixel,select=r_ts_name))) #can subset to range later
595 619
  #pix_ts <- subset(as.data.frame(pix_ts),select=r_ts_name)
596 620
  pix_ts <- (as.data.frame(pix_ts))
597
  ## Process the coliform data
621
  names(pix_ts) <- paste(var_pred,"_mosaic",sep="")
622
  #add scaling option
623
  #!is.null(scaling)
624
  ## Process the measurements data (with tmax/tmin/precip)
598 625
  
599 626
  #there are several measurements per day for some stations !!!
600 627
  #id_name <- data_pixel[[var_ID]]
601 628
  
602 629
  #df_tmp  <-data_var[data_var$LOCATION_ID==id_name,]
603
  df_tmp <- subset(data_var,data_var$ID_stat==id_name)
630
  df_tmp <- subset(data_var,data_var$id==id_name)
604 631
  #if(da)
605 632
  #aggregate(df_tmp
606
  if(nrow(df_tmp)>1){
607
    
608
    formula_str <- paste(var_name," ~ ","TRIP_START_DATE_f",sep="")
609
    #var_pix <- aggregate(COL_SCORE ~ TRIP_START_DATE_f, data = df_tmp, mean) #aggregate by date
610
    var_pix <- try(aggregate(as.formula(formula_str), data = df_tmp, FUN=mean)) #aggregate by date
611
    #length(unique(test$TRIP_START_DATE_f))
612
    #var_pix_ts <- t(as.data.frame(subset(data_pixel,select=var_name)))
613
    #pix <- t(data_pixel[1,24:388])#can subset to range later
614
  }else{
615
    var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!!
616
  }
633
  #if(nrow(df_tmp)>1){
634
  #  
635
  #  formula_str <- paste(var_name," ~ ","TRIP_START_DATE_f",sep="")
636
  #  #var_pix <- aggregate(COL_SCORE ~ TRIP_START_DATE_f, data = df_tmp, mean) #aggregate by date
637
  #  var_pix <- try(aggregate(as.formula(formula_str), data = df_tmp, FUN=mean)) #aggregate by date
638
  #  #length(unique(test$TRIP_START_DATE_f))
639
  #  #var_pix_ts <- t(as.data.frame(subset(data_pixel,select=var_name)))
640
  #  #pix <- t(data_pixel[1,24:388])#can subset to range later
641
  #}else{
642
  #  var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!!
643
  #}
617 644
  #var_pix <- subset(as.data.frame(data_id_selected,c(var_name,"TRIP_START_DATE_f")])) #,select=var_name)
645
  var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!!
646
  var_pix$date_str <- as.character(var_pix$date)
647
  #match from 20011231 to 2001-12-31 to date format
648
  var_pix$date <- as.character(as.Date(var_pix$date_str,"%Y%m%d")) #format back to the relevant date format for files
618 649
  
619
  #Create time series object from extract pixel time series
620
  d_z <- zoo(pix_ts,dates_val) #make a time series ...
621
  names(d_z)<- "rainfall"
622
  #Create date object for data from stations
623
  
624
  d_var <- zoo(var_pix,var_pix$TRIP_START_DATE_f)
625
  #plot(d_var,pch=10)
626
  
627
  d_z2 <- merge(d_z,d_var)
628
  ##Now subset?
629
  d_z2 <- window(d_z2,start=dates_val[1],end=dates_val[length(dates_val)])
630
  
631
  d_z2$TRIP_START_DATE_f <- NULL
650
  #dates_val <- df_time_series$date
651
  dates_val <- df_raster$date
652
  pix_ts$date <- dates_val 
653
  #pix_ts <- merge(df_raster,pix_ts,by="date")
632 654
  
633
  df2 <- as.data.frame(d_z2)
634
  df2$date <- rownames(df2)
635
  rownames(df2) <- NULL
636
  df2[[var_name]] <- as.numeric(as.character(df2[[var_name]]))
637
  
638
  #df2$COL_SCORE <- as.numeric(as.character(df2$COL_SCORE))
639
  df2$rainfall <- as.numeric(as.character(df2$rainfall))
640
  df2$ID_stat <- id_name
641
    
642
  #plot(df2$rainfall)
643
  #list_pix[[i]] <- pix_ts
655
  pix_ts$lf <- df_raster$lf
656
  #pix_ts$
657
  pix_ts <- merge(df_time_series,pix_ts,by="date",all=T)
644 658
  
645
  if(plot_fig==T){
646
    
647
    res_pix <- 480
648
    col_mfrow <- 2
649
    row_mfrow <- 1
650
    
651
    ###
652
    #Figure 3b
653
    png(filename=paste("Figure3b_","pixel_profile_var_combined_",id_name,"_",out_suffix,".png",sep=""),
654
        width=col_mfrow*res_pix,height=row_mfrow*res_pix)
655
    
656
    #plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="")
657
    #points(d_z2$COL_SCORE,col="red",pch=10,cex=2)
658
    plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="")
659
    abline(h=threshold_val,col="green")
660
    
661
    par(new=TRUE)              # key: ask for new plot without erasing old
662
    #plot(x,y,type="l",col=t_col[k],xlab="",ylab="",lty="dotted",axes=F) #plotting fusion profile
663
    plot(df2[[var_name]],pch=10,cex=2.5,col="red", axes=F,ylab="",xlab="")
664
    #points(d_z2$COL_SCORE,col="red",pch=10,cex=2)
665
    legend("topleft",legend=c("stations"), 
666
           cex=1.2,col="red",pch =10,bty="n")
667
    
668
    axis(4,cex=1.2)
669
    mtext(4, text = "coliform scores", line = 3)
670
    
671
    title(paste("Station time series",id_name,sep=" "))
672
    
673
    dev.off()
674
    
675
    #Figure 3c
676
    png(filename=paste("Figure3c_","pixel_profile_var_combined_log_scale_",id_name,"_",out_suffix,".png",sep=""),
677
        width=col_mfrow*res_pix,height=row_mfrow*res_pix)
678
    
679
    #plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="")
680
    #points(d_z2$COL_SCORE,col="red",pch=10,cex=2)
681
    plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="")
682
    abline(h=threshold_val,col="green")
683
    par(new=TRUE)              # key: ask for new plot without erasing old
684
    #plot(x,y,type="l",col=t_col[k],xlab="",ylab="",lty="dotted",axes=F) #plotting fusion profile
685
    #plot(log(df2$COL_SCORE),pch=10,cex=2.5,col="red", axes=F,ylab="",xlab="")
686
    plot(log(df2[[var_name]]),pch=10,cex=2.5,col="red", axes=F,ylab="",xlab="")
687
    
688
    #points(d_z2$COL_SCORE,col="red",pch=10,cex=2)
689
    legend("topleft",legend=c("stations"), 
690
           cex=1.2,col="red",pch =10,bty="n")
691
    
692
    axis(4,cex=1.2)
693
    mtext(4, text = "coliform scores", line = 3)
694
    
695
    title(paste("Station time series",id_name,sep=" "))
696
    
697
    dev.off()
698
    
699
    ####Histogram of values
700
    
701
    res_pix <- 480
702
    col_mfrow <- 2
703
    row_mfrow <- 1
704
    
705
    png(filename=paste("Figure4_","histogram_measurements_",year_processed,"_",id_name,"_",out_suffix,".png",sep=""),
706
        width=col_mfrow*res_pix,height=row_mfrow*res_pix)
707
    
708
    hist_val <- hist(df2[[var_name]],main="",xlab="COLIFORM SCORES")
709
    #hist_val <- hist(df2$COL_SCORE,main="",xlab="COLIFORM SCORES")
710
    title(paste("Histrogram of coliform scores for station",id_name,"in",year_processed,sep=" "))
711
    #abline(v=threshold_val,col="green" )
712
    legend("topright",legend=c("treshold val"), 
713
           cex=1.2, col="green",lty =1,bty="n")  
714
    
715
    y_loc <- max(hist_val$counts)/2
716
    
717
    #text(threshold_val,y_loc,paste(as.character(threshold_val)),pos=1,offset=0.1)
718
    
719
    dev.off()
720
    
721
    #res_pix <- 480
722
    #col_mfrow <- 2
723
    #row_mfrow <- 1
724
    
725
    #png(filename=paste("Figure4_","histogram_coliform_measurements_",year_processed,"_",id_name,"_",out_suffix,".png",sep=""),
726
    #    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
727
    
728
    plot(df2$rainfall)
729
    #plot(df2$rainfall,df2$COL_SCORE)
730
    #plot(log(df2$rainfall),log(df2$COL_SCORE))
731
    plot(df2$rainfall,df2[[var_name]])
732
    plot(df2$rainfall,log(df2[[var_name]]))
659
  #check for duplicates in extracted values (this can happen if there is a test layer or repetition
660
  if(nrow(pix_ts)!=length(unique(pix_ts$date))){
661
    var_pred_tmp <- paste0(var_pred,"_mosaic")
662
    md <- melt(pix_ts, id=(c("date")),measure.vars=c(var_pred_tmp, "missing")) #c("x","y","dailyTmax","mod1","res_mod1"))
663
    #formula_str <- "id + date ~ x + y + dailyTmax + mod1 + res_mod1"
664
    pix_ts <- cast(md, date ~ variable, fun.aggregate = mean, 
665
    na.rm = TRUE)
733 666

  
734
    
735
    
736 667
  }
737 668
  
738
  ## Now correlation.
739
  #sum(is.na(df2$rainfall))
740
  #[1] 0
741
  nb_zero <- sum((df2$rainfall==0)) #203
669
  #if(nrow(var_pix)!=length(unique(var_pix$date))){
670
  #
671
  #  md <- melt(var_pix, id=(c("date")),measure.vars=c(var_pred, "missing")) #c("x","y","dailyTmax","mod1","res_mod1"))
672
  #  #formula_str <- "id + date ~ x + y + dailyTmax + mod1 + res_mod1"
673
  #  test <- cast(md, date ~ variable, fun.aggregate = mean, 
674
  #  na.rm = TRUE)
675
  #
676
  #  
677
  #}
678
  df_pix_ts <- merge(pix_ts,var_pix,by="date",all=T)
679
  #Create time series object from extract pixel time series
680

  
681
  nb_zero <- sum( df_pix_ts[[var_pred_tmp]]==0) #relevant for precip
742 682
  #nb_NA <- sum(is.na(df2$COL_SCORE))
743
  nb_NA <- sum(is.na(df2[[var_name]])) #for ID 394 DMR it is 361 missing values for 2012!!
683
  nb_NA <- sum(is.na( df_pix_ts[[var_pred_tmp]])) #for ID 394 DMR it is 361 missing values for 2012!!
744 684
  ## Cumulated precip and lag?
745 685
  #Keep number of  0 for every year for rainfall
746 686
  #summarize by month
747 687
  #Kepp number of NA for scores... 
748 688
  #Summarize by season...
749 689
  ## Threshold?
750
  station_summary_obj <- list(nb_zero,nb_NA,df2)
751
  names(station_summary_obj) <- c("nb_zero_precip","nb_NA_var","df_combined")
690
  station_summary_obj <- list(nb_zero,nb_NA, df_pix_ts)
691
  names(station_summary_obj) <- c("nb_zero_precip","nb_NA_var"," df_pix_ts")
752 692
  return(station_summary_obj)
753 693
}
754 694

  
......
771 711
  
772 712
#head(pix_ts)
773 713

  
714

  
715
##################### plot time series: make this a function!!! ####
774 716
###start of plotting
775 717
### makes this a function:
776 718
id_selected <- "82111099999"

Also available in: Unified diff