Project

General

Profile

« Previous | Next » 

Revision 0892e770

Added by Benoit Parmentier over 8 years ago

major changes adding function to extract from raster, combine extracted data with assessmment and cleaning up

View differences:

climate/research/oregon/interpolation/global_product_assessment_part1.R
4 4
#Combining tables and figures for individual runs for years and tiles.
5 5
#AUTHOR: Benoit Parmentier 
6 6
#CREATED ON: 05/15/2016  
7
#MODIFIED ON: 09/06/2016            
7
#MODIFIED ON: 09/11/2016            
8 8
#Version: 1
9 9
#PROJECT: Environmental Layers project     
10 10
#COMMENTS: Initial commit, script based on part NASA biodiversity conferenc 
......
85 85
source(file.path(script_path,function_assessment_part3)) #source all functions used in this script 
86 86

  
87 87
#Product assessment
88
function_product_assessment_part1_functions <- "global_product_assessment_part1_functions_09062016.R"
88
function_product_assessment_part1_functions <- "global_product_assessment_part1_functions_09112016.R"
89 89
source(file.path(script_path,function_product_assessment_part1_functions)) #source all functions used in this script 
90 90

  
91 91
###############################
......
381 381
c( -78.396011,35.696143)) 
382 382

  
383 383

  
384
query_for_station_lat_long <- function(point_val,df_points_spdf,step_x=0.25,step_y=0.25){
385
  #make this function better using gbuffer later!!!, works for now 
386
  #Improve: circular query + random within it
387
  y_val <- point_val[2]
388
  x_val <- point_val[1]
389
  
390
  y_val_tmp <- y_val + step_y
391
  if(x_val>0){
392
    x_val_tmp <- x_val - step_x
393
  }else{
394
    x_val_tmp <- x_val + step_x
395
  }
396

  
397

  
398
  test_df <- subset(df_points_spdf,(y_val < df_points_spdf$y & df_points_spdf$y < y_val_tmp))
399
  test_df2 <- subset(test_df,(x_val < test_df$x & test_df$x < x_val_tmp))
400
  #plot(test_df2)
401
 if(nrow(test_df2)>0){
402
   df_selected <- test_df2[1,]
403
 }else{
404
   df_selected <- NULL
405
 }
406
 
407
 return(df_selected)
408
}
409

  
410 384
test_day_query2 <- lapply(list_lat_long,FUN=query_for_station_lat_long,df_points_spdf=df_point_day,step_x=1,step_y=1)
411 385
#test_day_query <-query_for_station_lat_long(c(-72,42),df_points_spdf=df_point_day,step_x=1,step_y=0.25)
412 386
df_stations_selected <- do.call(rbind,test_day_query2)
......
485 459
data_stations_var_pred <- cast(md, id + date ~ variable, fun.aggregate = mean, 
486 460
  na.rm = TRUE)
487 461

  
462
#write.table(data_stations_var_pred,
463
#            file=file.path(out_dir,paste0("data_stations_var_pred_tmp_",out_suffix,".txt",
464
#                                                                 sep=",")))
488 465
write.table(data_stations_var_pred,
489
            file=file.path(out_dir,paste0("data_stations_var_pred_tmp_",out_suffix,".txt",
490
                                                                 sep=",")))
491
write.table(data_stations_var_pred_training_testing,"data_stations_var_pred_training_testing.txt")
466
            file=file.path(out_dir,paste0("data_stations_var_pred_tmp_",out_suffix,".txt")),
467
            sep=",")
468

  
469
#data_stations_var_pred <- read.table(
470
#            file=file.path(out_dir,paste0("data_stations_var_pred_tmp_",out_suffix,".txt",
471
#                                                                 sep=",")))
492 472

  
493 473
md <- melt(data_stations, id=(c("id", "date")),measure.vars=c("training","testing"))
494 474
data_stations_training_testing <- cast(md, id + date ~ variable, fun.aggregate = sum, 
495 475
  na.rm = TRUE)
496 476

  
477
#write.table(data_stations_training_testing,
478
#            file=file.path(out_dir,paste0("data_stations_training_testing_",out_suffix,".txt",
479
#                                                                 sep=",")))
497 480
write.table(data_stations_training_testing,
498
            file=file.path(out_dir,paste0("data_stations_training_testing_",out_suffix,".txt",
499
                                                                 sep=",")))
500

  
481
            file=file.path(out_dir,paste0("data_stations_training_testing_",out_suffix,".txt")),
482
            sep=",")
501 483
#data_stations_var_pred <- aggregate(id2 ~ x + y + date + dailyTmax + mod1 + res_mod1 ,data = data_stations, mean ) #+ mod1 + res_mod1 , data = data_stations, min)
502 484
#data_stations$id2 <- as.numeric(data_stations$id)
503 485
#data_stations$date <- as.character(data_stations$date)
......
516 498
#data_stations_var_pred2 <- aggregate(id ~ training,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min)
517 499
#data_stations_var_pred2 <- aggregate(date ~ training,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min)
518 500

  
519
data_stations_var_pred <- cbind(data_stations_var_pred,data_stations_var_pred_training_testing)
520

  
521
write.table(data_stations_var_pred,"data_stations_var_pred.txt")
522
#started at 16.51, 09/07
523

  
524
############### PART3: Make raster stack and display maps #############
525
#### Extract corresponding raster for given dates and plot stations used
526

  
527

  
528
#start_date <- day_to_mosaic_range[1]
529
#end_date <- day_to_mosaic_range[2]
530
#start_date <- day_start #PARAM 12 arg 12
531
#end_date <- day_end #PARAM 13 arg 13
532

  
533
#date_to_plot <- seq(as.Date(strptime(start_date,"%Y%m%d")), as.Date(strptime(end_date,"%Y%m%d")), 'day')
534
#l_dates <- format(date_to_plot,"%Y%m%d") #format back to the relevant date format for files
535
#mask_pred <- FALSE
536
#matching <- FALSE #to be added after mask_pred option
537
#list_param_pre_process <- list(raster_name_lf,python_bin,infile_mask,scaling,mask_pred,NA_flag_val,out_suffix,out_dir) 
538
#names(list_param_pre_process) <- c("lf","python_bin","infile_mask","scaling","mask_pred","NA_flag_val","out_suffix","out_dir") 
539
  
540
#debug(pre_process_raster_mosaic_fun)
501
#data_stations_var_pred <- merge(data_stations_var_pred, data_stations_training_testing , by="id") #this is slow maybe do cbind?
502
#data_stations_var_pred_test <- data_stations_var_pred 
541 503

  
542
#lf_mosaic_scaled <- mclapply(1:length(raster_name_lf),FUN=pre_process_raster_mosaic_fun,list_param=list_param_pre_process,mc.preschedule=FALSE,mc.cores = num_cores)                         
543
#lf_mosaic_scaled <- mclapply(1:length(raster_name_lf),FUN=pre_process_raster_mosaic_fun,list_param=list_param_pre_process,mc.preschedule=FALSE,mc.cores = num_cores)                         
504
data_stations_var_pred <- cbind(data_stations_var_pred,data_stations_training_testing)
544 505

  
545
#test <- pre_process_raster_mosaic_fun(2,list_param_pre_process)
546
#lf_mosaic_scaled <- unlist(lf_mosaic_scaled)
547

  
548
r_mosaic_scaled <- stack(lf_mosaic_scaled)
549
NAvalue(r_mosaic_scaled)<- -3399999901438340239948148078125514752.000
550
plot(r_mosaic_scaled,y=6,zlim=c(-50,50))
551
plot(r_mosaic_scaled,zlim=c(-50,50),col=matlab.like(255))
552

  
553
#layout_m<-c(1,3) #one row two columns
554
#levelplot(r_mosaic_scaled,zlim=c(-50,50),col.regions=matlab.like(255))
555
#levelplot(r_mosaic_scaled,zlim=c(-50,50),col.regions=matlab.like(255))
556

  
557
#png(paste("Figure7a__spatial_pattern_tmax_prediction_levelplot_",date_selected,out_prefix,".png", sep=""),
558
#    height=480*layout_m[1],width=480*layout_m[2])
559
#plot(r_pred,col=temp.colors(255),zlim=c(-3500,4500))
560
#plot(r_pred,col=matlab.like(255),zlim=c(-40,50))
561
#paste(raster_name[1:7],collapse="_")
562
#add filename option later
563

  
564
#NA_flag_val_mosaic <- -3399999901438340239948148078125514752.000
506
write.table(data_stations_var_pred,
507
            file=file.path(out_dir,paste0("data_stations_var_pred_",out_suffix,".txt")),
508
            sep=",")
565 509

  
566
list_param_plot_raster_mosaic <- list(l_dates,r_mosaic_scaled,NA_flag_val_mosaic,out_dir,out_suffix,
567
                                      region_name,variable_name)
568
names(list_param_plot_raster_mosaic) <- c("l_dates","r_mosaic_scaled","NA_flag_val_mosaic","out_dir","out_suffix",
569
                                          "region_name","variable_name")
510
#started at 16.51, 09/07
570 511

  
571
lf_mosaic_plot_fig <- mclapply(1:length(lf_mosaic_scaled),FUN=plot_raster_mosaic,list_param=list_param_plot_raster_mosaic,mc.preschedule=FALSE,mc.cores = num_cores)                         
572 512

  
573 513
###############  PART4: Checking for mosaic produced for given region ##############
574 514
## From list of mosaic files predicted extract dates
......
577 517

  
578 518
##Now grab year year 1992 or matching year...maybe put this in a data.frame??
579 519

  
580
pattern_str <- "r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_.*._reg4_.*.tif"
581
searchStr = paste(in_dir_mosaic,"/output_reg4_2014",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="")
582
pattern_str <- ".*.tif"
583
in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaics/mosaic"
584
#lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern="*.tif",recursive=T)
585

  
586
if(is.null(r_mosaic_fname)){
587
  pattern_str <-"*.tif"
588
  lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T)
589
  r_mosaic <- stack(lf_mosaic_list,quick=T)
590
  save(r_mosaic,file="r_mosaic.RData")
591
  
592
}else{
593
  r_mosaic <- load_obj(r_mosaic_fname) #load raster stack of images
594
}
595

  
596

  
597
#r_mosaic_ts <- stack(lf_mosaic_list)
598
#df_centroids <- extract(df_centroids,r_mosaic_ts)
599

  
600
df_points$files <- lf_mosaic_list
520
#pattern_str <- "r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_.*._reg4_.*.tif"
521
#searchStr = paste(in_dir_mosaic,"/output_reg4_2014",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="")
601 522

  
602 523
#debug(extract_date)
603 524
#test <- extract_date(6431,lf_mosaic_list,12) #extract item number 12 from the name of files to get the data
604
list_dates_produced <- unlist(mclapply(1:length(lf_mosaic_list),FUN=extract_date,x=lf_mosaic_list,item_no=14,mc.preschedule=FALSE,mc.cores = num_cores))                         
605
#list_dates_produced <-  mclapply(6400:6431,FUN=extract_date,x=lf_mosaic_list,item_no=12,mc.preschedule=FALSE,mc.cores = num_cores)                         
525
list_dates_produced <- unlist(mclapply(1:length(lf_mosaic_list),FUN=extract_date,x=lf_mosaic_list,item_no=13,mc.preschedule=FALSE,mc.cores = num_cores))                         
526
#list_dates_produced <-  mclapply(1:2,FUN=extract_date,x=lf_mosaic_list,item_no=13,mc.preschedule=FALSE,mc.cores = 2)                         
606 527

  
607 528
list_dates_produced_date_val <- as.Date(strptime(list_dates_produced,"%Y%m%d"))
608 529
month_str <- format(list_dates_produced_date_val, "%b") ## Month, char, abbreviated
609 530
year_str <- format(list_dates_produced_date_val, "%Y") ## Year with century
610 531
day_str <- as.numeric(format(list_dates_produced_date_val, "%d")) ## numeric month
611 532

  
612
df_produced <- data.frame(lf_mosaic_list,list_dates_produced_date_val,month_str,year_str,day_str)
533
df_produced <- data.frame(basename(lf_mosaic_list),list_dates_produced_date_val,month_str,year_str,day_str,dirname(lf_mosaic_list))
613 534

  
614
date_start <- "19840101"
615
date_end <- "19991231"
616
date1 <- as.Date(strptime(date_start,"%Y%m%d"))
617
date2 <- as.Date(strptime(date_end,"%Y%m%d"))
618
dates_range <- seq.Date(date1, date2, by="1 day") #sequence of dates
535
finding_missing_dates <- function(date_start,date_end,list_dates){
536

  
537
  date_start <- "19840101"
538
  date_end <- "19991231"
539
  date1 <- as.Date(strptime(date_start,"%Y%m%d"))
540
  date2 <- as.Date(strptime(date_end,"%Y%m%d"))
541
  dates_range <- seq.Date(date1, date2, by="1 day") #sequence of dates
542

  
543
  missing_dates <- setdiff(as.character(dates_range),as.character(list_dates_produced_date_val))
544

  
545
  return(missing_dates)
546
}
619 547

  
620
missing_dates <- setdiff(as.character(dates_range),as.character(list_dates_produced_date_val))
621 548

  
622
month_str <- format(list_dates_produced_date_val, "%b") ## Month, char, abbreviated
623
year_str <- format(list_dates_produced_date_val, "%Y") ## Year with century
624
day_str <- as.numeric(format(list_dates_produced_date_val, "%d")) ## numeric month
625 549

  
550

  
551
####
626 552
df_points$date <- list_dates_produced_date_val
627 553
df_points$month <- month_str
628 554
df_points$year <- year_str
629 555
df_points$day <- day_str
630 556

  
557
in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaics/mosaic"
558
in_dir_mosaic_rmse <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaicsRMSE/mosaic"
559
#pattern_str <- ".*.tif"
560

  
561
extract_from_time_series_raster_stack <- function(df_points,date_start,date_end,lf_raster,item_no=13,num_cores=4,pattern_str=NULL,in_dir=NULL,out_dir=".",out_suffix=""){
562
  #
563
  #This function extract value given from a raster stack time series given a spatial data.frame and a list of files
564
  #
565
  #INPUTS
566
  #1) df_points
567
  #2) date_start,num_cores=4,pattern_str=NULL,in_dir=NULL,out_dir=".",out_suffix=
568
  #3) date_end
569
  #3) lf_raster
570
  #4) item_no=13
571
  #5) num_cores=4,
572
  #6) pattern_str=NULL
573
  #7) in_dir=NULL,
574
  #8) out_dir="."
575
  #9) out_suffix=""
576
  #OUTPUTS
577
  #
578
  #
579
  
580
  #### Start script ####
581
  
582
  if(is.null(lf_raster)){
583
    #pattern_str <- ".*.tif"
584
    pattern_str <-"*.tif"
585
    lf_raster <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T)
586
    r_stack <- stack(lf_raster,quick=T) #this is very fast now with the quick option!
587
    #save(r_mosaic,file="r_mosaic.RData")
588
    
589
  }else{
590
    r_stack <- stack(lf_raster,quick=T) #this is very fast now with the quick option!
591
  }
631 592

  
593
  #df_points$files <- lf_mosaic_list
594
  #Use the global output??
632 595

  
633
#Use the global output??
596
  ##23.09 (on 05/22)
597
  #df_points_day_extracted <- extract(r_mosaic,data_stations,df=T)
598
  #df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname",".txt") 
599
  #write.table(df_points_day_extracted,file=df_points_day_extracted_fname) #5.1 Giga
600
  #4.51 (on 05/23)
601
  #df_points_day <- data_stations_var_pred_data_s
634 602

  
635
##23.09 (on 05/22)
636
#df_points_day_extracted <- extract(r_mosaic,data_stations,df=T)
637
#df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname",".txt") 
638
#write.table(df_points_day_extracted,file=df_points_day_extracted_fname) #5.1 Giga
639
#4.51 (on 05/23)
640
df_points_day <- data_stations_var_pred_data_s
641
if(is.null(df_points_extracted_fname)){
642 603
  #15.17 (on 09/08)
643 604
  ##10.41 (on 05/22)
644
  df_points_day_extracted <- extract(r_mosaic,df_points_day,df=T)
605
  #took about 7h for 5262 layers, maybe can be sped up later
606
  df_points_extracted <- extract(r_stack,df_points,df=T,sp=T) #attach back to the original data...
607

  
645 608
  #17.19 (on 05/23)
646 609
  #22.27 (on 09/08)
647
  df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname2",".txt")
610
  #df_points_extracted_fname <- paste0("df_points_day_extracted_fname2",".txt")
648 611
  #17.27 (on 05/23)
649
  write.table(df_points_day_extracted,file=df_points_day_extracted_fname,sep=",",row.names = F) 
612
  
613
  df_points_extracted_fname <- file.path(out_dir,paste0("df_points_extracted_",out_suffix,".txt"))
614
  write.table(df_points_extracted,file= df_points_extracted_fname,sep=",",row.names = F) 
650 615
  #17.19 (on 05/23)
616
  
617
  #### Now check for missing dates
618
  
619
  #debug(extract_date)
620
  #test <- extract_date(6431,lf_mosaic_list,12) #extract item number 12 from the name of files to get the data
621
  #list_dates_produced <- unlist(mclapply(1:length(lf_raster),FUN=extract_date,x=lf_raster,item_no=13,mc.preschedule=FALSE,mc.cores = num_cores))                         
622
  #list_dates_produced <-  mclapply(1:2,FUN=extract_date,x=lf_mosaic_list,item_no=13,mc.preschedule=FALSE,mc.cores = 2)                         
623
  list_dates_produced <- unlist(mclapply(1:length(lf_raster),FUN=extract_date,x=lf_raster,item_no=item_no,
624
                                         mc.preschedule=FALSE,mc.cores = num_cores))                         
625

  
626
  list_dates_produced_date_val <- as.Date(strptime(list_dates_produced,"%Y%m%d"))
627
  month_str <- format(list_dates_produced_date_val, "%b") ## Month, char, abbreviated
628
  year_str <- format(list_dates_produced_date_val, "%Y") ## Year with century
629
  day_str <- as.numeric(format(list_dates_produced_date_val, "%d")) ## numeric month
630

  
631
  df_raster <- data.frame(basename(lf_mosaic_list),list_dates_produced_date_val,month_str,year_str,day_str,dirname(lf_mosaic_list))
632

  
633
  df_raster_fname <- file.path(out_dir,paste0("df_raster_",out_suffix,".txt"))
634
  write.table(df_raster,file= df_raster_fname,sep=",",row.names = F) 
635

  
636
  df_points_extracted_fname
637
  df_raster_fname
638
  
639
  extract_obj <- list(df_points_extracted_fname,df_raster_fname)
640
  names(extract_obj) <- c("df_points_extracted_fname","df_raster_fname")
641
  
642
  return(extract_obj)
643
}
644

  
645

  
646

  
647

  
651 648

  
652 649
}else{
653 650
  df_points_day_extracted <- read.table(df_points_extracted_fname,sep=",")
654 651
}
655 652

  
656
head(df_points_day) #contains ID, dates and coordinates
657
df_points_day$id
658

  
659
max_idst <- 0.009*5 #5km in degree
660
min_dist <- 0    #minimum distance to start with
653
df_points_day_extracted 
654
names(df_points_day_extracted)[1:10]
655
(df_points_day_extracted$ID)[1:10]
661 656

  
662
###this needs be modified
663
## target number
664
target_max_nb <- 1 #this is not actually used yet in the current implementation
665
target_min_nb <- 5 #this is the target number of stations we would like
666
max_dist <- 1000 # the maximum distance used for pruning ie removes stations that are closer than 1000m 
667
min_dist <- 0    #minimum distance to start with
668
step_dist <- 1000 #iteration step to remove the stations
669
target_range_nb <- c(target_min_nb,target_max_nb) #target range of number of stations
670
#First increase distance till 5km
671
#then use random sampling...to get the extact target?
672
#First test with maximum distance of 100m
673
test1 <- sub_sampling_by_dist(target_range_nb,dist=min_dist,max_dist=max_dist,step=step_dist,data_in=df_points_day)
657
df_points_day_extracted_tmp <- df_points_day_extracted
658
df_points_extracted <- cbind(df_points_day,df_points_day_extracted_tmp)
659
#df_points_extracted$id <- df_points_day$id
674 660

  
661
#### Now combined with the station data extracted from the assessment stage
662
combine
663
data_stations_var_pred
675 664

  
665
##write function to combine data!!!
676 666

  
677 667
pix_ts <- as.data.frame(t(df_points_day_extracted))
678 668
pix_ts <- pix_ts[-1,]
......
688 678
#pix_ts <- t(as.data.frame(subset(data_pixel,select=r_ts_name))) #can subset to range later
689 679
#pix_ts <- subset(as.data.frame(pix_ts),select=r_ts_name)
690 680

  
681
combine_measurements_and_predictions_df <- function(i,dates_val,df_ts_pix,data_var,list_selected_ID,r_ts_name,var_name,dates_str,plot_fig=T){
682
  
683
  # Input arguments:
684
  # i : selected station
685
  # df_ts_pix_data : data extracted from raster layer
686
  # data_var : data with station measurements (tmin,tmax or precip)
687
  # list_selected_ID : list of selected station
688
  # plot_fig : if T, figures are plotted
689
  # Output
690
  #
691
  
692
  ##### START FUNCTION ############
693
  
694
  #get the relevant station
695
  id_name <- list_selected_ID[i] # e.g. WS037.00
696
  #id_selected <- df_ts_pix[[var_ID]]==id_name
697
  id_selected <- df_ts_pix[["ID_stat"]]==id_name
698
  
699
  ### Not get the data from the time series
700
  data_pixel <- df_ts_pix[id_selected,]
701
  data_pixel <- as.data.frame(data_pixel)
702
  
703
  pix_ts <- t(as.data.frame(subset(data_pixel,select=r_ts_name))) #can subset to range later
704
  #pix_ts <- subset(as.data.frame(pix_ts),select=r_ts_name)
705
  pix_ts <- (as.data.frame(pix_ts))
706
  ## Process the coliform data
707
  
708
  #there are several measurements per day for some stations !!!
709
  #id_name <- data_pixel[[var_ID]]
710
  
711
  #df_tmp  <-data_var[data_var$LOCATION_ID==id_name,]
712
  df_tmp <- subset(data_var,data_var$ID_stat==id_name)
713
  #if(da)
714
  #aggregate(df_tmp
715
  if(nrow(df_tmp)>1){
716
    
717
    formula_str <- paste(var_name," ~ ","TRIP_START_DATE_f",sep="")
718
    #var_pix <- aggregate(COL_SCORE ~ TRIP_START_DATE_f, data = df_tmp, mean) #aggregate by date
719
    var_pix <- try(aggregate(as.formula(formula_str), data = df_tmp, FUN=mean)) #aggregate by date
720
    #length(unique(test$TRIP_START_DATE_f))
721
    #var_pix_ts <- t(as.data.frame(subset(data_pixel,select=var_name)))
722
    #pix <- t(data_pixel[1,24:388])#can subset to range later
723
  }else{
724
    var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!!
725
  }
726
  #var_pix <- subset(as.data.frame(data_id_selected,c(var_name,"TRIP_START_DATE_f")])) #,select=var_name)
727
  
728
  #Create time series object from extract pixel time series
729
  d_z <- zoo(pix_ts,dates_val) #make a time series ...
730
  names(d_z)<- "rainfall"
731
  #Create date object for data from stations
732
  
733
  d_var <- zoo(var_pix,var_pix$TRIP_START_DATE_f)
734
  #plot(d_var,pch=10)
735
  
736
  d_z2 <- merge(d_z,d_var)
737
  ##Now subset?
738
  d_z2 <- window(d_z2,start=dates_val[1],end=dates_val[length(dates_val)])
739
  
740
  d_z2$TRIP_START_DATE_f <- NULL
741
  
742
  df2 <- as.data.frame(d_z2)
743
  df2$date <- rownames(df2)
744
  rownames(df2) <- NULL
745
  df2[[var_name]] <- as.numeric(as.character(df2[[var_name]]))
746
  
747
  #df2$COL_SCORE <- as.numeric(as.character(df2$COL_SCORE))
748
  df2$rainfall <- as.numeric(as.character(df2$rainfall))
749
  df2$ID_stat <- id_name
750
    
751
  #plot(df2$rainfall)
752
  #list_pix[[i]] <- pix_ts
753
  
754
  if(plot_fig==T){
755
    
756
    res_pix <- 480
757
    col_mfrow <- 2
758
    row_mfrow <- 1
759
    
760
    ###
761
    #Figure 3b
762
    png(filename=paste("Figure3b_","pixel_profile_var_combined_",id_name,"_",out_suffix,".png",sep=""),
763
        width=col_mfrow*res_pix,height=row_mfrow*res_pix)
764
    
765
    #plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="")
766
    #points(d_z2$COL_SCORE,col="red",pch=10,cex=2)
767
    plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="")
768
    abline(h=threshold_val,col="green")
769
    
770
    par(new=TRUE)              # key: ask for new plot without erasing old
771
    #plot(x,y,type="l",col=t_col[k],xlab="",ylab="",lty="dotted",axes=F) #plotting fusion profile
772
    plot(df2[[var_name]],pch=10,cex=2.5,col="red", axes=F,ylab="",xlab="")
773
    #points(d_z2$COL_SCORE,col="red",pch=10,cex=2)
774
    legend("topleft",legend=c("stations"), 
775
           cex=1.2,col="red",pch =10,bty="n")
776
    
777
    axis(4,cex=1.2)
778
    mtext(4, text = "coliform scores", line = 3)
779
    
780
    title(paste("Station time series",id_name,sep=" "))
781
    
782
    dev.off()
783
    
784
    #Figure 3c
785
    png(filename=paste("Figure3c_","pixel_profile_var_combined_log_scale_",id_name,"_",out_suffix,".png",sep=""),
786
        width=col_mfrow*res_pix,height=row_mfrow*res_pix)
787
    
788
    #plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="")
789
    #points(d_z2$COL_SCORE,col="red",pch=10,cex=2)
790
    plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="")
791
    abline(h=threshold_val,col="green")
792
    par(new=TRUE)              # key: ask for new plot without erasing old
793
    #plot(x,y,type="l",col=t_col[k],xlab="",ylab="",lty="dotted",axes=F) #plotting fusion profile
794
    #plot(log(df2$COL_SCORE),pch=10,cex=2.5,col="red", axes=F,ylab="",xlab="")
795
    plot(log(df2[[var_name]]),pch=10,cex=2.5,col="red", axes=F,ylab="",xlab="")
796
    
797
    #points(d_z2$COL_SCORE,col="red",pch=10,cex=2)
798
    legend("topleft",legend=c("stations"), 
799
           cex=1.2,col="red",pch =10,bty="n")
800
    
801
    axis(4,cex=1.2)
802
    mtext(4, text = "coliform scores", line = 3)
803
    
804
    title(paste("Station time series",id_name,sep=" "))
805
    
806
    dev.off()
807
    
808
    ####Histogram of values
809
    
810
    res_pix <- 480
811
    col_mfrow <- 2
812
    row_mfrow <- 1
813
    
814
    png(filename=paste("Figure4_","histogram_measurements_",year_processed,"_",id_name,"_",out_suffix,".png",sep=""),
815
        width=col_mfrow*res_pix,height=row_mfrow*res_pix)
816
    
817
    hist_val <- hist(df2[[var_name]],main="",xlab="COLIFORM SCORES")
818
    #hist_val <- hist(df2$COL_SCORE,main="",xlab="COLIFORM SCORES")
819
    title(paste("Histrogram of coliform scores for station",id_name,"in",year_processed,sep=" "))
820
    #abline(v=threshold_val,col="green" )
821
    legend("topright",legend=c("treshold val"), 
822
           cex=1.2, col="green",lty =1,bty="n")  
823
    
824
    y_loc <- max(hist_val$counts)/2
825
    
826
    #text(threshold_val,y_loc,paste(as.character(threshold_val)),pos=1,offset=0.1)
827
    
828
    dev.off()
829
    
830
    #res_pix <- 480
831
    #col_mfrow <- 2
832
    #row_mfrow <- 1
833
    
834
    #png(filename=paste("Figure4_","histogram_coliform_measurements_",year_processed,"_",id_name,"_",out_suffix,".png",sep=""),
835
    #    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
836
    
837
    plot(df2$rainfall)
838
    #plot(df2$rainfall,df2$COL_SCORE)
839
    #plot(log(df2$rainfall),log(df2$COL_SCORE))
840
    plot(df2$rainfall,df2[[var_name]])
841
    plot(df2$rainfall,log(df2[[var_name]]))
842

  
843
    
844
    
845
  }
846
  
847
  ## Now correlation.
848
  #sum(is.na(df2$rainfall))
849
  #[1] 0
850
  nb_zero <- sum((df2$rainfall==0)) #203
851
  #nb_NA <- sum(is.na(df2$COL_SCORE))
852
  nb_NA <- sum(is.na(df2[[var_name]])) #for ID 394 DMR it is 361 missing values for 2012!!
853
  ## Cumulated precip and lag?
854
  #Keep number of  0 for every year for rainfall
855
  #summarize by month
856
  #Kepp number of NA for scores... 
857
  #Summarize by season...
858
  ## Threshold?
859
  station_summary_obj <- list(nb_zero,nb_NA,df2)
860
  names(station_summary_obj) <- c("nb_zero_precip","nb_NA_var","df_combined")
861
  return(station_summary_obj)
862
}
863

  
691 864
list_dates_produced <- unlist(mclapply(1:length(var_names),
692 865
                                       FUN=extract_date,
693 866
                                       x=var_names,
......
846 1019

  
847 1020
dev.off()
848 1021

  
1022
############### PART5: Make raster stack and display maps #############
1023
#### Extract corresponding raster for given dates and plot stations used
1024

  
1025

  
1026
#start_date <- day_to_mosaic_range[1]
1027
#end_date <- day_to_mosaic_range[2]
1028
#start_date <- day_start #PARAM 12 arg 12
1029
#end_date <- day_end #PARAM 13 arg 13
1030

  
1031
#date_to_plot <- seq(as.Date(strptime(start_date,"%Y%m%d")), as.Date(strptime(end_date,"%Y%m%d")), 'day')
1032
#l_dates <- format(date_to_plot,"%Y%m%d") #format back to the relevant date format for files
1033
#mask_pred <- FALSE
1034
#matching <- FALSE #to be added after mask_pred option
1035
#list_param_pre_process <- list(raster_name_lf,python_bin,infile_mask,scaling,mask_pred,NA_flag_val,out_suffix,out_dir) 
1036
#names(list_param_pre_process) <- c("lf","python_bin","infile_mask","scaling","mask_pred","NA_flag_val","out_suffix","out_dir") 
1037
  
1038
#debug(pre_process_raster_mosaic_fun)
1039

  
1040
#lf_mosaic_scaled <- mclapply(1:length(raster_name_lf),FUN=pre_process_raster_mosaic_fun,list_param=list_param_pre_process,mc.preschedule=FALSE,mc.cores = num_cores)                         
1041
#lf_mosaic_scaled <- mclapply(1:length(raster_name_lf),FUN=pre_process_raster_mosaic_fun,list_param=list_param_pre_process,mc.preschedule=FALSE,mc.cores = num_cores)                         
1042

  
1043
#test <- pre_process_raster_mosaic_fun(2,list_param_pre_process)
1044
#lf_mosaic_scaled <- unlist(lf_mosaic_scaled)
1045

  
1046
##################################### PART 5  ######
1047
##### Plotting specific days for the mosaics
1048

  
1049
r_mosaic_scaled <- stack(lf_mosaic_scaled)
1050
NAvalue(r_mosaic_scaled)<- -3399999901438340239948148078125514752.000
1051
plot(r_mosaic_scaled,y=6,zlim=c(-50,50))
1052
plot(r_mosaic_scaled,zlim=c(-50,50),col=matlab.like(255))
1053

  
1054
#layout_m<-c(1,3) #one row two columns
1055
#levelplot(r_mosaic_scaled,zlim=c(-50,50),col.regions=matlab.like(255))
1056
#levelplot(r_mosaic_scaled,zlim=c(-50,50),col.regions=matlab.like(255))
1057

  
1058
#png(paste("Figure7a__spatial_pattern_tmax_prediction_levelplot_",date_selected,out_prefix,".png", sep=""),
1059
#    height=480*layout_m[1],width=480*layout_m[2])
1060
#plot(r_pred,col=temp.colors(255),zlim=c(-3500,4500))
1061
#plot(r_pred,col=matlab.like(255),zlim=c(-40,50))
1062
#paste(raster_name[1:7],collapse="_")
1063
#add filename option later
1064

  
1065
#NA_flag_val_mosaic <- -3399999901438340239948148078125514752.000
1066

  
1067
list_param_plot_raster_mosaic <- list(l_dates,r_mosaic_scaled,NA_flag_val_mosaic,out_dir,out_suffix,
1068
                                      region_name,variable_name)
1069
names(list_param_plot_raster_mosaic) <- c("l_dates","r_mosaic_scaled","NA_flag_val_mosaic","out_dir","out_suffix",
1070
                                          "region_name","variable_name")
1071

  
1072
lf_mosaic_plot_fig <- mclapply(1:length(lf_mosaic_scaled),FUN=plot_raster_mosaic,list_param=list_param_plot_raster_mosaic,mc.preschedule=FALSE,mc.cores = num_cores)                         
1073

  
849 1074
####################
850 1075
###### Now add figures with additional met stations?
851 1076

  

Also available in: Unified diff