Project

General

Profile

« Previous | Next » 

Revision afa482cc

Added by Benoit Parmentier about 8 years ago

adding predictions_tiles_missing_fun from main script for tile assessment

View differences:

climate/research/oregon/interpolation/global_product_assessment_part0_functions.R
350 350
  obj_number_day_predicted <- list(raster_name_number_prediction,raster_name_missing)
351 351
  names(obj_number_day_predicted) <- c("raster_name_number_prediction","raster_name_missing")
352 352
    
353
  return(r_day_predicted)
353
  return(obj_number_day_predicted)
354 354
}
355 355

  
356
predictions_tiles_missing_fun <- function(i,list_param){
357
  #Add documentation
358

  
359
  ##############################
360
  #### Parameters and constants  
361
  
362

  
363
  in_dir1 <- list_param$in_dir1 
364
  region_name <- list_param$region_name #e.g. c("reg23","reg4") #run only for one region
365
  y_var_name <- list_param$y_var_name # e.g. dailyTmax" #PARAM3
366
  interpolation_method <- list_param_run_assessment_prediction$interpolation_method #c("gam_CAI") #PARAM4
367
  out_suffix <- list_param_run$out_suffix #output suffix e.g."run_global_analyses_pred_12282015" #PARAM5
368
  out_dir <- list_param$out_dir #<- "/nobackupp8/bparmen1/" #PARAM6
369
  create_out_dir_param <-list_param$create_out_dir_param #if TRUE output dir created #PARAM7
370
  proj_str <- list_param$proj_str # CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84, #PARAM8
371
  list_year_predicted <- list_param$list_year_predicted # 1984:2004
372
  file_format <- list_param$file_format #<- ".tif" #format for mosaiced files #PARAM10
373
  NA_flag_val <- list_param$NA_flag_val #<- -9999  #No data value, #PARAM11
374
  num_cores <- list_param$num_cores #<- 6 #number of cores used #PARAM13
375
  plotting_figures <- list_param$plotting_figures #if true run generate png for missing date
376
  
377
  ##for plotting assessment function
378
  
379
  item_no <- list_param_run_assessment_prediction$mosaic_plot  #PARAM14
380
  day_to_mosaic_range <- list_param$day_to_mosaic_range #PARAM15
381
  countries_shp <- list_param$countries_shp #PARAM17
382
  plotting_figures <- list_param$plotting_figures #PARAM18
383
  #threshold_missing_day <- list_param$threshold_missing_day #PARM20
384
  pred_mod_name <- list_param$pred_mod_name
385
  metric_name <- list_param$metric_name
386
  
387
  ########################## START SCRIPT #########################################
388
  
389
  #system("ls /nobackup/bparmen1")
390
  out_dir <- in_dir
391
  if(create_out_dir_param==TRUE){
392
    out_dir <- create_dir_fun(out_dir,out_suffix)
393
    setwd(out_dir)
394
  }else{
395
    setwd(out_dir) #use previoulsy defined directory
396
  }
397
  
398
  setwd(out_dir)
399
  
400
  #list_outfiles <- vector("list", length=35) #collect names of output files, this should be dynamic?
401
  #list_outfiles_names <- vector("list", length=35) #collect names of output files
402

  
403
  year_predicted <- list_param_run_assessment_prediction$list_year_predicted[i] 
404

  
405
  in_dir1_reg <- file.path(in_dir1,region_name)
406
  
407
  list_outfiles <- vector("list", length=14) #collect names of output files
408
  
409
  in_dir_list <- list.dirs(path=in_dir1_reg,recursive=FALSE) #get the list regions processed for this run
410

  
411
  #in_dir_list_all  <- unlist(lapply(in_dir_list,function(x){list.dirs(path=x,recursive=F)}))
412
  in_dir_list_all <- in_dir_list
413
  in_dir_subset <- in_dir_list_all[grep("subset",basename(in_dir_list_all),invert=FALSE)] #select directory with shapefiles...
414
  in_dir_shp <- file.path(in_dir_subset,"shapefiles")
415
  
416
  #select only directories used for predictions
417
  #nested structure, we need to go to higher level to obtain the tiles...
418
  in_dir_reg <- in_dir_list[grep(".*._.*.",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles...
419
  #in_dir_reg <- in_dir_list[grep("july_tiffs",basename(in_dir_reg),invert=TRUE)] #select directory with shapefiles...
420
  in_dir_list <- in_dir_reg
421
  
422
  in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1
423
  #list of shapefiles used to define tiles
424
  in_dir_shp_list <- list.files(in_dir_shp,".shp",full.names=T)
425
  
426
  ## load problematic tiles or additional runs
427
  #modify later...
428

  
429
  ##raster_prediction object : contains testing and training stations with RMSE and model object
430
  in_dir_list_tmp <- file.path(in_dir_list,year_predicted)
431
  list_raster_obj_files <- mclapply(in_dir_list_tmp,
432
                                    FUN=function(x){list.files(path=x,pattern="^raster_prediction_obj.*.RData",full.names=T)},
433
                                    mc.preschedule=FALSE,mc.cores = num_cores)
434
  
435
  #list_raster_obj_files <- try(lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^raster_prediction_obj.*.RData",full.names=T)}))
436
  #Add stop message here...if no raster object in any tiles then break from the function
437
  
438
  list_names_tile_coord <- lapply(list_raster_obj_files,FUN=function(x){basename(dirname(x))})
439
  list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_")
440
  names(list_raster_obj_files)<- list_names_tile_id
441
  
442
  #pred_mod_name <- "mod1"
443
  list_lf_raster_tif_tiles <- mclapply(in_dir_list_tmp,
444
                                    FUN=function(x){list.files(path=x,pattern=paste0("gam_CAI_dailyTmax_predicted_",pred_mod_name,".*.tif"),full.names=T)},
445
                                    mc.preschedule=FALSE,mc.cores = num_cores)
446
  list_names_tile_coord <- lapply(list_lf_raster_tif_tiles,FUN=function(x){basename(dirname(dirname(x)))})
447
  list_names_tile_id <- paste("tile",1:length(list_lf_raster_tif_tiles),sep="_")
448
  names(list_lf_raster_tif_tiles)<- list_names_tile_id
449
  
450
  #one level up
451
  #lf_covar_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar_obj.*.RData",full.names=T)})
452
  #lf_covar_tif <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar.*.tif",full.names=T)})
453
  
454
  #lf_sub_sampling_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern=paste("^sub_sampling_obj_",interpolation_method,".*.RData",sep=""),full.names=T)})
455
  #lf_sub_sampling_obj_daily_files <- lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^sub_sampling_obj_daily.*.RData",full.names=T)})
456
  year_processed <- year_predicted
457
  if(is.null(day_to_mosaic_range)){
458
  #  start_date <- #first date
459
     start_date <- paste0(year_processed,"0101") #change this later!!
460
     end_date <-   paste0(year_processed,"1231") #change this later!!
461
     day_to_mosaic <- seq(as.Date(strptime(start_date,"%Y%m%d")), as.Date(strptime(end_date,"%Y%m%d")), 'day')
462
     day_to_mosaic <- format(day_to_mosaic,"%Y%m%d") #format back to the relevant date format for files
463
  }else{
464
    start_date <- day_to_mosaic_range[1]
465
    end_date <- day_to_mosaic_range[2]
466
    day_to_mosaic <- seq(as.Date(strptime(start_date,"%Y%m%d")), as.Date(strptime(end_date,"%Y%m%d")), 'day')
467
    day_to_mosaic <- format(day_to_mosaic,"%Y%m%d") #format back to the relevant date format for files
468
  }
469
  
470
  in_dir_tiles_tmp <- in_dir1 #
471
  #in_dir_tiles_tmp <- in_dir_reg
472
  
473
  ### Do this by tile!!!
474
  
475
  #gam_CAI_dailyTmax_predicted_mod1_0_1_20001231_30_1-39.7_165.1.tif
476
  
477
  #undebug(check_missing)
478
  test_missing <- try(lapply(1:length(list_lf_raster_tif_tiles),function(i){check_missing(lf=list_lf_raster_tif_tiles[[i]], 
479
                                                                      pattern_str=NULL,
480
                                                                      in_dir=out_dir,
481
                                                                      date_start=start_date,
482
                                                                      date_end=end_date,
483
                                                                      item_no=item_no, #9 for predicted tiles
484
                                                                      out_suffix=out_suffix,
485
                                                                      num_cores=num_cores,
486
                                                                      out_dir=".")}))
487

  
488
 
489
  df_time_series <- test_missing[[1]]$df_time_series
490
  head(df_time_series)
491

  
492
  table(df_time_series$missing)
493
  table(df_time_series$year)
494
  
495
  #####
496
  #Now combine df_time_series in one table
497
  
498
  dim(test_missing[[1]]$df_time_series)
499
  list_lf <- lapply(1:length(test_missing),FUN=function(i){df_time_series <- as.character(test_missing[[i]]$df_time_series$lf)})
500
  df_lf_tiles_time_series <- as.data.frame(do.call(cbind,list_lf))
501
  #http://stackoverflow.com/questions/26220913/replace-na-with-na
502
  #Use dfr[dfr=="<NA>"]=NA where dfr is your dataframe.
503
  names(df_lf_tiles_time_series) <- unlist(basename(in_dir_reg))
504
  filename_df_lf_tiles <- paste0("df_files_by_tiles_predicted_tif_",region_name,"_",pred_mod_name,"_",out_suffix)
505
  write.table(df_lf_tiles_time_series,file=filename_df_lf_tiles)
506

  
507
  ###Now combined missing in one table?
508
  
509
  list_missing <- lapply(1:length(test_missing),FUN=function(i){df_time_series <- test_missing[[i]]$df_time_series$missing})
510
  
511
  df_missing <- as.data.frame(do.call(cbind,list_missing))
512
  names(df_missing) <- unlist(basename(in_dir_reg))
513
  df_missing$tot_missing <- rowSums (df_missing, na.rm = FALSE, dims = 1)
514
  df_missing$reg <- region_name
515
  df_missing$date <- day_to_mosaic
516

  
517
  filename_df_missing <- paste0("df_missing_by_tiles_predicted_tif_",region_name,"_",pred_mod_name,"_",out_suffix)
518
  write.table(df_missing,file=filename_df_missing)
519
  
520
  ########################
521
  #### Step 2: examine overlap
522
  
523
  path_to_shp <- dirname(countries_shp)
524
  layer_name <- sub(".shp","",basename(countries_shp))
525
  reg_layer <- readOGR(path_to_shp, layer_name)
526
  
527
  #collect info: read in all shapefiles
528
  
529
  obj_centroids_shp <- centroids_shp_fun(1,list_shp_reg_files=in_dir_shp_list)
530
                                         
531
  obj_centroids_shp <- mclapply(1:length(in_dir_shp_list),
532
                                FUN=centroids_shp_fun,
533
                                list_shp_reg_files=in_dir_shp_list,
534
                                mc.preschedule=FALSE,
535
                                mc.cores = num_cores)
536

  
537
  centroids_pts <- lapply(obj_centroids_shp, FUN=function(x){x$centroid})
538
  shps_tiles <-   lapply(obj_centroids_shp, FUN=function(x){x$spdf})
539

  
540
  #remove try-error polygons...we loose three tiles because they extend beyond -180 deg
541
  tmp <- shps_tiles
542
  shps_tiles <- remove_errors_list(shps_tiles) #[[!inherits(shps_tiles,"try-error")]]
543
  #shps_tiles <- tmp
544
  length(tmp)-length(shps_tiles) #number of tiles with error message
545
  
546
  tmp_pts <- centroids_pts 
547
  centroids_pts <- remove_errors_list(centroids_pts) #[[!inherits(shps_tiles,"try-error")]]
548
  #centroids_pts <- tmp_pts 
549
  
550
  r_mask <- raster(infile_mask)
551
  plot(r)
552
  plot(shps_tiles[[1]],add=T,border="blue",usePolypath = FALSE) #added usePolypath following error on brige and NEX
553

  
554
  ## find overlap
555
  #http://gis.stackexchange.com/questions/156660/loop-to-check-multiple-polygons-overlap-in-r
556
  
557
  matrix_overlap <- matrix(data=NA,nrow=length(shps_tiles),ncol=length(shps_tiles))
558
  for(i in 1:length(shps_tiles)){
559
     for(j in 1:length(shps_tiles)){
560
      overlap_val <- as.numeric(over(shps_tiles[[i]],shps_tiles[[j]]))
561
      matrix_overlap[i,j]<- overlap_val
562
    }
563
    #
564
  }
565
  
566
  names(shps_tiles) <- basename(unlist(in_dir_reg))
567
  r_ref <- raster(list_lf_raster_tif_tiles[[1]][1])
568
  list_r_ref <- lapply(1:length(in_dir_reg), function(i){raster(list_lf_raster_tif_tiles[[i]][1])})
569
  tile_spdf <- shps_tiles[[1]]
570
  tile_coord <- basename(in_dir_reg[1])
571
  date_val <- df_missing$date[1]
572
  
573
  ### use rasterize
574
  spdf_tiles <- do.call(bind, shps_tiles) #bind all tiles together in one shapefile
575

  
576
  #function_product_assessment_part0_functions <- "global_product_assessment_part0_functions_11052016.R"
577
  #source(file.path(script_path,function_product_assessment_part0_functions)) #source all functions used in this script 
578

  
579
  #undebug(rasterize_tile_day)
580
  list_predicted <- rasterize_tile_day(1,
581
           list_spdf=shps_tiles,
582
           df_missing=df_missing,
583
           list_r_ref=list_r_ref,
584
           col_name="overlap",
585
           date_val=df_missing$date[1])
586
  #list_predicted <- mclapply(1:6,
587
  #         FUN=rasterize_tile_day,
588
  #         list_spdf=shps_tiles,
589
  #         df_missing=df_missing,
590
  #         list_r_ref=list_r_ref,
591
  #         col_name = "overlap",
592
  #         date_val=df_missing$date[1],
593
  #          mc.preschedule=FALSE,
594
  #         mc.cores = num_cores)
595
  
596
  list_predicted <- mclapply(1:length(shps_tiles),
597
           FUN=rasterize_tile_day,
598
           list_spdf=shps_tiles,
599
           df_missing=df_missing,
600
           list_r_ref=list_r_ref,
601
           col_name = "overlap",
602
           date_val=df_missing$date[1],
603
            mc.preschedule=FALSE,
604
           mc.cores = num_cores)
605

  
606
  ##check that everything is correct:
607
  plot(r_mask)
608
  plot(raster(list_predicted[[1]]),add=T)
609
  plot(spdf_tiles_test,add=T,border="green",usePolypath = FALSE) #added usePolypath following error on brige and NEX
610

  
611
  ### Make a list of file
612
  out_suffix_str_tmp <- paste0(region_name,"_",out_suffix)
613
  out_dir_str <- out_dir
614
  filename_list_predicted <- file.path(out_dir_str,paste("list_to_mosaics_",out_suffix_str_tmp,".txt",sep=""))
615

  
616
  #writeLines(unlist(list_weights_m),con=filename_list_mosaics_weights_m) #weights files to mosaic 
617
  #writeLines(unlist(list_weights_prod_m),con=filename_list_mosaics_prod_weights_m) #prod weights files to mosaic
618
      
619
  writeLines(unlist(list_predicted),con=filename_list_predicted) #weights files to mosaic 
620

  
621
  #out_mosaic_name_weights_m <- r_weights_sum_raster_name <- file.path(out_dir,paste("r_weights_sum_m_",mosaic_method,"_weighted_mean_",out_suffix,".tif",sep=""))
622
  #out_mosaic_name_prod_weights_m <- r_weights_sum_raster_name <- file.path(out_dir,paste("r_prod_weights_sum_m_",mosaic_method,"_weighted_mean_",out_suffix,".tif",sep=""))
623
  out_mosaic_name_predicted_m  <- file.path(out_dir_str,paste("r_overlap_sum_m_",out_suffix_str_tmp,".tif",sep=""))
624

  
625
  rast_ref_name <- infile_mask
626
  mosaic_python <- "/nobackupp6/aguzman4/climateLayers/sharedCode/"
627
  rast_ref_name <- infile_mask
628
  #python /nobackupp6/aguzman4/climateLayers/sharedCode//gdal_merge_sum.py --config GDAL_CACHEMAX=1500 --overwrite=TRUE -o /nobackupp8/bparmen1/climateLayers/out
629
  mosaic_overlap_tiles_obj <- mosaic_python_merge(NA_flag_val=NA_flag_val,
630
                                                module_path=mosaic_python,
631
                                                module_name="gdal_merge_sum.py",
632
                                                input_file=filename_list_predicted,
633
                                                out_mosaic_name=out_mosaic_name_predicted_m,
634
                                                raster_ref_name = rast_ref_name) ##if NA, not take into account
635
  r_overlap_raster_name <- mosaic_overlap_tiles_obj$out_mosaic_name
636
  cmd_str1 <-   mosaic_overlap_tiles_obj$cmd_str
637

  
638
  r_overlap <- raster(r_overlap_raster_name)
639
  r_mask <- raster(infile_mask)
640
  
641
  out_mosaic_name_overlap_masked  <- file.path(out_dir_str,paste("r_overlap_sum_masked_",out_suffix_str_tmp,".tif",sep=""))
642

  
643
  r_overlap_m <- mask(r_overlap,r_mask,filename=out_mosaic_name_overlap_masked,overwrite=T)
644
  #plot(r_overlap_m)
645
  #plot(spdf_tiles_test,add=T,border="green",usePolypath = FALSE) #added usePolypath following error on brige and NEX
646
  
647
  r_table <- ratify(r_overlap_m) # build the Raster Attibute table
648
  rat <- levels(r_table)[[1]]#get the values of the unique cell frot the attribute table
649
  #rat$legend <- paste0("tile_",1:26)
650
  tb_freq <- as.data.frame(freq(r_table))
651
  rat$legend <- tb_freq$value
652
  levels(r_table) <- rat
653
  
654
  res_pix <- 800
655
  #res_pix <- 480
656
  col_mfrow <- 1
657
  row_mfrow <- 1
658
  
659
  png_filename <-  file.path(out_dir,paste("Figure_maximum_overlap_",region_name,"_",out_suffix,".png",sep =""))
660
    
661
  title_str <-  paste("Maximum overlap: Number of predicted pixels for ",variable_name, sep = "")
662
  
663
  png(filename=png_filename,width = col_mfrow * res_pix,height = row_mfrow * res_pix)
664
    #my_col=c('blue','red','green')
665
  my_col <- rainbow(length(tb_freq$value))
666

  
667
  plot(r_table,col=my_col,legend=F,box=F,axes=F,main=title_str)
668
  legend(x='topright', legend =rat$legend,fill = my_col,cex=0.8)
669
  
670
  dev.off()
671

  
672
  ### now assign id and match extent for tiles
673
  
674
  lf_files <- unlist(list_predicted)
675
  rast_ref_name <- infile_mask
676
  rast_ref <- rast_ref_name
677
  
678
  ##Maching resolution is probably only necessary for the r mosaic function
679
  #Modify later to take into account option R or python...
680
  list_param_raster_match <- list(lf_files,rast_ref,file_format,python_bin,out_suffix_str_tmp,out_dir_str)
681
  names(list_param_raster_match) <- c("lf_files","rast_ref","file_format","python_bin","out_suffix","out_dir_str")
682

  
683
  #undebug(raster_match)
684
  #r_test <- raster_match(1,list_param_raster_match)
685
  #r_test <- raster(raster_match(1,list_param_raster_match))
686

  
687
  list_tiles_predicted_m <- unlist(mclapply(1:length(lf_files),
688
                                            FUN=raster_match,list_param=list_param_raster_match,
689
                                            mc.preschedule=FALSE,mc.cores = num_cores))                           
690

  
691
  extension_str <- extension(lf_files)
692
  raster_name_tmp <- gsub(extension_str,"",basename(lf_files))
693
  out_suffix_str <- paste0(region_name,"_",out_suffix)
694
  raster_name <- file.path(out_dir_str,paste(raster_name_tmp,"_","masked_",out_suffix_str,file_format,sep=""))
695
  
696
  #writeRaster(r, NAflag=NA_flag_val,filename=raster_name,overwrite=TRUE)  
697

  
698
  #r_stack <- stack(list_tiles_predicted_m)
699
  list_mask_out_file_name <- raster_name
700
  list_tiles_predicted_masked <- unlist(mclapply(1:length(list_tiles_predicted_m),
701
                                                 FUN=function(i){mask(raster(list_tiles_predicted_m[i]),r_mask,filename=list_mask_out_file_name[i])},
702
                                                       mc.preschedule=FALSE,mc.cores = num_cores))                         
703
  #r_stack_masked <- mask(r, m2) #, maskvalue=TRUE)
704
  
705
  ########################
706
  #### Step 3: combine overlap information and number of predictions by day
707
  ##Now loop through every day if missing then generate are raster showing map of number of prediction
708
  
709
  #r_tiles_stack <- stack(list_tiles_predicted_masked)
710
  #names(r_tiles_stack) <- basename(in_dir_reg) #this does not work, X. is added to the name, use list instead
711
  
712
  names(list_tiles_predicted_masked) <- basename(in_dir_reg)
713
  df_missing_tiles_day <- subset(df_missing,tot_missing > 0)
714
  #r_tiles_s <- r_tiles_stack
715
  names_tiles <- basename(in_dir_reg)
716
  
717
  
718
  list_param_generate_raster_number_pred <- list(list_tiles_predicted_masked,df_missing_tiles_day,r_overlap_m,
719
                                                 num_cores,region_name,
720
                                                 NA_flag_val,out_suffix,out_dir)
721
  
722
  names(list_param_generate_raster_number_pred) <- c("list_tiles_predicted_masked","df_missing_tiles_day","r_overlap_m",
723
                                                     "num_cores","region_name",
724
                                                      "NA_flag_val","out_suffix","out_dir")
725
  
726
  #function_product_assessment_part0_functions <- "global_product_assessment_part0_functions_11152016b.R"
727
  #source(file.path(script_path,function_product_assessment_part0_functions)) #source all functions used in this script 
728

  
729
  #debug(generate_raster_number_of_prediction_by_day)
730
  
731
  #obj_number_pix_predictions <- generate_raster_number_of_prediction_by_day(1,list_param_generate_raster_number_pred)
732
  
733
  obj_number_pix_predictions <- mcapply(1:nrow(df_missing_tiles_day),
734
                                        FUN=generate_raster_number_of_prediction_by_day,
735
                                        list_param=list_param_generate_raster_number_pred,
736
                                        mc.preschedule=FALSE,
737
                                        mc.cores = num_cores)
738
  
739
  ## Make a function,
740
  #for specifi i (day) select tile with missing info, load it and subsetract to overlap raster, save it.
741
  
742
  #http://stackoverflow.com/questions/19586945/how-to-legend-a-raster-using-directly-the-raster-attribute-table-and-displaying
743
  #
744
  #http://gis.stackexchange.com/questions/148398/how-does-spatial-polygon-over-polygon-work-to-when-aggregating-values-in-r
745
  #ok other way of doing this:
746
  #1. find overlap assuming all predictions!
747
  #2. Us raster image with number of overlaps in the mosaic tile
748
  #3. for every pixel generate and ID (tile ID) as integer, there should  be 26 layers at the mosaic extent
749
  #4. generate a table? for each pixel it can say if is part of a specific tile
750
  #5. workout a formula to generate the number of predictions for each pixel based on tile predicted for each date!!
751

  
752
  return()
753
}
356 754

  
357 755

  
358 756
############################ END OF SCRIPT ##################################

Also available in: Unified diff