Project

General

Profile

« Previous | Next » 

Revision 41528b05

Added by Benoit Parmentier over 8 years ago

testing subsampling by distance function to select specific given stations from points

View differences:

climate/research/oregon/interpolation/global_product_assessment_part1_functions.R
453 453
  return(data_subset)
454 454
}
455 455

  
456
sub_sampling_by_dist <- function(target_range_nb=c(10000,10000),dist_val=0.0,max_dist=NULL,step_val,data_in){
457
  #Function to select stations data that are outside a specific spatial range from each other
458
  #Parameters:
459
  #max_dist: maximum spatial distance at which to stop the pruning
460
  #min_dist: minimum distance to start pruning the data
461
  #step_val: spatial distance increment
462
  #Note that we are assuming that the first columns contains ID with name col of "id".
463
  #Note that the selection is based on unique id of original SPDF so that replicates screened.
464
  
465
  data_in$id <- as.character(data_in$id)
466
  data <- data_in
467
  
468
  #Now only take unique id in the shapefile!!!
469
  #This step is necessary to avoid the large calculation of matrix distance with replicates
470
  #unique(data$id)
471
  data <- aggregate(id ~ x + y , data=data,min)
472
  coordinates(data) <- cbind(data$x,data$y)
473
  proj4string(data) <- proj4string(data_in)
474
  
475
  target_min_nb <- target_range_nb[1]
476
  #target_min_nb <- target_range_day_nb[1]
477
  
478
  #station_nb <- nrow(data_in)
479
  station_nb <- nrow(data)
480
  if(is.null(max_dist)){
481
    while(station_nb > target_min_nb){
482
      data <- remove.duplicates(data, zero = dist_val) #spatially sub sample...
483
      dist_val <- dist_val + step_val
484
      station_nb <- nrow(data)
485
    }
486
    #setdiff(as.character(data$id),as.character(data_in$id))
487
    #ind.selected <-match(as.character(data$id),as.character(data_in$id)) #index of stations row selected
488
    #ind.removed  <- setdiff(1:nrow(data_in), ind.selected) #index of stations rows removed 
489
    id_selected <- as.character(data$id)
490
    id_removed <- setdiff(unique(as.character(data_in$id)),as.character(data$id))
491

  
492
  }
493
  if(!is.null(max_dist)){
494
    
495
    while(station_nb > target_min_nb & dist_val < max_dist){ 
496
      data <- remove.duplicates(data, zero = dist_val) #spatially sub sample...
497
      #id_rm <- zerodist(data, zero = dist_val, unique.ID = FALSE)
498
      #data_rm <- data[id_rm,]
499
      dist_val <- dist_val + step_val
500
      station_nb <- nrow(data)
501
    }
502
    #ind.selected <- match(as.character(data$id),as.character(data_in$id))
503
    id_selected <- as.character(data$id)
504
    id_removed <- setdiff(unique(as.character(data_in$id)),as.character(data$id))
505
  #  ind.removed  <- setdiff(1:nrow(data_in), ind.selected)
506
  }
507
    
508
  #data_rm <- data_in[ind.removed,]
509
  data_rm <- subset(data_in, id %in% id_removed)
510
  data_tmp <- data #store the reduced dataset with only id, for debugging purpose
511
  
512
  #data <- subset(data_in, id %in% data$id) #select based on id
513
  data <-subset(data_in, id %in% id_selected) #select based on id
514
  
515
  #data <- data_in[ind.selected,]
516
  obj_sub_sampling <- list(data,dist_val,data_rm) #data.frame selected, minimum distance, data.frame stations removed
517
  names(obj_sub_sampling) <- c("data","dist","data_rm")
518
  return(obj_sub_sampling)
519
}
520

  
521

  
456 522
############################ END OF SCRIPT ##################################

Also available in: Unified diff