Project

General

Profile

« Previous | Next » 

Revision 4f635318

Added by Benoit Parmentier about 10 years ago

adding documentation and debugging subsampling function for NEX fun

View differences:

climate/research/oregon/interpolation/subsampling_data.R
87 87
  return(obj_sub_sampling)
88 88
}
89 89

  
90
sub_sampling_by_dist_nb_stat <- function(target_range_nb,dist_range,data_in,sampling=T,combined=F){
90
sub_sampling_by_dist_nb_stat <- function(target_range_nb,dist_range,step_dist,data_in,sampling=T,combined=F){
91
  ##This functions perform subsampling for tiles/region wiht a high density of region.
92
  ##Sub-sampling can be done through spatial pruning by providing  a range of distance and step or
93
  ##by using random sampling in addition  to spatial pruning.
94
  #Input parameters:
95
  #sampling: if  TRUE use random sampling  in addition to spatial  sub-sampling
96
  #target_range_nb : number of stations desired as min and max, convergence to  min  for  now
97
  #dist_range : spatial distance range  for pruning,  usually (0,5) in km or 0,0.009*5 for  degreee
98
  #step_dist : stepping distance used in pruning  spatially, use 1km or 0.009 for degree data
99
  #data_in : input data to be resampled (data.frame or spatial point df.)
100
  #combined: if FALSE, combined, add variable to  show wich  data rows  were removed (not currently in use)
101
  #
102
  #Output parameters:
103
  #data: subsampled data
104
  #dist: distance at which spatial sub-sampling  ended
105
  #data_removed: data that was removed from the input data frame
106
  #data_dist: data item/stations after using spatial pruning, only appears if sampling = T
107

  
108
  #### START PROGRAM BODY #####
91 109
  
92 110
  data <- data_in
93 111
  min_dist <- dist_range[1]
......
191 209
#Now use the other function to sample the station data points:
192 210

  
193 211
#### 
194
dist_range <- c(0,10000) 
212
#dist_range <- c(0,10000) 
195 213
max_dist <- 10000# the maximum distance used for pruning ie removes stations that are closer than 1000m 
214
target_range_nb <- c(target_min_nb,target_max_nb) #target range of number of stations
215
step_dist <- 1000 #iteration step to remove the stations, 1000 meters
196 216

  
197 217
#debug(sub_sampling_by_dist_nb_stat)
198
test4 <- sub_sampling_by_dist_nb_stat(target_range_nb=c(100,200),dist_range=c(0,10000),data_in=data_month,sampling=T,combined=F)
218
#test4 <- sub_sampling_by_dist_nb_stat(target_range_nb=c(100,200),dist_range=c(0,10000),data_in=data_month,sampling=T,combined=F)
219
test4 <- sub_sampling_by_dist_nb_stat(target_range_nb=c(100,200),dist_range=c(0,10000),step_dist=step_dist,data_in=data_month,sampling=T,combined=F)
199 220
dim(test4$data) #we get exactly 100 stations as asked...first the 178 stations were selected using the spatial criteria
200 221
                #then 100 stations were selected using the sampling function
222

  
223
### for NEX, most likely settings:
224

  
225
target_max_nb <- 100,000 #this is not actually used yet in the current implementation,can be set to very high value...
226
target_min_nb <- 8,000 #this is the target number of stations we would like: to be set by Alberto...
227
#max_dist <- 1000 # the maximum distance used for pruning ie removes stations that are closer than 1000m, this in degree...? 
228
max_idst <- 0.009*5 #5km in degree
229
min_dist <- 0    #minimum distance to start with
230
step_dist <- 0.009 #iteration step to remove the stations
231

  
232
test5 <- sub_sampling_by_dist_nb_stat(target_range_nb=target_range_nb,dist_range=dist_range,step_dist=step_dist,data_in=data_month,sampling=T,combined=F)
233

  
201 234
  
202 235
############ END OF SCRIPT #########

Also available in: Unified diff