Revision 4f635318
Added by Benoit Parmentier about 10 years ago
climate/research/oregon/interpolation/subsampling_data.R | ||
---|---|---|
87 | 87 |
return(obj_sub_sampling) |
88 | 88 |
} |
89 | 89 |
|
90 |
sub_sampling_by_dist_nb_stat <- function(target_range_nb,dist_range,data_in,sampling=T,combined=F){ |
|
90 |
sub_sampling_by_dist_nb_stat <- function(target_range_nb,dist_range,step_dist,data_in,sampling=T,combined=F){ |
|
91 |
##This functions perform subsampling for tiles/region wiht a high density of region. |
|
92 |
##Sub-sampling can be done through spatial pruning by providing a range of distance and step or |
|
93 |
##by using random sampling in addition to spatial pruning. |
|
94 |
#Input parameters: |
|
95 |
#sampling: if TRUE use random sampling in addition to spatial sub-sampling |
|
96 |
#target_range_nb : number of stations desired as min and max, convergence to min for now |
|
97 |
#dist_range : spatial distance range for pruning, usually (0,5) in km or 0,0.009*5 for degreee |
|
98 |
#step_dist : stepping distance used in pruning spatially, use 1km or 0.009 for degree data |
|
99 |
#data_in : input data to be resampled (data.frame or spatial point df.) |
|
100 |
#combined: if FALSE, combined, add variable to show wich data rows were removed (not currently in use) |
|
101 |
# |
|
102 |
#Output parameters: |
|
103 |
#data: subsampled data |
|
104 |
#dist: distance at which spatial sub-sampling ended |
|
105 |
#data_removed: data that was removed from the input data frame |
|
106 |
#data_dist: data item/stations after using spatial pruning, only appears if sampling = T |
|
107 |
|
|
108 |
#### START PROGRAM BODY ##### |
|
91 | 109 |
|
92 | 110 |
data <- data_in |
93 | 111 |
min_dist <- dist_range[1] |
... | ... | |
191 | 209 |
#Now use the other function to sample the station data points: |
192 | 210 |
|
193 | 211 |
#### |
194 |
dist_range <- c(0,10000) |
|
212 |
#dist_range <- c(0,10000)
|
|
195 | 213 |
max_dist <- 10000# the maximum distance used for pruning ie removes stations that are closer than 1000m |
214 |
target_range_nb <- c(target_min_nb,target_max_nb) #target range of number of stations |
|
215 |
step_dist <- 1000 #iteration step to remove the stations, 1000 meters |
|
196 | 216 |
|
197 | 217 |
#debug(sub_sampling_by_dist_nb_stat) |
198 |
test4 <- sub_sampling_by_dist_nb_stat(target_range_nb=c(100,200),dist_range=c(0,10000),data_in=data_month,sampling=T,combined=F) |
|
218 |
#test4 <- sub_sampling_by_dist_nb_stat(target_range_nb=c(100,200),dist_range=c(0,10000),data_in=data_month,sampling=T,combined=F) |
|
219 |
test4 <- sub_sampling_by_dist_nb_stat(target_range_nb=c(100,200),dist_range=c(0,10000),step_dist=step_dist,data_in=data_month,sampling=T,combined=F) |
|
199 | 220 |
dim(test4$data) #we get exactly 100 stations as asked...first the 178 stations were selected using the spatial criteria |
200 | 221 |
#then 100 stations were selected using the sampling function |
222 |
|
|
223 |
### for NEX, most likely settings: |
|
224 |
|
|
225 |
target_max_nb <- 100,000 #this is not actually used yet in the current implementation,can be set to very high value... |
|
226 |
target_min_nb <- 8,000 #this is the target number of stations we would like: to be set by Alberto... |
|
227 |
#max_dist <- 1000 # the maximum distance used for pruning ie removes stations that are closer than 1000m, this in degree...? |
|
228 |
max_idst <- 0.009*5 #5km in degree |
|
229 |
min_dist <- 0 #minimum distance to start with |
|
230 |
step_dist <- 0.009 #iteration step to remove the stations |
|
231 |
|
|
232 |
test5 <- sub_sampling_by_dist_nb_stat(target_range_nb=target_range_nb,dist_range=dist_range,step_dist=step_dist,data_in=data_month,sampling=T,combined=F) |
|
233 |
|
|
201 | 234 |
|
202 | 235 |
############ END OF SCRIPT ######### |
Also available in: Unified diff
adding documentation and debugging subsampling function for NEX fun