Revision 8af8ed42
Added by Benoit Parmentier about 10 years ago
climate/research/oregon/interpolation/subsampling_data.R | ||
---|---|---|
65 | 65 |
|
66 | 66 |
target_max_nb <- 200 |
67 | 67 |
target_min_nb <- 100 |
68 |
max_dist <- 10000
|
|
68 |
max_dist <- 10 |
|
69 | 69 |
min_dist <- 0 |
70 | 70 |
step_dist <- 1000 |
71 | 71 |
target_range_nb <- c(target_min_nb,target_max_nb) |
72 | 72 |
#debug(sub_sampling_by_dist) |
73 | 73 |
#First increase distance till 5km |
74 | 74 |
#then use random sampling...to get the extact target? |
75 |
test <- sub_sampling_by_dist(target_range_nb,dist=min_dist,step=step_dist,data_in=data_month) |
|
75 |
test <- sub_sampling_by_dist(target_range_nb,dist=min_dist,max_dist=max_dist,step=step_dist,data_in=data_month) |
|
76 |
test <- sub_sampling_by_dist(target_range_nb,dist=min_dist,max_dist=NULL,step=step_dist,data_in=data_month) |
|
76 | 77 |
|
77 | 78 |
dist_range <- c(0,5000) |
78 | 79 |
|
80 |
sub_sampling_by_dist_nb_stat(target_range_nb=c(10000,10000),dist_range,data_in,sampling=T){ |
|
79 | 81 |
|
80 |
sub_sampling_stat <- function(target_range_nb=,sampling=T) |
|
81 |
|
|
82 |
sub_sampling_by_dist <- function(target_range_nb=c(10000,10000),dist=0.0,step,data_in){ |
|
82 |
|
|
83 |
sub_sampling_by_dist <- function(target_range_nb=c(10000,10000),dist=0.0,max_dist=NULL,step,data_in){ |
|
83 | 84 |
data <- data_in |
84 | 85 |
target_min_nb <- target_range_nb[1] |
85 | 86 |
station_nb <- nrow(data_in) |
86 |
while(station_nb > target_min_nb){ #} #|| nrow > 0){ |
|
87 |
if(is.null(max_dist)){ |
|
88 |
while(station_nb > target_min_nb){ |
|
89 |
data <- remove.duplicates(data, zero = dist) #spatially sub sample... |
|
90 |
dist <- dist + step |
|
91 |
station_nb <- nrow(data) |
|
92 |
} |
|
93 |
} |
|
94 |
if(!is.null(max_dist)){ |
|
95 |
while(station_nb > target_min_nb || dist < max_dist){ |
|
96 |
d#} #|| nrow > 0){ |
|
87 | 97 |
#test <- zerodist(data, zero = 0.0, unique.ID = FALSE) |
88 | 98 |
#test <- remove.duplicates(data_month, zero = 5000) |
89 | 99 |
data <- remove.duplicates(data, zero = dist) #spatially sub sample... |
90 | 100 |
dist <- dist + step |
91 | 101 |
station_nb <- nrow(data) |
102 |
} |
|
92 | 103 |
} |
104 |
|
|
93 | 105 |
obj_sub_sampling <- list(data,dist) |
94 | 106 |
names(obj_sub_sampling) <- c("data","dist") |
95 | 107 |
return(obj_sub_sampling) |
96 | 108 |
} |
97 | 109 |
|
110 |
sub_sampling_by_dist_nb_stat <- function(target_range_nb=c(10000,10000),dist_range,data_in,sampling=T){ |
|
111 |
|
|
112 |
data <- data_in |
|
113 |
min_dist <- dist_range[1] |
|
114 |
max_dist <- dist_range[2] |
|
115 |
|
|
116 |
if(sampling==T){ |
|
117 |
dat <- sub_sampling_by_dist(target_range_nb,dist=min_dist,max_dist=max_dist,step=step_dist,data_in=data_month) |
|
118 |
data_out <-sample(dat$data, target_range_nb[2], replace = FALSE, prob = NULL) |
|
119 |
data_out <- list(data_out,dat$dist,dat$data) |
|
120 |
data_out <- c("data","dist","data_dist") |
|
121 |
} |
|
122 |
if(sampling!=T){ |
|
123 |
data_out <- sub_sampling_by_dist(target_range_nb,dist=min_dist,max_dist=NULL,step=step_dist,data_in=data_month) |
|
124 |
} |
|
125 |
return(data_out) |
|
126 |
} |
|
127 |
|
|
128 |
#sub_sampling_by_dist <- function(target_range_nb=c(10000,10000),dist=0.0,step,data_in){ |
|
129 |
# data <- data_in |
|
130 |
# target_min_nb <- target_range_nb[1] |
|
131 |
# station_nb <- nrow(data_in) |
|
132 |
# while(station_nb > target_min_nb){ #} #|| nrow > 0){ |
|
133 |
# #test <- zerodist(data, zero = 0.0, unique.ID = FALSE) |
|
134 |
# #test <- remove.duplicates(data_month, zero = 5000) |
|
135 |
# data <- remove.duplicates(data, zero = dist) #spatially sub sample... |
|
136 |
# dist <- dist + step |
|
137 |
# station_nb <- nrow(data) |
|
138 |
# } |
|
139 |
# obj_sub_sampling <- list(data,dist) |
|
140 |
# names(obj_sub_sampling) <- c("data","dist") |
|
141 |
# return(obj_sub_sampling) |
|
142 |
#} |
|
143 |
|
|
144 |
############ END OF SCRIPT ######### |
Also available in: Unified diff
spatial subsampling function and random sampling