Revision 6992f12e
Added by Benoit Parmentier almost 9 years ago
climate/research/oregon/interpolation/Database_stations_covariates_processing_function.R | ||
---|---|---|
1 |
################## Data preparation for interpolation #######################################
|
|
2 |
############################ Extraction of station data ##########################################
|
|
1 |
################## Data preparation for interpolation ####################################### |
|
2 |
############################ Extraction of station data########################################## |
|
3 | 3 |
|
4 | 4 |
|
5 | 5 |
database_covariates_preparation<-function(list_param_prep){ |
... | ... | |
20 | 20 |
# 13) out_prefix: output suffix added to output names--it is the same in the interpolation script |
21 | 21 |
# |
22 | 22 |
# |
23 |
# 14) subampling:
|
|
23 |
# 14) |
|
24 | 24 |
# 15).. |
25 |
# |
|
26 |
# |
|
27 |
|
|
28 | 25 |
#The output is a list of four shapefile names produced by the function: |
29 |
|
|
30 |
#1) loc_stations: locations of stations as shapefile in EPSG 4326 |
|
31 |
#2) loc_stations_ghcn: ghcn daily data for the year range of interpolation (locally projected) |
|
32 |
#3) daily_query_ghcn_data: ghcn daily data from daily query before application of quality flag |
|
33 |
#4) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected) |
|
34 |
#5) monthly_query_ghcn_data: ghcn daily data from monthly query before application of quality flag |
|
35 |
#6) monthly_covar_ghcn_data: ghcn monthly averaged data with covariates for the year range of interpolation (locally projected) |
|
26 |
#1) loc_stations: outfile1, locations of stations as shapefile in EPSG 4326 |
|
27 |
#2) loc_stations_ghcn: outfile2, ghcn daily data for the year range of interpolation (locally projected) |
|
28 |
#3) daily_query_ghcn_data: outfile3, ghcn daily data from daily query before application of quality flag |
|
29 |
#4) daily_covar_ghcn_data: outfile4, ghcn daily data with covariates for the year range of interpolation (locally projected) |
|
30 |
#5) monthly_query_ghcn_data: outfile5, ghcn daily data from monthly query before application of quality flag |
|
31 |
#6) monthly_covar_ghcn_data: outfile6, ghcn monthly averaged data with covariates for the year range of interpolation (locally projected) |
|
36 | 32 |
|
37 | 33 |
#AUTHOR: Benoit Parmentier |
38 |
#DATE: 05/21/2013 |
|
34 |
#CREATED ON: 02/22/2013 |
|
35 |
#MODIFIED ON: 12/21/2015 |
|
39 | 36 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- |
40 |
#Comments and TODO |
|
41 |
#-Add buffer option... |
|
37 |
#Comments: |
|
38 |
#Modifications to take into account the existence of clim query, to avoid querying many times the database at the climatology step. |
|
39 |
#TODO |
|
40 |
#-Add buffer option: solved by overlapping tiles |
|
42 | 41 |
#-Add screening for value predicted: var |
42 |
# |
|
43 | 43 |
################################################################################################## |
44 | 44 |
|
45 | 45 |
###Loading R library and packages: should it be read in before??? |
... | ... | |
71 | 71 |
return(sf_ID) |
72 | 72 |
} |
73 | 73 |
|
74 |
create_dir_fun <- function(out_dir,out_suffix){ |
|
75 |
if(!is.null(out_suffix)){ |
|
76 |
out_name <- paste("output_",out_suffix,sep="") |
|
77 |
out_dir <- file.path(out_dir,out_name) |
|
78 |
} |
|
79 |
#create if does not exists: create the output dir as defined |
|
80 |
if(!file.exists(out_dir)){ |
|
81 |
dir.create(out_dir) |
|
82 |
} |
|
83 |
return(out_dir) |
|
84 |
} |
|
85 |
|
|
74 | 86 |
#parsing input arguments |
75 | 87 |
|
76 | 88 |
db.name <- list_param_prep$db.name #name of the Postgres database |
... | ... | |
89 | 101 |
qc_flags_stations <- list_param_prep$qc_flags_stations #flags allowed for the query from the GHCND?? |
90 | 102 |
out_prefix<-list_param_prep$out_prefix #"_365d_GAM_fus5_all_lstd_03012013" #User defined output prefix |
91 | 103 |
|
92 |
## New parameters added for sub sampling in areas with important density of meteo stations |
|
93 |
|
|
94 |
sub_sampling <- list_param$sub_sampling #if TRUE then monthly stations data are resampled
|
|
95 |
sub_sample_rnd <- list_param$sub_sample_rnd #if TRUE use random sampling in addition to spatial sub-sampling
|
|
104 |
## New parameters added for sub samplineg in areas with important density of meteo stations
|
|
105 |
sub_sampling <- list_param_prep$sub_sampling #if TRUE then monthly stations data are resampled |
|
106 |
sub_sample_rnd <- list_param_prep$sub_sample_rnd #if TRUE use random sampling in addition to spatial sub-sampling
|
|
107 |
target_range_nb <- list_param_prep$target_range_nb # number of stations desired as min and max, convergence to min for now
|
|
96 | 108 |
|
97 |
target_range_nb <- list_param$target_range_nb # number of stations desired as min and max, convergence to min for now
|
|
98 |
#needs to be added in master script!!
|
|
99 |
target_range_daily_nb <- list_param$target_range_daily_nb #desired number range of daily stations |
|
100 |
|
|
101 |
dist_range <- list_param$dist_range #distance range for pruning, usually (0,5) in km or 0,0.009*5 for degreee |
|
102 |
step_dist <- list_param$step_dist #stepping distance used in pruning spatially, use 1km or 0.009 for degree data |
|
109 |
#needs to be added in master script!!
|
|
110 |
sub_sampling_day <- list_param_prep$sub_sampling_day
|
|
111 |
target_range_daily_nb <- list_param_prep$target_range_daily_nb #desired number range of daily stations
|
|
112 |
|
|
113 |
dist_range <- list_param_prep$dist_range #distance range for pruning, usually (0,5) in km or 0,0.009*5 for degreee
|
|
114 |
step_dist <- list_param_prep$step_dist #stepping distance used in pruning spatially, use 1km or 0.009 for degree data
|
|
103 | 115 |
|
104 | 116 |
## working directory is the same for input and output for this function |
105 | 117 |
#setwd(in_path) |
106 |
setwd(out_path) |
|
118 |
##### Create additional output dir for clim datasets |
|
119 |
|
|
120 |
out_path_clim <- file.path(out_path,paste("clim_",year_start_clim,"_",year_end_clim,sep="")) |
|
121 |
create_dir_fun(out_dir=out_path_clim,out_suffix=NULL) #create dir if it does not exists |
|
122 |
setwd(out_path) #thisvaries for clim datasets in the code |
|
123 |
|
|
124 |
out_path <- file.path(out_path,year_start) #if predictions for year 1992, then a 1992 dir is created |
|
125 |
create_dir_fun(out_dir=out_path,out_suffix=NULL) #create dir if it does not exists |
|
126 |
setwd(out_path) #thisvaries for clim datasets in the code |
|
107 | 127 |
|
108 | 128 |
##### STEP 1: Select station in the study area |
109 | 129 |
|
... | ... | |
127 | 147 |
stat_reg<-dat_stat[inside,] #Selecting stations contained in the current interpolation area |
128 | 148 |
|
129 | 149 |
#### |
130 |
##TODO: Add buffer option? |
|
150 |
##TODO: Add buffer option? Not needed right now as tiles overlap
|
|
131 | 151 |
#### |
132 | 152 |
|
133 | 153 |
#### STEP 2: Connecting to the database and query for relevant data |
134 |
|
|
154 |
|
|
155 |
##This is the query for daily time step predictions |
|
135 | 156 |
drv <- dbDriver("PostgreSQL") |
136 | 157 |
db <- dbConnect(drv, dbname=db.name) |
137 | 158 |
|
... | ... | |
145 | 166 |
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query |
146 | 167 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
147 | 168 |
time_minutes<-time_duration[3]/60 |
148 |
print(time_minutes) |
|
169 |
|
|
149 | 170 |
dbDisconnect(db) |
150 | 171 |
|
151 | 172 |
data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID") |
... | ... | |
176 | 197 |
outfile1_rds<-sub(".shp",".rds",outfile1) |
177 | 198 |
saveRDS(stat_reg,outfile1) |
178 | 199 |
|
200 |
#Also save in clim data |
|
201 |
outfile1_clim <-file.path(out_path_clim,paste("stations","_",out_prefix,".shp",sep="")) |
|
202 |
writeOGR(stat_reg,dsn= dirname(outfile1),layer= sub(".shp","",basename(outfile1)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
203 |
#writeOGR(dst,dsn= ".",layer= sub(".shp","",outfile4), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
204 |
#Also save as rds |
|
205 |
outfile1_rds<-sub(".shp",".rds",outfile1) |
|
206 |
saveRDS(stat_reg,outfile1) |
|
207 |
|
|
208 |
##This goes in the daily output dir (e.g. for year 1992) |
|
209 |
#/nobackupp6/aguzman4/climateLayers/out/reg4/20_-60/1992/ |
|
210 |
|
|
179 | 211 |
outfile2<-file.path(out_path,paste("ghcn_data_query_",var,"_",year_start,"_",year_end,out_prefix,".shp",sep="")) #Name of the file |
180 | 212 |
#writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
181 | 213 |
writeOGR(data_d,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
... | ... | |
192 | 224 |
### STEP 4: Extract values at stations from covariates stack of raster images |
193 | 225 |
#Eventually this step may be skipped if the covariates information is stored in the database... |
194 | 226 |
#s_raster<-stack(file.path(in_path,infile_covariates)) #read in the data stack |
227 |
|
|
195 | 228 |
s_raster<-brick(infile_covariates) #read in the data stack |
196 | 229 |
names(s_raster)<-covar_names #Assigning names to the raster layers: making sure it is included in the extraction |
197 | 230 |
stat_val<- extract(s_raster, data_reg) #Extracting values from the raster stack for every point location in coords data frame. |
... | ... | |
222 | 255 |
data_RST_SDF$value<-data_RST_SDF$value/10 #TMax is the average max temp for monthy data |
223 | 256 |
} |
224 | 257 |
|
225 |
|
|
226 |
## Adding subsampling for daily stations... |
|
258 |
## Adding subsampling for daily stations... |
|
227 | 259 |
|
228 | 260 |
#This must be set up in master script |
229 | 261 |
#target_max_nb <- 100,000 #this is not actually used yet in the current implementation,can be set to very high value... |
... | ... | |
240 | 272 |
#Daily range set at the begining...line 199! |
241 | 273 |
|
242 | 274 |
#target_range_daily_nb <- list_param$target_range_daily_nb #desired number range of daily stations |
243 |
|
|
275 |
|
|
244 | 276 |
if(sub_sampling_day==TRUE){ |
245 |
|
|
246 |
sub_sampling_obj <- sub_sampling_by_dist_nb_stat(target_range_nb=target_range_day_nb,dist_range=dist_range,step_dist=step_dist,data_in=data_RST_SDF,sampling=T,combined=F)
|
|
277 |
print("daily subsample") |
|
278 |
sub_sampling_obj <- sub_sampling_by_dist_nb_stat(target_range_nb=target_range_nb,dist_range=dist_range,step_dist=step_dist,data_in=data_RST_SDF,sampling=T,combined=F) |
|
247 | 279 |
data_RST_SDF <- sub_sampling_obj$data #get sub-sampled data...for monhtly stations |
248 | 280 |
#save the information for later use (validation at monthly step!!) |
249 | 281 |
save(sub_sampling_obj,file= file.path(out_path,paste("sub_sampling_obj_","daily_",interpolation_method,"_", out_prefix,".RData",sep=""))) |
250 | 282 |
} |
251 |
|
|
283 |
|
|
252 | 284 |
#Make sure this is still a shapefile...!! This might need to be uncommented... |
253 |
|
|
285 |
|
|
254 | 286 |
#coordinates(data_RST_SDF)<-cbind(data_RST_SDF$x,data_RST_SDF$y) #Transforming data_RST_SDF into a spatial point dataframe |
255 | 287 |
#CRS_reg<-proj4string(data_RST_SDF) |
256 | 288 |
#proj4string(data_RST_SDF)<-CRS_reg #Need to assign coordinates... |
... | ... | |
263 | 295 |
|
264 | 296 |
############################################################### |
265 | 297 |
######## STEP 5: Preparing monthly averages from the ProstGres database |
266 |
drv <- dbDriver("PostgreSQL") |
|
267 |
db <- dbConnect(drv, dbname=db.name) |
|
268 | 298 |
|
269 |
#year_start_clim: set at the start of the script |
|
270 |
time1<-proc.time() #Start stop watch |
|
271 |
list_s<-format_s(stat_reg$STAT_ID) |
|
299 |
## Only query for monthly climatology data if data does not exist in the directory |
|
300 |
|
|
301 |
#This file contains the input data for climatology predictions |
|
302 |
outfile6 <- file.path(out_path_clim,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file |
|
303 |
|
|
304 |
#Could also be replaced by run_clim_query parameter. |
|
305 |
if(!(file.exists(outfile6))){ |
|
306 |
|
|
307 |
drv <- dbDriver("PostgreSQL") |
|
308 |
db <- dbConnect(drv, dbname=db.name) |
|
309 |
|
|
310 |
#year_start_clim: set at the start of the script |
|
311 |
time1<-proc.time() #Start stop watch |
|
312 |
list_s<-format_s(stat_reg$STAT_ID) |
|
272 | 313 |
|
273 |
data_m<-dbGetQuery(db, paste("SELECT *
|
|
314 |
data_m <- dbGetQuery(db, paste("SELECT *
|
|
274 | 315 |
FROM ghcn |
275 | 316 |
WHERE element=",shQuote(var), |
276 | 317 |
"AND year>=",year_start_clim, |
277 | 318 |
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query |
278 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
|
279 |
time_minutes<-time_duration[3]/60 |
|
280 |
print(time_minutes) |
|
281 |
dbDisconnect(db) |
|
282 |
|
|
283 |
#Clean out this section!! |
|
284 |
date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column |
|
285 |
date2<-as.POSIXlt(as.Date(date1)) |
|
286 |
data_m$date<-date2 |
|
287 |
#Save the query data here... |
|
288 |
data_m<-merge(data_m, stat_reg, by.x="station", by.y="STAT_ID") #Inner join all columns are retained |
|
289 |
#Extracting covariates from stack for the monthly dataset... |
|
290 |
coords<- data_m[c('longitude','latitude')] #Define coordinates in a data frame |
|
291 |
coordinates(data_m)<-coords #Assign coordinates to the data frame |
|
292 |
proj4string(data_m)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
|
293 |
data_m<-spTransform(data_m,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
294 |
outfile5<-file.path(out_path,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file |
|
295 |
writeOGR(data_m,dsn= dirname(outfile5),layer= sub(".shp","",basename(outfile5)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
296 |
|
|
297 |
outfile5_rds<-sub(".shp",".rds",outfile5) |
|
298 |
saveRDS(data_m,outfile5_rds) |
|
319 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
|
320 |
time_minutes<-time_duration[3]/60 |
|
321 |
print(time_minutes) |
|
322 |
dbDisconnect(db) |
|
323 |
|
|
324 |
#Clean out this section!! |
|
325 |
date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column |
|
326 |
date2<-as.POSIXlt(as.Date(date1)) |
|
327 |
data_m$date<-date2 |
|
328 |
#Save the query data here... |
|
329 |
data_m<-merge(data_m, stat_reg, by.x="station", by.y="STAT_ID") #Inner join all columns are retained |
|
330 |
#Extracting covariates from stack for the monthly dataset... |
|
331 |
coords<- data_m[c('longitude','latitude')] #Define coordinates in a data frame |
|
332 |
coordinates(data_m)<-coords #Assign coordinates to the data frame |
|
333 |
proj4string(data_m)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
|
334 |
data_m<-spTransform(data_m,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
335 |
|
|
336 |
outfile5<-file.path(out_path_clim,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file |
|
337 |
|
|
338 |
#outfile5<-file.path(out_path,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file |
|
339 |
writeOGR(data_m,dsn= dirname(outfile5),layer= sub(".shp","",basename(outfile5)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
340 |
|
|
341 |
outfile5_rds<-sub(".shp",".rds",outfile5) |
|
342 |
saveRDS(data_m,outfile5_rds) |
|
299 | 343 |
|
300 |
#In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010. |
|
344 |
#In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010.
|
|
301 | 345 |
|
302 |
#d<-subset(data_m,mflag==qc_flags_stations[1] | mflag==qc_flags_stations[2]) |
|
303 |
d<-subset(data_m,mflag %in% qc_flags_stations) |
|
304 |
|
|
305 |
#Add screening here ...May need some screeing??? i.e. range of temp and elevation... |
|
306 |
|
|
307 |
d1<-aggregate(value~station+month, data=d, mean) #Calculate monthly mean for every station in OR |
|
308 |
#d2<-aggregate(value~station+month, data=d, length) #Calculate monthly mean for every station in OR |
|
309 |
is_not_na_fun<-function(x) sum(!is.na(x)) #count the number of available observation |
|
310 |
d2<-aggregate(value~station+month, data=d, is_not_na_fun) #Calculate monthly mean for every station in OR |
|
311 |
#names(d2)[names(d2)=="value"] <-"nobs_station" |
|
312 |
d1$nobs_station<-d2$value |
|
313 |
dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID") #Inner join all columns are retained |
|
314 |
|
|
315 |
#This allows to change only one name of the data.frame |
|
316 |
pos<-match("value",names(dst)) #Find column with name "value" |
|
317 |
if (var=="TMAX"){ |
|
318 |
names(dst)[pos]<-c("TMax") #renaming the column "value" extracted from the Postgres database |
|
319 |
dst$TMax<-dst$TMax/10 #TMax is the average max temp for monthy data |
|
320 |
} |
|
321 |
|
|
322 |
if (var=="TMIN"){ |
|
323 |
names(dst)[pos]<-c("TMin") |
|
324 |
dst$TMin<-dst$TMin/10 #TMin is the average min temp for monthy data |
|
325 |
} |
|
326 |
|
|
327 |
#Extracting covariates from stack for the monthly dataset... |
|
328 |
#names(dst)[5:6] <-c('latitude','longitude') |
|
329 |
coords<- dst[c('longitude','latitude')] #Define coordinates in a data frame |
|
346 |
#d<-subset(data_m,mflag==qc_flags_stations[1] | mflag==qc_flags_stations[2])
|
|
347 |
d<-subset(data_m,mflag %in% qc_flags_stations)
|
|
348 |
|
|
349 |
#Add screening here ...May need some screeing??? i.e. range of temp and elevation...
|
|
350 |
|
|
351 |
d1<-aggregate(value~station+month, data=d, mean) #Calculate monthly mean for every station in OR
|
|
352 |
#d2<-aggregate(value~station+month, data=d, length) #Calculate monthly mean for every station in OR
|
|
353 |
is_not_na_fun<-function(x) sum(!is.na(x)) #count the number of available observation
|
|
354 |
d2<-aggregate(value~station+month, data=d, is_not_na_fun) #Calculate monthly mean for every station in OR
|
|
355 |
#names(d2)[names(d2)=="value"] <-"nobs_station"
|
|
356 |
d1$nobs_station<-d2$value
|
|
357 |
dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID") #Inner join all columns are retained
|
|
358 |
|
|
359 |
#This allows to change only one name of the data.frame
|
|
360 |
pos<-match("value",names(dst)) #Find column with name "value"
|
|
361 |
if (var=="TMAX"){
|
|
362 |
names(dst)[pos]<-c("TMax") #renaming the column "value" extracted from the Postgres database
|
|
363 |
dst$TMax<-dst$TMax/10 #TMax is the average max temp for monthy data
|
|
364 |
}
|
|
365 |
|
|
366 |
if (var=="TMIN"){
|
|
367 |
names(dst)[pos]<-c("TMin")
|
|
368 |
dst$TMin<-dst$TMin/10 #TMin is the average min temp for monthy data
|
|
369 |
}
|
|
370 |
|
|
371 |
#Extracting covariates from stack for the monthly dataset...
|
|
372 |
#names(dst)[5:6] <-c('latitude','longitude')
|
|
373 |
coords<- dst[c('longitude','latitude')] #Define coordinates in a data frame
|
|
330 | 374 |
|
331 |
coordinates(dst)<-coords #Assign coordinates to the data frame |
|
332 |
proj4string(dst)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
|
333 |
dst_month<-spTransform(dst,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
334 |
|
|
335 |
stations_val<-extract(s_raster,dst_month,df=TRUE) #extraction of the information at station location in a data frame |
|
336 |
#dst_extract<-spCbind(dst_month,stations_val) #this is in sinusoidal from the raster stack |
|
337 |
dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack |
|
338 |
dst<-dst_extract #problem!!! two column named elev!!! use elev_s?? |
|
339 |
|
|
340 |
#browser() |
|
341 |
coords<- dst[c('x','y')] #Define coordinates in a data frame, this is the local x,y |
|
342 |
index<-!is.na(coords$x) #remove if NA, may need to revisit at a later stage |
|
343 |
dst<-dst[index,] |
|
344 |
coords<- dst[c('x','y')] #Define coordinates in a data frame, this is the local x,y |
|
345 |
coordinates(dst)<-coords #Assign coordinates to the data frame |
|
346 |
proj4string(dst)<-CRS_interp #Assign coordinates reference system in PROJ4 format |
|
347 |
|
|
348 |
##Added 01-07-2015 |
|
349 |
dst$id <- dst$station #the id field is needed for possible subsampling |
|
375 |
coordinates(dst)<-coords #Assign coordinates to the data frame
|
|
376 |
proj4string(dst)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format
|
|
377 |
dst_month<-spTransform(dst,CRS(CRS_interp)) #Project from WGS84 to new coord. system
|
|
378 |
|
|
379 |
stations_val<-extract(s_raster,dst_month,df=TRUE) #extraction of the information at station location in a data frame
|
|
380 |
#dst_extract<-spCbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
|
|
381 |
dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
|
|
382 |
dst<-dst_extract #problem!!! two column named elev!!! use elev_s??
|
|
383 |
|
|
384 |
#browser()
|
|
385 |
coords<- dst[c('x','y')] #Define coordinates in a data frame, this is the local x,y
|
|
386 |
index<-!is.na(coords$x) #remove if NA, may need to revisit at a later stage
|
|
387 |
dst<-dst[index,]
|
|
388 |
coords<- dst[c('x','y')] #Define coordinates in a data frame, this is the local x,y
|
|
389 |
coordinates(dst)<-coords #Assign coordinates to the data frame
|
|
390 |
proj4string(dst)<-CRS_interp #Assign coordinates reference system in PROJ4 format
|
|
391 |
|
|
392 |
##Added 01-07-2015
|
|
393 |
dst$id <- dst$station #the id field is needed for possible subsampling
|
|
350 | 394 |
|
351 |
### ADD SCREENING HERE BEFORE WRITING OUT DATA |
|
352 |
#Covariates ok since screening done in covariate script |
|
353 |
#screening on var i.e. value, TMIN, TMAX... |
|
395 |
### ADD SCREENING HERE BEFORE WRITING OUT DATA
|
|
396 |
#Covariates ok since screening done in covariate script
|
|
397 |
#screening on var i.e. value, TMIN, TMAX...
|
|
354 | 398 |
|
355 |
#### Adding subsampling for regions that have too many stations...This is for monthly stations...
|
|
399 |
#### Adding subsampling for regions that have too many stations...
|
|
356 | 400 |
|
357 |
#This must be set up in master script |
|
358 |
#target_max_nb <- 100,000 #this is not actually used yet in the current implementation,can be set to very high value... |
|
359 |
#target_min_nb <- 8,000 #this is the target number of stations we would like: to be set by Alberto... |
|
360 |
##max_dist <- 1000 # the maximum distance used for pruning ie removes stations that are closer than 1000m, this in degree...? |
|
361 |
#max_dist <- 0.009*5 #5km in degree |
|
362 |
#min_dist <- 0 #minimum distance to start with |
|
363 |
#step_dist <- 0.009 #iteration step to remove the stations |
|
401 |
#This must be set up in master script
|
|
402 |
#target_max_nb <- 100,000 #this is not actually used yet in the current implementation,can be set to very high value...
|
|
403 |
#target_min_nb <- 8,000 #this is the target number of stations we would like: to be set by Alberto...
|
|
404 |
##max_dist <- 1000 # the maximum distance used for pruning ie removes stations that are closer than 1000m, this in degree...?
|
|
405 |
#max_dist <- 0.009*5 #5km in degree
|
|
406 |
#min_dist <- 0 #minimum distance to start with
|
|
407 |
#step_dist <- 0.009 #iteration step to remove the stations
|
|
364 | 408 |
|
365 |
#test5 <- sub_sampling_by_dist_nb_stat(target_range_nb=target_range_nb,dist_range=dist_range,step_dist=step_dist,data_in=data_month,sampling=T,combined=F) |
|
366 |
#note that this is for monthly stations. |
|
367 |
|
|
368 |
if(sub_sampling==TRUE){ #sub_sampling is an option for the monthly station |
|
369 |
sub_sampling_obj <- sub_sampling_by_dist_nb_stat(target_range_nb=target_range_nb,dist_range=dist_range,step_dist=step_dist,data_in=dst,sampling=T,combined=F) |
|
370 |
dst <- sub_sampling_obj$data #get sub-sampled data...for monhtly stations |
|
371 |
#save the information for later use (validation at monthly step!!) |
|
372 |
save(sub_sampling_obj,file= file.path(out_path,paste("sub_sampling_obj_",interpolation_method,"_", out_prefix,".RData",sep=""))) |
|
373 |
} |
|
409 |
#test5 <- sub_sampling_by_dist_nb_stat(target_range_nb=target_range_nb,dist_range=dist_range,step_dist=step_dist,data_in=data_month,sampling=T,combined=F) |
|
410 |
#note that this is for monthly stations. |
|
411 |
|
|
412 |
if(sub_sampling==TRUE){ |
|
413 |
print("monthly subsample") |
|
414 |
sub_sampling_obj <- sub_sampling_by_dist_nb_stat(target_range_nb=target_range_nb,dist_range=dist_range,step_dist=step_dist,data_in=dst,sampling=T,combined=F) |
|
415 |
dst <- sub_sampling_obj$data #get sub-sampled data...for monhtly stations |
|
416 |
#save the information for later use (validation at monthly step!!) |
|
417 |
#save(sub_sampling_obj,file= file.path(out_path,paste("sub_sampling_obj_",interpolation_method,"_", out_prefix,".RData",sep=""))) |
|
418 |
save(sub_sampling_obj,file= file.path(out_path_clim,paste("sub_sampling_obj_",interpolation_method,"_", out_prefix,".RData",sep=""))) |
|
419 |
} |
|
374 | 420 |
|
375 |
#Make sure this is still a shapefile...!! This might need to be uncommented... |
|
376 |
|
|
377 |
#coordinates(dst)<-cbind(dst$x,dst$y) #Transforming data_RST_SDF into a spatial point dataframe |
|
378 |
#CRS_reg<-proj4string(data_reg) |
|
379 |
#proj4string(dst)<-CRS_reg #Need to assign coordinates... |
|
421 |
#Make sure this is still a shapefile...!! This might need to be uncommented... |
|
422 |
#coordinates(dst)<-cbind(dst$x,dst$y) #Transforming data_RST_SDF into a spatial point dataframe |
|
423 |
#CRS_reg<-proj4string(data_reg) |
|
424 |
#proj4string(dst)<-CRS_reg #Need to assign coordinates... |
|
380 | 425 |
|
381 |
#### |
|
382 |
#write out a new shapefile (including .prj component) |
|
383 |
dst$OID<-1:nrow(dst) #need a unique ID? |
|
384 |
outfile6<-file.path(out_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file |
|
385 |
writeOGR(dst,dsn= dirname(outfile6),layer= sub(".shp","",basename(outfile6)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
426 |
#### |
|
427 |
#write out a new shapefile (including .prj component) |
|
428 |
dst$OID<-1:nrow(dst) #need a unique ID? |
|
429 |
#outfile6<-file.path(out_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file |
|
430 |
outfile6<-file.path(out_path_clim,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file |
|
431 |
|
|
432 |
writeOGR(dst,dsn= dirname(outfile6),layer= sub(".shp","",basename(outfile6)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
386 | 433 |
|
387 |
outfile6_rds<-sub(".shp",".rds",outfile6) |
|
388 |
saveRDS(dst,outfile6_rds) |
|
434 |
outfile6_rds<-sub(".shp",".rds",outfile6) |
|
435 |
saveRDS(dst,outfile6_rds) |
|
436 |
|
|
437 |
} |
|
438 |
|
|
439 |
###If clim exists, just create name for outfile5 to object used later... |
|
440 |
#outfile6 <- file.path(out_path_clim,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file |
|
441 |
#Could also be replaced by run_clim_query parameter. |
|
442 |
if(file.exists(outfile6)){ |
|
443 |
outfile5<-file.path(out_path_clim,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file |
|
444 |
} |
|
389 | 445 |
|
390 | 446 |
outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4,outfile5,outfile6) |
391 | 447 |
names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_query_ghcn_data","daily_covar_ghcn_data","monthly_query_ghcn_data","monthly_covar_ghcn_data") |
Also available in: Unified diff
database preparation function stage2 and 3, modifications to avoid multiple queries for climatologies by tile to speed up production