Project

General

Profile

« Previous | Next » 

Revision 6992f12e

Added by Benoit Parmentier almost 9 years ago

database preparation function stage2 and 3, modifications to avoid multiple queries for climatologies by tile to speed up production

View differences:

climate/research/oregon/interpolation/Database_stations_covariates_processing_function.R
1
##################    Data preparation for interpolation   #######################################
2
############################ Extraction of station data ##########################################
1
##################    Data preparation for interpolation  #######################################
2
############################ Extraction of station data##########################################
3 3

  
4 4

  
5 5
database_covariates_preparation<-function(list_param_prep){
......
20 20
  # 13) out_prefix: output suffix added to output names--it is the same in the interpolation script
21 21
  #
22 22
  # 
23
  # 14) subampling:
23
  # 14)
24 24
  # 15)..
25
  #
26
  #
27
  
28 25
  #The output is a list of four shapefile names produced by the function:
29
  
30
  #1) loc_stations: locations of stations as shapefile in EPSG 4326
31
  #2) loc_stations_ghcn: ghcn daily data for the year range of interpolation (locally projected)
32
  #3) daily_query_ghcn_data: ghcn daily data from daily query before application of quality flag
33
  #4) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected)
34
  #5) monthly_query_ghcn_data: ghcn daily data from monthly query before application of quality flag
35
  #6) monthly_covar_ghcn_data: ghcn monthly averaged data with covariates for the year range of interpolation (locally projected)
26
  #1) loc_stations: outfile1, locations of stations as shapefile in EPSG 4326 
27
  #2) loc_stations_ghcn: outfile2, ghcn daily data for the year range of interpolation (locally projected) 
28
  #3) daily_query_ghcn_data: outfile3, ghcn daily data from daily query before application of quality flag
29
  #4) daily_covar_ghcn_data: outfile4, ghcn daily data with covariates for the year range of interpolation (locally projected)
30
  #5) monthly_query_ghcn_data: outfile5, ghcn daily data from monthly query before application of quality flag
31
  #6) monthly_covar_ghcn_data: outfile6, ghcn monthly averaged data with covariates for the year range of interpolation (locally projected)
36 32
  
37 33
  #AUTHOR: Benoit Parmentier                                                                       
38
  #DATE: 05/21/2013                                                                                 
34
  #CREATED ON: 02/22/2013
35
  #MODIFIED ON: 12/21/2015
39 36
  #PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--     
40
  #Comments and TODO
41
  #-Add buffer option...
37
  #Comments:
38
  #Modifications to take into account the existence of clim query, to avoid querying many times the database at the climatology step.
39
  #TODO
40
  #-Add buffer option: solved by overlapping tiles
42 41
  #-Add screening for value predicted: var
42
  # 
43 43
  ##################################################################################################
44 44
  
45 45
  ###Loading R library and packages: should it be read in before???   
......
71 71
    return(sf_ID)
72 72
  }
73 73
  
74
  create_dir_fun <- function(out_dir,out_suffix){
75
    if(!is.null(out_suffix)){
76
      out_name <- paste("output_",out_suffix,sep="")
77
      out_dir <- file.path(out_dir,out_name)
78
    }
79
    #create if does not exists: create the output dir as defined 
80
    if(!file.exists(out_dir)){
81
      dir.create(out_dir)
82
    }
83
    return(out_dir)
84
  }
85
  
74 86
  #parsing input arguments
75 87
  
76 88
  db.name <- list_param_prep$db.name             #name of the Postgres database
......
89 101
  qc_flags_stations <- list_param_prep$qc_flags_stations #flags allowed for the query from the GHCND??
90 102
  out_prefix<-list_param_prep$out_prefix #"_365d_GAM_fus5_all_lstd_03012013"                #User defined output prefix
91 103
  
92
  ## New parameters added for  sub sampling in areas with important density of meteo stations
93
  
94
  sub_sampling <- list_param$sub_sampling  #if TRUE then monthly stations data are resampled
95
  sub_sample_rnd <- list_param$sub_sample_rnd #if  TRUE use random sampling  in addition to spatial  sub-sampling
104
  ## New parameters added for  sub samplineg in areas with important density of meteo stations
105
  sub_sampling <- list_param_prep$sub_sampling  #if TRUE then monthly stations data are resampled
106
  sub_sample_rnd <- list_param_prep$sub_sample_rnd #if  TRUE use random sampling  in addition to spatial  sub-sampling
107
  target_range_nb <- list_param_prep$target_range_nb # number of stations desired as min and max, convergence to  min  for  now
96 108
  
97
  target_range_nb <- list_param$target_range_nb # number of stations desired as min and max, convergence to  min  for  now
98
  #needs to be added in master script!!
99
  target_range_daily_nb <- list_param$target_range_daily_nb #desired number range of daily stations
100
    
101
  dist_range <- list_param$dist_range #distance range  for pruning,  usually (0,5) in km or 0,0.009*5 for  degreee
102
  step_dist <- list_param$step_dist #stepping distance used in pruning  spatially, use 1km or 0.009 for degree data
109
  #needs to be added in master script!!	
110
  sub_sampling_day <- list_param_prep$sub_sampling_day
111
  target_range_daily_nb <- list_param_prep$target_range_daily_nb #desired number range of daily stations
112

  
113
  dist_range <- list_param_prep$dist_range #distance range  for pruning,  usually (0,5) in km or 0,0.009*5 for  degreee
114
  step_dist <- list_param_prep$step_dist #stepping distance used in pruning  spatially, use 1km or 0.009 for degree data
103 115

  
104 116
  ## working directory is the same for input and output for this function  
105 117
  #setwd(in_path) 
106
  setwd(out_path)
118
  ##### Create additional output dir for clim datasets
119
  
120
  out_path_clim <- file.path(out_path,paste("clim_",year_start_clim,"_",year_end_clim,sep=""))
121
  create_dir_fun(out_dir=out_path_clim,out_suffix=NULL) #create dir if it does not exists
122
  setwd(out_path) #thisvaries for clim datasets in the code
123
  
124
  out_path <- file.path(out_path,year_start) #if predictions for year 1992, then a 1992 dir is created
125
  create_dir_fun(out_dir=out_path,out_suffix=NULL) #create dir if it does not exists
126
  setwd(out_path) #thisvaries for clim datasets in the code
107 127
  
108 128
  ##### STEP 1: Select station in the study area
109 129
    
......
127 147
  stat_reg<-dat_stat[inside,]              #Selecting stations contained in the current interpolation area
128 148
  
129 149
  ####
130
  ##TODO: Add buffer option? 
150
  ##TODO: Add buffer option? Not needed right now as tiles overlap
131 151
  ####
132 152
  
133 153
  #### STEP 2: Connecting to the database and query for relevant data 
134

  
154
  
155
  ##This is the query for daily time step predictions
135 156
  drv <- dbDriver("PostgreSQL")
136 157
  db <- dbConnect(drv, dbname=db.name)
137 158

  
......
145 166
                              "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
146 167
  time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
147 168
  time_minutes<-time_duration[3]/60
148
  print(time_minutes)
169
  
149 170
  dbDisconnect(db)
150 171
 
151 172
  data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID")
......
176 197
  outfile1_rds<-sub(".shp",".rds",outfile1)
177 198
  saveRDS(stat_reg,outfile1)
178 199

  
200
  #Also save in clim data
201
  outfile1_clim <-file.path(out_path_clim,paste("stations","_",out_prefix,".shp",sep=""))
202
  writeOGR(stat_reg,dsn= dirname(outfile1),layer= sub(".shp","",basename(outfile1)), driver="ESRI Shapefile",overwrite_layer=TRUE)
203
  #writeOGR(dst,dsn= ".",layer= sub(".shp","",outfile4), driver="ESRI Shapefile",overwrite_layer=TRUE)
204
  #Also save as rds
205
  outfile1_rds<-sub(".shp",".rds",outfile1)
206
  saveRDS(stat_reg,outfile1)
207
  
208
  ##This goes in the daily output dir (e.g. for year 1992)
209
  #/nobackupp6/aguzman4/climateLayers/out/reg4/20_-60/1992/
210
  
179 211
  outfile2<-file.path(out_path,paste("ghcn_data_query_",var,"_",year_start,"_",year_end,out_prefix,".shp",sep=""))         #Name of the file
180 212
  #writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
181 213
  writeOGR(data_d,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE)
......
192 224
  ### STEP 4: Extract values at stations from covariates stack of raster images
193 225
  #Eventually this step may be skipped if the covariates information is stored in the database...
194 226
  #s_raster<-stack(file.path(in_path,infile_covariates))                   #read in the data stack
227
  
195 228
  s_raster<-brick(infile_covariates)                   #read in the data stack
196 229
  names(s_raster)<-covar_names               #Assigning names to the raster layers: making sure it is included in the extraction
197 230
  stat_val<- extract(s_raster, data_reg)        #Extracting values from the raster stack for every point location in coords data frame.
......
222 255
    data_RST_SDF$value<-data_RST_SDF$value/10                #TMax is the average max temp for monthy data
223 256
  }
224 257
  
225

  
226
  ## Adding subsampling for daily stations...
258
   ## Adding subsampling for daily stations...
227 259
  
228 260
    #This must be set up in master script
229 261
  #target_max_nb <- 100,000 #this is not actually used yet in the current implementation,can be set to very high value...
......
240 272
  #Daily range set at the begining...line 199!
241 273
  
242 274
  #target_range_daily_nb <- list_param$target_range_daily_nb #desired number range of daily stations
243

  
275
  
244 276
  if(sub_sampling_day==TRUE){
245
    
246
    sub_sampling_obj <- sub_sampling_by_dist_nb_stat(target_range_nb=target_range_day_nb,dist_range=dist_range,step_dist=step_dist,data_in=data_RST_SDF,sampling=T,combined=F)
277
    print("daily subsample")
278
    sub_sampling_obj <- sub_sampling_by_dist_nb_stat(target_range_nb=target_range_nb,dist_range=dist_range,step_dist=step_dist,data_in=data_RST_SDF,sampling=T,combined=F)
247 279
    data_RST_SDF <- sub_sampling_obj$data #get sub-sampled data...for monhtly stations
248 280
    #save the information for later use (validation at monthly step!!)
249 281
    save(sub_sampling_obj,file= file.path(out_path,paste("sub_sampling_obj_","daily_",interpolation_method,"_", out_prefix,".RData",sep="")))
250 282
  }
251
  
283

  
252 284
  #Make sure this is still a shapefile...!! This might need to be uncommented...
253
  
285

  
254 286
  #coordinates(data_RST_SDF)<-cbind(data_RST_SDF$x,data_RST_SDF$y) #Transforming data_RST_SDF into a spatial point dataframe
255 287
  #CRS_reg<-proj4string(data_RST_SDF)
256 288
  #proj4string(data_RST_SDF)<-CRS_reg  #Need to assign coordinates...
......
263 295

  
264 296
  ###############################################################
265 297
  ######## STEP 5: Preparing monthly averages from the ProstGres database
266
  drv <- dbDriver("PostgreSQL")
267
  db <- dbConnect(drv, dbname=db.name)
268 298
  
269
  #year_start_clim: set at the start of the script
270
  time1<-proc.time()    #Start stop watch
271
  list_s<-format_s(stat_reg$STAT_ID)
299
  ## Only query for monthly climatology data if data does not exist in the directory
300
  
301
  #This file contains the input data for climatology predictions
302
  outfile6 <- file.path(out_path_clim,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
303

  
304
  #Could also be replaced by run_clim_query parameter.
305
  if(!(file.exists(outfile6))){
306
    
307
    drv <- dbDriver("PostgreSQL")
308
    db <- dbConnect(drv, dbname=db.name)
309
  
310
    #year_start_clim: set at the start of the script
311
    time1<-proc.time()    #Start stop watch
312
    list_s<-format_s(stat_reg$STAT_ID)
272 313
 
273
  data_m<-dbGetQuery(db, paste("SELECT *
314
    data_m <- dbGetQuery(db, paste("SELECT *
274 315
                               FROM ghcn
275 316
                               WHERE element=",shQuote(var),
276 317
                               "AND year>=",year_start_clim,
277 318
                               "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
278
  time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
279
  time_minutes<-time_duration[3]/60
280
  print(time_minutes)
281
  dbDisconnect(db)
282
  
283
  #Clean out this section!!
284
  date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column
285
  date2<-as.POSIXlt(as.Date(date1))
286
  data_m$date<-date2
287
  #Save the query data here...
288
  data_m<-merge(data_m, stat_reg, by.x="station", by.y="STAT_ID")   #Inner join all columns are retained
289
  #Extracting covariates from stack for the monthly dataset...
290
  coords<- data_m[c('longitude','latitude')]              #Define coordinates in a data frame
291
  coordinates(data_m)<-coords                      #Assign coordinates to the data frame
292
  proj4string(data_m)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
293
  data_m<-spTransform(data_m,CRS(CRS_interp))     #Project from WGS84 to new coord. system
294
  outfile5<-file.path(out_path,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
295
  writeOGR(data_m,dsn= dirname(outfile5),layer= sub(".shp","",basename(outfile5)), driver="ESRI Shapefile",overwrite_layer=TRUE)
296
  
297
  outfile5_rds<-sub(".shp",".rds",outfile5)
298
  saveRDS(data_m,outfile5_rds) 
319
    time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
320
    time_minutes<-time_duration[3]/60
321
    print(time_minutes)
322
    dbDisconnect(db)
323
  
324
    #Clean out this section!!
325
    date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column
326
    date2<-as.POSIXlt(as.Date(date1))
327
    data_m$date<-date2
328
    #Save the query data here...
329
    data_m<-merge(data_m, stat_reg, by.x="station", by.y="STAT_ID")   #Inner join all columns are retained
330
    #Extracting covariates from stack for the monthly dataset...
331
    coords<- data_m[c('longitude','latitude')]              #Define coordinates in a data frame
332
    coordinates(data_m)<-coords                      #Assign coordinates to the data frame
333
    proj4string(data_m)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
334
    data_m<-spTransform(data_m,CRS(CRS_interp))     #Project from WGS84 to new coord. system
335
  
336
    outfile5<-file.path(out_path_clim,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
337
  
338
    #outfile5<-file.path(out_path,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
339
    writeOGR(data_m,dsn= dirname(outfile5),layer= sub(".shp","",basename(outfile5)), driver="ESRI Shapefile",overwrite_layer=TRUE)
340
  
341
    outfile5_rds<-sub(".shp",".rds",outfile5)
342
    saveRDS(data_m,outfile5_rds) 
299 343

  
300
  #In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010.
344
    #In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010.
301 345

  
302
  #d<-subset(data_m,mflag==qc_flags_stations[1] | mflag==qc_flags_stations[2])
303
  d<-subset(data_m,mflag %in% qc_flags_stations) 
304
  
305
  #Add screening here ...May need some screeing??? i.e. range of temp and elevation...
306
  
307
  d1<-aggregate(value~station+month, data=d, mean)  #Calculate monthly mean for every station in OR
308
  #d2<-aggregate(value~station+month, data=d, length)  #Calculate monthly mean for every station in OR
309
  is_not_na_fun<-function(x) sum(!is.na(x)) #count the number of available observation
310
  d2<-aggregate(value~station+month, data=d, is_not_na_fun)  #Calculate monthly mean for every station in OR
311
  #names(d2)[names(d2)=="value"] <-"nobs_station"
312
  d1$nobs_station<-d2$value
313
  dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID")   #Inner join all columns are retained
314
  
315
  #This allows to change only one name of the data.frame
316
  pos<-match("value",names(dst)) #Find column with name "value"
317
  if (var=="TMAX"){
318
    names(dst)[pos]<-c("TMax")           #renaming the column "value" extracted from the Postgres database
319
    dst$TMax<-dst$TMax/10                #TMax is the average max temp for monthy data
320
  }
321
  
322
  if (var=="TMIN"){
323
    names(dst)[pos]<-c("TMin")
324
    dst$TMin<-dst$TMin/10                #TMin is the average min temp for monthy data
325
  }
326
  
327
  #Extracting covariates from stack for the monthly dataset...
328
  #names(dst)[5:6] <-c('latitude','longitude')
329
  coords<- dst[c('longitude','latitude')]              #Define coordinates in a data frame
346
    #d<-subset(data_m,mflag==qc_flags_stations[1] | mflag==qc_flags_stations[2])
347
    d<-subset(data_m,mflag %in% qc_flags_stations) 
348
  
349
    #Add screening here ...May need some screeing??? i.e. range of temp and elevation...
350
  
351
    d1<-aggregate(value~station+month, data=d, mean)  #Calculate monthly mean for every station in OR
352
    #d2<-aggregate(value~station+month, data=d, length)  #Calculate monthly mean for every station in OR
353
    is_not_na_fun<-function(x) sum(!is.na(x)) #count the number of available observation
354
    d2<-aggregate(value~station+month, data=d, is_not_na_fun)  #Calculate monthly mean for every station in OR
355
    #names(d2)[names(d2)=="value"] <-"nobs_station"
356
    d1$nobs_station<-d2$value
357
    dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID")   #Inner join all columns are retained
358
  
359
    #This allows to change only one name of the data.frame
360
    pos<-match("value",names(dst)) #Find column with name "value"
361
    if (var=="TMAX"){
362
      names(dst)[pos]<-c("TMax")           #renaming the column "value" extracted from the Postgres database
363
      dst$TMax<-dst$TMax/10                #TMax is the average max temp for monthy data
364
    }
365
  
366
    if (var=="TMIN"){
367
      names(dst)[pos]<-c("TMin")
368
      dst$TMin<-dst$TMin/10                #TMin is the average min temp for monthy data
369
    }
370
  
371
    #Extracting covariates from stack for the monthly dataset...
372
    #names(dst)[5:6] <-c('latitude','longitude')
373
    coords<- dst[c('longitude','latitude')]              #Define coordinates in a data frame
330 374

  
331
  coordinates(dst)<-coords                      #Assign coordinates to the data frame
332
  proj4string(dst)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
333
  dst_month<-spTransform(dst,CRS(CRS_interp))     #Project from WGS84 to new coord. system
334
  
335
  stations_val<-extract(s_raster,dst_month,df=TRUE)  #extraction of the information at station location in a data frame
336
  #dst_extract<-spCbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
337
  dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
338
  dst<-dst_extract #problem!!! two column named elev!!! use elev_s??
339
  
340
  #browser()
341
  coords<- dst[c('x','y')]              #Define coordinates in a data frame, this is the local x,y
342
  index<-!is.na(coords$x)               #remove if NA, may need to revisit at a later stage
343
  dst<-dst[index,]
344
  coords<- dst[c('x','y')]              #Define coordinates in a data frame, this is the local x,y
345
  coordinates(dst)<-coords                    #Assign coordinates to the data frame
346
  proj4string(dst)<-CRS_interp        #Assign coordinates reference system in PROJ4 format
347
  
348
  ##Added 01-07-2015
349
  dst$id <- dst$station #the id field is needed for possible subsampling
375
    coordinates(dst)<-coords                      #Assign coordinates to the data frame
376
    proj4string(dst)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
377
    dst_month<-spTransform(dst,CRS(CRS_interp))     #Project from WGS84 to new coord. system
378
  
379
    stations_val<-extract(s_raster,dst_month,df=TRUE)  #extraction of the information at station location in a data frame
380
    #dst_extract<-spCbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
381
    dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
382
    dst<-dst_extract #problem!!! two column named elev!!! use elev_s??
383
  
384
    #browser()
385
    coords<- dst[c('x','y')]              #Define coordinates in a data frame, this is the local x,y
386
    index<-!is.na(coords$x)               #remove if NA, may need to revisit at a later stage
387
    dst<-dst[index,]
388
    coords<- dst[c('x','y')]              #Define coordinates in a data frame, this is the local x,y
389
    coordinates(dst)<-coords                    #Assign coordinates to the data frame
390
    proj4string(dst)<-CRS_interp        #Assign coordinates reference system in PROJ4 format
391

  
392
    ##Added 01-07-2015
393
    dst$id <- dst$station #the id field is needed for possible subsampling
350 394
  
351
  ### ADD SCREENING HERE BEFORE WRITING OUT DATA
352
  #Covariates ok since screening done in covariate script
353
  #screening on var i.e. value, TMIN, TMAX...
395
    ### ADD SCREENING HERE BEFORE WRITING OUT DATA
396
    #Covariates ok since screening done in covariate script
397
    #screening on var i.e. value, TMIN, TMAX...
354 398
  
355
  #### Adding  subsampling for regions  that  have  too  many stations...This is for monthly stations...
399
    #### Adding  subsampling for regions  that  have  too  many stations...
356 400
  
357
  #This must be set up in master script
358
  #target_max_nb <- 100,000 #this is not actually used yet in the current implementation,can be set to very high value...
359
  #target_min_nb <- 8,000 #this is the target number of stations we would like: to be set by Alberto...
360
  ##max_dist <- 1000 # the maximum distance used for pruning ie removes stations that are closer than 1000m, this in degree...? 
361
  #max_dist <- 0.009*5 #5km in degree
362
  #min_dist <- 0    #minimum distance to start with
363
  #step_dist <- 0.009 #iteration step to remove the stations
401
    #This must be set up in master script
402
    #target_max_nb <- 100,000 #this is not actually used yet in the current implementation,can be set to very high value...
403
    #target_min_nb <- 8,000 #this is the target number of stations we would like: to be set by Alberto...
404
    ##max_dist <- 1000 # the maximum distance used for pruning ie removes stations that are closer than 1000m, this in degree...? 
405
    #max_dist <- 0.009*5 #5km in degree
406
    #min_dist <- 0    #minimum distance to start with
407
    #step_dist <- 0.009 #iteration step to remove the stations
364 408

  
365
  #test5 <- sub_sampling_by_dist_nb_stat(target_range_nb=target_range_nb,dist_range=dist_range,step_dist=step_dist,data_in=data_month,sampling=T,combined=F)
366
  #note that  this is for monthly stations.
367
  
368
  if(sub_sampling==TRUE){ #sub_sampling is an option for the monthly station
369
    sub_sampling_obj <- sub_sampling_by_dist_nb_stat(target_range_nb=target_range_nb,dist_range=dist_range,step_dist=step_dist,data_in=dst,sampling=T,combined=F)
370
    dst <- sub_sampling_obj$data #get sub-sampled data...for monhtly stations
371
    #save the information for later use (validation at monthly step!!)
372
    save(sub_sampling_obj,file= file.path(out_path,paste("sub_sampling_obj_",interpolation_method,"_", out_prefix,".RData",sep="")))
373
  }
409
    #test5 <- sub_sampling_by_dist_nb_stat(target_range_nb=target_range_nb,dist_range=dist_range,step_dist=step_dist,data_in=data_month,sampling=T,combined=F)
410
    #note that  this is for monthly stations.
411

  
412
    if(sub_sampling==TRUE){
413
      print("monthly subsample")
414
      sub_sampling_obj <- sub_sampling_by_dist_nb_stat(target_range_nb=target_range_nb,dist_range=dist_range,step_dist=step_dist,data_in=dst,sampling=T,combined=F)
415
      dst <- sub_sampling_obj$data #get sub-sampled data...for monhtly stations
416
      #save the information for later use (validation at monthly step!!)
417
      #save(sub_sampling_obj,file= file.path(out_path,paste("sub_sampling_obj_",interpolation_method,"_", out_prefix,".RData",sep="")))
418
      save(sub_sampling_obj,file= file.path(out_path_clim,paste("sub_sampling_obj_",interpolation_method,"_", out_prefix,".RData",sep="")))
419
    }
374 420
 
375
  #Make sure this is still a shapefile...!! This might need to be uncommented...
376
  
377
  #coordinates(dst)<-cbind(dst$x,dst$y) #Transforming data_RST_SDF into a spatial point dataframe
378
  #CRS_reg<-proj4string(data_reg)
379
  #proj4string(dst)<-CRS_reg  #Need to assign coordinates...
421
    #Make sure this is still a shapefile...!! This might need to be uncommented...
422
    #coordinates(dst)<-cbind(dst$x,dst$y) #Transforming data_RST_SDF into a spatial point dataframe
423
    #CRS_reg<-proj4string(data_reg)
424
    #proj4string(dst)<-CRS_reg  #Need to assign coordinates...
380 425

  
381
  ####
382
  #write out a new shapefile (including .prj component)
383
  dst$OID<-1:nrow(dst) #need a unique ID?
384
  outfile6<-file.path(out_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
385
  writeOGR(dst,dsn= dirname(outfile6),layer= sub(".shp","",basename(outfile6)), driver="ESRI Shapefile",overwrite_layer=TRUE)
426
    ####
427
    #write out a new shapefile (including .prj component)
428
    dst$OID<-1:nrow(dst) #need a unique ID?
429
    #outfile6<-file.path(out_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
430
    outfile6<-file.path(out_path_clim,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
431

  
432
    writeOGR(dst,dsn= dirname(outfile6),layer= sub(".shp","",basename(outfile6)), driver="ESRI Shapefile",overwrite_layer=TRUE)
386 433
  
387
  outfile6_rds<-sub(".shp",".rds",outfile6)
388
  saveRDS(dst,outfile6_rds)
434
    outfile6_rds<-sub(".shp",".rds",outfile6)
435
    saveRDS(dst,outfile6_rds)
436

  
437
  }
438
    
439
  ###If clim exists, just create name for outfile5 to object used later...
440
  #outfile6 <- file.path(out_path_clim,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
441
  #Could also be replaced by run_clim_query parameter.
442
  if(file.exists(outfile6)){
443
    outfile5<-file.path(out_path_clim,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
444
  }
389 445

  
390 446
  outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4,outfile5,outfile6)
391 447
  names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_query_ghcn_data","daily_covar_ghcn_data","monthly_query_ghcn_data","monthly_covar_ghcn_data")

Also available in: Unified diff