Project

General

Profile

« Previous | Next » 

Revision 0709a332

Added by Benoit Parmentier over 11 years ago

data preparation following up modifications related to flags and output from functions

View differences:

climate/research/oregon/interpolation/Database_stations_covariates_processing_function.R
5 5
database_covariates_preparation<-function(list_param_prep){
6 6
  #This function performs queries on the Postgres ghcnd database for stations matching the             
7 7
  #interpolation area. It requires 11 inputs:                                           
8
  # 1) db.name :  Postgres database name containing the meteorological stations
9
  # 2) var: the variable of interest - "TMAX","TMIN" or "PRCP" 
10
  # 3) range_years: range of records used in the daily interpolation, note that upper bound year is not included               
11
  # 4) range_years_clim: range of records used in the monthly climatology interpolation, note that upper bound is not included
12
  # 5) infile1: region outline as a shape file - used in the interpolation  stage too                              
13
  # 6) infile2: ghcnd stations locations as a textfile name with lat-long fields                                                                                   
14
  # 7) infile_covarariates: tif file of raser covariates for the interpolation area: it should have a local projection                                                                                           
15
  # 8) CRS_locs_WGS84: longlat EPSG 4326 used as coordinates reference system (proj4)for stations locations
16
  # 9) in_path: input path for covariates data and other files, this is also the output?
8
  # 1)  db.name :  Postgres database name containing the meteorological stations
9
  # 2)  var: the variable of interest - "TMAX","TMIN" or "PRCP" 
10
  # 3)  range_years: range of records used in the daily interpolation, note that upper bound year is not included               
11
  # 4)  range_years_clim: range of records used in the monthly climatology interpolation, note that upper bound is not included
12
  # 5)  infile1: region outline as a shape file - used in the interpolation  stage too                              
13
  # 6)  infile2: ghcnd stations locations as a textfile name with lat-long fields                                                                                   
14
  # 7)  infile_covarariates: tif file of raser covariates for the interpolation area: it should have a local projection                                                                                           
15
  # 8)  CRS_locs_WGS84: longlat EPSG 4326 used as coordinates reference system (proj4)for stations locations
16
  # 9)  in_path: input path for covariates data and other files, this is also the output?
17 17
  # 10) covar_names: names of covariates used for the interpolation --may be removed later? (should be stored in the brick)
18
  # 11) out_prefix: output suffix added to output names--it is the same in the interpolation script
18
  # 11) qc_flags_stations: flag used to screen out at this stage only two values...
19
  # 12) out_prefix: output suffix added to output names--it is the same in the interpolation script
19 20
  #
20 21
  #The output is a list of four shapefile names produced by the function:
21 22
  #1) loc_stations: locations of stations as shapefile in EPSG 4326
22 23
  #2) loc_stations_ghcn: ghcn daily data for the year range of interpolation (locally projected)
23
  #3) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected)
24
  #4) monthly_query_ghcn_data: ghcn daily data from monthly query before application of quality flag
25
  #5) monthly_covar_ghcn_data: ghcn monthly averaged data with covariates for the year range of interpolation (locally projected)
24
  #3) daily_query_ghcn_data: ghcn daily data from daily query before application of quality flag
25
  #4) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected)
26
  #5) monthly_query_ghcn_data: ghcn daily data from monthly query before application of quality flag
27
  #6) monthly_covar_ghcn_data: ghcn monthly averaged data with covariates for the year range of interpolation (locally projected)
26 28
  
27 29
  #AUTHOR: Benoit Parmentier                                                                       
28
  #DATE: 03/24/2013                                                                                 
30
  #DATE: 03/28/2013                                                                                 
29 31
  #PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--     
30 32
  #Comments and TODO
31 33
  #-Add buffer option...
......
75 77
  infile3<-list_param_prep$infile_covariates #"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script
76 78
  CRS_locs_WGS84<-list_param_prep$CRS_locs_WGS84 #Station coords WGS84: same as earlier
77 79
  in_path <- list_param_prep$in_path #CRS_locs_WGS84"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/"
78
  out_prefix<-list_param_prep$out_prefix #"_365d_GAM_fus5_all_lstd_03012013"                #User defined output prefix
79
  #qc_flags<-list_param_prep$qc_flags    flags allowed for the query from the GHCND??
80 80
  covar_names<-list_param_prep$covar_names # names should be written in the tif file!!!
81
  qc_flags_stations <- list_param_prep$qc_flags_stations #flags allowed for the query from the GHCND??
82
  out_prefix<-list_param_prep$out_prefix #"_365d_GAM_fus5_all_lstd_03012013"                #User defined output prefix
81 83
  
82 84
  ## working directory is the same for input and output for this function  
83 85
  setwd(in_path) 
......
122 124
  time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
123 125
  time_minutes<-time_duration[3]/60
124 126
  dbDisconnect(db)
125
  ###
126
  #Add month query and averages here...
127
  ###
128
  
129
  #data2 contains only 46 stations for Venezueal area??
127

  
130 128
  data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID")
131 129
  
132 130
  #Transform the subset data frame in a spatial data frame and reproject
133 131
  data_reg<-data_table                               #Make a copy of the data frame
134 132
  coords<- data_reg[c('lon','lat')]              #Define coordinates in a data frame: clean up here!!
135
  #Wrong label...it is in fact projected...
136 133
  coordinates(data_reg)<-coords                      #Assign coordinates to the data frame
137
  #proj4string(data3)<-locs_coord                  #Assign coordinates reference system in PROJ4 format
138 134
  proj4string(data_reg)<-CRS_locs_WGS84                #Assign coordinates reference system in PROJ4 format
139 135
  data_reg<-spTransform(data_reg,CRS(CRS_interp))     #Project from WGS84 to new coord. system
140 136
  
141 137
  data_d <-data_reg  #data_d: daily data containing the query without screening
142
  data_reg <-subset(data_d,mflag=="0" | mflag=="S") #should be input arguments!!
143

  
138
  #data_reg <-subset(data_d,mflag=="0" | mflag=="S") #should be input arguments!!
139
  data_reg <-subset(data_d,mflag==qc_flags_stations[1] | mflag==qc_flags_stations[2]) #screening using flags
140
  
144 141
  ##################################################################
145 142
  ### STEP 3: Save results and outuput in textfile and a shape file
146 143
  #browser()
......
149 146
  writeOGR(stat_reg,dsn= dirname(outfile1),layer= sub(".shp","",basename(outfile1)), driver="ESRI Shapefile",overwrite_layer=TRUE)
150 147
  #writeOGR(dst,dsn= ".",layer= sub(".shp","",outfile4), driver="ESRI Shapefile",overwrite_layer=TRUE)
151 148
  
152
  outfile2<-file.path(in_path,paste("ghcn_data_",var,"_",year_start,"_",year_end,out_prefix,".shp",sep=""))         #Name of the file
149
  outfile2<-file.path(in_path,paste("ghcn_data_query_",var,"_",year_start,"_",year_end,out_prefix,".shp",sep=""))         #Name of the file
150
  #writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
151
  writeOGR(data_d,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE)
152
  
153
  outfile3<-file.path(in_path,paste("ghcn_data_",var,"_",year_start,"_",year_end,out_prefix,".shp",sep=""))         #Name of the file
153 154
  #writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
154
  writeOGR(data_reg,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE)
155
  writeOGR(data_reg,dsn= dirname(outfile3),layer= sub(".shp","",basename(outfile3)), driver="ESRI Shapefile",overwrite_layer=TRUE)
155 156
  
156 157
  ###################################################################
157 158
  ### STEP 4: Extract values at stations from covariates stack of raster images
......
187 188
  }
188 189
  
189 190
  #write out a new shapefile (including .prj component)
190
  outfile3<-file.path(in_path,paste("daily_covariates_ghcn_data_",var,"_",range_years[1],"_",range_years[2],out_prefix,".shp",sep=""))         #Name of the file
191
  writeOGR(data_RST_SDF,dsn= dirname(outfile3),layer= sub(".shp","",basename(outfile3)), driver="ESRI Shapefile",overwrite_layer=TRUE)
191
  outfile4<-file.path(in_path,paste("daily_covariates_ghcn_data_",var,"_",range_years[1],"_",range_years[2],out_prefix,".shp",sep=""))         #Name of the file
192
  writeOGR(data_RST_SDF,dsn= dirname(outfile4),layer= sub(".shp","",basename(outfile4)), driver="ESRI Shapefile",overwrite_layer=TRUE)
192 193
  
193 194
  ###############################################################
194 195
  ######## STEP 5: Preparing monthly averages from the ProstGres database
......
197 198
  db <- dbConnect(drv, dbname=db.name)
198 199
  
199 200
  #year_start_clim: set at the start of the script
200
  #year_end<-2011
201 201
  time1<-proc.time()    #Start stop watch
202 202
  list_s<-format_s(stat_reg$STAT_ID)
203 203
  data_m<-dbGetQuery(db, paste("SELECT *
......
220 220
  coordinates(data_m)<-coords                      #Assign coordinates to the data frame
221 221
  proj4string(data_m)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
222 222
  data_m<-spTransform(data_m,CRS(CRS_interp))     #Project from WGS84 to new coord. system
223
  outfile4<-file.path(in_path,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
224
  writeOGR(data_m,dsn= dirname(outfile4),layer= sub(".shp","",basename(outfile4)), driver="ESRI Shapefile",overwrite_layer=TRUE)
223
  outfile5<-file.path(in_path,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
224
  writeOGR(data_m,dsn= dirname(outfile5),layer= sub(".shp","",basename(outfile5)), driver="ESRI Shapefile",overwrite_layer=TRUE)
225 225
  
226 226
  #In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010.
227
  #d<-subset(data_m,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations
228 227

  
229
  d<-subset(data_m,mflag=="0" | mflag=="S") #should be input arguments!!
228
  #d<-subset(data_m,mflag=="0" | mflag=="S") #should be input arguments!!
229
  d<-subset(data_m,mflag==qc_flags_stations[1] | mflag==qc_flags_stations[2])
230 230
  #May need some screeing??? i.e. range of temp and elevation...
231 231
  d1<-aggregate(value~station+month, data=d, mean)  #Calculate monthly mean for every station in OR
232 232
  #d2<-aggregate(value~station+month, data=d, length)  #Calculate monthly mean for every station in OR
......
270 270
  ####
271 271
  #write out a new shapefile (including .prj component)
272 272
  dst$OID<-1:nrow(dst) #need a unique ID?
273
  outfile5<-file.path(in_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
274
  writeOGR(dst,dsn= dirname(outfile5),layer= sub(".shp","",basename(outfile5)), driver="ESRI Shapefile",overwrite_layer=TRUE)
273
  outfile6<-file.path(in_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
274
  writeOGR(dst,dsn= dirname(outfile6),layer= sub(".shp","",basename(outfile6)), driver="ESRI Shapefile",overwrite_layer=TRUE)
275 275
  
276 276
  ### list of outputs return
277 277
  
278
  outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4,outfile5)
279
  names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_covar_ghcn_data","monthly_query_ghcn_data_","monthly_covar_ghcn_data")
278
  outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4,outfile5,outfile6)
279
  names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_query_ghcn_data","daily_covar_ghcn_data","monthly_query_ghcn_data","monthly_covar_ghcn_data")
280 280
  return(outfiles_obj)
281 281
  
282 282
  #END OF FUNCTION # 

Also available in: Unified diff