Revision 0709a332
Added by Benoit Parmentier over 11 years ago
climate/research/oregon/interpolation/Database_stations_covariates_processing_function.R | ||
---|---|---|
5 | 5 |
database_covariates_preparation<-function(list_param_prep){ |
6 | 6 |
#This function performs queries on the Postgres ghcnd database for stations matching the |
7 | 7 |
#interpolation area. It requires 11 inputs: |
8 |
# 1) db.name : Postgres database name containing the meteorological stations |
|
9 |
# 2) var: the variable of interest - "TMAX","TMIN" or "PRCP" |
|
10 |
# 3) range_years: range of records used in the daily interpolation, note that upper bound year is not included |
|
11 |
# 4) range_years_clim: range of records used in the monthly climatology interpolation, note that upper bound is not included |
|
12 |
# 5) infile1: region outline as a shape file - used in the interpolation stage too |
|
13 |
# 6) infile2: ghcnd stations locations as a textfile name with lat-long fields |
|
14 |
# 7) infile_covarariates: tif file of raser covariates for the interpolation area: it should have a local projection |
|
15 |
# 8) CRS_locs_WGS84: longlat EPSG 4326 used as coordinates reference system (proj4)for stations locations |
|
16 |
# 9) in_path: input path for covariates data and other files, this is also the output? |
|
8 |
# 1) db.name : Postgres database name containing the meteorological stations
|
|
9 |
# 2) var: the variable of interest - "TMAX","TMIN" or "PRCP"
|
|
10 |
# 3) range_years: range of records used in the daily interpolation, note that upper bound year is not included
|
|
11 |
# 4) range_years_clim: range of records used in the monthly climatology interpolation, note that upper bound is not included
|
|
12 |
# 5) infile1: region outline as a shape file - used in the interpolation stage too
|
|
13 |
# 6) infile2: ghcnd stations locations as a textfile name with lat-long fields
|
|
14 |
# 7) infile_covarariates: tif file of raser covariates for the interpolation area: it should have a local projection
|
|
15 |
# 8) CRS_locs_WGS84: longlat EPSG 4326 used as coordinates reference system (proj4)for stations locations
|
|
16 |
# 9) in_path: input path for covariates data and other files, this is also the output?
|
|
17 | 17 |
# 10) covar_names: names of covariates used for the interpolation --may be removed later? (should be stored in the brick) |
18 |
# 11) out_prefix: output suffix added to output names--it is the same in the interpolation script |
|
18 |
# 11) qc_flags_stations: flag used to screen out at this stage only two values... |
|
19 |
# 12) out_prefix: output suffix added to output names--it is the same in the interpolation script |
|
19 | 20 |
# |
20 | 21 |
#The output is a list of four shapefile names produced by the function: |
21 | 22 |
#1) loc_stations: locations of stations as shapefile in EPSG 4326 |
22 | 23 |
#2) loc_stations_ghcn: ghcn daily data for the year range of interpolation (locally projected) |
23 |
#3) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected) |
|
24 |
#4) monthly_query_ghcn_data: ghcn daily data from monthly query before application of quality flag |
|
25 |
#5) monthly_covar_ghcn_data: ghcn monthly averaged data with covariates for the year range of interpolation (locally projected) |
|
24 |
#3) daily_query_ghcn_data: ghcn daily data from daily query before application of quality flag |
|
25 |
#4) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected) |
|
26 |
#5) monthly_query_ghcn_data: ghcn daily data from monthly query before application of quality flag |
|
27 |
#6) monthly_covar_ghcn_data: ghcn monthly averaged data with covariates for the year range of interpolation (locally projected) |
|
26 | 28 |
|
27 | 29 |
#AUTHOR: Benoit Parmentier |
28 |
#DATE: 03/24/2013
|
|
30 |
#DATE: 03/28/2013
|
|
29 | 31 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- |
30 | 32 |
#Comments and TODO |
31 | 33 |
#-Add buffer option... |
... | ... | |
75 | 77 |
infile3<-list_param_prep$infile_covariates #"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script |
76 | 78 |
CRS_locs_WGS84<-list_param_prep$CRS_locs_WGS84 #Station coords WGS84: same as earlier |
77 | 79 |
in_path <- list_param_prep$in_path #CRS_locs_WGS84"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/" |
78 |
out_prefix<-list_param_prep$out_prefix #"_365d_GAM_fus5_all_lstd_03012013" #User defined output prefix |
|
79 |
#qc_flags<-list_param_prep$qc_flags flags allowed for the query from the GHCND?? |
|
80 | 80 |
covar_names<-list_param_prep$covar_names # names should be written in the tif file!!! |
81 |
qc_flags_stations <- list_param_prep$qc_flags_stations #flags allowed for the query from the GHCND?? |
|
82 |
out_prefix<-list_param_prep$out_prefix #"_365d_GAM_fus5_all_lstd_03012013" #User defined output prefix |
|
81 | 83 |
|
82 | 84 |
## working directory is the same for input and output for this function |
83 | 85 |
setwd(in_path) |
... | ... | |
122 | 124 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
123 | 125 |
time_minutes<-time_duration[3]/60 |
124 | 126 |
dbDisconnect(db) |
125 |
### |
|
126 |
#Add month query and averages here... |
|
127 |
### |
|
128 |
|
|
129 |
#data2 contains only 46 stations for Venezueal area?? |
|
127 |
|
|
130 | 128 |
data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID") |
131 | 129 |
|
132 | 130 |
#Transform the subset data frame in a spatial data frame and reproject |
133 | 131 |
data_reg<-data_table #Make a copy of the data frame |
134 | 132 |
coords<- data_reg[c('lon','lat')] #Define coordinates in a data frame: clean up here!! |
135 |
#Wrong label...it is in fact projected... |
|
136 | 133 |
coordinates(data_reg)<-coords #Assign coordinates to the data frame |
137 |
#proj4string(data3)<-locs_coord #Assign coordinates reference system in PROJ4 format |
|
138 | 134 |
proj4string(data_reg)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
139 | 135 |
data_reg<-spTransform(data_reg,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
140 | 136 |
|
141 | 137 |
data_d <-data_reg #data_d: daily data containing the query without screening |
142 |
data_reg <-subset(data_d,mflag=="0" | mflag=="S") #should be input arguments!! |
|
143 |
|
|
138 |
#data_reg <-subset(data_d,mflag=="0" | mflag=="S") #should be input arguments!! |
|
139 |
data_reg <-subset(data_d,mflag==qc_flags_stations[1] | mflag==qc_flags_stations[2]) #screening using flags |
|
140 |
|
|
144 | 141 |
################################################################## |
145 | 142 |
### STEP 3: Save results and outuput in textfile and a shape file |
146 | 143 |
#browser() |
... | ... | |
149 | 146 |
writeOGR(stat_reg,dsn= dirname(outfile1),layer= sub(".shp","",basename(outfile1)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
150 | 147 |
#writeOGR(dst,dsn= ".",layer= sub(".shp","",outfile4), driver="ESRI Shapefile",overwrite_layer=TRUE) |
151 | 148 |
|
152 |
outfile2<-file.path(in_path,paste("ghcn_data_",var,"_",year_start,"_",year_end,out_prefix,".shp",sep="")) #Name of the file |
|
149 |
outfile2<-file.path(in_path,paste("ghcn_data_query_",var,"_",year_start,"_",year_end,out_prefix,".shp",sep="")) #Name of the file |
|
150 |
#writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
|
151 |
writeOGR(data_d,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
152 |
|
|
153 |
outfile3<-file.path(in_path,paste("ghcn_data_",var,"_",year_start,"_",year_end,out_prefix,".shp",sep="")) #Name of the file |
|
153 | 154 |
#writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
154 |
writeOGR(data_reg,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE)
|
|
155 |
writeOGR(data_reg,dsn= dirname(outfile3),layer= sub(".shp","",basename(outfile3)), driver="ESRI Shapefile",overwrite_layer=TRUE)
|
|
155 | 156 |
|
156 | 157 |
################################################################### |
157 | 158 |
### STEP 4: Extract values at stations from covariates stack of raster images |
... | ... | |
187 | 188 |
} |
188 | 189 |
|
189 | 190 |
#write out a new shapefile (including .prj component) |
190 |
outfile3<-file.path(in_path,paste("daily_covariates_ghcn_data_",var,"_",range_years[1],"_",range_years[2],out_prefix,".shp",sep="")) #Name of the file
|
|
191 |
writeOGR(data_RST_SDF,dsn= dirname(outfile3),layer= sub(".shp","",basename(outfile3)), driver="ESRI Shapefile",overwrite_layer=TRUE)
|
|
191 |
outfile4<-file.path(in_path,paste("daily_covariates_ghcn_data_",var,"_",range_years[1],"_",range_years[2],out_prefix,".shp",sep="")) #Name of the file
|
|
192 |
writeOGR(data_RST_SDF,dsn= dirname(outfile4),layer= sub(".shp","",basename(outfile4)), driver="ESRI Shapefile",overwrite_layer=TRUE)
|
|
192 | 193 |
|
193 | 194 |
############################################################### |
194 | 195 |
######## STEP 5: Preparing monthly averages from the ProstGres database |
... | ... | |
197 | 198 |
db <- dbConnect(drv, dbname=db.name) |
198 | 199 |
|
199 | 200 |
#year_start_clim: set at the start of the script |
200 |
#year_end<-2011 |
|
201 | 201 |
time1<-proc.time() #Start stop watch |
202 | 202 |
list_s<-format_s(stat_reg$STAT_ID) |
203 | 203 |
data_m<-dbGetQuery(db, paste("SELECT * |
... | ... | |
220 | 220 |
coordinates(data_m)<-coords #Assign coordinates to the data frame |
221 | 221 |
proj4string(data_m)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
222 | 222 |
data_m<-spTransform(data_m,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
223 |
outfile4<-file.path(in_path,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file
|
|
224 |
writeOGR(data_m,dsn= dirname(outfile4),layer= sub(".shp","",basename(outfile4)), driver="ESRI Shapefile",overwrite_layer=TRUE)
|
|
223 |
outfile5<-file.path(in_path,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file
|
|
224 |
writeOGR(data_m,dsn= dirname(outfile5),layer= sub(".shp","",basename(outfile5)), driver="ESRI Shapefile",overwrite_layer=TRUE)
|
|
225 | 225 |
|
226 | 226 |
#In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010. |
227 |
#d<-subset(data_m,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations |
|
228 | 227 |
|
229 |
d<-subset(data_m,mflag=="0" | mflag=="S") #should be input arguments!! |
|
228 |
#d<-subset(data_m,mflag=="0" | mflag=="S") #should be input arguments!! |
|
229 |
d<-subset(data_m,mflag==qc_flags_stations[1] | mflag==qc_flags_stations[2]) |
|
230 | 230 |
#May need some screeing??? i.e. range of temp and elevation... |
231 | 231 |
d1<-aggregate(value~station+month, data=d, mean) #Calculate monthly mean for every station in OR |
232 | 232 |
#d2<-aggregate(value~station+month, data=d, length) #Calculate monthly mean for every station in OR |
... | ... | |
270 | 270 |
#### |
271 | 271 |
#write out a new shapefile (including .prj component) |
272 | 272 |
dst$OID<-1:nrow(dst) #need a unique ID? |
273 |
outfile5<-file.path(in_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file
|
|
274 |
writeOGR(dst,dsn= dirname(outfile5),layer= sub(".shp","",basename(outfile5)), driver="ESRI Shapefile",overwrite_layer=TRUE)
|
|
273 |
outfile6<-file.path(in_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file
|
|
274 |
writeOGR(dst,dsn= dirname(outfile6),layer= sub(".shp","",basename(outfile6)), driver="ESRI Shapefile",overwrite_layer=TRUE)
|
|
275 | 275 |
|
276 | 276 |
### list of outputs return |
277 | 277 |
|
278 |
outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4,outfile5) |
|
279 |
names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_covar_ghcn_data","monthly_query_ghcn_data_","monthly_covar_ghcn_data")
|
|
278 |
outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4,outfile5,outfile6)
|
|
279 |
names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_query_ghcn_data","daily_covar_ghcn_data","monthly_query_ghcn_data","monthly_covar_ghcn_data")
|
|
280 | 280 |
return(outfiles_obj) |
281 | 281 |
|
282 | 282 |
#END OF FUNCTION # |
Also available in: Unified diff
data preparation following up modifications related to flags and output from functions