Revision 5c5e50b9
Added by Benoit Parmentier over 11 years ago
climate/research/oregon/interpolation/Database_stations_covariates_processing_function.R | ||
---|---|---|
21 | 21 |
#1) loc_stations: locations of stations as shapefile in EPSG 4326 |
22 | 22 |
#2) loc_stations_ghcn: ghcn daily data for the year range of interpolation (locally projected) |
23 | 23 |
#3) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected) |
24 |
#4) monthly_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected) |
|
24 |
#4) monthly_query_ghcn_data: ghcn daily data from monthly query before application of quality flag |
|
25 |
#5) monthly_covar_ghcn_data: ghcn monthly averaged data with covariates for the year range of interpolation (locally projected) |
|
25 | 26 |
|
26 | 27 |
#AUTHOR: Benoit Parmentier |
27 |
#DATE: 03/13/2013
|
|
28 |
#DATE: 03/24/2013
|
|
28 | 29 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- |
29 | 30 |
#Comments and TODO |
30 | 31 |
#-Add buffer option... |
... | ... | |
68 | 69 |
year_start <-list_param_prep$range_years[1] #"2010" #starting year for the query (included) |
69 | 70 |
year_end <-list_param_prep$range_years[2] #"2011" #end year for the query (excluded) |
70 | 71 |
year_start_clim <-list_param_prep$range_years_clim[1] #right bound not included in the range!! starting year for monthly query to calculate clime |
72 |
year_end_clim <-list_param_prep$range_years_clim[2] #right bound not included in the range!! starting year for monthly query to calculate clime |
|
71 | 73 |
infile1<- list_param_prep$infile1 #This is the shape file of outline of the study area #It is an input/output of the covariate script |
72 | 74 |
infile2<-list_param_prep$infile2 #"/home/layers/data/climate/ghcn/v2.92-upd-2012052822/ghcnd-stations.txt" #This is the textfile of station locations from GHCND |
73 | 75 |
infile3<-list_param_prep$infile_covariates #"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script |
... | ... | |
136 | 138 |
proj4string(data_reg)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
137 | 139 |
data_reg<-spTransform(data_reg,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
138 | 140 |
|
141 |
data_d <-data_reg #data_d: daily data containing the query without screening |
|
142 |
data_reg <-subset(data_d,mflag=="0" | mflag=="S") #should be input arguments!! |
|
143 |
|
|
139 | 144 |
################################################################## |
140 | 145 |
### STEP 3: Save results and outuput in textfile and a shape file |
141 | 146 |
#browser() |
... | ... | |
144 | 149 |
writeOGR(stat_reg,dsn= dirname(outfile1),layer= sub(".shp","",basename(outfile1)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
145 | 150 |
#writeOGR(dst,dsn= ".",layer= sub(".shp","",outfile4), driver="ESRI Shapefile",overwrite_layer=TRUE) |
146 | 151 |
|
147 |
outfile2<-file.path(in_path,paste("ghcn_data_",var,"_",year_start_clim,"_",year_end,out_prefix,".shp",sep="")) #Name of the file
|
|
152 |
outfile2<-file.path(in_path,paste("ghcn_data_",var,"_",year_start,"_",year_end,out_prefix,".shp",sep="")) #Name of the file |
|
148 | 153 |
#writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
149 | 154 |
writeOGR(data_reg,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
150 | 155 |
|
... | ... | |
199 | 204 |
FROM ghcn |
200 | 205 |
WHERE element=",shQuote(var), |
201 | 206 |
"AND year>=",year_start_clim, |
202 |
"AND year<",year_end, |
|
207 |
"AND year<",year_end_clim,
|
|
203 | 208 |
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query |
204 | 209 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
205 | 210 |
time_minutes<-time_duration[3]/60 |
... | ... | |
208 | 213 |
date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column |
209 | 214 |
date2<-as.POSIXlt(as.Date(date1)) |
210 | 215 |
data_m$date<-date2 |
216 |
#Save the query data here... |
|
217 |
data_m<-merge(data_m, stat_reg, by.x="station", by.y="STAT_ID") #Inner join all columns are retained |
|
218 |
#Extracting covariates from stack for the monthly dataset... |
|
219 |
coords<- data_m[c('lon','lat')] #Define coordinates in a data frame |
|
220 |
coordinates(data_m)<-coords #Assign coordinates to the data frame |
|
221 |
proj4string(data_m)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
|
222 |
data_m<-spTransform(data_m,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
223 |
outfile4<-file.path(in_path,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file |
|
224 |
writeOGR(data_m,dsn= dirname(outfile4),layer= sub(".shp","",basename(outfile4)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
225 |
|
|
211 | 226 |
#In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010. |
212 | 227 |
#d<-subset(data_m,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations |
228 |
|
|
213 | 229 |
d<-subset(data_m,mflag=="0" | mflag=="S") #should be input arguments!! |
214 | 230 |
#May need some screeing??? i.e. range of temp and elevation... |
215 | 231 |
d1<-aggregate(value~station+month, data=d, mean) #Calculate monthly mean for every station in OR |
216 |
id<-as.data.frame(unique(d1$station)) #Unique station in OR for year 2000-2010: 193 but 7 loss of monthly avg |
|
217 |
|
|
232 |
#d2<-aggregate(value~station+month, data=d, length) #Calculate monthly mean for every station in OR |
|
233 |
is_not_na_fun<-function(x) sum(!is.na(x)) #count the number of available observation |
|
234 |
d2<-aggregate(value~station+month, data=d, is_not_na_fun) #Calculate monthly mean for every station in OR |
|
235 |
#names(d2)[names(d2)=="value"] <-"nobs_station" |
|
236 |
d1$nobs_station<-d2$value |
|
218 | 237 |
dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID") #Inner join all columns are retained |
219 | 238 |
|
220 | 239 |
#This allows to change only one name of the data.frame |
... | ... | |
251 | 270 |
#### |
252 | 271 |
#write out a new shapefile (including .prj component) |
253 | 272 |
dst$OID<-1:nrow(dst) #need a unique ID? |
254 |
outfile4<-file.path(in_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end,out_prefix,".shp",sep="")) #Name of the file
|
|
255 |
writeOGR(dst,dsn= dirname(outfile4),layer= sub(".shp","",basename(outfile4)), driver="ESRI Shapefile",overwrite_layer=TRUE)
|
|
273 |
outfile5<-file.path(in_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file
|
|
274 |
writeOGR(dst,dsn= dirname(outfile5),layer= sub(".shp","",basename(outfile5)), driver="ESRI Shapefile",overwrite_layer=TRUE)
|
|
256 | 275 |
|
257 | 276 |
### list of outputs return |
258 | 277 |
|
259 |
outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4) |
|
260 |
names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_covar_ghcn_data","monthly_covar_ghcn_data") |
|
278 |
outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4,outfile5)
|
|
279 |
names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_covar_ghcn_data","monthly_query_ghcn_data_","monthly_covar_ghcn_data")
|
|
261 | 280 |
return(outfiles_obj) |
262 | 281 |
|
263 | 282 |
#END OF FUNCTION # |
264 | 283 |
} |
265 | 284 |
|
266 |
##### END OF SCRIPT ########## |
|
285 |
########## END OF SCRIPT ########## |
Also available in: Unified diff
Data preparation, degugging query year period and adding other improvements to keep track of datasets used in interpolation