Revision d8a3533b
Added by Benoit Parmentier over 11 years ago
climate/research/oregon/interpolation/Database_stations_extraction_raster_covariates_processing.R | ||
---|---|---|
1 | 1 |
################## Data preparation for interpolation ####################################### |
2 | 2 |
############################ Extraction of station data ########################################## |
3 |
#This script perform queries on the Postgres database ghcn for stations matching the |
|
4 |
#interpolation area. It requires the following inputs: |
|
5 |
# 1)the text file ofGHCND stations from NCDC matching the database version release |
|
6 |
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84 # |
|
7 |
# 3)a new coordinate system can be provided as an argument |
|
8 |
# 4)the variable of interest: "TMAX","TMIN" or "PRCP" |
|
9 |
# 5)the location of raser covariate stack. |
|
10 |
#The outputs are text files and a shape file of a time subset of the database |
|
11 |
#AUTHOR: Benoit Parmentier |
|
12 |
#DATE: 03/01/2013 |
|
13 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- |
|
14 |
#Comments and TODO |
|
15 |
#-Add buffer option... |
|
16 |
#-Add calculation of monthly mean... |
|
17 |
#Outputs are: |
|
18 |
# |
|
19 |
################################################################################################## |
|
20 | 3 |
|
21 |
###Loading R library and packages |
|
22 |
|
|
23 |
library(RPostgreSQL) |
|
24 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
25 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
26 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
27 |
library(rgeos) |
|
28 |
library(rgdal) |
|
29 |
library(raster) |
|
30 |
library(rasterVis) |
|
31 |
|
|
32 |
### Parameters and arguments |
|
4 |
### Parameters and arguments for the function |
|
33 | 5 |
|
34 | 6 |
db.name <- "ghcn" #name of the Postgres database |
35 | 7 |
var <- "TMAX" #name of the variables to keep: TMIN, TMAX or PRCP |
36 |
year_start<-"2010" #starting year for the query (included) |
|
37 |
year_end<-"2011" #end year for the query (excluded) |
|
38 |
year_start_clim<-"2000" #starting year for monthly query to calculate clime |
|
39 |
infile1<- "outline_venezuela_region__VE_01292013.shp" #This is the shape file of outline of the study area |
|
40 |
#It is an input/output of the covariate script |
|
8 |
range_years<-c("2010","2011") #right bound not included in the range!! |
|
9 |
range_years_clim<-c("2000","2011") #right bound not included in the range!! |
|
10 |
infile1<- "outline_venezuela_region__VE_01292013.shp" #This is the shape file of outline of the study area #It is an input/output of the covariate script |
|
41 | 11 |
infile2<-"/home/layers/data/climate/ghcn/v2.92-upd-2012052822/ghcnd-stations.txt" #This is the textfile of station locations from GHCND |
42 | 12 |
infile3<-"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script |
43 |
|
|
44 |
new_proj<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs" |
|
45 |
#CRS_locs_WGS84<-"+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0" |
|
46 | 13 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84: same as earlier |
47 |
|
|
48 |
##Paths to inputs and output |
|
49 | 14 |
in_path <- "/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/" |
50 |
|
|
51 |
setwd(in_path) |
|
52 |
|
|
53 |
out_prefix<-"_365d_GAM_fus5_all_lstd_02202013" #User defined output prefix |
|
54 |
|
|
55 |
#PUT ALL THE INPUT ARGUMENTS IN ONE OBJECT??? read the object as input!!! |
|
56 |
|
|
57 |
### Functions used in the script |
|
58 |
|
|
59 |
format_s <-function(s_ID){ |
|
60 |
#Format station ID in a vector format/tuple that is used in a psql query. |
|
61 |
# Argument 1: vector of station ID |
|
62 |
# Return: character of station ID |
|
63 |
tx2<-s_ID |
|
64 |
tx2<-as.character(tx2) |
|
65 |
stat_list<-tx2 |
|
66 |
temp<-shQuote(stat_list) |
|
67 |
t<-paste(temp, collapse= " ") |
|
68 |
t1<-gsub(" ", ",",t) |
|
69 |
sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query |
|
70 |
return(sf_ID) |
|
71 |
} |
|
72 |
|
|
73 |
############ BEGIN: START OF THE SCRIPT ################# |
|
74 |
|
|
75 |
##### STEP 1: Select station in the study area |
|
76 |
|
|
77 |
filename<-sub(".shp","",infile1) #Removing the extension from file. |
|
78 |
interp_area <- readOGR(".",filename) |
|
79 |
CRS_interp<-proj4string(interp_area) #Storing the coordinate information: geographic coordinates longlat WGS84 |
|
80 |
|
|
81 |
#infile2<-"ghcnd-stations.txt" #This is the textfile of station locations from GHCND |
|
82 |
dat_stat <- read.fwf(infile2, |
|
83 |
widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE) |
|
84 |
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID") |
|
85 |
coords<- dat_stat[,c('lon','lat')] |
|
86 |
coordinates(dat_stat)<-coords |
|
87 |
proj4string(dat_stat)<-CRS_locs_WGS84 #this is the WGS84 projection |
|
88 |
#proj4string(dat_stat)<-CRS_interp |
|
89 |
dat_stat2<-spTransform(dat_stat,CRS(new_proj)) # Project from WGS84 to new coord. system |
|
90 |
|
|
91 |
# Spatial query to find relevant stations |
|
92 |
inside <- !is.na(over(dat_stat2, as(interp_area, "SpatialPolygons"))) #Finding stations contained in the current interpolation area |
|
93 |
stat_reg<-dat_stat2[inside,] #Selecting stations contained in the current interpolation area |
|
94 |
|
|
95 |
#Quick visualization of station locations |
|
96 |
plot(interp_area, axes =TRUE) |
|
97 |
plot(stat_reg, pch=1, col="red", cex= 0.7, add=TRUE) |
|
98 |
#plot(data3,pch=1,col="blue",cex=3,add=TRUE) |
|
99 |
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6) |
|
100 |
#only 357 station for Venezuela?? |
|
101 |
|
|
102 |
#### |
|
103 |
##TODO: Add buffer option? |
|
104 |
#### |
|
105 |
|
|
106 |
#### STEP 2: Connecting to the database and query for relevant data |
|
107 |
|
|
108 |
drv <- dbDriver("PostgreSQL") |
|
109 |
db <- dbConnect(drv, dbname=db.name) |
|
110 |
|
|
111 |
time1<-proc.time() #Start stop watch |
|
112 |
list_s<-format_s(stat_reg$STAT_ID) |
|
113 |
data2<-dbGetQuery(db, paste("SELECT * |
|
114 |
FROM ghcn |
|
115 |
WHERE element=",shQuote(var), |
|
116 |
"AND year>=",year_start, |
|
117 |
"AND year<",year_end, |
|
118 |
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query |
|
119 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
|
120 |
time_minutes<-time_duration[3]/60 |
|
121 |
|
|
122 |
### |
|
123 |
#Add month query and averages here... |
|
124 |
### |
|
125 |
|
|
126 |
#data2 contains only 46 stations for Venezueal area?? |
|
127 |
data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID") |
|
128 |
|
|
129 |
#Transform the subset data frame in a spatial data frame and reproject |
|
130 |
data_reg<-data_table #Make a copy of the data frame |
|
131 |
coords<- data_reg[c('lon','lat')] #Define coordinates in a data frame: clean up here!! |
|
132 |
#Wrong label...it is in fact projected... |
|
133 |
coordinates(data_reg)<-coords #Assign coordinates to the data frame |
|
134 |
#proj4string(data3)<-locs_coord #Assign coordinates reference system in PROJ4 format |
|
135 |
proj4string(data_reg)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
|
136 |
data_reg<-spTransform(data_reg,CRS(new_proj)) #Project from WGS84 to new coord. system |
|
137 |
|
|
138 |
#png...output? |
|
139 |
plot(interp_area, axes =TRUE) |
|
140 |
plot(stat_reg, pch=1, col="red", cex= 0.7, add=TRUE) |
|
141 |
plot(data_reg,pch=2,col="blue",cex=2,add=TRUE) |
|
142 |
|
|
143 |
################################################################## |
|
144 |
### STEP 3: Save results and outuput in textfile and a shape file |
|
145 |
|
|
146 |
#Save shape files of the locations of meteorological stations in the study area |
|
147 |
outfile1<-file.path(in_path,paste("stations","_",out_prefix,".shp",sep="")) |
|
148 |
writeOGR(stat_reg,dsn= ".",layer= sub(".shp","",outfile1), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
149 |
|
|
150 |
outfile2<-file.path(in_path,paste("ghcn_data_",var,"_",year_start_clim,"_",year_end,out_prefix,".shp",sep="")) #Name of the file |
|
151 |
#writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
|
152 |
writeOGR(data_reg,dsn= ".",layer= sub(".shp","",outfile2), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
153 |
|
|
154 |
################################################################### |
|
155 |
### STEP 4: Extract values at stations from covariates stack of raster images |
|
156 |
#Eventually this step may be skipped if the covariates information is stored in the database... |
|
15 |
out_prefix<-"_365d_GAM_fus5_all_lstd_03012013" #User defined output prefix |
|
16 |
#qc_flags<- flags allowe for the query from the GHCND?? |
|
157 | 17 |
|
158 | 18 |
#The names of covariates can be changed...these names should be output/input from covar script!!! |
159 | 19 |
rnames<-c("x","y","lon","lat","N","E","N_w","E_w","elev","slope","aspect","CANHEIGHT","DISTOC") |
... | ... | |
161 | 21 |
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12", |
162 | 22 |
"nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08", |
163 | 23 |
"nobs_09","nobs_10","nobs_11","nobs_12") |
164 |
|
|
165 | 24 |
covar_names<-c(rnames,lc_names,lst_names) |
166 | 25 |
|
167 |
s_raster<-stack(infile3) #read in the data stack |
|
168 |
names(s_raster)<-covar_names #Assigning names to the raster layers: making sure it is included in the extraction |
|
169 |
stat_val<- extract(s_raster, data_reg) #Extracting values from the raster stack for every point location in coords data frame. |
|
170 |
|
|
171 |
#create a shape file and data_frame with names |
|
172 |
|
|
173 |
data_RST<-as.data.frame(stat_val) #This creates a data frame with the values extracted |
|
174 |
data_RST_SDF<-cbind(data_reg,data_RST) |
|
175 |
coordinates(data_RST_SDF)<-coordinates(data_reg) #Transforming data_RST_SDF into a spatial point dataframe |
|
176 |
CRS_reg<-proj4string(data_reg) |
|
177 |
proj4string(data_RST_SDF)<-CRS_reg #Need to assign coordinates... |
|
178 |
|
|
179 |
#Creating a date column |
|
180 |
date1<-ISOdate(data_RST_SDF$year,data_RST_SDF$month,data_RST_SDF$day) #Creating a date object from 3 separate column |
|
181 |
date2<-gsub("-","",as.character(as.Date(date1))) |
|
182 |
data_RST_SDF$date<-date2 #Date format (year,month,day) is the following: "20100627" |
|
183 |
|
|
184 |
#This allows to change only one name of the data.frame |
|
185 |
pos<-match("value",names(data_RST_SDF)) #Find column with name "value" |
|
186 |
if (var=="TMAX"){ |
|
187 |
#names(data_RST_SDF)[pos]<-c("TMax") |
|
188 |
data_RST_SDF$value<-data_RST_SDF$value/10 #TMax is the average max temp for monthy data |
|
26 |
#list of 11 parameters for input in the function... |
|
27 |
|
|
28 |
list_param_prep<-list(db.name,var,range_years,range_years_clim,infile1,infile2,infile3,CRS_locs_WGS84,in_path,covar_names,out_prefix) |
|
29 |
cnames<-c("db.name","var","range_years","range_years_clim","infile1","infile2","infile3","CRS_locs_WGS84","in_path","covar_names","out_prefix") |
|
30 |
names(list_param_prep)<-cnames |
|
31 |
|
|
32 |
database_covaratiates_preparation<-function(list_param_prep){ |
|
33 |
#This function performs queries on the Postgres ghcnd database for stations matching the |
|
34 |
#interpolation area. It requires 11 inputs: |
|
35 |
# 1) db.name : Postgres database name containing the meteorological stations |
|
36 |
# 2) var: the variable of interest - "TMAX","TMIN" or "PRCP" |
|
37 |
# 3) range_years: range of records used in the daily interpolation, note that upper bound year is not included |
|
38 |
# 4) range_years_clim: range of records used in the monthly climatology interpolation, note that upper bound is not included |
|
39 |
# 5) infile1: region outline as a shape file - used in the interpolation stage too |
|
40 |
# 6) infile2: ghcnd stations locations as a textfile name with lat-long fields |
|
41 |
# 7) infile3: tif file of raser covariates for the interpolation area: it should have a local projection |
|
42 |
# 8) CRS_locs_WGS84: longlat EPSG 4326 used as coordinates reference system (proj4)for stations locations |
|
43 |
# 9) in_path: input path for covariates data and other files, this is also the output? |
|
44 |
# 10) covar_names: names of covariates used for the interpolation --may be removed later? (should be stored in the brick) |
|
45 |
# 11) out_prefix: output suffix added to output names--it is the same in the interpolation script |
|
46 |
# |
|
47 |
#The output is a list of four shapefile names produced by the function: |
|
48 |
#1) loc_stations: locations of stations as shapefile in EPSG 4326 |
|
49 |
#2) loc_stations_ghcn: ghcn daily data for the year range of interpolation (locally projected) |
|
50 |
#3) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected) |
|
51 |
#4) monthly_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected) |
|
52 |
|
|
53 |
#AUTHOR: Benoit Parmentier |
|
54 |
#DATE: 03/01/2013 |
|
55 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- |
|
56 |
#Comments and TODO |
|
57 |
#-Add buffer option... |
|
58 |
#-Add output path argument option |
|
59 |
#-Add qc flag options |
|
60 |
################################################################################################## |
|
61 |
|
|
62 |
###Loading R library and packages: should it be read in before??? |
|
63 |
|
|
64 |
library(RPostgreSQL) |
|
65 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
66 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
67 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
68 |
library(rgeos) |
|
69 |
library(rgdal) |
|
70 |
library(raster) |
|
71 |
library(rasterVis) |
|
72 |
|
|
73 |
### Functions used in the script |
|
74 |
|
|
75 |
format_s <-function(s_ID){ |
|
76 |
#Format station ID in a vector format/tuple that is used in a psql query. |
|
77 |
# Argument 1: vector of station ID |
|
78 |
# Return: character of station ID |
|
79 |
tx2<-s_ID |
|
80 |
tx2<-as.character(tx2) |
|
81 |
stat_list<-tx2 |
|
82 |
temp<-shQuote(stat_list) |
|
83 |
t<-paste(temp, collapse= " ") |
|
84 |
t1<-gsub(" ", ",",t) |
|
85 |
sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query |
|
86 |
return(sf_ID) |
|
87 |
} |
|
88 |
|
|
89 |
#parsing input arguments |
|
90 |
|
|
91 |
db.name <- list_param_prep$db.name #name of the Postgres database |
|
92 |
var <- list_param_prep$var #name of the variables to keep: TMIN, TMAX or PRCP |
|
93 |
year_start <-list_param_prep$range_years[1] #"2010" #starting year for the query (included) |
|
94 |
year_end <-list_param_prep$range_years[2] #"2011" #end year for the query (excluded) |
|
95 |
year_start_clim <-list_param_prep$range_years_clim[1] #right bound not included in the range!! starting year for monthly query to calculate clime |
|
96 |
infile1<- list_param_prep$infile1 #This is the shape file of outline of the study area #It is an input/output of the covariate script |
|
97 |
infile2<-list_param_prep$infile2 #"/home/layers/data/climate/ghcn/v2.92-upd-2012052822/ghcnd-stations.txt" #This is the textfile of station locations from GHCND |
|
98 |
infile3<-list_param_prep$infile3 #"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script |
|
99 |
CRS_locs_WGS84<-list_param_prep$CRS_locs_WGS84 #Station coords WGS84: same as earlier |
|
100 |
in_path <- list_param_prep$in_path #CRS_locs_WGS84"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/" |
|
101 |
out_prefix<-list_param_prep$out_prefix #"_365d_GAM_fus5_all_lstd_03012013" #User defined output prefix |
|
102 |
#qc_flags<-list_param_prep$qc_flags flags allowed for the query from the GHCND?? |
|
103 |
covar_names<-list_param_prep$covar_names # names should be written in the tif file!!! |
|
104 |
|
|
105 |
## working directory is the same for input and output for this function |
|
106 |
setwd(in_path) |
|
107 |
|
|
108 |
##### STEP 1: Select station in the study area |
|
109 |
|
|
110 |
filename<-sub(".shp","",infile1) #Removing the extension from file. |
|
111 |
interp_area <- readOGR(".",filename) |
|
112 |
CRS_interp<-proj4string(interp_area) #Storing the coordinate information: geographic coordinates longlat WGS84 |
|
113 |
|
|
114 |
#Read in GHCND database station locations |
|
115 |
dat_stat <- read.fwf(infile2, |
|
116 |
widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE) |
|
117 |
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID") |
|
118 |
coords<- dat_stat[,c('lon','lat')] |
|
119 |
coordinates(dat_stat)<-coords |
|
120 |
proj4string(dat_stat)<-CRS_locs_WGS84 #this is the WGS84 projection |
|
121 |
#proj4string(dat_stat)<-CRS_interp |
|
122 |
dat_stat2<-spTransform(dat_stat,CRS(CRS_interp)) # Project from WGS84 to new coord. system |
|
123 |
|
|
124 |
# Spatial query to find relevant stations |
|
125 |
inside <- !is.na(over(dat_stat2, as(interp_area, "SpatialPolygons"))) #Finding stations contained in the current interpolation area |
|
126 |
stat_reg<-dat_stat2[inside,] #Selecting stations contained in the current interpolation area |
|
127 |
|
|
128 |
#### |
|
129 |
##TODO: Add buffer option? |
|
130 |
#### |
|
131 |
|
|
132 |
#### STEP 2: Connecting to the database and query for relevant data |
|
133 |
|
|
134 |
drv <- dbDriver("PostgreSQL") |
|
135 |
db <- dbConnect(drv, dbname=db.name) |
|
136 |
|
|
137 |
time1<-proc.time() #Start stop watch |
|
138 |
list_s<-format_s(stat_reg$STAT_ID) |
|
139 |
data2<-dbGetQuery(db, paste("SELECT * |
|
140 |
FROM ghcn |
|
141 |
WHERE element=",shQuote(var), |
|
142 |
"AND year>=",year_start, |
|
143 |
"AND year<",year_end, |
|
144 |
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query |
|
145 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
|
146 |
time_minutes<-time_duration[3]/60 |
|
147 |
dbDisconnect(db) |
|
148 |
### |
|
149 |
#Add month query and averages here... |
|
150 |
### |
|
151 |
|
|
152 |
#data2 contains only 46 stations for Venezueal area?? |
|
153 |
data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID") |
|
154 |
|
|
155 |
#Transform the subset data frame in a spatial data frame and reproject |
|
156 |
data_reg<-data_table #Make a copy of the data frame |
|
157 |
coords<- data_reg[c('lon','lat')] #Define coordinates in a data frame: clean up here!! |
|
158 |
#Wrong label...it is in fact projected... |
|
159 |
coordinates(data_reg)<-coords #Assign coordinates to the data frame |
|
160 |
#proj4string(data3)<-locs_coord #Assign coordinates reference system in PROJ4 format |
|
161 |
proj4string(data_reg)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
|
162 |
data_reg<-spTransform(data_reg,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
163 |
|
|
164 |
################################################################## |
|
165 |
### STEP 3: Save results and outuput in textfile and a shape file |
|
166 |
#browser() |
|
167 |
#Save shape files of the locations of meteorological stations in the study area |
|
168 |
outfile1<-file.path(in_path,paste("stations","_",out_prefix,".shp",sep="")) |
|
169 |
writeOGR(stat_reg,dsn= dirname(outfile1),layer= sub(".shp","",basename(outfile1)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
170 |
#writeOGR(dst,dsn= ".",layer= sub(".shp","",outfile4), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
171 |
|
|
172 |
outfile2<-file.path(in_path,paste("ghcn_data_",var,"_",year_start_clim,"_",year_end,out_prefix,".shp",sep="")) #Name of the file |
|
173 |
#writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
|
174 |
writeOGR(data_reg,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
175 |
|
|
176 |
################################################################### |
|
177 |
### STEP 4: Extract values at stations from covariates stack of raster images |
|
178 |
#Eventually this step may be skipped if the covariates information is stored in the database... |
|
179 |
|
|
180 |
s_raster<-stack(infile3) #read in the data stack |
|
181 |
names(s_raster)<-covar_names #Assigning names to the raster layers: making sure it is included in the extraction |
|
182 |
stat_val<- extract(s_raster, data_reg) #Extracting values from the raster stack for every point location in coords data frame. |
|
183 |
|
|
184 |
#create a shape file and data_frame with names |
|
185 |
|
|
186 |
data_RST<-as.data.frame(stat_val) #This creates a data frame with the values extracted |
|
187 |
data_RST_SDF<-cbind(data_reg,data_RST) |
|
188 |
coordinates(data_RST_SDF)<-coordinates(data_reg) #Transforming data_RST_SDF into a spatial point dataframe |
|
189 |
CRS_reg<-proj4string(data_reg) |
|
190 |
proj4string(data_RST_SDF)<-CRS_reg #Need to assign coordinates... |
|
191 |
|
|
192 |
#Creating a date column |
|
193 |
date1<-ISOdate(data_RST_SDF$year,data_RST_SDF$month,data_RST_SDF$day) #Creating a date object from 3 separate column |
|
194 |
date2<-gsub("-","",as.character(as.Date(date1))) |
|
195 |
data_RST_SDF$date<-date2 #Date format (year,month,day) is the following: "20100627" |
|
196 |
|
|
197 |
#This allows to change only one name of the data.frame |
|
198 |
pos<-match("value",names(data_RST_SDF)) #Find column with name "value" |
|
199 |
if (var=="TMAX"){ |
|
200 |
#names(data_RST_SDF)[pos]<-c("TMax") |
|
201 |
data_RST_SDF$value<-data_RST_SDF$value/10 #TMax is the average max temp for monthy data |
|
202 |
} |
|
203 |
|
|
204 |
#write out a new shapefile (including .prj component) |
|
205 |
outfile3<-file.path(in_path,paste("daily_covariates_ghcn_data_",var,"_",range_years[1],"_",range_years[2],out_prefix,".shp",sep="")) #Name of the file |
|
206 |
writeOGR(data_RST_SDF,dsn= dirname(outfile3),layer= sub(".shp","",basename(outfile3)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
207 |
|
|
208 |
############################################################### |
|
209 |
######## STEP 5: Preparing monthly averages from the ProstGres database |
|
210 |
|
|
211 |
drv <- dbDriver("PostgreSQL") |
|
212 |
db <- dbConnect(drv, dbname=db.name) |
|
213 |
|
|
214 |
#year_start_clim: set at the start of the script |
|
215 |
year_end<-2011 |
|
216 |
time1<-proc.time() #Start stop watch |
|
217 |
list_s<-format_s(stat_reg$STAT_ID) |
|
218 |
data_m<-dbGetQuery(db, paste("SELECT * |
|
219 |
FROM ghcn |
|
220 |
WHERE element=",shQuote(var), |
|
221 |
"AND year>=",year_start_clim, |
|
222 |
"AND year<",year_end, |
|
223 |
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query |
|
224 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
|
225 |
time_minutes<-time_duration[3]/60 |
|
226 |
dbDisconnect(db) |
|
227 |
#Clean out this section!! |
|
228 |
date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column |
|
229 |
date2<-as.POSIXlt(as.Date(date1)) |
|
230 |
data_m$date<-date2 |
|
231 |
#In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010. |
|
232 |
#d<-subset(data_m,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations |
|
233 |
d<-subset(data_m,mflag=="0" | mflag=="S") #should be input arguments!! |
|
234 |
#May need some screeing??? i.e. range of temp and elevation... |
|
235 |
d1<-aggregate(value~station+month, data=d, mean) #Calculate monthly mean for every station in OR |
|
236 |
id<-as.data.frame(unique(d1$station)) #Unique station in OR for year 2000-2010: 193 but 7 loss of monthly avg |
|
237 |
|
|
238 |
dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID") #Inner join all columns are retained |
|
239 |
|
|
240 |
#This allows to change only one name of the data.frame |
|
241 |
pos<-match("value",names(dst)) #Find column with name "value" |
|
242 |
if (var=="TMAX"){ |
|
243 |
names(dst)[pos]<-c("TMax") |
|
244 |
dst$TMax<-dst$TMax/10 #TMax is the average max temp for monthy data |
|
245 |
} |
|
246 |
|
|
247 |
#Extracting covariates from stack for the monthly dataset... |
|
248 |
coords<- dst[c('lon','lat')] #Define coordinates in a data frame |
|
249 |
coordinates(dst)<-coords #Assign coordinates to the data frame |
|
250 |
proj4string(dst)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
|
251 |
dst_month<-spTransform(dst,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
252 |
|
|
253 |
stations_val<-extract(s_raster,dst_month) #extraction of the infomration at station location |
|
254 |
stations_val<-as.data.frame(stations_val) |
|
255 |
dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack |
|
256 |
dst<-dst_extract |
|
257 |
|
|
258 |
coords<- dst[c('x','y')] #Define coordinates in a data frame, this is the local x,y |
|
259 |
coordinates(dst)<-coords #Assign coordinates to the data frame |
|
260 |
proj4string(dst)<-projection(s_raster) #Assign coordinates reference system in PROJ4 format |
|
261 |
|
|
262 |
#### |
|
263 |
#write out a new shapefile (including .prj component) |
|
264 |
dst$OID<-1:nrow(dst) #need a unique ID? |
|
265 |
outfile4<-file.path(in_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end,out_prefix,".shp",sep="")) #Name of the file |
|
266 |
writeOGR(dst,dsn= dirname(outfile4),layer= sub(".shp","",basename(outfile4)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
267 |
|
|
268 |
### list of outputs return |
|
269 |
|
|
270 |
outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4) |
|
271 |
names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_covar_ghcn_data","monthly_covar_ghcn_data") |
|
272 |
return(outfiles_obj) |
|
273 |
|
|
274 |
#END OF FUNCTION # |
|
189 | 275 |
} |
190 | 276 |
|
191 |
#write out a new shapefile (including .prj component) |
|
192 |
outfile3<-file.path(in_path,paste("daily_covariates_ghcn_data_",var,out_prefix,".shp",sep="")) #Name of the file |
|
193 |
writeOGR(data_RST_SDF,dsn= ".",layer= sub(".shp","",outfile3), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
194 |
|
|
195 |
############################################################### |
|
196 |
######## STEP 5: Preparing monthly averages from the ProstGres database |
|
197 |
|
|
198 |
drv <- dbDriver("PostgreSQL") |
|
199 |
db <- dbConnect(drv, dbname=db.name) |
|
200 |
|
|
201 |
#year_start_clim: set at the start of the script |
|
202 |
year_end<-2011 |
|
203 |
time1<-proc.time() #Start stop watch |
|
204 |
list_s<-format_s(stat_reg$STAT_ID) |
|
205 |
data_m<-dbGetQuery(db, paste("SELECT * |
|
206 |
FROM ghcn |
|
207 |
WHERE element=",shQuote(var), |
|
208 |
"AND year>=",year_start_clim, |
|
209 |
"AND year<",year_end, |
|
210 |
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query |
|
211 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
|
212 |
time_minutes<-time_duration[3]/60 |
|
213 |
|
|
214 |
# do this work outside of (before) this function |
|
215 |
# to avoid making a copy of the data frame inside the function call |
|
216 |
date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column |
|
217 |
date2<-as.POSIXlt(as.Date(date1)) |
|
218 |
data_m$date<-date2 |
|
219 |
#In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010. |
|
220 |
#d<-subset(data_m,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations |
|
221 |
d<-subset(data_m,mflag=="0" | mflag=="S") |
|
222 |
#May need some screeing??? i.e. range of temp and elevation... |
|
223 |
d1<-aggregate(value~station+month, data=d, mean) #Calculate monthly mean for every station in OR |
|
224 |
id<-as.data.frame(unique(d1$station)) #Unique station in OR for year 2000-2010: 193 but 7 loss of monthly avg |
|
225 |
|
|
226 |
dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID") #Inner join all columns are retained |
|
227 |
|
|
228 |
#This allows to change only one name of the data.frame |
|
229 |
pos<-match("value",names(dst)) #Find column with name "value" |
|
230 |
if (var=="TMAX"){ |
|
231 |
names(dst)[pos]<-c("TMax") |
|
232 |
dst$TMax<-dst$TMax/10 #TMax is the average max temp for monthy data |
|
233 |
} |
|
234 |
#dstjan=dst[dst$month==9,] #dst contains the monthly averages for tmax for every station over 2000-2010 |
|
235 |
|
|
236 |
#Extracting covariates from stack for the monthly dataset... |
|
237 |
coords<- dst[c('lon','lat')] #Define coordinates in a data frame |
|
238 |
coordinates(dst)<-coords #Assign coordinates to the data frame |
|
239 |
proj4string(dst)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
|
240 |
dst_month<-spTransform(dst,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
241 |
|
|
242 |
stations_val<-extract(s_raster,dst_month) #extraction of the infomration at station location |
|
243 |
stations_val<-as.data.frame(stations_val) |
|
244 |
dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack |
|
245 |
dst<-dst_extract |
|
246 |
|
|
247 |
coords<- dst[c('x','y')] #Define coordinates in a data frame, this is the local x,y |
|
248 |
coordinates(dst)<-coords #Assign coordinates to the data frame |
|
249 |
proj4string(dst)<-projection(s_raster) #Assign coordinates reference system in PROJ4 format |
|
250 |
|
|
251 |
#### |
|
252 |
#write out a new shapefile (including .prj component) |
|
253 |
dst$OID<-1:nrow(dst) #need a unique ID? |
|
254 |
outfile4<-file.path(in_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end,out_prefix,".shp",sep="")) #Name of the file |
|
255 |
writeOGR(dst,dsn= ".",layer= sub(".shp","",outfile4), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
256 |
|
|
257 |
### list of output return |
|
258 |
|
|
259 |
outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4,outfile5) |
|
260 |
outfiles_obj<cname("loc_stations","loc_stations_ghcn","daily_covar_ghcn_data","monthly_covar_ghcn_data") |
|
261 |
#return(outfiles_obj) |
|
262 | 277 |
##### END OF SCRIPT ########## |
Also available in: Unified diff
Data preparation, ghcnd station queries and processing now a function with 11 parameters and four outputs