Project

General

Profile

« Previous | Next » 

Revision d8a3533b

Added by Benoit Parmentier over 11 years ago

Data preparation, ghcnd station queries and processing now a function with 11 parameters and four outputs

View differences:

climate/research/oregon/interpolation/Database_stations_extraction_raster_covariates_processing.R
1 1
##################    Data preparation for interpolation   #######################################
2 2
############################ Extraction of station data ##########################################
3
#This script perform queries on the Postgres database ghcn for stations matching the             
4
#interpolation area. It requires the following inputs:                                           
5
# 1)the text file ofGHCND  stations from NCDC matching the database version release              
6
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84                                                                          #       
7
# 3)a new coordinate system can be provided as an argument                                       
8
# 4)the variable of interest: "TMAX","TMIN" or "PRCP"                                            
9
# 5)the location of raser covariate stack.                                                                                             
10
#The outputs are text files and a shape file of a time subset of the database                    
11
#AUTHOR: Benoit Parmentier                                                                       
12
#DATE: 03/01/2013                                                                                 
13
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--     
14
#Comments and TODO
15
#-Add buffer option...
16
#-Add calculation of monthly mean...
17
#Outputs are:
18
#
19
##################################################################################################
20 3

  
21
###Loading R library and packages   
22

  
23
library(RPostgreSQL)
24
library(sp)                                           # Spatial pacakge with class definition by Bivand et al.
25
library(spdep)                                          # Spatial pacakge with methods and spatial stat. by Bivand et al.
26
library(rgdal)                                          # GDAL wrapper for R, spatial utilities
27
library(rgeos)
28
library(rgdal)
29
library(raster)
30
library(rasterVis)
31

  
32
### Parameters and arguments
4
### Parameters and arguments for the function
33 5

  
34 6
db.name <- "ghcn"                #name of the Postgres database
35 7
var <- "TMAX"                    #name of the variables to keep: TMIN, TMAX or PRCP
36
year_start<-"2010"               #starting year for the query (included)
37
year_end<-"2011"                 #end year for the query (excluded)
38
year_start_clim<-"2000"          #starting year for monthly query to calculate clime
39
infile1<- "outline_venezuela_region__VE_01292013.shp"      #This is the shape file of outline of the study area 
40
                                                           #It is an input/output of the covariate script
8
range_years<-c("2010","2011") #right bound not included in the range!!
9
range_years_clim<-c("2000","2011") #right bound not included in the range!!
10
infile1<- "outline_venezuela_region__VE_01292013.shp"      #This is the shape file of outline of the study area                                                      #It is an input/output of the covariate script
41 11
infile2<-"/home/layers/data/climate/ghcn/v2.92-upd-2012052822/ghcnd-stations.txt"                              #This is the textfile of station locations from GHCND
42 12
infile3<-"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script
43

  
44
new_proj<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"
45
#CRS_locs_WGS84<-"+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0"
46 13
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84: same as earlier
47

  
48
##Paths to inputs and output
49 14
in_path <- "/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/"
50

  
51
setwd(in_path) 
52

  
53
out_prefix<-"_365d_GAM_fus5_all_lstd_02202013"                #User defined output prefix
54

  
55
#PUT ALL THE INPUT ARGUMENTS IN ONE OBJECT??? read the object as input!!!
56

  
57
### Functions used in the script
58

  
59
format_s <-function(s_ID){
60
  #Format station ID in a vector format/tuple that is used in a psql query.
61
  # Argument 1: vector of station ID
62
  # Return: character of station ID
63
  tx2<-s_ID
64
  tx2<-as.character(tx2)
65
  stat_list<-tx2
66
  temp<-shQuote(stat_list)
67
  t<-paste(temp, collapse= " ")
68
  t1<-gsub(" ", ",",t)
69
  sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query
70
  return(sf_ID)
71
}
72

  
73
############ BEGIN: START OF THE SCRIPT #################
74

  
75
##### STEP 1: Select station in the study area
76

  
77
filename<-sub(".shp","",infile1)             #Removing the extension from file.
78
interp_area <- readOGR(".",filename)
79
CRS_interp<-proj4string(interp_area)         #Storing the coordinate information: geographic coordinates longlat WGS84
80

  
81
#infile2<-"ghcnd-stations.txt"                              #This is the textfile of station locations from GHCND
82
dat_stat <- read.fwf(infile2, 
83
                     widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE)
84
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID")
85
coords<- dat_stat[,c('lon','lat')]
86
coordinates(dat_stat)<-coords
87
proj4string(dat_stat)<-CRS_locs_WGS84 #this is the WGS84 projection
88
#proj4string(dat_stat)<-CRS_interp
89
dat_stat2<-spTransform(dat_stat,CRS(new_proj))         # Project from WGS84 to new coord. system
90

  
91
# Spatial query to find relevant stations
92
inside <- !is.na(over(dat_stat2, as(interp_area, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
93
stat_reg<-dat_stat2[inside,]              #Selecting stations contained in the current interpolation area
94

  
95
#Quick visualization of station locations
96
plot(interp_area, axes =TRUE)
97
plot(stat_reg, pch=1, col="red", cex= 0.7, add=TRUE)
98
#plot(data3,pch=1,col="blue",cex=3,add=TRUE)
99
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6)
100
#only 357 station for Venezuela??
101

  
102
####
103
##TODO: Add buffer option? 
104
####
105

  
106
#### STEP 2: Connecting to the database and query for relevant data 
107

  
108
drv <- dbDriver("PostgreSQL")
109
db <- dbConnect(drv, dbname=db.name)
110

  
111
time1<-proc.time()    #Start stop watch
112
list_s<-format_s(stat_reg$STAT_ID)
113
data2<-dbGetQuery(db, paste("SELECT *
114
      FROM ghcn
115
      WHERE element=",shQuote(var),
116
      "AND year>=",year_start,
117
      "AND year<",year_end,
118
      "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
119
time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
120
time_minutes<-time_duration[3]/60
121

  
122
###
123
#Add month query and averages here...
124
###
125

  
126
#data2 contains only 46 stations for Venezueal area??
127
data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID")
128

  
129
#Transform the subset data frame in a spatial data frame and reproject
130
data_reg<-data_table                               #Make a copy of the data frame
131
coords<- data_reg[c('lon','lat')]              #Define coordinates in a data frame: clean up here!!
132
                                                   #Wrong label...it is in fact projected...
133
coordinates(data_reg)<-coords                      #Assign coordinates to the data frame
134
#proj4string(data3)<-locs_coord                  #Assign coordinates reference system in PROJ4 format
135
proj4string(data_reg)<-CRS_locs_WGS84                #Assign coordinates reference system in PROJ4 format
136
data_reg<-spTransform(data_reg,CRS(new_proj))     #Project from WGS84 to new coord. system
137

  
138
#png...output?
139
plot(interp_area, axes =TRUE)
140
plot(stat_reg, pch=1, col="red", cex= 0.7, add=TRUE)
141
plot(data_reg,pch=2,col="blue",cex=2,add=TRUE)
142

  
143
##################################################################
144
### STEP 3: Save results and outuput in textfile and a shape file
145

  
146
#Save shape files of the locations of meteorological stations in the study area
147
outfile1<-file.path(in_path,paste("stations","_",out_prefix,".shp",sep=""))
148
writeOGR(stat_reg,dsn= ".",layer= sub(".shp","",outfile1), driver="ESRI Shapefile",overwrite_layer=TRUE)
149

  
150
outfile2<-file.path(in_path,paste("ghcn_data_",var,"_",year_start_clim,"_",year_end,out_prefix,".shp",sep=""))         #Name of the file
151
#writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
152
writeOGR(data_reg,dsn= ".",layer= sub(".shp","",outfile2), driver="ESRI Shapefile",overwrite_layer=TRUE)
153

  
154
###################################################################
155
### STEP 4: Extract values at stations from covariates stack of raster images
156
#Eventually this step may be skipped if the covariates information is stored in the database...
15
out_prefix<-"_365d_GAM_fus5_all_lstd_03012013"                #User defined output prefix
16
#qc_flags<-    flags allowe for the query from the GHCND??
157 17

  
158 18
#The names of covariates can be changed...these names should be output/input from covar script!!!
159 19
rnames<-c("x","y","lon","lat","N","E","N_w","E_w","elev","slope","aspect","CANHEIGHT","DISTOC")
......
161 21
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12",
162 22
             "nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
163 23
             "nobs_09","nobs_10","nobs_11","nobs_12")
164

  
165 24
covar_names<-c(rnames,lc_names,lst_names)
166 25

  
167
s_raster<-stack(infile3)                   #read in the data stack
168
names(s_raster)<-covar_names               #Assigning names to the raster layers: making sure it is included in the extraction
169
stat_val<- extract(s_raster, data_reg)        #Extracting values from the raster stack for every point location in coords data frame.
170

  
171
#create a shape file and data_frame with names
172

  
173
data_RST<-as.data.frame(stat_val)                                            #This creates a data frame with the values extracted
174
data_RST_SDF<-cbind(data_reg,data_RST)
175
coordinates(data_RST_SDF)<-coordinates(data_reg) #Transforming data_RST_SDF into a spatial point dataframe
176
CRS_reg<-proj4string(data_reg)
177
proj4string(data_RST_SDF)<-CRS_reg  #Need to assign coordinates...
178

  
179
#Creating a date column
180
date1<-ISOdate(data_RST_SDF$year,data_RST_SDF$month,data_RST_SDF$day) #Creating a date object from 3 separate column
181
date2<-gsub("-","",as.character(as.Date(date1)))
182
data_RST_SDF$date<-date2                                              #Date format (year,month,day) is the following: "20100627"
183

  
184
#This allows to change only one name of the data.frame
185
pos<-match("value",names(data_RST_SDF)) #Find column with name "value"
186
if (var=="TMAX"){
187
  #names(data_RST_SDF)[pos]<-c("TMax")
188
  data_RST_SDF$value<-data_RST_SDF$value/10                #TMax is the average max temp for monthy data
26
#list of 11 parameters for input in the function...
27

  
28
list_param_prep<-list(db.name,var,range_years,range_years_clim,infile1,infile2,infile3,CRS_locs_WGS84,in_path,covar_names,out_prefix)
29
cnames<-c("db.name","var","range_years","range_years_clim","infile1","infile2","infile3","CRS_locs_WGS84","in_path","covar_names","out_prefix")
30
names(list_param_prep)<-cnames
31

  
32
database_covaratiates_preparation<-function(list_param_prep){
33
  #This function performs queries on the Postgres ghcnd database for stations matching the             
34
  #interpolation area. It requires 11 inputs:                                           
35
  # 1) db.name :  Postgres database name containing the meteorological stations
36
  # 2) var: the variable of interest - "TMAX","TMIN" or "PRCP" 
37
  # 3) range_years: range of records used in the daily interpolation, note that upper bound year is not included               
38
  # 4) range_years_clim: range of records used in the monthly climatology interpolation, note that upper bound is not included
39
  # 5) infile1: region outline as a shape file - used in the interpolation  stage too                              
40
  # 6) infile2: ghcnd stations locations as a textfile name with lat-long fields                                                                                   
41
  # 7) infile3: tif file of raser covariates for the interpolation area: it should have a local projection                                                                                           
42
  # 8) CRS_locs_WGS84: longlat EPSG 4326 used as coordinates reference system (proj4)for stations locations
43
  # 9) in_path: input path for covariates data and other files, this is also the output?
44
  # 10) covar_names: names of covariates used for the interpolation --may be removed later? (should be stored in the brick)
45
  # 11) out_prefix: output suffix added to output names--it is the same in the interpolation script
46
  #
47
  #The output is a list of four shapefile names produced by the function:
48
  #1) loc_stations: locations of stations as shapefile in EPSG 4326
49
  #2) loc_stations_ghcn: ghcn daily data for the year range of interpolation (locally projected)
50
  #3) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected)
51
  #4) monthly_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected)
52
  
53
  #AUTHOR: Benoit Parmentier                                                                       
54
  #DATE: 03/01/2013                                                                                 
55
  #PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--     
56
  #Comments and TODO
57
  #-Add buffer option...
58
  #-Add output path argument option
59
  #-Add qc flag options
60
  ##################################################################################################
61
  
62
  ###Loading R library and packages: should it be read in before???   
63
  
64
  library(RPostgreSQL)
65
  library(sp)                                           # Spatial pacakge with class definition by Bivand et al.
66
  library(spdep)                                          # Spatial pacakge with methods and spatial stat. by Bivand et al.
67
  library(rgdal)                                          # GDAL wrapper for R, spatial utilities
68
  library(rgeos)
69
  library(rgdal)
70
  library(raster)
71
  library(rasterVis)
72
  
73
  ### Functions used in the script
74
  
75
  format_s <-function(s_ID){
76
    #Format station ID in a vector format/tuple that is used in a psql query.
77
    # Argument 1: vector of station ID
78
    # Return: character of station ID
79
    tx2<-s_ID
80
    tx2<-as.character(tx2)
81
    stat_list<-tx2
82
    temp<-shQuote(stat_list)
83
    t<-paste(temp, collapse= " ")
84
    t1<-gsub(" ", ",",t)
85
    sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query
86
    return(sf_ID)
87
  }
88
  
89
  #parsing input arguments
90
  
91
  db.name <- list_param_prep$db.name             #name of the Postgres database
92
  var <- list_param_prep$var                     #name of the variables to keep: TMIN, TMAX or PRCP
93
  year_start <-list_param_prep$range_years[1] #"2010"               #starting year for the query (included)
94
  year_end <-list_param_prep$range_years[2] #"2011"                 #end year for the query (excluded)
95
  year_start_clim <-list_param_prep$range_years_clim[1] #right bound not included in the range!! starting year for monthly query to calculate clime
96
  infile1<- list_param_prep$infile1  #This is the shape file of outline of the study area                                                      #It is an input/output of the covariate script
97
  infile2<-list_param_prep$infile2      #"/home/layers/data/climate/ghcn/v2.92-upd-2012052822/ghcnd-stations.txt"                              #This is the textfile of station locations from GHCND
98
  infile3<-list_param_prep$infile3 #"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script
99
  CRS_locs_WGS84<-list_param_prep$CRS_locs_WGS84 #Station coords WGS84: same as earlier
100
  in_path <- list_param_prep$in_path #CRS_locs_WGS84"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/"
101
  out_prefix<-list_param_prep$out_prefix #"_365d_GAM_fus5_all_lstd_03012013"                #User defined output prefix
102
  #qc_flags<-list_param_prep$qc_flags    flags allowed for the query from the GHCND??
103
  covar_names<-list_param_prep$covar_names # names should be written in the tif file!!!
104
  
105
  ## working directory is the same for input and output for this function  
106
  setwd(in_path) 
107
  
108
  ##### STEP 1: Select station in the study area
109
  
110
  filename<-sub(".shp","",infile1)             #Removing the extension from file.
111
  interp_area <- readOGR(".",filename)
112
  CRS_interp<-proj4string(interp_area)         #Storing the coordinate information: geographic coordinates longlat WGS84
113
  
114
  #Read in GHCND database station locations
115
  dat_stat <- read.fwf(infile2, 
116
                       widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE)
117
  colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID")
118
  coords<- dat_stat[,c('lon','lat')]
119
  coordinates(dat_stat)<-coords
120
  proj4string(dat_stat)<-CRS_locs_WGS84 #this is the WGS84 projection
121
  #proj4string(dat_stat)<-CRS_interp
122
  dat_stat2<-spTransform(dat_stat,CRS(CRS_interp))         # Project from WGS84 to new coord. system
123
  
124
  # Spatial query to find relevant stations
125
  inside <- !is.na(over(dat_stat2, as(interp_area, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
126
  stat_reg<-dat_stat2[inside,]              #Selecting stations contained in the current interpolation area
127
  
128
  ####
129
  ##TODO: Add buffer option? 
130
  ####
131
  
132
  #### STEP 2: Connecting to the database and query for relevant data 
133
  
134
  drv <- dbDriver("PostgreSQL")
135
  db <- dbConnect(drv, dbname=db.name)
136
  
137
  time1<-proc.time()    #Start stop watch
138
  list_s<-format_s(stat_reg$STAT_ID)
139
  data2<-dbGetQuery(db, paste("SELECT *
140
                              FROM ghcn
141
                              WHERE element=",shQuote(var),
142
                              "AND year>=",year_start,
143
                              "AND year<",year_end,
144
                              "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
145
  time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
146
  time_minutes<-time_duration[3]/60
147
  dbDisconnect(db)
148
  ###
149
  #Add month query and averages here...
150
  ###
151
  
152
  #data2 contains only 46 stations for Venezueal area??
153
  data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID")
154
  
155
  #Transform the subset data frame in a spatial data frame and reproject
156
  data_reg<-data_table                               #Make a copy of the data frame
157
  coords<- data_reg[c('lon','lat')]              #Define coordinates in a data frame: clean up here!!
158
  #Wrong label...it is in fact projected...
159
  coordinates(data_reg)<-coords                      #Assign coordinates to the data frame
160
  #proj4string(data3)<-locs_coord                  #Assign coordinates reference system in PROJ4 format
161
  proj4string(data_reg)<-CRS_locs_WGS84                #Assign coordinates reference system in PROJ4 format
162
  data_reg<-spTransform(data_reg,CRS(CRS_interp))     #Project from WGS84 to new coord. system
163
  
164
  ##################################################################
165
  ### STEP 3: Save results and outuput in textfile and a shape file
166
  #browser()
167
  #Save shape files of the locations of meteorological stations in the study area
168
  outfile1<-file.path(in_path,paste("stations","_",out_prefix,".shp",sep=""))
169
  writeOGR(stat_reg,dsn= dirname(outfile1),layer= sub(".shp","",basename(outfile1)), driver="ESRI Shapefile",overwrite_layer=TRUE)
170
  #writeOGR(dst,dsn= ".",layer= sub(".shp","",outfile4), driver="ESRI Shapefile",overwrite_layer=TRUE)
171
  
172
  outfile2<-file.path(in_path,paste("ghcn_data_",var,"_",year_start_clim,"_",year_end,out_prefix,".shp",sep=""))         #Name of the file
173
  #writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
174
  writeOGR(data_reg,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE)
175
  
176
  ###################################################################
177
  ### STEP 4: Extract values at stations from covariates stack of raster images
178
  #Eventually this step may be skipped if the covariates information is stored in the database...
179
  
180
  s_raster<-stack(infile3)                   #read in the data stack
181
  names(s_raster)<-covar_names               #Assigning names to the raster layers: making sure it is included in the extraction
182
  stat_val<- extract(s_raster, data_reg)        #Extracting values from the raster stack for every point location in coords data frame.
183
  
184
  #create a shape file and data_frame with names
185
  
186
  data_RST<-as.data.frame(stat_val)                                            #This creates a data frame with the values extracted
187
  data_RST_SDF<-cbind(data_reg,data_RST)
188
  coordinates(data_RST_SDF)<-coordinates(data_reg) #Transforming data_RST_SDF into a spatial point dataframe
189
  CRS_reg<-proj4string(data_reg)
190
  proj4string(data_RST_SDF)<-CRS_reg  #Need to assign coordinates...
191
  
192
  #Creating a date column
193
  date1<-ISOdate(data_RST_SDF$year,data_RST_SDF$month,data_RST_SDF$day) #Creating a date object from 3 separate column
194
  date2<-gsub("-","",as.character(as.Date(date1)))
195
  data_RST_SDF$date<-date2                                              #Date format (year,month,day) is the following: "20100627"
196
  
197
  #This allows to change only one name of the data.frame
198
  pos<-match("value",names(data_RST_SDF)) #Find column with name "value"
199
  if (var=="TMAX"){
200
    #names(data_RST_SDF)[pos]<-c("TMax")
201
    data_RST_SDF$value<-data_RST_SDF$value/10                #TMax is the average max temp for monthy data
202
  }
203
  
204
  #write out a new shapefile (including .prj component)
205
  outfile3<-file.path(in_path,paste("daily_covariates_ghcn_data_",var,"_",range_years[1],"_",range_years[2],out_prefix,".shp",sep=""))         #Name of the file
206
  writeOGR(data_RST_SDF,dsn= dirname(outfile3),layer= sub(".shp","",basename(outfile3)), driver="ESRI Shapefile",overwrite_layer=TRUE)
207
  
208
  ###############################################################
209
  ######## STEP 5: Preparing monthly averages from the ProstGres database
210
  
211
  drv <- dbDriver("PostgreSQL")
212
  db <- dbConnect(drv, dbname=db.name)
213
  
214
  #year_start_clim: set at the start of the script
215
  year_end<-2011
216
  time1<-proc.time()    #Start stop watch
217
  list_s<-format_s(stat_reg$STAT_ID)
218
  data_m<-dbGetQuery(db, paste("SELECT *
219
                               FROM ghcn
220
                               WHERE element=",shQuote(var),
221
                               "AND year>=",year_start_clim,
222
                               "AND year<",year_end,
223
                               "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
224
  time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
225
  time_minutes<-time_duration[3]/60
226
  dbDisconnect(db)
227
  #Clean out this section!!
228
  date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column
229
  date2<-as.POSIXlt(as.Date(date1))
230
  data_m$date<-date2
231
  #In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010.
232
  #d<-subset(data_m,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations
233
  d<-subset(data_m,mflag=="0" | mflag=="S") #should be input arguments!!
234
  #May need some screeing??? i.e. range of temp and elevation...
235
  d1<-aggregate(value~station+month, data=d, mean)  #Calculate monthly mean for every station in OR
236
  id<-as.data.frame(unique(d1$station))     #Unique station in OR for year 2000-2010: 193 but 7 loss of monthly avg    
237
  
238
  dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID")   #Inner join all columns are retained
239
  
240
  #This allows to change only one name of the data.frame
241
  pos<-match("value",names(dst)) #Find column with name "value"
242
  if (var=="TMAX"){
243
    names(dst)[pos]<-c("TMax")
244
    dst$TMax<-dst$TMax/10                #TMax is the average max temp for monthy data
245
  }
246
  
247
  #Extracting covariates from stack for the monthly dataset...
248
  coords<- dst[c('lon','lat')]              #Define coordinates in a data frame
249
  coordinates(dst)<-coords                      #Assign coordinates to the data frame
250
  proj4string(dst)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
251
  dst_month<-spTransform(dst,CRS(CRS_interp))     #Project from WGS84 to new coord. system
252
  
253
  stations_val<-extract(s_raster,dst_month)  #extraction of the infomration at station location
254
  stations_val<-as.data.frame(stations_val)
255
  dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
256
  dst<-dst_extract
257
  
258
  coords<- dst[c('x','y')]              #Define coordinates in a data frame, this is the local x,y
259
  coordinates(dst)<-coords                    #Assign coordinates to the data frame
260
  proj4string(dst)<-projection(s_raster)        #Assign coordinates reference system in PROJ4 format
261
  
262
  ####
263
  #write out a new shapefile (including .prj component)
264
  dst$OID<-1:nrow(dst) #need a unique ID?
265
  outfile4<-file.path(in_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end,out_prefix,".shp",sep=""))  #Name of the file
266
  writeOGR(dst,dsn= dirname(outfile4),layer= sub(".shp","",basename(outfile4)), driver="ESRI Shapefile",overwrite_layer=TRUE)
267
  
268
  ### list of outputs return
269
  
270
  outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4)
271
  names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_covar_ghcn_data","monthly_covar_ghcn_data")
272
  return(outfiles_obj)
273
  
274
  #END OF FUNCTION # 
189 275
}
190 276

  
191
#write out a new shapefile (including .prj component)
192
outfile3<-file.path(in_path,paste("daily_covariates_ghcn_data_",var,out_prefix,".shp",sep=""))         #Name of the file
193
writeOGR(data_RST_SDF,dsn= ".",layer= sub(".shp","",outfile3), driver="ESRI Shapefile",overwrite_layer=TRUE)
194

  
195
###############################################################
196
######## STEP 5: Preparing monthly averages from the ProstGres database
197

  
198
drv <- dbDriver("PostgreSQL")
199
db <- dbConnect(drv, dbname=db.name)
200

  
201
#year_start_clim: set at the start of the script
202
year_end<-2011
203
time1<-proc.time()    #Start stop watch
204
list_s<-format_s(stat_reg$STAT_ID)
205
data_m<-dbGetQuery(db, paste("SELECT *
206
                            FROM ghcn
207
                            WHERE element=",shQuote(var),
208
                            "AND year>=",year_start_clim,
209
                            "AND year<",year_end,
210
                            "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
211
time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
212
time_minutes<-time_duration[3]/60
213

  
214
# do this work outside of (before) this function
215
# to avoid making a copy of the data frame inside the function call
216
date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column
217
date2<-as.POSIXlt(as.Date(date1))
218
data_m$date<-date2
219
#In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010.
220
#d<-subset(data_m,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations
221
d<-subset(data_m,mflag=="0" | mflag=="S")
222
#May need some screeing??? i.e. range of temp and elevation...
223
d1<-aggregate(value~station+month, data=d, mean)  #Calculate monthly mean for every station in OR
224
id<-as.data.frame(unique(d1$station))     #Unique station in OR for year 2000-2010: 193 but 7 loss of monthly avg    
225

  
226
dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID")   #Inner join all columns are retained
227

  
228
#This allows to change only one name of the data.frame
229
pos<-match("value",names(dst)) #Find column with name "value"
230
if (var=="TMAX"){
231
  names(dst)[pos]<-c("TMax")
232
  dst$TMax<-dst$TMax/10                #TMax is the average max temp for monthy data
233
}
234
#dstjan=dst[dst$month==9,]  #dst contains the monthly averages for tmax for every station over 2000-2010
235

  
236
#Extracting covariates from stack for the monthly dataset...
237
coords<- dst[c('lon','lat')]              #Define coordinates in a data frame
238
coordinates(dst)<-coords                      #Assign coordinates to the data frame
239
proj4string(dst)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
240
dst_month<-spTransform(dst,CRS(CRS_interp))     #Project from WGS84 to new coord. system
241

  
242
stations_val<-extract(s_raster,dst_month)  #extraction of the infomration at station location
243
stations_val<-as.data.frame(stations_val)
244
dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
245
dst<-dst_extract
246

  
247
coords<- dst[c('x','y')]              #Define coordinates in a data frame, this is the local x,y
248
coordinates(dst)<-coords                    #Assign coordinates to the data frame
249
proj4string(dst)<-projection(s_raster)        #Assign coordinates reference system in PROJ4 format
250

  
251
####
252
#write out a new shapefile (including .prj component)
253
dst$OID<-1:nrow(dst) #need a unique ID?
254
outfile4<-file.path(in_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end,out_prefix,".shp",sep=""))  #Name of the file
255
writeOGR(dst,dsn= ".",layer= sub(".shp","",outfile4), driver="ESRI Shapefile",overwrite_layer=TRUE)
256

  
257
### list of output return
258

  
259
outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4,outfile5)
260
outfiles_obj<cname("loc_stations","loc_stations_ghcn","daily_covar_ghcn_data","monthly_covar_ghcn_data")
261
#return(outfiles_obj)
262 277
##### END OF SCRIPT ##########

Also available in: Unified diff