Project

General

Profile

« Previous | Next » 

Revision 6a5b56da

Added by Benoit Parmentier over 11 years ago

Data preparation Venezuela, added monthly query from Postgres station database

View differences:

climate/research/oregon/interpolation/Database_stations_extraction_raster_covariates_processing.R
1 1
##################    Data preparation for interpolation   #######################################
2 2
############################ Extraction of station data ##########################################
3
#This script perform queries on the Postgres database ghcn for stations matching the             #
4
#interpolation area. It requires the following inputs:                                           #
5
# 1)the text file ofGHCND  stations from NCDC matching the database version release              #
6
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84                     #                                                     #       
7
# 3)a new coordinate system can be provided as an argument                                       #
8
# 4)the variable of interest: "TMAX","TMIN" or "PRCP"                                            #
9
#                                                                                                #
10
#The outputs are text files and a shape file of a time subset of the database                    #
11
#AUTHOR: Benoit Parmentier                                                                       #
12
#DATE: 01/31/2013                                                                                 #
3
#This script perform queries on the Postgres database ghcn for stations matching the             
4
#interpolation area. It requires the following inputs:                                           
5
# 1)the text file ofGHCND  stations from NCDC matching the database version release              
6
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84                                                                          #       
7
# 3)a new coordinate system can be provided as an argument                                       
8
# 4)the variable of interest: "TMAX","TMIN" or "PRCP"                                            
9
# 5)the location of raser covariate stack.                                                                                             
10
#The outputs are text files and a shape file of a time subset of the database                    
11
#AUTHOR: Benoit Parmentier                                                                       
12
#DATE: 02/08/2013                                                                                 
13 13
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--     
14 14
#Comments and TODO
15 15
#-Add buffer option...
......
33 33
var <- "TMAX"                    #name of the variables to keep: TMIN, TMAX or PRCP
34 34
year_start<-"2010"               #starting year for the query (included)
35 35
year_end<-"2011"                 #end year for the query (excluded)
36
infile1<- "_venezuela_region__VE_01292013.shp"      #This is the shape file of outline of the study area. 
37
                                                    #It is projected alreaday
36
infile1<- "outline_venezuela_region__VE_01292013.shp"      #This is the shape file of outline of the study area.                                              #It is projected alreaday
38 37
infile2<-"ghcnd-stations.txt"                             #This is the textfile of station locations from GHCND
39
new_proj<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"
40 38
infile3<-"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script
41 39

  
40
new_proj<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"
41
locs_coord<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0")
42
CRS_locs_WGS84<-"+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0"
42 43
##Paths to inputs and output
43 44
in_path <- "/home/parmentier/Data/benoit_test"
44 45
in_path <- "/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/"
45 46
out_path<- "/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/output_data/"
46 47
ghcnd_path<- "/home/layers/data/climate/ghcn/v2.92-upd-2012052822"
47 48
setwd(in_path) 
48
out_suffix<-"y2010_2010_VE_01292013"                                                 #User defined output prefix
49
out_suffix<-"y2010_2010_VE_02082013"                                                 #User defined output prefix
49 50
out_region_name<-"_venezuela_region"
50 51
#out_suffix<-"_VE_01292013"
51 52

  
......
77 78
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID")
78 79
coords<- dat_stat[,c('lon','lat')]
79 80
coordinates(dat_stat)<-coords
80
locs_coord<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0")
81
proj4string(dat_stat)<-locs_coord
81
proj4string(dat_stat)<-locs_coord #this is the WGS84 projection
82 82
#proj4string(dat_stat)<-CRS_interp
83 83
dat_stat2<-spTransform(dat_stat,CRS(new_proj))         # Project from WGS84 to new coord. system
84 84

  
85 85
# Spatial query to find relevant stations
86 86
inside <- !is.na(over(dat_stat2, as(interp_area, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
87
stat_reg<-dat_stat2[inside,]                                            #Finding stations contained in the current interpolation area
87
stat_reg<-dat_stat2[inside,]              #Selecting stations contained in the current interpolation area
88 88

  
89 89
#Quick visualization of station locations
90 90
plot(interp_area, axes =TRUE)
......
122 122

  
123 123
#Transform the subset data frame in a spatial data frame and reproject
124 124
data_reg<-data_table                               #Make a copy of the data frame
125
coords<- data_reg[c('lon.1','lat.1')]              #Define coordinates in a data frame: clean up here!!
125
coords<- data_reg[c('lon','lat')]              #Define coordinates in a data frame: clean up here!!
126 126
                                                   #Wrong label...it is in fact projected...
127 127
coordinates(data_reg)<-coords                      #Assign coordinates to the data frame
128 128
#proj4string(data3)<-locs_coord                  #Assign coordinates reference system in PROJ4 format
129
proj4string(data_reg)<-new_proj                 #Assign coordinates reference system in PROJ4 format
130
#data_proj<-spTransform(data3,CRS(new_proj))     #Project from WGS84 to new coord. system
129
proj4string(data_reg)<-locs_coord                #Assign coordinates reference system in PROJ4 format
130
data_reg<-spTransform(data_reg,CRS(new_proj))     #Project from WGS84 to new coord. system
131 131

  
132 132
plot(interp_area, axes =TRUE)
133 133
plot(stat_reg, pch=1, col="red", cex= 0.7, add=TRUE)
......
137 137
### STEP 3: Save results and outuput in textfile and a shape file
138 138

  
139 139
#Save a textfile of the locations of meteorological stations in the study area
140
write.table(as.data.frame(stat_reg), file=file.path(in_path,paste(out_region_name,"_",
140
write.table(as.data.frame(stat_reg), file=file.path(in_path,paste("stations",out_region_name,"_",
141 141
                                                          out_suffix,".txt",sep="")),sep=",")
142
#Save a textfile and shape file of all the subset data
143
#write.table(data_table,file= paste(path,"/","ghcn_data_",var,out_suffix,".txt",sep=""), sep=",")
144
#outfile<-paste(path,"ghcn_data_",var,out_prefix,sep="")   #Removing extension if it is present
142
outfile<-paste("stations",out_region_name,"_",
143
               out_suffix,sep="")
144
writeOGR(stat_reg,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE)
145

  
145 146
outfile<-paste("ghcn_data_",var,out_suffix,sep="")         #Name of the file
146 147
#writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
147 148
writeOGR(data_reg,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE)
148
outfile<-paste(out_region_name,"_",
149
               out_suffix,sep="")
150
writeOGR(stat_reg,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE)
151 149

  
152 150
###################################################################
153 151
### STEP 4: Extract values at stations from covariates stack of raster images
......
171 169
data_RST<-as.data.frame(stat_val)                                            #This creates a data frame with the values extracted
172 170
data_RST_SDF<-cbind(data_reg,data_RST)
173 171
coordinates(data_RST_SDF)<-coordinates(data_reg) #Transforming data_RST_SDF into a spatial point dataframe
174
CRS<-proj4string(data_reg)
175
proj4string(data_RST_SDF)<-CRS  #Need to assign coordinates...
172
CRS_reg<-proj4string(data_reg)
173
proj4string(data_RST_SDF)<-CRS_reg  #Need to assign coordinates...
176 174

  
177 175
#Creating a date column
178 176
date1<-ISOdate(data_RST_SDF$year,data_RST_SDF$month,data_RST_SDF$day) #Creating a date object from 3 separate column
179 177
date2<-gsub("-","",as.character(as.Date(date1)))
180 178
data_RST_SDF$date<-date2                                              #Date format (year,month,day) is the following: "20100627"
181 179

  
180
#This allows to change only one name of the data.frame
181
pos<-match("value",names(data_RST_SDF)) #Find column with name "value"
182
if (var=="TMAX"){
183
  #names(data_RST_SDF)[pos]<-c("TMax")
184
  data_RST_SDF$value<-data_RST_SDF$value/10                #TMax is the average max temp for monthy data
185
}
186

  
187
#write out a new shapefile (including .prj component)
188
outfile<-paste("daily_covariates_ghcn_data_",var,out_suffix,sep="")         #Name of the file
189
writeOGR(data_RST_SDF,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE)
190

  
191
###############################################################
192
######## STEP 5: Preparing monthly averages from the ProstGres database
193

  
194
drv <- dbDriver("PostgreSQL")
195
db <- dbConnect(drv, dbname=db.name)
196

  
197
year_start<-2000
198
year_end<-2011
199
time1<-proc.time()    #Start stop watch
200
list_s<-format_s(stat_reg$STAT_ID)
201
data_m<-dbGetQuery(db, paste("SELECT *
202
                            FROM ghcn
203
                            WHERE element=",shQuote(var),
204
                            "AND year>=",year_start,
205
                            "AND year<",year_end,
206
                            "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
207
time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
208
time_minutes<-time_duration[3]/60
209

  
210
# do this work outside of (before) this function
211
# to avoid making a copy of the data frame inside the function call
212
date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column
213
date2<-as.POSIXlt(as.Date(date1))
214
data_m$date<-date2
215
#In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010.
216
#d<-subset(data_m,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations
217
d<-subset(data_m,mflag=="0" | mflag=="S")
218
#May need some screeing??? i.e. range of temp and elevation...
219
d1<-aggregate(value~station+month, data=d, mean)  #Calculate monthly mean for every station in OR
220
id<-as.data.frame(unique(d1$station))     #Unique station in OR for year 2000-2010: 193 but 7 loss of monthly avg    
221

  
222
dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID")   #Inner join all columns are retained
223

  
224
#This allows to change only one name of the data.frame
225
pos<-match("value",names(dst)) #Find column with name "value"
226
if (var=="TMAX"){
227
  names(dst)[pos]<-c("TMax")
228
  dst$TMax<-dst$TMax/10                #TMax is the average max temp for monthy data
229
}
230
#dstjan=dst[dst$month==9,]  #dst contains the monthly averages for tmax for every station over 2000-2010
231

  
232
#Extracting covariates from stack for the monthly dataset...
233
coords<- dst[c('lon','lat')]              #Define coordinates in a data frame
234
coordinates(dst)<-coords                      #Assign coordinates to the data frame
235
proj4string(dst)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
236
dst_month<-spTransform(dst,CRS(CRS_interp))     #Project from WGS84 to new coord. system
237

  
238
stations_val<-extract(s_raster,dst_month)  #extraction of the infomration at station location
239
stations_val<-as.data.frame(stations_val)
240
dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
241
dst<-dst_extract
242

  
243
coords<- dst[c('x','y')]              #Define coordinates in a data frame, this is the local x,y
244
coordinates(dst)<-coords                    #Assign coordinates to the data frame
245
proj4string(dst)<-projection(s_raster)        #Assign coordinates reference system in PROJ4 format
246

  
247
####
182 248
#write out a new shapefile (including .prj component)
183
outfile<-paste("covariates_ghcn_data_",var,out_suffix,sep="")         #Name of the file
184
writeOGR(data_RST_SDF,,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE)
249
outfile<-paste("monthly_covariates_ghcn_data_",var,out_suffix,sep="")         #Name of the file
250
dst$OID<-1:nrow(dst) #need a unique ID?
251
writeOGR(dst,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE)
185 252

  
186 253
##### END OF SCRIPT ##########
climate/research/oregon/interpolation/Database_stations_extraction_raster_covariates_processing_region.R
1
##################    Data preparation for interpolation   #######################################
2
############################ Extraction of station data ##########################################
3
#This script perform queries on the Postgres database ghcn for stations matching the             
4
#interpolation area. It requires the following inputs:                                           
5
# 1)the text file ofGHCND  stations from NCDC matching the database version release              
6
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84                                                                          #       
7
# 3)a new coordinate system can be provided as an argument                                       
8
# 4)the variable of interest: "TMAX","TMIN" or "PRCP"                                            
9
# 5)the location of raser covariate stack.                                                                                             
10
#The outputs are text files and a shape file of a time subset of the database                    
11
#AUTHOR: Benoit Parmentier                                                                       
12
#DATE: 02/08/2013                                                                                 
13
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--     
14
#Comments and TODO
15
#-Add buffer option...
16
#-Add calculation of monthly mean...
17
##################################################################################################
18

  
19
###Loading R library and packages   
20

  
21
library(RPostgreSQL)
22
library(sp)                                           # Spatial pacakge with class definition by Bivand et al.
23
library(spdep)                                          # Spatial pacakge with methods and spatial stat. by Bivand et al.
24
library(rgdal)                                          # GDAL wrapper for R, spatial utilities
25
library(rgeos)
26
library(rgdal)
27
library(raster)
28
library(rasterVis)
29

  
30
### Parameters and arguments
31

  
32
db.name <- "ghcn"                #name of the Postgres database
33
var <- "TMAX"                    #name of the variables to keep: TMIN, TMAX or PRCP
34
year_start<-"2010"               #starting year for the query (included)
35
year_end<-"2011"                 #end year for the query (excluded)
36
infile1<- "outline_venezuela_region__VE_01292013.shp"      #This is the shape file of outline of the study area.                                              #It is projected alreaday
37
infile2<-"ghcnd-stations.txt"                             #This is the textfile of station locations from GHCND
38
infile3<-"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script
39

  
40
new_proj<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"
41
locs_coord<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0")
42
CRS_locs_WGS84<-"+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0"
43
##Paths to inputs and output
44
in_path <- "/home/parmentier/Data/benoit_test"
45
in_path <- "/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/"
46
out_path<- "/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/output_data/"
47
ghcnd_path<- "/home/layers/data/climate/ghcn/v2.92-upd-2012052822"
48
setwd(in_path) 
49
out_suffix<-"y2010_2010_VE_02082013"                                                 #User defined output prefix
50
out_region_name<-"_venezuela_region"
51
#out_suffix<-"_VE_01292013"
52

  
53
### Functions used in the script
54

  
55
format_s <-function(s_ID){
56
  #Format station ID in a vector format/tuple that is used in a psql query.
57
  # Argument 1: vector of station ID
58
  # Return: character of station ID
59
  tx2<-s_ID
60
  tx2<-as.character(tx2)
61
  stat_list<-tx2
62
  temp<-shQuote(stat_list)
63
  t<-paste(temp, collapse= " ")
64
  t1<-gsub(" ", ",",t)
65
  sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query
66
  return(sf_ID)
67
}
68

  
69
############ BEGIN: START OF THE SCRIPT #################
70

  
71
##### STEP 1: Select station in the study area
72

  
73
filename<-sub(".shp","",infile1)             #Removing the extension from file.
74
interp_area <- readOGR(".",filename)
75
CRS_interp<-proj4string(interp_area)         #Storing the coordinate information: geographic coordinates longlat WGS84
76

  
77
dat_stat <- read.fwf(file.path(ghcnd_path,"ghcnd-stations.txt"), widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE)
78
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID")
79
coords<- dat_stat[,c('lon','lat')]
80
coordinates(dat_stat)<-coords
81
proj4string(dat_stat)<-locs_coord #this is the WGS84 projection
82
#proj4string(dat_stat)<-CRS_interp
83
dat_stat2<-spTransform(dat_stat,CRS(new_proj))         # Project from WGS84 to new coord. system
84

  
85
# Spatial query to find relevant stations
86
inside <- !is.na(over(dat_stat2, as(interp_area, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
87
stat_reg<-dat_stat2[inside,]              #Selecting stations contained in the current interpolation area
88

  
89
#Quick visualization of station locations
90
plot(interp_area, axes =TRUE)
91
plot(stat_reg, pch=1, col="red", cex= 0.7, add=TRUE)
92
#plot(data3,pch=1,col="blue",cex=3,add=TRUE)
93
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6)
94
#only 357 station for Venezuela??
95

  
96
####
97
##Add buffer option? 
98
####
99

  
100
#### STEP 2: Connecting to the database and query for relevant data 
101

  
102
drv <- dbDriver("PostgreSQL")
103
db <- dbConnect(drv, dbname=db.name)
104

  
105
time1<-proc.time()    #Start stop watch
106
list_s<-format_s(stat_reg$STAT_ID)
107
data2<-dbGetQuery(db, paste("SELECT *
108
      FROM ghcn
109
      WHERE element=",shQuote(var),
110
      "AND year>=",year_start,
111
      "AND year<",year_end,
112
      "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
113
time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
114
time_minutes<-time_duration[3]/60
115

  
116
###
117
#Add month query and averages here...
118
###
119

  
120
#data2 contains only 46 stations for Venezueal area??
121
data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID")
122

  
123
#Transform the subset data frame in a spatial data frame and reproject
124
data_reg<-data_table                               #Make a copy of the data frame
125
coords<- data_reg[c('lon','lat')]              #Define coordinates in a data frame: clean up here!!
126
                                                   #Wrong label...it is in fact projected...
127
coordinates(data_reg)<-coords                      #Assign coordinates to the data frame
128
#proj4string(data3)<-locs_coord                  #Assign coordinates reference system in PROJ4 format
129
proj4string(data_reg)<-locs_coord                #Assign coordinates reference system in PROJ4 format
130
data_reg<-spTransform(data_reg,CRS(new_proj))     #Project from WGS84 to new coord. system
131

  
132
plot(interp_area, axes =TRUE)
133
plot(stat_reg, pch=1, col="red", cex= 0.7, add=TRUE)
134
plot(data_reg,pch=2,col="blue",cex=2,add=TRUE)
135

  
136
##################################################################
137
### STEP 3: Save results and outuput in textfile and a shape file
138

  
139
#Save a textfile of the locations of meteorological stations in the study area
140
write.table(as.data.frame(stat_reg), file=file.path(in_path,paste("stations",out_region_name,"_",
141
                                                          out_suffix,".txt",sep="")),sep=",")
142
outfile<-paste("stations",out_region_name,"_",
143
               out_suffix,sep="")
144
writeOGR(stat_reg,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE)
145

  
146
outfile<-paste("ghcn_data_",var,out_suffix,sep="")         #Name of the file
147
#writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
148
writeOGR(data_reg,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE)
149

  
150
###################################################################
151
### STEP 4: Extract values at stations from covariates stack of raster images
152
#Eventually this step may be skipped if the covariates information is stored in the database...
153

  
154
#The names of covariates can be changed...
155
rnames<-c("x","y","lon","lat","N","E","N_w","E_w","elev","slope","aspect","CANHEIGHT","DISTOC")
156
lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10","LC11","LC12")
157
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12",
158
             "nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
159
             "nobs_09","nobs_10","nobs_11","nobs_12")
160

  
161
covar_names<-c(rnames,lc_names,lst_names)
162

  
163
s_raster<-stack(infile3)                   #read in the data stack
164
names(s_raster)<-covar_names               #Assigning names to the raster layers: making sure it is included in the extraction
165
stat_val<- extract(s_raster, data_reg)        #Extracting values from the raster stack for every point location in coords data frame.
166

  
167
#create a shape file and data_frame with names
168

  
169
data_RST<-as.data.frame(stat_val)                                            #This creates a data frame with the values extracted
170
data_RST_SDF<-cbind(data_reg,data_RST)
171
coordinates(data_RST_SDF)<-coordinates(data_reg) #Transforming data_RST_SDF into a spatial point dataframe
172
CRS_reg<-proj4string(data_reg)
173
proj4string(data_RST_SDF)<-CRS_reg  #Need to assign coordinates...
174

  
175
#Creating a date column
176
date1<-ISOdate(data_RST_SDF$year,data_RST_SDF$month,data_RST_SDF$day) #Creating a date object from 3 separate column
177
date2<-gsub("-","",as.character(as.Date(date1)))
178
data_RST_SDF$date<-date2                                              #Date format (year,month,day) is the following: "20100627"
179

  
180
#This allows to change only one name of the data.frame
181
pos<-match("value",names(data_RST_SDF)) #Find column with name "value"
182
if (var=="TMAX"){
183
  #names(data_RST_SDF)[pos]<-c("TMax")
184
  data_RST_SDF$value<-data_RST_SDF$value/10                #TMax is the average max temp for monthy data
185
}
186

  
187
#write out a new shapefile (including .prj component)
188
outfile<-paste("daily_covariates_ghcn_data_",var,out_suffix,sep="")         #Name of the file
189
writeOGR(data_RST_SDF,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE)
190

  
191
###############################################################
192
######## STEP 5: Preparing monthly averages from the ProstGres database
193

  
194
drv <- dbDriver("PostgreSQL")
195
db <- dbConnect(drv, dbname=db.name)
196

  
197
year_start<-2000
198
year_end<-2011
199
time1<-proc.time()    #Start stop watch
200
list_s<-format_s(stat_reg$STAT_ID)
201
data_m<-dbGetQuery(db, paste("SELECT *
202
                            FROM ghcn
203
                            WHERE element=",shQuote(var),
204
                            "AND year>=",year_start,
205
                            "AND year<",year_end,
206
                            "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
207
time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
208
time_minutes<-time_duration[3]/60
209

  
210
# do this work outside of (before) this function
211
# to avoid making a copy of the data frame inside the function call
212
date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column
213
date2<-as.POSIXlt(as.Date(date1))
214
data_m$date<-date2
215
#In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010.
216
#d<-subset(data_m,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations
217
d<-subset(data_m,mflag=="0" | mflag=="S")
218
#May need some screeing??? i.e. range of temp and elevation...
219
d1<-aggregate(value~station+month, data=d, mean)  #Calculate monthly mean for every station in OR
220
id<-as.data.frame(unique(d1$station))     #Unique station in OR for year 2000-2010: 193 but 7 loss of monthly avg    
221

  
222
dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID")   #Inner join all columns are retained
223

  
224
#This allows to change only one name of the data.frame
225
pos<-match("value",names(dst)) #Find column with name "value"
226
if (var=="TMAX"){
227
  names(dst)[pos]<-c("TMax")
228
  dst$TMax<-dst$TMax/10                #TMax is the average max temp for monthy data
229
}
230
#dstjan=dst[dst$month==9,]  #dst contains the monthly averages for tmax for every station over 2000-2010
231

  
232
#Extracting covariates from stack for the monthly dataset...
233
coords<- dst[c('lon','lat')]              #Define coordinates in a data frame
234
coordinates(dst)<-coords                      #Assign coordinates to the data frame
235
proj4string(dst)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
236
dst_month<-spTransform(dst,CRS(CRS_interp))     #Project from WGS84 to new coord. system
237

  
238
stations_val<-extract(s_raster,dst_month)  #extraction of the infomration at station location
239
stations_val<-as.data.frame(stations_val)
240
dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
241
dst<-dst_extract
242

  
243
coords<- dst[c('x','y')]              #Define coordinates in a data frame, this is the local x,y
244
coordinates(dst)<-coords                    #Assign coordinates to the data frame
245
proj4string(dst)<-projection(s_raster)        #Assign coordinates reference system in PROJ4 format
246

  
247
####
248
#write out a new shapefile (including .prj component)
249
outfile<-paste("monthly_covariates_ghcn_data_",var,out_suffix,sep="")         #Name of the file
250
dst$OID<-1:nrow(dst) #need a unique ID?
251
writeOGR(dst,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE)
252

  
253
##### END OF SCRIPT ##########

Also available in: Unified diff