Project

General

Profile

« Previous | Next » 

Revision 95d521f9

Added by Benoit Parmentier over 12 years ago

GHNCD station data selection using Postgres database, initial commit, task #363

View differences:

climate/research/oregon/interpolation/GHCND_stations_extraction_study_area.R
1
##################    Data preparation for interpolation   #######################################
2
###########################  TWO-STAGE REGRESSION  ###############################################
3
#This script perform queries on the Postgres database ghcn for stations matching the             #
4
#interpolation area. It requires the text file of stations and a shape file of the study area.   #       
5
#Note that the projection for both GHCND and study area is lonlat WGS84.                         #
6
#AUTHOR: Benoit Parmentier                                                                       #
7
#DATE: 06/02/212                                                                                 #
8
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--                                  #
9
##################################################################################################
10

  
11
###Loading R library and packages   
12
library(RPostgreSQL)
13
library(sp)                                             # Spatial pacakge with class definition by Bivand et al.
14
library(spdep)                                          # Spatial pacakge with methods and spatial stat. by Bivand et al.
15
library(rgdal)                                          # GDAL wrapper for R, spatial utilities
16

  
17
### Parameters and arguments
18

  
19
db.name <- "ghcn"                #name of the Postgres database
20
var <- "TMAX"                    #name of the variables to keep: TMIN, TMAX or PRCP
21
year_start<-"2010"               #starting year for the query (included)
22
year_end<-"2011"                 #end year for the query (excluded)
23
infile1<- "ORWGS84_state_outline.shp"                     #This is the shape file of outline of the study area.                
24
infile2<-"ghcnd-stations.txt"                             #This is the textfile of station locations from GHCND
25

  
26
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations/"        #Jupiter LOCATION on EOS/Atlas
27
#path<-"H:/Data/IPLANT_project/data_Oregon_stations"                      #Jupiter Location on XANDERS
28
setwd(path) 
29
out_prefix<-"y2010_2010_OR_0602012"                                                 #User defined output prefix
30

  
31
### Functions
32

  
33
format_s <-function(s_ID){
34
  #Format station ID in a vector format/tuple that is used in a psql query.
35
  # Argument 1: vector of station ID
36
  # Return: character of station ID
37
  tx2<-s_ID
38
  tx2<-as.character(tx2)
39
  stat_list<-tx2
40
  temp<-shQuote(stat_list)
41
  t<-paste(temp, collapse= " ")
42
  t1<-gsub(" ", ",",t)
43
  sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query
44
  return(sf_ID)
45
}
46

  
47
############ START OF THE SCRIPT #################
48

  
49
##### STEP 1: Select station in Oregon
50

  
51
infile1<- "ORWGS84_state_outline.shp"        #This is the shape file of outline of the study area.
52
filename<-sub(".shp","",infile1)             #Removing the extension from file.
53
interp_area <- readOGR(".",filename)
54
CRS_interp<-proj4string(interp_area)         #Storing the coordinate information: geographic coordinates longlat WGS84
55

  
56
dat_stat <- read.fwf("ghcnd-stations.txt", widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE)
57
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID")
58
coords<- dat_stat[,c('lon','lat')]
59
coordinates(dat_stat)<-coords
60
locs_coord<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0")
61
#proj4string(dat_stat)<-locs_coord
62
proj4string(dat_stat)<-CRS_interp
63

  
64
# Spatial query to find relevant stations
65
inside <- !is.na(over(dat_stat, as(interp_area, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
66
stat_OR<-dat_stat[inside,]                                            #Finding stations contained in the current interpolation area
67

  
68
#Quick visualization of station locations
69
plot(interp_area, axes =TRUE)
70
plot(stat_OR, pch=1, col="red", cex= 0.7, add=TRUE)
71
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6)
72

  
73
#### STEP 2: Connecting to the database and query for relevant data 
74

  
75
drv <- dbDriver("PostgreSQL")
76
db <- dbConnect(drv, dbname=db.name)
77

  
78
z1<-stat_OR$STAT_ID[999:1002] # Selecting three station to check the query and processing time
79
l2<-format_s(z1)  
80

  
81
d1<-dbGetQuery(db, paste("SELECT *
82
      FROM ghcn
83
      WHERE element=",shQuote(var),
84
      "AND year>=",year_start,
85
      "AND year<",year_end,
86
      "AND station IN ",l2,";",sep=""))  #Selecting one station
87

  
88
time1<-proc.time()    #Start stop watch
89
list_s<-format_s(stat_OR$STAT_ID)
90
data<-dbGetQuery(db, paste("SELECT *
91
      FROM ghcn
92
      WHERE element=",shQuote(var),
93
      "AND year>=",year_start,
94
      "AND year<",year_end,
95
      "AND station IN ",list_s,";",sep=""))  #Selecting one station 
96
time_duration<-proc.time()-time1 #time for the query?
97

  
98
write.table(data, file= paste(path,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",")
99
#write.table(d1, file= paste(path,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",")
100
outfile<-file= paste(path,"/","ghcn_data_",var,out_prefix,".shp",sep="")   #Removing extension if it is present
101
#writeOGR(data_SDF,".", outfile, driver ="ESRI Shapefile")
102

  
103
##### END OF SCRIPT ##########
104

  
105
# data<-dbGetQuery(db, paste("SELECT *
106
#       FROM ghcn
107
#       WHERE element='TMAX'
108
#       AND station IN ",t1,";",sep=""))  #Selecting one station

Also available in: Unified diff