Project

General

Profile

Download (6.4 KB) Statistics
| Branch: | Revision:
1
##################    Data preparation for interpolation   #######################################
2
############################ Extraction of station data ##########################################
3
#This script perform queries on the Postgres database ghcn for stations matching the             #
4
#interpolation area. It requires the following inputs:                                           #
5
# 1)the text file ofGHCND  stations from NCDC matching the database version release              #
6
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84                     #                                                     #       
7
# 3)a new coordinate system can be provided as an argument                                       #
8
# 4)the variable of interest: "TMAX","TMIN" or "PRCP"                                            #
9
#                                                                                                #
10
#The outputs are text files and a shape file of a time subset of the database                    #
11
#AUTHOR: Benoit Parmentier                                                                       #
12
#DATE: 06/02/212                                                                                 #
13
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--                                  #
14
##################################################################################################
15

    
16
###Loading R library and packages   
17
library(RPostgreSQL)
18
library(sp)                                             # Spatial pacakge with class definition by Bivand et al.
19
library(spdep)                                          # Spatial pacakge with methods and spatial stat. by Bivand et al.
20
library(rgdal)                                          # GDAL wrapper for R, spatial utilities
21

    
22
### Parameters and arguments
23

    
24
db.name <- "ghcn"                #name of the Postgres database
25
var <- "TMAX"                    #name of the variables to keep: TMIN, TMAX or PRCP
26
year_start<-"2010"               #starting year for the query (included)
27
year_end<-"2011"                 #end year for the query (excluded)
28
infile1<- "ORWGS84_state_outline.shp"                     #This is the shape file of outline of the study area.                
29
infile2<-"ghcnd-stations.txt"                             #This is the textfile of station locations from GHCND
30
new_proj<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
31

    
32
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations/"        #Jupiter LOCATION on EOS/Atlas
33
#path<-"H:/Data/IPLANT_project/data_Oregon_stations"                      #Jupiter Location on XANDERS
34
setwd(path) 
35
out_prefix<-"y2010_2010_OR_0626012"                                                 #User defined output prefix
36

    
37
### Functions used in the script
38

    
39
format_s <-function(s_ID){
40
  #Format station ID in a vector format/tuple that is used in a psql query.
41
  # Argument 1: vector of station ID
42
  # Return: character of station ID
43
  tx2<-s_ID
44
  tx2<-as.character(tx2)
45
  stat_list<-tx2
46
  temp<-shQuote(stat_list)
47
  t<-paste(temp, collapse= " ")
48
  t1<-gsub(" ", ",",t)
49
  sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query
50
  return(sf_ID)
51
}
52

    
53
############ START OF THE SCRIPT #################
54

    
55
##### STEP 1: Select station in the study area
56

    
57
infile1<- "ORWGS84_state_outline.shp"        #This is the shape file of outline of the study area.
58
filename<-sub(".shp","",infile1)             #Removing the extension from file.
59
interp_area <- readOGR(".",filename)
60
CRS_interp<-proj4string(interp_area)         #Storing the coordinate information: geographic coordinates longlat WGS84
61

    
62
dat_stat <- read.fwf("ghcnd-stations.txt", widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE)
63
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID")
64
coords<- dat_stat[,c('lon','lat')]
65
coordinates(dat_stat)<-coords
66
locs_coord<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0")
67
#proj4string(dat_stat)<-locs_coord
68
proj4string(dat_stat)<-CRS_interp
69

    
70
# Spatial query to find relevant stations
71
inside <- !is.na(over(dat_stat, as(interp_area, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
72
stat_OR<-dat_stat[inside,]                                            #Finding stations contained in the current interpolation area
73

    
74
#Quick visualization of station locations
75
plot(interp_area, axes =TRUE)
76
plot(stat_OR, pch=1, col="red", cex= 0.7, add=TRUE)
77
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6)
78

    
79
#### STEP 2: Connecting to the database and query for relevant data 
80

    
81
drv <- dbDriver("PostgreSQL")
82
db <- dbConnect(drv, dbname=db.name)
83

    
84
time1<-proc.time()    #Start stop watch
85
list_s<-format_s(stat_OR$STAT_ID)
86
data2<-dbGetQuery(db, paste("SELECT *
87
      FROM ghcn
88
      WHERE element=",shQuote(var),
89
      "AND year>=",year_start,
90
      "AND year<",year_end,
91
      "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
92
time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
93
time_minutes<-time_duration[3]/60
94
  
95
data_table<-merge(data2,stat_OR, by.x = "station", by.y = "STAT_ID")
96

    
97
#Transform the subset data frame in a spatial data frame and reproject
98
data3<-data_table                               #Make a copy of the data frame
99
coords<- data3[c('lon.1','lat.1')]              #Define coordinates in a data frame
100
coordinates(data3)<-coords                      #Assign coordinates to the data frame
101
proj4string(data3)<-CRS_interp                  #Assign coordinates reference system in PROJ4 format
102
data_proj<-spTransform(data3,CRS(new_proj))     #Project from WGS84 to new coord. system
103

    
104
### STEP 3: Save results and outuput in textfile and a shape file
105

    
106
#Save a textfile of the locations of meteorological stations in the study area
107
write.table(as.data.frame(stat_OR), file=paste(path,"/","location_study_area",out_prefix,".txt",sep=""),sep=",")
108

    
109
#Save a textfile and shape file of all the subset data
110
write.table(data_table, file= paste(path,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",")
111
#outfile<-paste(path,"ghcn_data_",var,out_prefix,sep="")   #Removing extension if it is present
112
outfile<-paste("ghcn_data_",var,out_prefix,sep="")         #Name of the file
113
writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
114

    
115
##### END OF SCRIPT ##########
(10-10/34)