1 |
95d521f9
|
Benoit Parmentier
|
################## Data preparation for interpolation #######################################
|
2 |
b8bf4042
|
Benoit Parmentier
|
############################ Extraction of station data ##########################################
|
3 |
95d521f9
|
Benoit Parmentier
|
#This script perform queries on the Postgres database ghcn for stations matching the #
|
4 |
b8bf4042
|
Benoit Parmentier
|
#interpolation area. It requires the following inputs: #
|
5 |
|
|
# 1)the text file ofGHCND stations from NCDC matching the database version release #
|
6 |
|
|
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84 # #
|
7 |
|
|
# 3)a new coordinate system can be provided as an argument #
|
8 |
|
|
# 4)the variable of interest: "TMAX","TMIN" or "PRCP" #
|
9 |
|
|
# #
|
10 |
|
|
#The outputs are text files and a shape file of a time subset of the database #
|
11 |
95d521f9
|
Benoit Parmentier
|
#AUTHOR: Benoit Parmentier #
|
12 |
|
|
#DATE: 06/02/212 #
|
13 |
|
|
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- #
|
14 |
|
|
##################################################################################################
|
15 |
|
|
|
16 |
|
|
###Loading R library and packages
|
17 |
|
|
library(RPostgreSQL)
|
18 |
|
|
library(sp) # Spatial pacakge with class definition by Bivand et al.
|
19 |
|
|
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al.
|
20 |
|
|
library(rgdal) # GDAL wrapper for R, spatial utilities
|
21 |
|
|
|
22 |
|
|
### Parameters and arguments
|
23 |
|
|
|
24 |
|
|
db.name <- "ghcn" #name of the Postgres database
|
25 |
|
|
var <- "TMAX" #name of the variables to keep: TMIN, TMAX or PRCP
|
26 |
|
|
year_start<-"2010" #starting year for the query (included)
|
27 |
|
|
year_end<-"2011" #end year for the query (excluded)
|
28 |
|
|
infile1<- "ORWGS84_state_outline.shp" #This is the shape file of outline of the study area.
|
29 |
|
|
infile2<-"ghcnd-stations.txt" #This is the textfile of station locations from GHCND
|
30 |
b8bf4042
|
Benoit Parmentier
|
new_proj<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
|
31 |
95d521f9
|
Benoit Parmentier
|
|
32 |
|
|
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations/" #Jupiter LOCATION on EOS/Atlas
|
33 |
|
|
#path<-"H:/Data/IPLANT_project/data_Oregon_stations" #Jupiter Location on XANDERS
|
34 |
|
|
setwd(path)
|
35 |
b8bf4042
|
Benoit Parmentier
|
out_prefix<-"y2010_2010_OR_0626012" #User defined output prefix
|
36 |
95d521f9
|
Benoit Parmentier
|
|
37 |
b8bf4042
|
Benoit Parmentier
|
### Functions used in the script
|
38 |
95d521f9
|
Benoit Parmentier
|
|
39 |
|
|
format_s <-function(s_ID){
|
40 |
|
|
#Format station ID in a vector format/tuple that is used in a psql query.
|
41 |
|
|
# Argument 1: vector of station ID
|
42 |
|
|
# Return: character of station ID
|
43 |
|
|
tx2<-s_ID
|
44 |
|
|
tx2<-as.character(tx2)
|
45 |
|
|
stat_list<-tx2
|
46 |
|
|
temp<-shQuote(stat_list)
|
47 |
|
|
t<-paste(temp, collapse= " ")
|
48 |
|
|
t1<-gsub(" ", ",",t)
|
49 |
|
|
sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query
|
50 |
|
|
return(sf_ID)
|
51 |
|
|
}
|
52 |
|
|
|
53 |
|
|
############ START OF THE SCRIPT #################
|
54 |
|
|
|
55 |
b8bf4042
|
Benoit Parmentier
|
##### STEP 1: Select station in the study area
|
56 |
95d521f9
|
Benoit Parmentier
|
|
57 |
|
|
infile1<- "ORWGS84_state_outline.shp" #This is the shape file of outline of the study area.
|
58 |
|
|
filename<-sub(".shp","",infile1) #Removing the extension from file.
|
59 |
|
|
interp_area <- readOGR(".",filename)
|
60 |
|
|
CRS_interp<-proj4string(interp_area) #Storing the coordinate information: geographic coordinates longlat WGS84
|
61 |
|
|
|
62 |
|
|
dat_stat <- read.fwf("ghcnd-stations.txt", widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE)
|
63 |
|
|
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID")
|
64 |
|
|
coords<- dat_stat[,c('lon','lat')]
|
65 |
|
|
coordinates(dat_stat)<-coords
|
66 |
|
|
locs_coord<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0")
|
67 |
|
|
#proj4string(dat_stat)<-locs_coord
|
68 |
|
|
proj4string(dat_stat)<-CRS_interp
|
69 |
|
|
|
70 |
|
|
# Spatial query to find relevant stations
|
71 |
|
|
inside <- !is.na(over(dat_stat, as(interp_area, "SpatialPolygons"))) #Finding stations contained in the current interpolation area
|
72 |
|
|
stat_OR<-dat_stat[inside,] #Finding stations contained in the current interpolation area
|
73 |
|
|
|
74 |
|
|
#Quick visualization of station locations
|
75 |
|
|
plot(interp_area, axes =TRUE)
|
76 |
|
|
plot(stat_OR, pch=1, col="red", cex= 0.7, add=TRUE)
|
77 |
|
|
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6)
|
78 |
|
|
|
79 |
|
|
#### STEP 2: Connecting to the database and query for relevant data
|
80 |
|
|
|
81 |
|
|
drv <- dbDriver("PostgreSQL")
|
82 |
|
|
db <- dbConnect(drv, dbname=db.name)
|
83 |
|
|
|
84 |
|
|
time1<-proc.time() #Start stop watch
|
85 |
|
|
list_s<-format_s(stat_OR$STAT_ID)
|
86 |
b8bf4042
|
Benoit Parmentier
|
data2<-dbGetQuery(db, paste("SELECT *
|
87 |
95d521f9
|
Benoit Parmentier
|
FROM ghcn
|
88 |
|
|
WHERE element=",shQuote(var),
|
89 |
|
|
"AND year>=",year_start,
|
90 |
|
|
"AND year<",year_end,
|
91 |
b8bf4042
|
Benoit Parmentier
|
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query
|
92 |
|
|
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database
|
93 |
|
|
time_minutes<-time_duration[3]/60
|
94 |
|
|
|
95 |
|
|
data_table<-merge(data2,stat_OR, by.x = "station", by.y = "STAT_ID")
|
96 |
|
|
|
97 |
|
|
#Transform the subset data frame in a spatial data frame and reproject
|
98 |
|
|
data3<-data_table #Make a copy of the data frame
|
99 |
|
|
coords<- data3[c('lon.1','lat.1')] #Define coordinates in a data frame
|
100 |
|
|
coordinates(data3)<-coords #Assign coordinates to the data frame
|
101 |
|
|
proj4string(data3)<-CRS_interp #Assign coordinates reference system in PROJ4 format
|
102 |
|
|
data_proj<-spTransform(data3,CRS(new_proj)) #Project from WGS84 to new coord. system
|
103 |
|
|
|
104 |
|
|
### STEP 3: Save results and outuput in textfile and a shape file
|
105 |
|
|
|
106 |
|
|
#Save a textfile of the locations of meteorological stations in the study area
|
107 |
|
|
write.table(as.data.frame(stat_OR), file=paste(path,"/","location_study_area",out_prefix,".txt",sep=""),sep=",")
|
108 |
|
|
|
109 |
|
|
#Save a textfile and shape file of all the subset data
|
110 |
|
|
write.table(data_table, file= paste(path,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",")
|
111 |
|
|
#outfile<-paste(path,"ghcn_data_",var,out_prefix,sep="") #Removing extension if it is present
|
112 |
|
|
outfile<-paste("ghcn_data_",var,out_prefix,sep="") #Name of the file
|
113 |
|
|
writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
|
114 |
95d521f9
|
Benoit Parmentier
|
|
115 |
|
|
##### END OF SCRIPT ##########
|