1
|
################## Data preparation for interpolation #######################################
|
2
|
############################ Extraction of station data ##########################################
|
3
|
#This script perform queries on the Postgres database ghcn for stations matching the #
|
4
|
#interpolation area. It requires the following inputs: #
|
5
|
# 1)the text file ofGHCND stations from NCDC matching the database version release #
|
6
|
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84 # #
|
7
|
# 3)a new coordinate system can be provided as an argument #
|
8
|
# 4)the variable of interest: "TMAX","TMIN" or "PRCP" #
|
9
|
# #
|
10
|
#The outputs are text files and a shape file of a time subset of the database #
|
11
|
#AUTHOR: Benoit Parmentier #
|
12
|
#DATE: 06/02/212 #
|
13
|
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- #
|
14
|
##################################################################################################
|
15
|
|
16
|
###Loading R library and packages
|
17
|
library(RPostgreSQL)
|
18
|
library(sp) # Spatial pacakge with class definition by Bivand et al.
|
19
|
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al.
|
20
|
library(rgdal) # GDAL wrapper for R, spatial utilities
|
21
|
library(rgeos) # Polygon buffering and other vector operations
|
22
|
library(reshape)
|
23
|
|
24
|
### Parameters and arguments
|
25
|
|
26
|
db.name <- "ghcn" #name of the Postgres database
|
27
|
var <- c("TMAX","TMIN","PRCP") #name of the variables to keep: TMIN, TMAX or PRCP
|
28
|
year_start<-"1970" #starting year for the query (included)
|
29
|
year_end<-"2011" #end year for the query (excluded)
|
30
|
|
31
|
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations/" #Jupiter LOCATION on EOS/Atlas
|
32
|
#path<-"H:/Data/IPLANT_project/data_Oregon_stations" #Jupiter Location on XANDERS
|
33
|
|
34
|
outpath=path # create different output path because we don't have write access to other's home dirs
|
35
|
setwd(path)
|
36
|
out_prefix<-"stationarity" #User defined output prefix
|
37
|
buffer=100
|
38
|
|
39
|
#for Adam
|
40
|
outpath="/home/wilson/data/"
|
41
|
|
42
|
|
43
|
############ START OF THE SCRIPT #################
|
44
|
|
45
|
##### Connect to Station database
|
46
|
drv <- dbDriver("PostgreSQL")
|
47
|
db <- dbConnect(drv, dbname=db.name)#,options="statement_timeout = 1m")
|
48
|
|
49
|
##### STEP 1: Select station in the study area
|
50
|
|
51
|
infile1<- "ORWGS84_state_outline.shp" #This is the shape file of outline of the study area.
|
52
|
filename<-sub(".shp","",infile1) #Removing the extension from file.
|
53
|
interp_area <- readOGR(".",filename)
|
54
|
CRS_interp<-proj4string(interp_area) #Storing the coordinate information: geographic coordinates longlat WGS84
|
55
|
|
56
|
##### Buffer shapefile if desired
|
57
|
## This is done to include stations from outside the region in the interpolation fitting process and reduce edge effects when stiching regions
|
58
|
if(buffer>0){ #only apply buffer if buffer >0
|
59
|
interp_area=gUnionCascaded(interp_area) #dissolve any subparts of roi (if there are islands, lakes, etc.)
|
60
|
interp_areaC=gCentroid(interp_area) # get centroid of region
|
61
|
interp_areaB=spTransform( # buffer roi (transform to azimuthal equidistant with centroid of region for most (?) accurate buffering, add buffer, then transform to WGS84)
|
62
|
gBuffer(
|
63
|
spTransform(interp_area,
|
64
|
CRS(paste("+proj=aeqd +lat_0=",interp_areaC@coords[2]," +lon_0=",interp_areaC@coords[1]," +ellps=WGS84 +datum=WGS84 +units=m +no_defs ",sep=""))),
|
65
|
width=buffer*1000), # convert buffer (km) to meters
|
66
|
CRS(CRS_interp)) # reproject back to original projection
|
67
|
# interp_area=interp_areaB # replace original region with buffered region
|
68
|
}
|
69
|
|
70
|
## get bounding box of study area
|
71
|
bbox=bbox(interp_areab)
|
72
|
|
73
|
### read in station location information from database
|
74
|
### use the bbox of the region to include only station in rectangular region to speed up overlay
|
75
|
dat_stat=dbGetQuery(db, paste("SELECT id,name,latitude,longitude
|
76
|
FROM stations
|
77
|
WHERE latitude>=",bbox[2,1]," AND latitude<=",bbox[2,2],"
|
78
|
AND longitude>=",bbox[1,1]," AND longitude<=",bbox[1,2],"
|
79
|
;",sep=""))
|
80
|
coordinates(dat_stat)<-c("longitude","latitude")
|
81
|
proj4string(dat_stat)<-CRS_interp
|
82
|
|
83
|
|
84
|
# Spatial query to find relevant stations
|
85
|
inside <- !is.na(over(dat_stat, as(interp_areab, "SpatialPolygons"))) #Finding stations contained in the current interpolation area
|
86
|
stat_roi<-dat_stat[inside,] #Finding stations contained in the current interpolation area
|
87
|
#stat_roi<-spTransform(stat_roi,CRS(new_proj)) # Project from WGS84 to new coord. system
|
88
|
|
89
|
#Quick visualization of station locations
|
90
|
plot(interp_area, axes =TRUE)
|
91
|
plot(stat_roi, pch=1, col="red", cex= 0.7, add=TRUE)
|
92
|
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6)
|
93
|
|
94
|
|
95
|
#################################################################
|
96
|
### STEP 2: generate monthly means for climate-aided interpolation
|
97
|
## Query to link station location information and observations
|
98
|
## Concatenate date columns into single field for easy convert to date
|
99
|
## Divide value by 10 to convert to degrees C and mm
|
100
|
## Subset to years in year_start -> year_end
|
101
|
## Drop missing values (-9999)
|
102
|
## Drop observations that failed quality control (keep only qflag==NA)
|
103
|
|
104
|
### first extract average daily values by month.
|
105
|
system.time(
|
106
|
d<<-dbGetQuery(db, # create dm object (data monthly)
|
107
|
paste("SELECT station,month,element,count30,value30,count10,value10,latitude,longitude,elevation
|
108
|
FROM
|
109
|
(SELECT station,month,element,count(value) as count30,avg(value)/10.0 as value30,latitude,longitude,elevation
|
110
|
FROM ghcn, stations
|
111
|
WHERE station = id
|
112
|
AND id IN ('",paste(stat_roi$id,collapse="','"),"')
|
113
|
AND element IN ('",paste(var,collapse="','"),"')
|
114
|
AND year>=",1970,"
|
115
|
AND year<",2000,"
|
116
|
AND value<>-9999
|
117
|
AND qflag IS NULL
|
118
|
GROUP BY station, month,latitude,longitude,elevation,element
|
119
|
) as a30
|
120
|
INNER JOIN
|
121
|
(SELECT station,month,element,count(value) as count10,avg(value)/10.0 as value10
|
122
|
FROM ghcn, stations
|
123
|
WHERE station = id
|
124
|
AND id IN ('",paste(stat_roi$id,collapse="','"),"')
|
125
|
AND element IN ('",paste(var,collapse="','"),"')
|
126
|
AND year>=",2000,"
|
127
|
AND year<",2010,"
|
128
|
AND value<>-9999
|
129
|
AND qflag IS NULL
|
130
|
GROUP BY station, month,element
|
131
|
) as a10
|
132
|
USING (station,element,month)
|
133
|
;",sep=""))
|
134
|
) ### print used time in seconds ~ 10 minutes
|
135
|
|
136
|
save(d,file=paste(outpath,"stationarity.Rdata"))
|
137
|
|
138
|
#####################################################################33
|
139
|
#### Explore it
|
140
|
load(paste(outpath,"stationarity.Rdata"))
|
141
|
|
142
|
## subset by # of observations?
|
143
|
thresh=.75 #threshold % to keep
|
144
|
d$keep=d$count30/900>thresh&d$count10/300>thresh
|
145
|
table(d$keep)
|
146
|
|
147
|
## create month factor
|
148
|
d$monthname=factor(d$month,labels=format(as.Date(paste(2000,1:12,1,sep="-")),"%B"),ordered=T)
|
149
|
### start PDF
|
150
|
pdf(paste(outpath,"ClimateStationarity.pdf",sep=""),width=11,height=8.5)
|
151
|
|
152
|
library(latticeExtra)
|
153
|
#combineLimits(useOuterStrips(xyplot(value10~value30|monthname+element,data=d[d$keep,],scales=list(relation="free",rot=0),cex=.5,pch=16,
|
154
|
# ylab="2000-2010 Mean Daily Value",xlab="1970-2000 Mean Daily Value",
|
155
|
# main="Comparison of Mean Daily Values",asp=1)))+
|
156
|
# layer(panel.abline(0,1,col="red"))+
|
157
|
# layer(panel.text(max(x),min(y),paste("R^2=",round(summary(lm(y~x))$r.squared,2)),cex=.5,pos=2))
|
158
|
|
159
|
for(v in unique(d$element)){
|
160
|
print(xyplot(value10~value30|monthname,data=d[d$keep&d$element==v,],scales=list(relation="free",rot=0),cex=.5,pch=16,
|
161
|
ylab="2000-2010 Mean Daily Value",xlab="1970-2000 Mean Daily Value",
|
162
|
main=paste("Comparison of Mean Daily Values for",v),asp=1)+
|
163
|
layer(panel.abline(0,1,col="red"))+
|
164
|
layer(panel.text(max(x),min(y),paste("R^2=",round(summary(lm(y~x))$r.squared,2)),cex=1,pos=2)))
|
165
|
}
|
166
|
|
167
|
## look at deviances
|
168
|
d$dif=d$value10-d$value30
|
169
|
|
170
|
trellis.par.set(superpose.symbol = list(col=c("blue","grey","green","red"),cex=.5,pch=16))
|
171
|
|
172
|
for(v in unique(d$element)){
|
173
|
print(xyplot(latitude~longitude|monthname,group=cut(dif,quantile(d$dif[d$keep&d$element==v],seq(0,1,len=5))),
|
174
|
data=d[d$keep&d$element==v,],auto.key=list(space="right"),
|
175
|
main=paste("Current-Past anomolies for",v," (2000-2010 Daily Means Minus 1970-2000 Daily Means)"),
|
176
|
sub="Positive values indicate stations that were warmer/wetter in 2000-2010 than 1970-2000")+
|
177
|
layer(sp.lines(as(interp_area,"SpatialLines"),col="black")))
|
178
|
}
|
179
|
|
180
|
dev.off()
|