1 |
0924578a
|
Adam M. Wilson
|
################## Data preparation for interpolation #######################################
|
2 |
|
|
############################ Extraction of station data ##########################################
|
3 |
|
|
#This script perform queries on the Postgres database ghcn for stations matching the #
|
4 |
|
|
#interpolation area. It requires the following inputs: #
|
5 |
|
|
# 1)the text file ofGHCND stations from NCDC matching the database version release #
|
6 |
|
|
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84 # #
|
7 |
|
|
# 3)a new coordinate system can be provided as an argument #
|
8 |
|
|
# 4)the variable of interest: "TMAX","TMIN" or "PRCP" #
|
9 |
|
|
# #
|
10 |
|
|
#The outputs are text files and a shape file of a time subset of the database #
|
11 |
|
|
#AUTHOR: Benoit Parmentier #
|
12 |
|
|
#DATE: 06/02/212 #
|
13 |
|
|
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- #
|
14 |
|
|
##################################################################################################
|
15 |
|
|
|
16 |
|
|
###Loading R library and packages
|
17 |
|
|
library(RPostgreSQL)
|
18 |
|
|
library(sp) # Spatial pacakge with class definition by Bivand et al.
|
19 |
|
|
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al.
|
20 |
|
|
library(rgdal) # GDAL wrapper for R, spatial utilities
|
21 |
|
|
library(rgeos) # Polygon buffering and other vector operations
|
22 |
|
|
library(reshape)
|
23 |
|
|
|
24 |
|
|
### Parameters and arguments
|
25 |
|
|
|
26 |
|
|
db.name <- "ghcn" #name of the Postgres database
|
27 |
|
|
var <- c("TMAX","TMIN","PRCP") #name of the variables to keep: TMIN, TMAX or PRCP
|
28 |
|
|
year_start<-"1970" #starting year for the query (included)
|
29 |
|
|
year_end<-"2011" #end year for the query (excluded)
|
30 |
|
|
|
31 |
|
|
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations/" #Jupiter LOCATION on EOS/Atlas
|
32 |
|
|
#path<-"H:/Data/IPLANT_project/data_Oregon_stations" #Jupiter Location on XANDERS
|
33 |
|
|
|
34 |
|
|
outpath=path # create different output path because we don't have write access to other's home dirs
|
35 |
|
|
setwd(path)
|
36 |
|
|
out_prefix<-"stationarity" #User defined output prefix
|
37 |
|
|
buffer=100
|
38 |
|
|
|
39 |
|
|
#for Adam
|
40 |
|
|
outpath="/home/wilson/data/"
|
41 |
|
|
|
42 |
|
|
|
43 |
|
|
############ START OF THE SCRIPT #################
|
44 |
|
|
|
45 |
|
|
##### Connect to Station database
|
46 |
|
|
drv <- dbDriver("PostgreSQL")
|
47 |
|
|
db <- dbConnect(drv, dbname=db.name)#,options="statement_timeout = 1m")
|
48 |
|
|
|
49 |
|
|
##### STEP 1: Select station in the study area
|
50 |
|
|
|
51 |
|
|
infile1<- "ORWGS84_state_outline.shp" #This is the shape file of outline of the study area.
|
52 |
|
|
filename<-sub(".shp","",infile1) #Removing the extension from file.
|
53 |
|
|
interp_area <- readOGR(".",filename)
|
54 |
|
|
CRS_interp<-proj4string(interp_area) #Storing the coordinate information: geographic coordinates longlat WGS84
|
55 |
|
|
|
56 |
|
|
##### Buffer shapefile if desired
|
57 |
|
|
## This is done to include stations from outside the region in the interpolation fitting process and reduce edge effects when stiching regions
|
58 |
|
|
if(buffer>0){ #only apply buffer if buffer >0
|
59 |
|
|
interp_area=gUnionCascaded(interp_area) #dissolve any subparts of roi (if there are islands, lakes, etc.)
|
60 |
|
|
interp_areaC=gCentroid(interp_area) # get centroid of region
|
61 |
|
|
interp_areaB=spTransform( # buffer roi (transform to azimuthal equidistant with centroid of region for most (?) accurate buffering, add buffer, then transform to WGS84)
|
62 |
|
|
gBuffer(
|
63 |
|
|
spTransform(interp_area,
|
64 |
|
|
CRS(paste("+proj=aeqd +lat_0=",interp_areaC@coords[2]," +lon_0=",interp_areaC@coords[1]," +ellps=WGS84 +datum=WGS84 +units=m +no_defs ",sep=""))),
|
65 |
|
|
width=buffer*1000), # convert buffer (km) to meters
|
66 |
|
|
CRS(CRS_interp)) # reproject back to original projection
|
67 |
|
|
# interp_area=interp_areaB # replace original region with buffered region
|
68 |
|
|
}
|
69 |
|
|
|
70 |
|
|
## get bounding box of study area
|
71 |
|
|
bbox=bbox(interp_areab)
|
72 |
|
|
|
73 |
|
|
### read in station location information from database
|
74 |
|
|
### use the bbox of the region to include only station in rectangular region to speed up overlay
|
75 |
|
|
dat_stat=dbGetQuery(db, paste("SELECT id,name,latitude,longitude
|
76 |
|
|
FROM stations
|
77 |
|
|
WHERE latitude>=",bbox[2,1]," AND latitude<=",bbox[2,2],"
|
78 |
|
|
AND longitude>=",bbox[1,1]," AND longitude<=",bbox[1,2],"
|
79 |
|
|
;",sep=""))
|
80 |
|
|
coordinates(dat_stat)<-c("longitude","latitude")
|
81 |
|
|
proj4string(dat_stat)<-CRS_interp
|
82 |
|
|
|
83 |
|
|
|
84 |
|
|
# Spatial query to find relevant stations
|
85 |
|
|
inside <- !is.na(over(dat_stat, as(interp_areab, "SpatialPolygons"))) #Finding stations contained in the current interpolation area
|
86 |
|
|
stat_roi<-dat_stat[inside,] #Finding stations contained in the current interpolation area
|
87 |
|
|
#stat_roi<-spTransform(stat_roi,CRS(new_proj)) # Project from WGS84 to new coord. system
|
88 |
|
|
|
89 |
|
|
#Quick visualization of station locations
|
90 |
|
|
plot(interp_area, axes =TRUE)
|
91 |
|
|
plot(stat_roi, pch=1, col="red", cex= 0.7, add=TRUE)
|
92 |
|
|
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6)
|
93 |
|
|
|
94 |
|
|
|
95 |
|
|
#################################################################
|
96 |
|
|
### STEP 2: generate monthly means for climate-aided interpolation
|
97 |
|
|
## Query to link station location information and observations
|
98 |
|
|
## Concatenate date columns into single field for easy convert to date
|
99 |
|
|
## Divide value by 10 to convert to degrees C and mm
|
100 |
|
|
## Subset to years in year_start -> year_end
|
101 |
|
|
## Drop missing values (-9999)
|
102 |
|
|
## Drop observations that failed quality control (keep only qflag==NA)
|
103 |
|
|
|
104 |
|
|
### first extract average daily values by month.
|
105 |
|
|
system.time(
|
106 |
|
|
d<<-dbGetQuery(db, # create dm object (data monthly)
|
107 |
|
|
paste("SELECT station,month,element,count30,value30,count10,value10,latitude,longitude,elevation
|
108 |
|
|
FROM
|
109 |
|
|
(SELECT station,month,element,count(value) as count30,avg(value)/10.0 as value30,latitude,longitude,elevation
|
110 |
|
|
FROM ghcn, stations
|
111 |
|
|
WHERE station = id
|
112 |
|
|
AND id IN ('",paste(stat_roi$id,collapse="','"),"')
|
113 |
|
|
AND element IN ('",paste(var,collapse="','"),"')
|
114 |
|
|
AND year>=",1970,"
|
115 |
|
|
AND year<",2000,"
|
116 |
|
|
AND value<>-9999
|
117 |
|
|
AND qflag IS NULL
|
118 |
|
|
GROUP BY station, month,latitude,longitude,elevation,element
|
119 |
|
|
) as a30
|
120 |
|
|
INNER JOIN
|
121 |
|
|
(SELECT station,month,element,count(value) as count10,avg(value)/10.0 as value10
|
122 |
|
|
FROM ghcn, stations
|
123 |
|
|
WHERE station = id
|
124 |
|
|
AND id IN ('",paste(stat_roi$id,collapse="','"),"')
|
125 |
|
|
AND element IN ('",paste(var,collapse="','"),"')
|
126 |
|
|
AND year>=",2000,"
|
127 |
|
|
AND year<",2010,"
|
128 |
|
|
AND value<>-9999
|
129 |
|
|
AND qflag IS NULL
|
130 |
|
|
GROUP BY station, month,element
|
131 |
|
|
) as a10
|
132 |
|
|
USING (station,element,month)
|
133 |
|
|
;",sep=""))
|
134 |
|
|
) ### print used time in seconds ~ 10 minutes
|
135 |
|
|
|
136 |
|
|
save(d,file=paste(outpath,"stationarity.Rdata"))
|
137 |
|
|
|
138 |
|
|
#####################################################################33
|
139 |
|
|
#### Explore it
|
140 |
|
|
load(paste(outpath,"stationarity.Rdata"))
|
141 |
|
|
|
142 |
|
|
## subset by # of observations?
|
143 |
|
|
thresh=.75 #threshold % to keep
|
144 |
|
|
d$keep=d$count30/900>thresh&d$count10/300>thresh
|
145 |
|
|
table(d$keep)
|
146 |
|
|
|
147 |
|
|
## create month factor
|
148 |
|
|
d$monthname=factor(d$month,labels=format(as.Date(paste(2000,1:12,1,sep="-")),"%B"),ordered=T)
|
149 |
|
|
### start PDF
|
150 |
|
|
pdf(paste(outpath,"ClimateStationarity.pdf",sep=""),width=11,height=8.5)
|
151 |
|
|
|
152 |
|
|
library(latticeExtra)
|
153 |
|
|
#combineLimits(useOuterStrips(xyplot(value10~value30|monthname+element,data=d[d$keep,],scales=list(relation="free",rot=0),cex=.5,pch=16,
|
154 |
|
|
# ylab="2000-2010 Mean Daily Value",xlab="1970-2000 Mean Daily Value",
|
155 |
|
|
# main="Comparison of Mean Daily Values",asp=1)))+
|
156 |
|
|
# layer(panel.abline(0,1,col="red"))+
|
157 |
|
|
# layer(panel.text(max(x),min(y),paste("R^2=",round(summary(lm(y~x))$r.squared,2)),cex=.5,pos=2))
|
158 |
|
|
|
159 |
|
|
for(v in unique(d$element)){
|
160 |
|
|
print(xyplot(value10~value30|monthname,data=d[d$keep&d$element==v,],scales=list(relation="free",rot=0),cex=.5,pch=16,
|
161 |
|
|
ylab="2000-2010 Mean Daily Value",xlab="1970-2000 Mean Daily Value",
|
162 |
|
|
main=paste("Comparison of Mean Daily Values for",v),asp=1)+
|
163 |
|
|
layer(panel.abline(0,1,col="red"))+
|
164 |
|
|
layer(panel.text(max(x),min(y),paste("R^2=",round(summary(lm(y~x))$r.squared,2)),cex=1,pos=2)))
|
165 |
|
|
}
|
166 |
|
|
|
167 |
|
|
## look at deviances
|
168 |
|
|
d$dif=d$value10-d$value30
|
169 |
|
|
|
170 |
|
|
trellis.par.set(superpose.symbol = list(col=c("blue","grey","green","red"),cex=.5,pch=16))
|
171 |
|
|
|
172 |
|
|
for(v in unique(d$element)){
|
173 |
|
|
print(xyplot(latitude~longitude|monthname,group=cut(dif,quantile(d$dif[d$keep&d$element==v],seq(0,1,len=5))),
|
174 |
|
|
data=d[d$keep&d$element==v,],auto.key=list(space="right"),
|
175 |
|
|
main=paste("Current-Past anomolies for",v," (2000-2010 Daily Means Minus 1970-2000 Daily Means)"),
|
176 |
|
|
sub="Positive values indicate stations that were warmer/wetter in 2000-2010 than 1970-2000")+
|
177 |
|
|
layer(sp.lines(as(interp_area,"SpatialLines"),col="black")))
|
178 |
|
|
}
|
179 |
|
|
|
180 |
|
|
dev.off()
|