Project

General

Profile

Download (10.1 KB) Statistics
| Branch: | Revision:
1
##################    Data preparation for interpolation   #######################################
2
############################ Extraction of station data ##########################################
3
#This script perform queries on the Postgres database ghcn for stations matching the             #
4
#interpolation area. It requires the following inputs:                                           #
5
# 1)the text file ofGHCND  stations from NCDC matching the database version release              #
6
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84                     #                                                     #       
7
# 3)a new coordinate system can be provided as an argument                                       #
8
# 4)the variable of interest: "TMAX","TMIN" or "PRCP"                                            #
9
#                                                                                                #
10
#The outputs are text files and a shape file of a time subset of the database                    #
11
#AUTHOR: Benoit Parmentier                                                                       #
12
#DATE: 06/02/212                                                                                 #
13
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--                                  #
14
##################################################################################################
15

    
16
###Loading R library and packages   
17
library(RPostgreSQL)
18
library(sp)                                             # Spatial pacakge with class definition by Bivand et al.
19
library(spdep)                                          # Spatial pacakge with methods and spatial stat. by Bivand et al.
20
library(rgdal)                                          # GDAL wrapper for R, spatial utilities
21
library(rgeos)                                          # Polygon buffering and other vector operations
22
library(reshape)
23

    
24
### Parameters and arguments
25

    
26
db.name <- "ghcn"                #name of the Postgres database
27
var <- c("TMAX","TMIN","PRCP")                    #name of the variables to keep: TMIN, TMAX or PRCP
28
year_start<-"1970"               #starting year for the query (included)
29
year_end<-"2011"                 #end year for the query (excluded)
30

    
31
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations/"        #Jupiter LOCATION on EOS/Atlas
32
#path<-"H:/Data/IPLANT_project/data_Oregon_stations"                      #Jupiter Location on XANDERS
33

    
34
outpath=path                                                              # create different output path because we don't have write access to other's home dirs
35
setwd(path) 
36
out_prefix<-"stationarity"                                                 #User defined output prefix
37
buffer=100
38

    
39
#for Adam
40
outpath="/home/wilson/data/"
41

    
42

    
43
############ START OF THE SCRIPT #################
44

    
45
#####  Connect to Station database
46
drv <- dbDriver("PostgreSQL")
47
db <- dbConnect(drv, dbname=db.name)#,options="statement_timeout = 1m")
48

    
49
##### STEP 1: Select station in the study area
50

    
51
infile1<- "ORWGS84_state_outline.shp"        #This is the shape file of outline of the study area.
52
filename<-sub(".shp","",infile1)             #Removing the extension from file.
53
interp_area <- readOGR(".",filename)
54
CRS_interp<-proj4string(interp_area)         #Storing the coordinate information: geographic coordinates longlat WGS84
55

    
56
#####  Buffer shapefile if desired
57
##     This is done to include stations from outside the region in the interpolation fitting process and reduce edge effects when stiching regions
58
if(buffer>0){  #only apply buffer if buffer >0
59
  interp_area=gUnionCascaded(interp_area)  #dissolve any subparts of roi (if there are islands, lakes, etc.)
60
  interp_areaC=gCentroid(interp_area)       # get centroid of region
61
  interp_areaB=spTransform(                # buffer roi (transform to azimuthal equidistant with centroid of region for most (?) accurate buffering, add buffer, then transform to WGS84)
62
    gBuffer(
63
      spTransform(interp_area,
64
                  CRS(paste("+proj=aeqd +lat_0=",interp_areaC@coords[2]," +lon_0=",interp_areaC@coords[1]," +ellps=WGS84 +datum=WGS84 +units=m +no_defs ",sep=""))),
65
      width=buffer*1000),                  # convert buffer (km) to meters
66
    CRS(CRS_interp))                       # reproject back to original projection
67
#  interp_area=interp_areaB                 # replace original region with buffered region
68
}
69

    
70
## get bounding box of study area
71
bbox=bbox(interp_areab)
72

    
73
### read in station location information from database
74
### use the bbox of the region to include only station in rectangular region to speed up overlay
75
dat_stat=dbGetQuery(db, paste("SELECT id,name,latitude,longitude
76
                  FROM stations
77
                  WHERE latitude>=",bbox[2,1]," AND latitude<=",bbox[2,2],"
78
                  AND longitude>=",bbox[1,1]," AND longitude<=",bbox[1,2],"
79
                  ;",sep=""))
80
coordinates(dat_stat)<-c("longitude","latitude")
81
proj4string(dat_stat)<-CRS_interp
82

    
83

    
84
# Spatial query to find relevant stations
85
inside <- !is.na(over(dat_stat, as(interp_areab, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
86
stat_roi<-dat_stat[inside,]                                            #Finding stations contained in the current interpolation area
87
#stat_roi<-spTransform(stat_roi,CRS(new_proj))         # Project from WGS84 to new coord. system
88

    
89
#Quick visualization of station locations
90
plot(interp_area, axes =TRUE)
91
plot(stat_roi, pch=1, col="red", cex= 0.7, add=TRUE)
92
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6)
93

    
94

    
95
#################################################################
96
### STEP 2: generate monthly means for climate-aided interpolation
97
##  Query to link station location information and observations
98
##  Concatenate date columns into single field for easy convert to date
99
##  Divide value by 10 to convert to degrees C and mm
100
##  Subset to years in year_start -> year_end
101
##  Drop missing values (-9999)
102
##  Drop observations that failed quality control (keep only qflag==NA)
103

    
104
### first extract average daily values by month.  
105
system.time(
106
           d<<-dbGetQuery(db,  # create dm object (data monthly)
107
                          paste("SELECT station,month,element,count30,value30,count10,value10,latitude,longitude,elevation
108
                                 FROM 
109
                                          (SELECT station,month,element,count(value) as count30,avg(value)/10.0 as value30,latitude,longitude,elevation
110
                                           FROM ghcn, stations
111
                                           WHERE station = id
112
                                           AND id IN ('",paste(stat_roi$id,collapse="','"),"')
113
                                           AND element IN ('",paste(var,collapse="','"),"')
114
                                           AND year>=",1970,"
115
                                           AND year<",2000,"
116
                                           AND value<>-9999
117
                                           AND qflag IS NULL
118
                                           GROUP BY station, month,latitude,longitude,elevation,element
119
                                           ) as a30
120
                                  INNER JOIN 
121
                                           (SELECT station,month,element,count(value) as count10,avg(value)/10.0 as value10
122
                                           FROM ghcn, stations
123
                                           WHERE station = id
124
                                           AND id IN ('",paste(stat_roi$id,collapse="','"),"')
125
                                           AND element IN ('",paste(var,collapse="','"),"')
126
                                           AND year>=",2000,"
127
                                           AND year<",2010,"
128
                                           AND value<>-9999
129
                                           AND qflag IS NULL
130
                                           GROUP BY station, month,element
131
                                           ) as a10
132
                                      USING (station,element,month)
133
                                 ;",sep=""))
134
            )  ### print used time in seconds  ~ 10 minutes
135

    
136
save(d,file=paste(outpath,"stationarity.Rdata"))
137

    
138
#####################################################################33
139
#### Explore it
140
load(paste(outpath,"stationarity.Rdata"))
141

    
142
## subset by # of observations?
143
thresh=.75 #threshold % to keep
144
d$keep=d$count30/900>thresh&d$count10/300>thresh
145
table(d$keep)
146

    
147
## create month factor
148
d$monthname=factor(d$month,labels=format(as.Date(paste(2000,1:12,1,sep="-")),"%B"),ordered=T)
149
### start PDF
150
pdf(paste(outpath,"ClimateStationarity.pdf",sep=""),width=11,height=8.5)
151

    
152
library(latticeExtra)
153
#combineLimits(useOuterStrips(xyplot(value10~value30|monthname+element,data=d[d$keep,],scales=list(relation="free",rot=0),cex=.5,pch=16,
154
#                      ylab="2000-2010 Mean Daily Value",xlab="1970-2000 Mean Daily Value",
155
#                      main="Comparison of Mean Daily Values",asp=1)))+
156
#  layer(panel.abline(0,1,col="red"))+
157
#  layer(panel.text(max(x),min(y),paste("R^2=",round(summary(lm(y~x))$r.squared,2)),cex=.5,pos=2))
158

    
159
for(v in unique(d$element)){
160
print(xyplot(value10~value30|monthname,data=d[d$keep&d$element==v,],scales=list(relation="free",rot=0),cex=.5,pch=16,
161
                      ylab="2000-2010 Mean Daily Value",xlab="1970-2000 Mean Daily Value",
162
                      main=paste("Comparison of Mean Daily Values for",v),asp=1)+
163
  layer(panel.abline(0,1,col="red"))+
164
  layer(panel.text(max(x),min(y),paste("R^2=",round(summary(lm(y~x))$r.squared,2)),cex=1,pos=2)))
165
}
166

    
167
## look at deviances
168
d$dif=d$value10-d$value30
169

    
170
trellis.par.set(superpose.symbol = list(col=c("blue","grey","green","red"),cex=.5,pch=16))
171

    
172
 for(v in unique(d$element)){
173
    print(xyplot(latitude~longitude|monthname,group=cut(dif,quantile(d$dif[d$keep&d$element==v],seq(0,1,len=5))),
174
       data=d[d$keep&d$element==v,],auto.key=list(space="right"),
175
                 main=paste("Current-Past anomolies for",v," (2000-2010 Daily Means Minus 1970-2000 Daily Means)"),
176
                 sub="Positive values indicate stations that were warmer/wetter in 2000-2010 than 1970-2000")+
177
          layer(sp.lines(as(interp_area,"SpatialLines"),col="black")))
178
}
179

    
180
dev.off()
(11-11/11)