1
|
################## Data preparation for interpolation #######################################
|
2
|
############################ Extraction of station data ##########################################
|
3
|
#This script perform queries on the Postgres database ghcn for stations matching the #
|
4
|
#interpolation area. It requires the following inputs: #
|
5
|
# 1)the text file ofGHCND stations from NCDC matching the database version release #
|
6
|
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84 # #
|
7
|
# 3)a new coordinate system can be provided as an argument #
|
8
|
# 4)the variable of interest: "TMAX","TMIN" or "PRCP" #
|
9
|
# #
|
10
|
#The outputs are text files and a shape file of a time subset of the database #
|
11
|
#AUTHOR: Benoit Parmentier #
|
12
|
#DATE: 06/02/212 #
|
13
|
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- #
|
14
|
##################################################################################################
|
15
|
|
16
|
###Loading R library and packages
|
17
|
library(RPostgreSQL)
|
18
|
library(sp) # Spatial pacakge with class definition by Bivand et al.
|
19
|
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al.
|
20
|
library(rgdal) # GDAL wrapper for R, spatial utilities
|
21
|
library(rgeos) # Polygon buffering and other vector operations
|
22
|
library(reshape)
|
23
|
library(raster)
|
24
|
|
25
|
### Parameters and arguments
|
26
|
|
27
|
db.name <- "ghcn" #name of the Postgres database
|
28
|
var <- "PRCP" #name of the variables to keep: TMIN, TMAX or PRCP
|
29
|
year_start<-"1970" #starting year for the query (included)
|
30
|
year_end<-"2011" #end year for the query (excluded)
|
31
|
buffer=500 # size of buffer around shapefile to include in spatial query (in km), set to 0 to use no buffer
|
32
|
infile2<-"ghcnd-stations.txt" #This is the textfile of station locations from GHCND
|
33
|
new_proj<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"
|
34
|
#tile="h11v08"
|
35
|
tile="h09v04"
|
36
|
tile="h21v09"
|
37
|
|
38
|
#out_prefix<-"20121212" #User defined output prefix
|
39
|
|
40
|
#for Adam
|
41
|
outpath="/home/wilson/data"
|
42
|
path="/home/wilson/data"
|
43
|
setwd(path)
|
44
|
|
45
|
|
46
|
############ START OF THE SCRIPT #################
|
47
|
|
48
|
##### Connect to Station database
|
49
|
drv <- dbDriver("PostgreSQL")
|
50
|
db <- dbConnect(drv, dbname=db.name)#,options="statement_timeout = 1m")
|
51
|
|
52
|
##### STEP 1: Select station in the study area
|
53
|
## get MODLAND tile information
|
54
|
#tb=read.table("http://landweb.nascom.nasa.gov/developers/sn_tiles/sn_bound_10deg.txt",skip=6,nrows=648,header=T)
|
55
|
#tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
|
56
|
#tile_bb=tb[tb$tile==tile,] ## identify tile of interest
|
57
|
#roi_ll=extent(tile_bb$lon_min,tile_bb$lon_max,tile_bb$lat_min,tile_bb$lat_max)
|
58
|
|
59
|
modgrid=readOGR("/home/wilson/data/modisgrid","modis_sinusoidal_grid_world")
|
60
|
roi=modgrid[modgrid$h==as.numeric(substr(tile,2,3))&modgrid$v==as.numeric(substr(tile,5,6)),]
|
61
|
CRS_interp="+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
|
62
|
interp_area=spTransform(roi,CRS(CRS_interp))
|
63
|
|
64
|
##### Buffer shapefile if desired
|
65
|
## This is done to include stations from outside the region in the interpolation fitting process and reduce edge effects when stiching regions
|
66
|
if(buffer>0){ #only apply buffer if buffer >0
|
67
|
interp_area=gUnionCascaded(interp_area) #dissolve any subparts of roi (if there are islands, lakes, etc.)
|
68
|
interp_areaC=gCentroid(interp_area) # get centroid of region
|
69
|
interp_areaB=spTransform( # buffer roi (transform to azimuthal equidistant with centroid of region for most (?) accurate buffering, add buffer, then transform to WGS84)
|
70
|
gBuffer(
|
71
|
spTransform(interp_area,
|
72
|
CRS(paste("+proj=aeqd +lat_0=",interp_areaC@coords[2]," +lon_0=",interp_areaC@coords[1]," +ellps=WGS84 +datum=WGS84 +units=m +no_defs ",sep=""))),
|
73
|
width=buffer*1000), # convert buffer (km) to meters
|
74
|
CRS(CRS_interp)) # reproject back to original projection
|
75
|
interp_area=interp_areaB # replace original region with buffered region
|
76
|
}
|
77
|
|
78
|
## get bounding box of study area
|
79
|
bbox=bbox(interp_area)
|
80
|
|
81
|
### read in station location information from database
|
82
|
### use the bbox of the region to include only station in rectangular region to speed up overlay
|
83
|
dat_stat=dbGetQuery(db, paste("SELECT id,name,latitude,longitude
|
84
|
FROM stations
|
85
|
WHERE latitude>=",bbox[2,1]," AND latitude<=",bbox[2,2],"
|
86
|
AND longitude>=",bbox[1,1]," AND longitude<=",bbox[1,2],"
|
87
|
;",sep=""))
|
88
|
coordinates(dat_stat)<-c("longitude","latitude")
|
89
|
proj4string(dat_stat)<-CRS_interp
|
90
|
|
91
|
|
92
|
# Spatial query to find relevant stations
|
93
|
inside <- !is.na(over(dat_stat, as(interp_area, "SpatialPolygons"))) #Finding stations contained in the current interpolation area
|
94
|
stat_roi<-dat_stat[inside,] #Finding stations contained in the current interpolation area
|
95
|
stat_roi<-spTransform(stat_roi,CRS(new_proj)) # Project from WGS84 to new coord. system
|
96
|
|
97
|
#Quick visualization of station locations
|
98
|
#plot(interp_area, axes =TRUE)
|
99
|
#plot(stat_roi, pch=1, col="red", cex= 0.7, add=TRUE)
|
100
|
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6)
|
101
|
|
102
|
#### STEP 2: Connecting to the database and query for relevant data
|
103
|
|
104
|
## Query to link station location information and observations
|
105
|
## Concatenate date columns into single field for easy convert to date
|
106
|
## Divide value by 10 to convert to degrees C and mm
|
107
|
## Subset to years in year_start -> year_end
|
108
|
## Drop missing values (-9999)
|
109
|
## Drop observations that failed quality control (keep only qflag==NA)
|
110
|
system.time(
|
111
|
dd<<-dbGetQuery(db, #save object dd (data daily)
|
112
|
paste("SELECT station,year||'-'||month||'-'||day AS date,value / 10.0 as value,latitude,longitude,elevation
|
113
|
FROM ghcn, stations
|
114
|
WHERE station = id
|
115
|
AND id IN ('",paste(stat_roi$id,collapse="','"),"')
|
116
|
AND element='",var,"'
|
117
|
AND year>=",year_start,"
|
118
|
AND year<",year_end,"
|
119
|
AND value<>-9999
|
120
|
AND qflag IS NULL
|
121
|
;",sep=""))
|
122
|
) ### print used time in seconds - only taking ~30 seconds....
|
123
|
|
124
|
dd=dd[!is.na(dd$value),] #drop any NA values
|
125
|
|
126
|
#################################################################
|
127
|
### STEP 3: generate monthly means for climate-aided interpolation
|
128
|
|
129
|
### first extract average daily values by year.
|
130
|
system.time(
|
131
|
dm<<-dbGetQuery(db, # create dm object (data monthly)
|
132
|
paste("SELECT station,month,count(value) as count,avg(value)/10.0 as value,latitude,longitude,elevation
|
133
|
FROM ghcn, stations
|
134
|
WHERE station = id
|
135
|
AND id IN ('",paste(stat_roi$id,collapse="','"),"')
|
136
|
AND element='",var,"'
|
137
|
AND year>=",year_start,"
|
138
|
AND year<",year_end,"
|
139
|
AND value<>-9999
|
140
|
AND qflag IS NULL
|
141
|
GROUP BY station, month,latitude,longitude,elevation
|
142
|
;",sep=""))
|
143
|
) ### print used time in seconds - only taking ~30 seconds....
|
144
|
|
145
|
### drop months with fewer than 75% of the data observations
|
146
|
### is 75% the right number?
|
147
|
#dm=dm[dm$count>=(.75*10*30),]
|
148
|
|
149
|
## add the monthly data to the daily table
|
150
|
dd$month=as.numeric(as.character(format(as.Date(dd$date),"%m")))
|
151
|
dd$avgvalue=dm$value[match(paste(dd$station,dd$month),paste(dm$station,dm$month))]
|
152
|
dd$avgcount=dm$count[match(paste(dd$station,dd$month),paste(dm$station,dm$month))]
|
153
|
|
154
|
### Generate log-transformed ratio of daily:monthly ppt and add it to daily data
|
155
|
if(var=="PRCP"){
|
156
|
dd$anom=log((1+dd$value)/(1+dd$avgvalue))
|
157
|
dd$anom[dd$anom==Inf|dd$anom2==-Inf]=NA
|
158
|
}
|
159
|
|
160
|
### Generate temperature anomolies for each daily value
|
161
|
if(var!="PRCP"){
|
162
|
dd$anom=dd$value-dd$avgvalue
|
163
|
}
|
164
|
|
165
|
|
166
|
### Transform the subset data frame in a spatial data frame and reproject
|
167
|
dd_sp<-SpatialPointsDataFrame (dd[,c('longitude','latitude')],data=dd,proj=CRS(CRS_interp))
|
168
|
dd_sp<-spTransform(dd_sp,CRS(new_proj)) # Project from WGS84 to new coord. system
|
169
|
|
170
|
### Transform the subset data frame in a spatial data frame and reproject
|
171
|
dm_sp<-SpatialPointsDataFrame (dm[,c('longitude','latitude')],data=dm,proj=CRS(CRS_interp))
|
172
|
dm_sp<-spTransform(dm_sp,CRS(new_proj)) # Project from WGS84 to new coord. system
|
173
|
|
174
|
################################################################
|
175
|
### STEP 4: Save results and outuput in textfile and a shape file
|
176
|
|
177
|
## Save shapefile of station locations
|
178
|
writeOGR(stat_roi,outpath,paste("station_location_",tile,"_",var,sep=""),driver ="ESRI Shapefile")
|
179
|
|
180
|
## save shapefile of daily observations
|
181
|
writeOGR(dd_sp,outpath,paste("/station_daily_",tile,"_",var,sep=""), driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
|
182
|
|
183
|
### write monthly data
|
184
|
writeOGR(dm_sp, outpath, paste("/station_monthly_",tile,"_",var,sep=""), driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
|
185
|
|
186
|
##### END OF SCRIPT ##########
|