Project

General

Profile

Download (9.91 KB) Statistics
| Branch: | Revision:
1
##################    Data preparation for interpolation   #######################################
2
############################ Extraction of station data ##########################################
3
#This script perform queries on the Postgres database ghcn for stations matching the             #
4
#interpolation area. It requires the following inputs:                                           #
5
# 1)the text file ofGHCND  stations from NCDC matching the database version release              #
6
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84                     #                                                     #       
7
# 3)a new coordinate system can be provided as an argument                                       #
8
# 4)the variable of interest: "TMAX","TMIN" or "PRCP"                                            #
9
#                                                                                                #
10
#The outputs are text files and a shape file of a time subset of the database                    #
11
#AUTHOR: Benoit Parmentier                                                                       #
12
#DATE: 06/02/212                                                                                 #
13
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--                                  #
14
##################################################################################################
15

    
16
###Loading R library and packages   
17
library(RPostgreSQL)
18
library(sp)                                             # Spatial pacakge with class definition by Bivand et al.
19
library(spdep)                                          # Spatial pacakge with methods and spatial stat. by Bivand et al.
20
library(rgdal)                                          # GDAL wrapper for R, spatial utilities
21
library(rgeos)                                          # Polygon buffering and other vector operations
22
library(reshape)
23
library(raster)
24

    
25
### Parameters and arguments
26

    
27
db.name <- "ghcn"                #name of the Postgres database
28
var <- "PRCP"                    #name of the variables to keep: TMIN, TMAX or PRCP
29
year_start<-"1970"               #starting year for the query (included)
30
year_end<-"2011"                 #end year for the query (excluded)
31
buffer=500                      #  size of buffer around shapefile to include in spatial query (in km), set to 0 to use no buffer
32
infile2<-"ghcnd-stations.txt"                             #This is the textfile of station locations from GHCND
33
new_proj<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"
34
#tile="h11v08" 
35
tile="h09v04"
36
tile="h21v09"
37

    
38
#out_prefix<-"20121212"                                                 #User defined output prefix
39

    
40
#for Adam
41
outpath="/home/wilson/data"
42
path="/home/wilson/data"
43
setwd(path) 
44

    
45

    
46
############ START OF THE SCRIPT #################
47

    
48
#####  Connect to Station database
49
drv <- dbDriver("PostgreSQL")
50
db <- dbConnect(drv, dbname=db.name)#,options="statement_timeout = 1m")
51

    
52
##### STEP 1: Select station in the study area
53
## get MODLAND tile information
54
#tb=read.table("http://landweb.nascom.nasa.gov/developers/sn_tiles/sn_bound_10deg.txt",skip=6,nrows=648,header=T)
55
#tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
56
#tile_bb=tb[tb$tile==tile,] ## identify tile of interest
57
#roi_ll=extent(tile_bb$lon_min,tile_bb$lon_max,tile_bb$lat_min,tile_bb$lat_max)
58

    
59
modgrid=readOGR("/home/wilson/data/modisgrid","modis_sinusoidal_grid_world")
60
roi=modgrid[modgrid$h==as.numeric(substr(tile,2,3))&modgrid$v==as.numeric(substr(tile,5,6)),]
61
CRS_interp="+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
62
interp_area=spTransform(roi,CRS(CRS_interp))
63

    
64
#####  Buffer shapefile if desired
65
##     This is done to include stations from outside the region in the interpolation fitting process and reduce edge effects when stiching regions
66
if(buffer>0){  #only apply buffer if buffer >0
67
  interp_area=gUnionCascaded(interp_area)  #dissolve any subparts of roi (if there are islands, lakes, etc.)
68
  interp_areaC=gCentroid(interp_area)       # get centroid of region
69
  interp_areaB=spTransform(                # buffer roi (transform to azimuthal equidistant with centroid of region for most (?) accurate buffering, add buffer, then transform to WGS84)
70
    gBuffer(
71
      spTransform(interp_area,
72
                  CRS(paste("+proj=aeqd +lat_0=",interp_areaC@coords[2]," +lon_0=",interp_areaC@coords[1]," +ellps=WGS84 +datum=WGS84 +units=m +no_defs ",sep=""))),
73
      width=buffer*1000),                  # convert buffer (km) to meters
74
    CRS(CRS_interp))                       # reproject back to original projection
75
  interp_area=interp_areaB                 # replace original region with buffered region
76
}
77

    
78
## get bounding box of study area
79
bbox=bbox(interp_area)
80

    
81
### read in station location information from database
82
### use the bbox of the region to include only station in rectangular region to speed up overlay
83
dat_stat=dbGetQuery(db, paste("SELECT id,name,latitude,longitude
84
                  FROM stations
85
                  WHERE latitude>=",bbox[2,1]," AND latitude<=",bbox[2,2],"
86
                  AND longitude>=",bbox[1,1]," AND longitude<=",bbox[1,2],"
87
                  ;",sep=""))
88
coordinates(dat_stat)<-c("longitude","latitude")
89
proj4string(dat_stat)<-CRS_interp
90

    
91

    
92
# Spatial query to find relevant stations
93
inside <- !is.na(over(dat_stat, as(interp_area, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
94
stat_roi<-dat_stat[inside,]                                            #Finding stations contained in the current interpolation area
95
stat_roi<-spTransform(stat_roi,CRS(new_proj))         # Project from WGS84 to new coord. system
96

    
97
#Quick visualization of station locations
98
#plot(interp_area, axes =TRUE)
99
#plot(stat_roi, pch=1, col="red", cex= 0.7, add=TRUE)
100
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6)
101

    
102
#### STEP 2: Connecting to the database and query for relevant data 
103

    
104
##  Query to link station location information and observations
105
##  Concatenate date columns into single field for easy convert to date
106
##  Divide value by 10 to convert to degrees C and mm
107
##  Subset to years in year_start -> year_end
108
##  Drop missing values (-9999)
109
##  Drop observations that failed quality control (keep only qflag==NA)
110
system.time(
111
  dd<<-dbGetQuery(db,  #save object dd (data daily)
112
                          paste("SELECT station,year||'-'||month||'-'||day AS date,value / 10.0 as value,latitude,longitude,elevation
113
                                 FROM ghcn, stations
114
                                 WHERE station = id
115
                                 AND id IN ('",paste(stat_roi$id,collapse="','"),"')
116
                                 AND element='",var,"'
117
                                 AND year>=",year_start,"
118
                                 AND year<",year_end,"
119
                                 AND value<>-9999
120
                                 AND qflag IS NULL
121
                                 ;",sep=""))
122
  )  ### print used time in seconds  - only taking ~30 seconds....
123

    
124
dd=dd[!is.na(dd$value),]  #drop any NA values
125

    
126
#################################################################
127
### STEP 3: generate monthly means for climate-aided interpolation
128

    
129
### first extract average daily values by year.  
130
system.time(
131
           dm<<-dbGetQuery(db,  # create dm object (data monthly)
132
                          paste("SELECT station,month,count(value) as count,avg(value)/10.0 as value,latitude,longitude,elevation
133
                                 FROM ghcn, stations
134
                                 WHERE station = id
135
                                 AND id IN ('",paste(stat_roi$id,collapse="','"),"')
136
                                 AND element='",var,"'
137
                                 AND year>=",year_start,"
138
                                 AND year<",year_end,"
139
                                 AND value<>-9999
140
                                 AND qflag IS NULL
141
                                 GROUP BY station, month,latitude,longitude,elevation
142
                                 ;",sep=""))
143
            )  ### print used time in seconds  - only taking ~30 seconds....
144

    
145
### drop months with fewer than 75% of the data observations
146
### is 75% the right number?
147
#dm=dm[dm$count>=(.75*10*30),]
148

    
149
## add the monthly data to the daily table
150
dd$month=as.numeric(as.character(format(as.Date(dd$date),"%m")))
151
dd$avgvalue=dm$value[match(paste(dd$station,dd$month),paste(dm$station,dm$month))]
152
dd$avgcount=dm$count[match(paste(dd$station,dd$month),paste(dm$station,dm$month))]
153

    
154
### Generate log-transformed ratio of daily:monthly ppt and add it to daily data
155
if(var=="PRCP"){
156
  dd$anom=log((1+dd$value)/(1+dd$avgvalue))
157
  dd$anom[dd$anom==Inf|dd$anom2==-Inf]=NA
158
}
159

    
160
### Generate temperature anomolies for each daily value
161
if(var!="PRCP"){
162
  dd$anom=dd$value-dd$avgvalue
163
}
164

    
165

    
166
###  Transform the subset data frame in a spatial data frame and reproject
167
dd_sp<-SpatialPointsDataFrame (dd[,c('longitude','latitude')],data=dd,proj=CRS(CRS_interp)) 
168
dd_sp<-spTransform(dd_sp,CRS(new_proj))         # Project from WGS84 to new coord. system
169

    
170
###  Transform the subset data frame in a spatial data frame and reproject
171
dm_sp<-SpatialPointsDataFrame (dm[,c('longitude','latitude')],data=dm,proj=CRS(CRS_interp)) 
172
dm_sp<-spTransform(dm_sp,CRS(new_proj))         # Project from WGS84 to new coord. system
173

    
174
################################################################
175
### STEP 4: Save results and outuput in textfile and a shape file
176

    
177
##  Save shapefile of station locations
178
writeOGR(stat_roi,outpath,paste("station_location_",tile,"_",var,sep=""),driver ="ESRI Shapefile")
179

    
180
## save shapefile of daily observations
181
writeOGR(dd_sp,outpath,paste("/station_daily_",tile,"_",var,sep=""), driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
182

    
183
### write monthly data
184
writeOGR(dm_sp, outpath, paste("/station_monthly_",tile,"_",var,sep=""), driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
185

    
186
##### END OF SCRIPT ##########
(1-1/18)