Project

General

Profile

« Previous | Next » 

Revision 1c15fc49

Added by Adam M. Wilson over 10 years ago

Revert "Merge branch 'ag/interp' of code.nceas.ucsb.edu:environmental-layers into aw/precip"

This reverts commit f9c712987ba814b83b2c2cc058c6c1f9b07933c1, reversing
changes made to f0375becc9fb9e13e55c5b7a48be5c5f98064f87.

View differences:

climate/research/oregon/interpolation/AAG2013_conference_Oregon_interpolation.R
1
####################################  INTERPOLATION TEMPERATURES  #######################################
2
############################  AAG 2013 and OREGON transitition script #######################################
3
#This script reads information concerning the Oregon case study to adapt data for the revised 
4
# interpolation code.
5
#Figures and data for the AAG conference are also produced.
6
#AUTHOR: Benoit Parmentier                                                                      #
7
#DATE: 04/08/2013            
8
#Version: 1
9
#PROJECT: Environmental Layers project                                       #
10
#################################################################################################
11

  
12
###Loading R library and packages                                                      
13
library(gtools)                              # loading some useful tools 
14
library(mgcv)                                # GAM package by Simon Wood
15
library(sp)                                  # Spatial pacakge with class definition by Bivand et al.
16
library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
17
library(rgdal)                               # GDAL wrapper for R, spatial utilities
18
library(gstat)                               # Kriging and co-kriging by Pebesma et al.
19
library(fields)                              # NCAR Spatial Interpolation methods such as kriging, splines
20
library(raster)                              # Hijmans et al. package for raster processing
21
library(gdata)                               # various tools with xls reading
22
library(rasterVis)
23
library(parallel)
24
library(maptools)
25
library(maps)
26
library(reshape)
27
library(plotrix)
28

  
29
#### UNCTION USED IN SCRIPT
30

  
31
create_modis_tiles_region<-function(tiles,modis_sp){
32
  #This functions returns a subset of tiles from the modis grdi.
33
  #Arguments: modies grid tile,list of tiles
34
  #Output: spatial grid data frame of the subset of tiles
35
  
36
  h_list<-lapply(tiles,substr,start=2,stop=3) #passing multiple arguments
37
  v_list<-lapply(tiles,substr,start=5,stop=6) #passing multiple arguments
38
  selected_tiles<-subset(subset(modis_sp,subset = h %in% as.numeric (h_list) ),
39
                         subset = v %in% as.numeric(v_list)) 
40
  return(selected_tiles)
41
}
42

  
43
### Parameters and argument
44

  
45
lc_path<-"/home/layers/data/land-cover/lc-consensus-global"
46
infile_modis_grid<-"modis_sinusoidal_grid_world.shp"
47
infile_elev<-"/home/layers/data/terrain/dem-cgiar-srtm-1km-tif/srtm_1km.tif"  #this is the global file: replace later with the input produced by the DEM team
48
infile_canheight<-"Simard_Pinto_3DGlobalVeg_JGR.tif"              #Canopy height
49
#list_tiles_modis = c('h11v08','h11v07','h12v07','h12v08','h10v07','h10v08') #tile for Venezuel and surrounding area
50
list_tiles_modis = c('h08v04','h09v04')
51

  
52
#infile_reg_outline=""  #input region outline defined by polygon
53
infile_reg_outline= "OR83M_state_outline.shp"
54
infile_countries_sinusoidal<-"countries_sinusoidal_world.shp"
55

  
56
#CRS_interp<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs";
57
CRS_interp <-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
58
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
59
out_region_name<-"Oregon_region" #generated on the fly
60
out_prefix<-"_OR_04052013"
61
ref_rast_name<- "mean_day244_rescaled.rst"                     #This is the shape file of outline of the study area. #local raster name defining resolution, exent, local projection--. set on the fly??
62
infile_covariates<-"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script and used in stage 3 and stage 4
63

  
64
#The names of covariates can be changed...these names should be output/input from covar script!!!
65
rnames<-c("x","y","lon","lat","N","E","N_w","E_w","elev","slope","aspect","CANHEIGHT","DISTOC")
66
lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10","LC11","LC12")
67
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12",
68
             "nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
69
             "nobs_09","nobs_10","nobs_11","nobs_12")
70
covar_names<-c(rnames,lc_names,lst_names)
71
infile2<-"/home/layers/data/climate/ghcn/v2.92-upd-2012052822/ghcnd-stations.txt"                              #This is the textfile of station 
72

  
73
in_path<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_covariates"
74

  
75
#c("Oregon", c("h08v04","h09v04","h08v05","h09v05"))
76
study_area_list_tiles <- vector("list",6)
77
study_area_list_tiles[[1]] <-list("Oregon", c("h08v04","h09v04"))
78
study_area_list_tiles[[2]] <-list("Venezuela",c("h10v07", "h10v08", "h11v7", "h11v08", "h12v07", "h12v08"))
79
study_area_list_tiles[[3]] <-list("Norway",c("h18v02","h18v03", "h19v02", "h19v03"))
80
study_area_list_tiles[[4]] <-list("East_Africa",c("h20v08", "h21v08", "h22v08", "h20v09", "h21v09", "h22v09", "h21v10"))
81
study_area_list_tiles[[5]] <-list("South_Africa",c("h19v11", "h20v11", "h19v12", "h20v12"))
82
study_area_list_tiles[[6]] <-list("Queensland",c("h31v10", "h31v10", "h32v10", "h30v11", "h31v11"))
83

  
84
#######################################################################################
85
###########################      BEGIN SCRIPT    ######################################
86

  
87

  
88
setwd(in_path)
89

  
90

  
91
####### PART I: Prepare data for figures and for Oregon interpolation ##########
92

  
93
### Read in Venezuela covariate stack
94

  
95
s_raster_Ven<-brick(infile_covariates) #read brick 
96
names(s_raster_Ven)<-covar_names #assign names
97
mm_01_Ven<-subset(s_raster_ven,"mm_01") #select LST January month average
98

  
99
### Read in world map to show stuy areas. 
100

  
101
world_sp <- getData("countries")  # different resolutions available
102
outfile2<-file.path(in_path,paste("word_countries.shp",sep=""))  #Name of the file
103
writeOGR(world_sp,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE)
104

  
105
### Read in sinusoidal grid and world countries
106
filename<-sub(".shp","",infile_modis_grid)       #Removing the extension from file.
107
modis_grid<-readOGR(".", filename)     #Reading shape file using rgdal library
108

  
109
### Create list ofALL STUDY AREAS/TEST SITES
110

  
111
## Create list of study area regions:
112
list_tiles<-lapply(1:length(study_area_list_tiles),function(k) study_area_list_tiles[[k]][[2]])
113
modis_reg_outlines<-lapply(list_tiles,FUN=create_modis_tiles_region,modis_sp=modis_grid) #problem...this does not 
114

  
115
#writeOGR(modis_reg_outline,dsn= ".",layer= paste("outline",out_region_name,"_",out_suffix,sep=""), 
116
#         driver="ESRI Shapefile",overwrite_layer="TRUE")
117

  
118
####################################################
119
#Read in GHCND database station locations
120

  
121
dat_stat <- read.fwf(infile2, 
122
                     widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE)  
123
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID")
124
coords<- dat_stat[,c('lon','lat')]
125
coordinates(dat_stat)<-coords
126
proj4string(dat_stat)<-CRS_locs_WGS84 #this is the WGS84 projection
127
#Save shapefile for later
128
outfile1<-file.path(in_path,paste("ghcnd_stations.shp",sep=""))  #Name of the file
129
writeOGR(dat_stat,dsn= dirname(outfile1),layer= sub(".shp","",basename(outfile1)), driver="ESRI Shapefile",overwrite_layer=TRUE)
130

  
131
interp_area <- readOGR(dsn=in_path,sub(".shp","",infile_reg_outline))
132
interp_area_WGS84 <-spTransform(interp_area,CRS_locs_WGS84)         # Project from WGS84 to new coord. system
133

  
134
# Spatial query to find relevant stations
135

  
136
inside <- !is.na(over(dat_stat, as(interp_area_WGS84, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
137
stat_reg_OR<-dat_stat[inside,]              #Selecting stations contained in the current interpolation area
138

  
139
stat_reg_OR <-spTransform(stat_reg_OR,CRS(proj4string(interp_area)))         # Project from WGS84 to new coord. system
140

  
141
#Now Venezuela
142
interp_area_Ven_WGS84 <-spTransform(modis_reg_outlines[[2]],CRS_locs_WGS84)         # Project from WGS84 to new coord. system
143
inside <- !is.na(over(dat_stat, as(interp_area_Ven_WGS84, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
144
stat_reg_Ven <-dat_stat[inside,]              #Selecting stations contained in the current interpolation area
145

  
146
## Get the data in the local projection
147
stat_reg_Ven <-spTransform(stat_reg_Ven,CRS(proj4string(modis_reg_outlines[[2]])))         # Project from WGS84 to new coord. system
148

  
149
### READ IN COVARIATES FILES FOR OREGON AND MAKE IT A MULTI-BAND FILE
150

  
151
inlistf<-"list_files_covariates_04032013.txt"
152
lines<-read.table(paste(inlistf,sep=""), sep=" ")                  #Column 1 contains the names of raster files
153
inlistvar<-lines[,1]
154
inlistvar<-as.character(inlistvar)
155
covar_names_OR<-as.character(lines[,2])                                         #Column two contains short names for covaraites
156

  
157
s_raster_OR<- stack(inlistvar)                                                  #Creating a stack of raster images from the list of variables.
158
layerNames(s_raster_OR)<-covar_names_OR                                            #Assigning names to the raster layers
159

  
160
aspect<-subset(s_raster_OR,"aspect")             #Select layer from stack
161
slope<-subset(s_raster_OR,"slope")             #Select layer from stack
162
distoc<-subset(s_raster_OR,"DISTOC")  
163

  
164
N<-cos(aspect*pi/180)
165
E<-sin(aspect*pi/180)
166
Nw<-sin(slope*pi/180)*cos(aspect*pi/180)   #Adding a variable to the dataframe
167
Ew<-sin(slope*pi/180)*sin(aspect*pi/180)   #Adding variable to the dataframe.
168

  
169
xy <-coordinates(slope)  #get x and y projected coordinates...
170
xy_latlon<-project(xy, CRS_interp, inv=TRUE) # find lat long for projected coordinats (or pixels...)
171

  
172
x <-init(slope,v="x")
173
y <-init(slope,v="y")
174
lon<-x
175
lat<-y
176
lon <-setValues(lon,xy_latlon[,1]) #longitude for every pixel in the processing tile/region
177
lat <-setValues(lat,xy_latlon[,2]) #latitude for every pixel in the processing tile/region
178
CANHEIGHT<-subset(s_raster_OR,"CANHEIGHT")
179
CANHEIGHT[is.na(CANHEIGHT)]<-0
180

  
181
elev<-subset(s_raster_OR,"elev")
182
elev[elev==-9999] <- NA
183

  
184
r<-stack(x,y,lon,lat,N,E,Nw,Ew,elev,slope,aspect,CANHEIGHT,distoc)
185
rnames<-c("x","y","lon","lat","N","E","N_w","E_w","elev","slope","aspect","CANHEIGHT","DISTOC")
186
s_raster<-r
187
#Add landcover layers
188
lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10")
189
lc_reg_s<-subset(s_raster_OR,lc_names)
190
#Should be a function...ok for now??
191
test<-vector("list",nlayers(lc_reg_s))
192
for (k in 1:nlayers(lc_reg_s)){
193
  LC<-raster(lc_reg_s,layer=k)             #Select layer from stack
194
  LC[is.na(LC)]<-0
195
  test[[k]]<-LC
196
}
197
#tmp_df<-freq(lc_reg_s,merge=TRUE) #check to see if it worked
198
#head(tmp_df)
199
lc_reg_s<-stack(test)
200
s_raster<-addLayer(s_raster, lc_reg_s)
201
  
202
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12",
203
             "nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
204
             "nobs_09","nobs_10","nobs_11","nobs_12")
205
lst_mm_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")
206
lst_mm_s<-subset(s_raster_OR,lst_mm_names)
207
lst_mm_s <- lst_mm_s - 273.16 
208
lst_nobs_names<-c("nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
209
                "nobs_09","nobs_10","nobs_11","nobs_12")
210
lst_nobs_s<-subset(s_raster_OR,lst_nobs_names)
211

  
212
s_raster<-addLayer(s_raster, lst_mm_s)
213
s_raster<-addLayer(s_raster, lst_nobs_s)
214
covar_names<-c(rnames,lc_names,lst_names)
215
names(s_raster)<-covar_names
216

  
217
# create mask!!! Should combine with mask of elev
218
LC10<-subset(s_raster,"LC10")
219

  
220
LC10_mask<-LC10
221
LC10_mask[is.na(LC10_mask)]<- 0
222
LC10_mask[LC10==100]<- NA
223
LC10_mask[LC10_mask<100]<- 1
224
LC10_mask[is.na(LC10_mask)]<- 0
225
mask_land_NA<-LC10_mask
226
mask_land_NA[mask_land_NA==0]<-NA
227

  
228

  
229
##### SAVE AS MULTIBAND...make this a function...
230

  
231
#list of files...
232
file_format<-".tif"
233
NA_val<- -9999
234
band_order<- "BSQ"
235
var<-"TMAX"
236
data_name<-paste("covariates_",out_region_name,"_",sep="")
237
raster_name<-paste(data_name,var,"_",out_prefix,file_format, sep="")
238
#writeRaster(s_raster, filename=raster_name,NAflag=-999,bylayer=FALSE,bandorder="BSQ",overwrite=TRUE)  #Writing the data in a raster file format...
239
#if mask
240
#stat_val<- extract(s_raster, ghcn3)                                          #Extracting values from the raster stack for every point location in coords data frame.
241
s_raster_m<-mask(s_raster,mask_land_NA,filename=raster_name,
242
                 overwrite=TRUE,NAflag=NA_val,bylayer=FALSE,bandorder=band_order)
243
#if no mask
244
#writeRaster(s_raster, filename=raster_name,NAflag=-999,bylayer=FALSE,bandorder="BSQ",overwrite=TRUE)  #Writing the data in a raster file format...
245

  
246
############# PART II: PRODUCE FIGURES #######
247

  
248
### CREATE FIGURE TEST SITES
249

  
250
dat_stat_sinusoidal <- spTransform(dat_stat,CRS(proj4string(modis_grid)))
251
world_sinusoidal <- readOGR(dsn=".",sub(".shp","",infile_countries_sinusoidal))
252

  
253
png(paste("Study_area_modis_grid",out_prefix,".png",sep=""))
254
plot(world_sinusoidal)
255
plot(dat_stat_sinusoidal,cex=0.2,pch=16,col=c("blue"),add=TRUE)
256
plot(modis_grid,add=TRUE)
257
for (k in 1:length(modis_reg_outlines)){
258
  plot(modis_reg_outlines[[k]],border=c("red"),lwd=2.5,add=TRUE)
259
}
260
title("Study area for temperature and precipitation predictions")
261
#legend
262
dev.off()
263

  
264
### CREATE FIGURE MEAN DAILY AND MEAN MONTHLY: AAG 2013  ####
265

  
266
lst_md<-raster(ref_rast_name)
267
lst_mm_09<-subset(s_raster,"mm_09")
268
plot(stack(lst_md,lst_mm_09))
269

  
270
lst_md<-raster("mean_day001_rescaled.rst")
271
lst_md<- lst_md - 273.16
272
lst_mm_01<-subset(s_raster,"mm_01")
273

  
274
png(filename=paste("Comparison_daily_monthly_mean_lst",out_prefix,".png",sep=""),width=960,height=480)
275
par(mfrow=c(1,2))
276
plot(lst_md)
277
plot(interp_area,add=TRUE)
278
title("Mean January 1")
279
plot(lst_mm_01)
280
plot(interp_area,add=TRUE)
281
title("Mean for monht of January")
282
dev.off()
283

  
284
### CREATE FIGURE NUMBER OF STATIONS PER SITE 
285

  
286
png(paste("stations_for_Venezuela_Oregon_areas",out_prefix,".png",sep=""),,width=960,height=480)
287
par(mfrow=c(1,2))
288
#Oregon data
289
plot(lst_mm_01)
290
plot(interp_area,add=TRUE)
291
plot(stat_reg_OR,add=TRUE)
292
title("Stations located in Oregon from GHNCD")
293
plot(mm_01_Ven)
294
plot(modis_reg_outlines[[2]],add=TRUE)
295
plot(stat_reg_Ven,add=TRUE)
296
title("Stations located in Venezuela from GHNCD")
297
dev.off()
298

  
299
### CREATE FIGURE NUMBER OF STATIONS PER SITE AND SPECIFIC MONTH... 
300

  
301
#png(paste("stations_for_Venezuela_Oregon_areas_per_month",out_prefix,".png",sep=""))
302
#par(mfrow=c(1,2))
303
#plot(interp_area_WGS84)
304
#plot(stat_reg_OR,add=TRUE)
305
#plot(modis_reg_outlines[[2]])
306
#plot(stat_reg_Ven,add=TRUE)
307
#dev.off()
308

  
309
############ PART III: SCREENING OF COVARIATES #############
310

  
311
### SCREENING FUNCTION for covariate stack and GHNCD data base to add later in the functions
312

  
313
#Screen for extreme values": this needs more thought, min and max val vary with regions
314
#min_val<-(-15+273.16) #if values less than -15C then screen out (note the Kelvin units that will need to be changed later in all datasets)
315
#r1[r1 < (min_val)]<-NA
316
#s_raster<-addLayer(s_raster,LST)            #Adding current month
317

  
318
nel<-12
319
#tab_range<-data.frame(varname=character(nel),varterm=character(nel),vmin=numeric(nel),vmax=numeric(nel))
320
val_range<-vector("list",nel) #list of one row data.frame
321
val_rst<-vector("list",nel) #list of one row data.frame
322
val_range[[1]]<-data.frame(varname="lon",vmin=-180,vmax=180)
323
val_range[[2]]<-data.frame(varname="lat",vmin=-90,vmax=90)
324
val_range[[3]]<-data.frame(varname="N",vmin=-1,vmax=1)
325
val_range[[4]]<-data.frame(varname="E",vmin=-1,vmax=1)
326
val_range[[5]]<-data.frame(varname="N_w",vmin=-1,vmax=1)
327
val_range[[6]]<-data.frame(varname="E_w",vmin=-1,vmax=1)
328
val_range[[7]]<-data.frame(varname="elev",vmin=0,vmax=6000)
329
val_range[[8]]<-data.frame(varname="slope",varterm="slope",vmin=0,vmax=90)
330
val_range[[9]]<-data.frame(varname="aspect",varterm="aspect",vmin=0,vmax=360)
331
val_range[[10]]<-data.frame(varname="DISTOC",vmin=-0,vmax=10000000)
332
val_range[[11]]<-data.frame(varname="CANHEIGHT",vmin=0,vmax=255)
333
val_range[[12]]<-data.frame(varname="LC1",vmin=0,vmax=100)
334
val_range[[13]]<-data.frame(varname="LC3",vmin=0,vmax=100)
335
val_range[[14]]<-data.frame(varname="mm_01",vmin=-15,vmax=50)
336
val_range[[15]]<-data.frame(varname="mm_02",vmin=-15,vmax=50)
337
val_range[[16]]<-data.frame(varname="mm_03",vmin=-15,vmax=50)
338
val_range[[17]]<-data.frame(varname="mm_04",vmin=-15,vmax=50)
339
val_range[[18]]<-data.frame(varname="mm_05",vmin=-15,vmax=50)
340
val_range[[19]]<-data.frame(varname="mm_06",vmin=-15,vmax=50)
341
val_range[[20]]<-data.frame(varname="mm_07",vmin=-15,vmax=50)
342
val_range[[21]]<-data.frame(varname="mm_08",vmin=-15,vmax=50)
343
val_range[[22]]<-data.frame(varname="mm_09",vmin=-15,vmax=50)
344
val_range[[23]]<-data.frame(varname="mm_10",vmin=-15,vmax=50)
345
val_range[[24]]<-data.frame(varname="mm_11",vmin=-15,vmax=50)
346
val_range[[25]]<-data.frame(varname="mm_12",vmin=-15,vmax=50)
347

  
348
tab_range<-do.call(rbind,val_range)
349

  
350
#pos<-match("ELEV_SRTM",layerNames(s_raster)) #Find column with the current month for instance mm12
351
#ELEV_SRTM<-raster(s_raster,pos)
352

  
353
screening_val_covariates_fun<-function(tab_range,r_stack){
354
  #Screening for raster stack
355
  #
356
  for (k in 1:nrow(tab_range)){
357
    avl<-c(-Inf,tab_range$vmin[k],NA, tab_range$vmax[k],+Inf,NA)   #This creates a input vector...val 1 are -9999, 2 neg, 3 positive
358
    rclmat<-matrix(avl,ncol=3,byrow=TRUE)
359
    #s_raster_r<-raster(r_stack,match(tab_range$varterm[k],names(r_stack))) #select relevant layer from stack
360
    s_raster_r<-raster(r_stack,match(tab_range$varname[k],names(r_stack)))
361
    s_raster_r<-reclass(s_raster_r,rclmat)  #now reclass values 
362
    r_stack<-dropLayer(r_stack,"N")
363
    names(s_raster_r)<-tab_range$varname[k] #Loss of layer names when using reclass
364
    val_rst[[k]]<-s_raster_r
365
  }
366
  s_rst_m<-stack(val_rst) #This a raster stack with valid range of values
367
  r_stack<-addLayer(r_stack,s_rst_m) #add back layers that were screened out
368
  return(r_stack)
369
}
370

  
371
#### ADD SCREENING FUNCTION FOR GHCND extracted!!!
372

  
373
#Remove NA for LC and CANHEIGHT
374
#ghcn$LC1[is.na(ghcn$LC1)]<-0
375
#ghcn$LC3[is.na(ghcn$LC3)]<-0
376
#ghcn$CANHEIGHT[is.na(ghcn$CANHEIGHT)]<-0
377
#ghcn$LC4[is.na(ghcn$LC4)]<-0
378
#ghcn$LC6[is.na(ghcn$LC6)]<-0
379
##ghcn$ELEV_SRTM[ghcn$ELEV_SRTM==-9999]<-NA
380
#dst<-subset(dst,dst$TMax>-15 & dst$TMax<40)
381
#dst<-subset(dst,dst$ELEV_SRTM>0) #This will drop two stations...or 24 rows
climate/research/oregon/interpolation/Database_stations_covariates_processing_function.R
1
##################    Data preparation for interpolation   #######################################
2
############################ Extraction of station data ##########################################
3

  
4

  
5
database_covariates_preparation<-function(list_param_prep){
6
  #This function performs queries on the Postgres ghcnd database for stations matching the             
7
  #interpolation area. It requires 11 inputs:                                           
8
  # 1)  db.name :  Postgres database name containing the meteorological stations
9
  # 2)  var: the variable of interest - "TMAX","TMIN" or "PRCP" 
10
  # 3)  range_years: range of records used in the daily interpolation, note that upper bound year is not included               
11
  # 4)  range_years_clim: range of records used in the monthly climatology interpolation, note that upper bound is not included
12
  # 5)  infile_reg_outline: region outline as a shape file - used in the interpolation  stage too                              
13
  # 6)  infile_ghncd_data: ghcnd stations locations as a textfile name with lat-long fields                                                                                   
14
  # 7)  infile_covarariates: tif file of raser covariates for the interpolation area: it should have a local projection                                                                                           
15
  # 8)  CRS_locs_WGS84: longlat EPSG 4326 used as coordinates reference system (proj4)for stations locations
16
  # 9)  in_path: input path for covariates data and other files, this is also the output?
17
  # 10) out_path: output path created in master script
18
  # 11) covar_names: names of covariates used for the interpolation --may be removed later? (should be stored in the brick)
19
  # 12) qc_flags_stations: flag used to screen out at this stage only two values...
20
  # 13) out_prefix: output suffix added to output names--it is the same in the interpolation script
21
  #
22
  #The output is a list of four shapefile names produced by the function:
23
  #1) loc_stations: locations of stations as shapefile in EPSG 4326
24
  #2) loc_stations_ghcn: ghcn daily data for the year range of interpolation (locally projected)
25
  #3) daily_query_ghcn_data: ghcn daily data from daily query before application of quality flag
26
  #4) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected)
27
  #5) monthly_query_ghcn_data: ghcn daily data from monthly query before application of quality flag
28
  #6) monthly_covar_ghcn_data: ghcn monthly averaged data with covariates for the year range of interpolation (locally projected)
29
  
30
  #AUTHOR: Benoit Parmentier                                                                       
31
  #DATE: 05/21/2013                                                                                 
32
  #PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--     
33
  #Comments and TODO
34
  #-Add buffer option...
35
  #-Add screening for value predicted: var
36
  ##################################################################################################
37
  
38
  ###Loading R library and packages: should it be read in before???   
39
  
40
  library(RPostgreSQL)
41
  library(sp)                                           # Spatial pacakge with class definition by Bivand et al.
42
  library(spdep)                                          # Spatial pacakge with methods and spatial stat. by Bivand et al.
43
  library(rgdal)                                          # GDAL wrapper for R, spatial utilities
44
  library(rgeos)
45
  library(rgdal)
46
  library(raster)
47
  library(rasterVis)
48
  library(maps)
49
  library(maptools)
50
  
51
  ### Functions used in the script
52
  
53
  format_s <-function(s_ID){
54
    #Format station ID in a vector format/tuple that is used in a psql query.
55
    # Argument 1: vector of station ID
56
    # Return: character of station ID
57
    tx2<-s_ID
58
    tx2<-as.character(tx2)
59
    stat_list<-tx2
60
    temp<-shQuote(stat_list)
61
    t<-paste(temp, collapse= " ")
62
    t1<-gsub(" ", ",",t)
63
    sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query
64
    return(sf_ID)
65
  }
66
  
67
  #parsing input arguments
68
  
69
  db.name <- list_param_prep$db.name             #name of the Postgres database
70
  var <- list_param_prep$var                     #name of the variables to keep: TMIN, TMAX or PRCP
71
  year_start <-list_param_prep$range_years[1] #"2010"               #starting year for the query (included)
72
  year_end <-list_param_prep$range_years[2] #"2011"                 #end year for the query (excluded)
73
  year_start_clim <-list_param_prep$range_years_clim[1] #right bound not included in the range!! starting year for monthly query to calculate clime
74
  year_end_clim <-list_param_prep$range_years_clim[2] #right bound not included in the range!! starting year for monthly query to calculate clime
75
  infile_reg_outline<- list_param_prep$infile_reg_outline  #This is the shape file of outline of the study area                                                      #It is an input/output of the covariate script
76
  infile_ghncd_data<-list_param_prep$infile_ghncd_data      #"/home/layers/data/climate/ghcn/v2.92-upd-2012052822/ghcnd-stations.txt"                              #This is the textfile of station locations from GHCND
77
  infile_covariates<-list_param_prep$infile_covariates #"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script
78
  CRS_locs_WGS84<-list_param_prep$CRS_locs_WGS84 #Station coords WGS84: same as earlier
79
  in_path <- list_param_prep$in_path #CRS_locs_WGS84"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/"
80
  out_path <- list_param_prep$out_path #CRS_locs_WGS84"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/"
81
  covar_names<-list_param_prep$covar_names # names should be written in the tif file!!!
82
  qc_flags_stations <- list_param_prep$qc_flags_stations #flags allowed for the query from the GHCND??
83
  out_prefix<-list_param_prep$out_prefix #"_365d_GAM_fus5_all_lstd_03012013"                #User defined output prefix
84
  
85
  ## working directory is the same for input and output for this function  
86
  #setwd(in_path) 
87
  setwd(out_path)
88
  ##### STEP 1: Select station in the study area
89
  
90
  filename<-sub(".shp","",infile_reg_outline)             #Removing the extension from file.
91
  interp_area <- readOGR(dsn=dirname(filename),basename(filename))
92
  CRS_interp<-proj4string(interp_area)         #Storing the coordinate information: geographic coordinates longlat WGS84
93
  
94
  #Read in GHCND database station locations
95
  dat_stat <- read.fwf(infile_ghncd_data, 
96
                       widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE)
97
  colnames(dat_stat)<-c("STAT_ID","latitude","longitude","elev","state","name","GSNF","HCNF","WMOID")
98
  coords<- dat_stat[,c('longitude','latitude')]
99
  coordinates(dat_stat)<-coords
100
  proj4string(dat_stat)<-CRS_locs_WGS84 #this is the WGS84 projection
101
  #proj4string(dat_stat)<-CRS_interp
102
  interp_area_WGS84 <-spTransform(interp_area,CRS_locs_WGS84)         # Project from WGS84 to new coord. system
103
  
104
  # Spatial query to find relevant stations
105
  
106
  inside <- !is.na(over(dat_stat, as(interp_area_WGS84, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
107
  stat_reg<-dat_stat[inside,]              #Selecting stations contained in the current interpolation area
108
  
109
  ####
110
  ##TODO: Add buffer option? 
111
  ####
112
  
113
  #### STEP 2: Connecting to the database and query for relevant data 
114
  
115
  drv <- dbDriver("PostgreSQL")
116
  db <- dbConnect(drv, dbname=db.name)
117
  
118
  time1<-proc.time()    #Start stop watch
119
  list_s<-format_s(stat_reg$STAT_ID)
120
  data2<-dbGetQuery(db, paste("SELECT *
121
                              FROM ghcn
122
                              WHERE element=",shQuote(var),
123
                              "AND year>=",year_start,
124
                              "AND year<",year_end,
125
                              "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
126
  time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
127
  time_minutes<-time_duration[3]/60
128
  dbDisconnect(db)
129

  
130
  data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID")
131
  
132
  #Transform the subset data frame in a spatial data frame and reproject
133
  data_reg<-data_table                               #Make a copy of the data frame
134
  coords<- data_reg[c('longitude','latitude')]              #Define coordinates in a data frame: clean up here!!
135
  coordinates(data_reg)<-coords                      #Assign coordinates to the data frame
136
  proj4string(data_reg)<-CRS_locs_WGS84                #Assign coordinates reference system in PROJ4 format
137
  data_reg<-spTransform(data_reg,CRS(CRS_interp))     #Project from WGS84 to new coord. system
138
  
139
  data_d <-data_reg  #data_d: daily data containing the query without screening
140
  #data_reg <-subset(data_d,mflag=="0" | mflag=="S") #should be input arguments!!
141
  #Transform the query to be depending on the number of flags
142
  
143
  data_reg <-subset(data_d, mflag %in% qc_flags_stations) #screening using flags
144
  #data_reg2 <-subset(data_d,mflag==qc_flags_stations[1] | mflag==qc_flags_stations[2]) #screening using flags
145
  
146
  ##################################################################
147
  ### STEP 3: Save results and outuput in textfile and a shape file
148
  #browser()
149
  #Save shape files of the locations of meteorological stations in the study area
150
  #outfile1<-file.path(in_path,paste("stations","_",out_prefix,".shp",sep=""))
151
  outfile1<-file.path(out_path,paste("stations","_",out_prefix,".shp",sep=""))
152
  writeOGR(stat_reg,dsn= dirname(outfile1),layer= sub(".shp","",basename(outfile1)), driver="ESRI Shapefile",overwrite_layer=TRUE)
153
  #writeOGR(dst,dsn= ".",layer= sub(".shp","",outfile4), driver="ESRI Shapefile",overwrite_layer=TRUE)
154
  
155
  outfile2<-file.path(out_path,paste("ghcn_data_query_",var,"_",year_start,"_",year_end,out_prefix,".shp",sep=""))         #Name of the file
156
  #writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
157
  writeOGR(data_d,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE)
158
  
159
  outfile3<-file.path(out_path,paste("ghcn_data_",var,"_",year_start,"_",year_end,out_prefix,".shp",sep=""))         #Name of the file
160
  #writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
161
  writeOGR(data_reg,dsn= dirname(outfile3),layer= sub(".shp","",basename(outfile3)), driver="ESRI Shapefile",overwrite_layer=TRUE)
162
  
163
  ###################################################################
164
  ### STEP 4: Extract values at stations from covariates stack of raster images
165
  #Eventually this step may be skipped if the covariates information is stored in the database...
166
  
167
  #s_raster<-stack(file.path(in_path,infile_covariates))                   #read in the data stack
168
  s_raster<-brick(infile_covariates)                   #read in the data stack
169
  names(s_raster)<-covar_names               #Assigning names to the raster layers: making sure it is included in the extraction
170
  stat_val<- extract(s_raster, data_reg)        #Extracting values from the raster stack for every point location in coords data frame.
171
  #stat_val_test<- extract(s_raster, data_reg,def=TRUE)
172
  
173
  #create a shape file and data_frame with names
174
  
175
  data_RST<-as.data.frame(stat_val)                                            #This creates a data frame with the values extracted
176
  data_RST_SDF<-cbind(data_reg,data_RST)
177
  coordinates(data_RST_SDF)<-coordinates(data_reg) #Transforming data_RST_SDF into a spatial point dataframe
178
  CRS_reg<-proj4string(data_reg)
179
  proj4string(data_RST_SDF)<-CRS_reg  #Need to assign coordinates...
180
  
181
  #Creating a date column
182
  date1<-ISOdate(data_RST_SDF$year,data_RST_SDF$month,data_RST_SDF$day) #Creating a date object from 3 separate column
183
  date2<-gsub("-","",as.character(as.Date(date1)))
184
  data_RST_SDF$date<-date2                                              #Date format (year,month,day) is the following: "20100627"
185
  
186
  #This allows to change only one name of the data.frame
187
  pos<-match("value",names(data_RST_SDF)) #Find column with name "value"
188
  if (var=="TMAX"){
189
    #names(data_RST_SDF)[pos]<-c("TMax")
190
    data_RST_SDF$value<-data_RST_SDF$value/10                #TMax is the average max temp for monthy data
191
  }
192
  
193
  if (var=="TMIN"){
194
    #names(data_RST_SDF)[pos]<-c("TMin")
195
    data_RST_SDF$value<-data_RST_SDF$value/10                #TMax is the average max temp for monthy data
196
  }
197
  
198
  #write out a new shapefile (including .prj component)
199
  outfile4<-file.path(out_path,paste("daily_covariates_ghcn_data_",var,"_",range_years[1],"_",range_years[2],out_prefix,".shp",sep=""))         #Name of the file
200
  writeOGR(data_RST_SDF,dsn= dirname(outfile4),layer= sub(".shp","",basename(outfile4)), driver="ESRI Shapefile",overwrite_layer=TRUE)
201
  
202
  ###############################################################
203
  ######## STEP 5: Preparing monthly averages from the ProstGres database
204
  
205
  drv <- dbDriver("PostgreSQL")
206
  db <- dbConnect(drv, dbname=db.name)
207
  
208
  #year_start_clim: set at the start of the script
209
  time1<-proc.time()    #Start stop watch
210
  list_s<-format_s(stat_reg$STAT_ID)
211
  data_m<-dbGetQuery(db, paste("SELECT *
212
                               FROM ghcn
213
                               WHERE element=",shQuote(var),
214
                               "AND year>=",year_start_clim,
215
                               "AND year<",year_end_clim,
216
                               "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
217
  time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
218
  time_minutes<-time_duration[3]/60
219
  dbDisconnect(db)
220
  #Clean out this section!!
221
  date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column
222
  date2<-as.POSIXlt(as.Date(date1))
223
  data_m$date<-date2
224
  #Save the query data here...
225
  data_m<-merge(data_m, stat_reg, by.x="station", by.y="STAT_ID")   #Inner join all columns are retained
226
  #Extracting covariates from stack for the monthly dataset...
227
  coords<- data_m[c('longitude','latitude')]              #Define coordinates in a data frame
228
  coordinates(data_m)<-coords                      #Assign coordinates to the data frame
229
  proj4string(data_m)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
230
  data_m<-spTransform(data_m,CRS(CRS_interp))     #Project from WGS84 to new coord. system
231
  outfile5<-file.path(out_path,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
232
  writeOGR(data_m,dsn= dirname(outfile5),layer= sub(".shp","",basename(outfile5)), driver="ESRI Shapefile",overwrite_layer=TRUE)
233
  
234
  #In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010.
235

  
236
  #d<-subset(data_m,mflag==qc_flags_stations[1] | mflag==qc_flags_stations[2])
237
  d<-subset(data_m,mflag %in% qc_flags_stations)
238
  
239
  #Add screening here ...May need some screeing??? i.e. range of temp and elevation...
240
  
241
  d1<-aggregate(value~station+month, data=d, mean)  #Calculate monthly mean for every station in OR
242
  #d2<-aggregate(value~station+month, data=d, length)  #Calculate monthly mean for every station in OR
243
  is_not_na_fun<-function(x) sum(!is.na(x)) #count the number of available observation
244
  d2<-aggregate(value~station+month, data=d, is_not_na_fun)  #Calculate monthly mean for every station in OR
245
  #names(d2)[names(d2)=="value"] <-"nobs_station"
246
  d1$nobs_station<-d2$value
247
  dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID")   #Inner join all columns are retained
248
  
249
  #This allows to change only one name of the data.frame
250
  pos<-match("value",names(dst)) #Find column with name "value"
251
  if (var=="TMAX"){
252
    names(dst)[pos]<-c("TMax")           #renaming the column "value" extracted from the Postgres database
253
    dst$TMax<-dst$TMax/10                #TMax is the average max temp for monthy data
254
  }
255
  
256
  if (var=="TMIN"){
257
    names(dst)[pos]<-c("TMin")
258
    dst$TMin<-dst$TMin/10                #TMin is the average min temp for monthy data
259
  }
260
  
261
  #Extracting covariates from stack for the monthly dataset...
262
  #names(dst)[5:6] <-c('latitude','longitude')
263
  coords<- dst[c('longitude','latitude')]              #Define coordinates in a data frame
264
  
265
  coordinates(dst)<-coords                      #Assign coordinates to the data frame
266
  proj4string(dst)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
267
  dst_month<-spTransform(dst,CRS(CRS_interp))     #Project from WGS84 to new coord. system
268
  
269
  stations_val<-extract(s_raster,dst_month,df=TRUE)  #extraction of the information at station location in a data frame
270
  #dst_extract<-spCbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
271
  dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
272
  dst<-dst_extract #problem!!! two column named elev!!! use elev_s??
273
  
274
  #browser()
275
  coords<- dst[c('x','y')]              #Define coordinates in a data frame, this is the local x,y
276
  index<-!is.na(coords$x)               #remove if NA, may need to revisit at a later stage
277
  dst<-dst[index,]
278
  coords<- dst[c('x','y')]              #Define coordinates in a data frame, this is the local x,y
279
  coordinates(dst)<-coords                    #Assign coordinates to the data frame
280
  proj4string(dst)<-CRS_interp        #Assign coordinates reference system in PROJ4 format
281
  
282
  ### ADD SCREENING HERE BEFORE WRITING OUT DATA
283
  #Covariates ok since screening done in covariate script
284
  #screening on var i.e. value, TMIN, TMAX...
285
  
286
  ####
287
  
288
  ####
289
  #write out a new shapefile (including .prj component)
290
  dst$OID<-1:nrow(dst) #need a unique ID?
291
  outfile6<-file.path(out_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
292
  writeOGR(dst,dsn= dirname(outfile6),layer= sub(".shp","",basename(outfile6)), driver="ESRI Shapefile",overwrite_layer=TRUE)
293
  
294
  ### list of outputs return
295
  
296
  outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4,outfile5,outfile6)
297
  names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_query_ghcn_data","daily_covar_ghcn_data","monthly_query_ghcn_data","monthly_covar_ghcn_data")
298
  save(outfiles_obj,file= file.path(out_path,paste("met_stations_outfiles_obj_",interpolation_method,"_", out_prefix,".RData",sep="")))
299
  
300
  return(outfiles_obj)
301
  
302
  #END OF FUNCTION # 
303
}
304

  
305
########## END OF SCRIPT ##########
climate/research/oregon/interpolation/Database_stations_extraction_raster_covariates_processing.R
1
##################    Data preparation for interpolation   #######################################
2
############################ Extraction of station data ##########################################
3

  
4

  
5
database_covaratiates_preparation<-function(list_param_prep){
6
  #This function performs queries on the Postgres ghcnd database for stations matching the             
7
  #interpolation area. It requires 11 inputs:                                           
8
  # 1) db.name :  Postgres database name containing the meteorological stations
9
  # 2) var: the variable of interest - "TMAX","TMIN" or "PRCP" 
10
  # 3) range_years: range of records used in the daily interpolation, note that upper bound year is not included               
11
  # 4) range_years_clim: range of records used in the monthly climatology interpolation, note that upper bound is not included
12
  # 5) infile1: region outline as a shape file - used in the interpolation  stage too                              
13
  # 6) infile2: ghcnd stations locations as a textfile name with lat-long fields                                                                                   
14
  # 7) infile_covarariates: tif file of raser covariates for the interpolation area: it should have a local projection                                                                                           
15
  # 8) CRS_locs_WGS84: longlat EPSG 4326 used as coordinates reference system (proj4)for stations locations
16
  # 9) in_path: input path for covariates data and other files, this is also the output?
17
  # 10) covar_names: names of covariates used for the interpolation --may be removed later? (should be stored in the brick)
18
  # 11) out_prefix: output suffix added to output names--it is the same in the interpolation script
19
  #
20
  #The output is a list of four shapefile names produced by the function:
21
  #1) loc_stations: locations of stations as shapefile in EPSG 4326
22
  #2) loc_stations_ghcn: ghcn daily data for the year range of interpolation (locally projected)
23
  #3) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected)
24
  #4) monthly_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected)
25
  
26
  #AUTHOR: Benoit Parmentier                                                                       
27
  #DATE: 03/01/2013                                                                                 
28
  #PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--     
29
  #Comments and TODO
30
  #-Add buffer option...
31
  #-Add output path argument option
32
  #-Add qc flag options
33
  ##################################################################################################
34
  
35
  ###Loading R library and packages: should it be read in before???   
36
  
37
  library(RPostgreSQL)
38
  library(sp)                                           # Spatial pacakge with class definition by Bivand et al.
39
  library(spdep)                                          # Spatial pacakge with methods and spatial stat. by Bivand et al.
40
  library(rgdal)                                          # GDAL wrapper for R, spatial utilities
41
  library(rgeos)
42
  library(rgdal)
43
  library(raster)
44
  library(rasterVis)
45
  
46
  ### Functions used in the script
47
  
48
  format_s <-function(s_ID){
49
    #Format station ID in a vector format/tuple that is used in a psql query.
50
    # Argument 1: vector of station ID
51
    # Return: character of station ID
52
    tx2<-s_ID
53
    tx2<-as.character(tx2)
54
    stat_list<-tx2
55
    temp<-shQuote(stat_list)
56
    t<-paste(temp, collapse= " ")
57
    t1<-gsub(" ", ",",t)
58
    sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query
59
    return(sf_ID)
60
  }
61
  
62
  #parsing input arguments
63
  
64
  db.name <- list_param_prep$db.name             #name of the Postgres database
65
  var <- list_param_prep$var                     #name of the variables to keep: TMIN, TMAX or PRCP
66
  year_start <-list_param_prep$range_years[1] #"2010"               #starting year for the query (included)
67
  year_end <-list_param_prep$range_years[2] #"2011"                 #end year for the query (excluded)
68
  year_start_clim <-list_param_prep$range_years_clim[1] #right bound not included in the range!! starting year for monthly query to calculate clime
69
  infile1<- list_param_prep$infile1  #This is the shape file of outline of the study area                                                      #It is an input/output of the covariate script
70
  infile2<-list_param_prep$infile2      #"/home/layers/data/climate/ghcn/v2.92-upd-2012052822/ghcnd-stations.txt"                              #This is the textfile of station locations from GHCND
71
  infile3<-list_param_prep$infile_covariates #"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script
72
  CRS_locs_WGS84<-list_param_prep$CRS_locs_WGS84 #Station coords WGS84: same as earlier
73
  in_path <- list_param_prep$in_path #CRS_locs_WGS84"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/"
74
  out_prefix<-list_param_prep$out_prefix #"_365d_GAM_fus5_all_lstd_03012013"                #User defined output prefix
75
  #qc_flags<-list_param_prep$qc_flags    flags allowed for the query from the GHCND??
76
  covar_names<-list_param_prep$covar_names # names should be written in the tif file!!!
77
  
78
  ## working directory is the same for input and output for this function  
79
  setwd(in_path) 
80
  
81
  ##### STEP 1: Select station in the study area
82
  
83
  filename<-sub(".shp","",infile1)             #Removing the extension from file.
84
  interp_area <- readOGR(".",filename)
85
  CRS_interp<-proj4string(interp_area)         #Storing the coordinate information: geographic coordinates longlat WGS84
86
  
87
  #Read in GHCND database station locations
88
  dat_stat <- read.fwf(infile2, 
89
                       widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE)
90
  colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID")
91
  coords<- dat_stat[,c('lon','lat')]
92
  coordinates(dat_stat)<-coords
93
  proj4string(dat_stat)<-CRS_locs_WGS84 #this is the WGS84 projection
94
  #proj4string(dat_stat)<-CRS_interp
95
  dat_stat2<-spTransform(dat_stat,CRS(CRS_interp))         # Project from WGS84 to new coord. system
96
  
97
  # Spatial query to find relevant stations
98
  inside <- !is.na(over(dat_stat2, as(interp_area, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
99
  stat_reg<-dat_stat2[inside,]              #Selecting stations contained in the current interpolation area
100
  
101
  ####
102
  ##TODO: Add buffer option? 
103
  ####
104
  
105
  #### STEP 2: Connecting to the database and query for relevant data 
106
  
107
  drv <- dbDriver("PostgreSQL")
108
  db <- dbConnect(drv, dbname=db.name)
109
  
110
  time1<-proc.time()    #Start stop watch
111
  list_s<-format_s(stat_reg$STAT_ID)
112
  data2<-dbGetQuery(db, paste("SELECT *
113
                              FROM ghcn
114
                              WHERE element=",shQuote(var),
115
                              "AND year>=",year_start,
116
                              "AND year<",year_end,
117
                              "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
118
  time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
119
  time_minutes<-time_duration[3]/60
120
  dbDisconnect(db)
121
  ###
122
  #Add month query and averages here...
123
  ###
124
  
125
  #data2 contains only 46 stations for Venezueal area??
126
  data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID")
127
  
128
  #Transform the subset data frame in a spatial data frame and reproject
129
  data_reg<-data_table                               #Make a copy of the data frame
130
  coords<- data_reg[c('lon','lat')]              #Define coordinates in a data frame: clean up here!!
131
  #Wrong label...it is in fact projected...
132
  coordinates(data_reg)<-coords                      #Assign coordinates to the data frame
133
  #proj4string(data3)<-locs_coord                  #Assign coordinates reference system in PROJ4 format
134
  proj4string(data_reg)<-CRS_locs_WGS84                #Assign coordinates reference system in PROJ4 format
135
  data_reg<-spTransform(data_reg,CRS(CRS_interp))     #Project from WGS84 to new coord. system
136
  
137
  ##################################################################
138
  ### STEP 3: Save results and outuput in textfile and a shape file
139
  #browser()
140
  #Save shape files of the locations of meteorological stations in the study area
141
  outfile1<-file.path(in_path,paste("stations","_",out_prefix,".shp",sep=""))
142
  writeOGR(stat_reg,dsn= dirname(outfile1),layer= sub(".shp","",basename(outfile1)), driver="ESRI Shapefile",overwrite_layer=TRUE)
143
  #writeOGR(dst,dsn= ".",layer= sub(".shp","",outfile4), driver="ESRI Shapefile",overwrite_layer=TRUE)
144
  
145
  outfile2<-file.path(in_path,paste("ghcn_data_",var,"_",year_start_clim,"_",year_end,out_prefix,".shp",sep=""))         #Name of the file
146
  #writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
147
  writeOGR(data_reg,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE)
148
  
149
  ###################################################################
150
  ### STEP 4: Extract values at stations from covariates stack of raster images
151
  #Eventually this step may be skipped if the covariates information is stored in the database...
152
  
153
  s_raster<-stack(infile3)                   #read in the data stack
154
  names(s_raster)<-covar_names               #Assigning names to the raster layers: making sure it is included in the extraction
155
  stat_val<- extract(s_raster, data_reg)        #Extracting values from the raster stack for every point location in coords data frame.
156
  
157
  #create a shape file and data_frame with names
158
  
159
  data_RST<-as.data.frame(stat_val)                                            #This creates a data frame with the values extracted
160
  data_RST_SDF<-cbind(data_reg,data_RST)
161
  coordinates(data_RST_SDF)<-coordinates(data_reg) #Transforming data_RST_SDF into a spatial point dataframe
162
  CRS_reg<-proj4string(data_reg)
163
  proj4string(data_RST_SDF)<-CRS_reg  #Need to assign coordinates...
164
  
165
  #Creating a date column
166
  date1<-ISOdate(data_RST_SDF$year,data_RST_SDF$month,data_RST_SDF$day) #Creating a date object from 3 separate column
167
  date2<-gsub("-","",as.character(as.Date(date1)))
168
  data_RST_SDF$date<-date2                                              #Date format (year,month,day) is the following: "20100627"
169
  
170
  #This allows to change only one name of the data.frame
171
  pos<-match("value",names(data_RST_SDF)) #Find column with name "value"
172
  if (var=="TMAX"){
173
    #names(data_RST_SDF)[pos]<-c("TMax")
174
    data_RST_SDF$value<-data_RST_SDF$value/10                #TMax is the average max temp for monthy data
175
  }
176
  
177
  #write out a new shapefile (including .prj component)
178
  outfile3<-file.path(in_path,paste("daily_covariates_ghcn_data_",var,"_",range_years[1],"_",range_years[2],out_prefix,".shp",sep=""))         #Name of the file
179
  writeOGR(data_RST_SDF,dsn= dirname(outfile3),layer= sub(".shp","",basename(outfile3)), driver="ESRI Shapefile",overwrite_layer=TRUE)
180
  
181
  ###############################################################
182
  ######## STEP 5: Preparing monthly averages from the ProstGres database
183
  
184
  drv <- dbDriver("PostgreSQL")
185
  db <- dbConnect(drv, dbname=db.name)
186
  
187
  #year_start_clim: set at the start of the script
188
  year_end<-2011
189
  time1<-proc.time()    #Start stop watch
190
  list_s<-format_s(stat_reg$STAT_ID)
191
  data_m<-dbGetQuery(db, paste("SELECT *
192
                               FROM ghcn
193
                               WHERE element=",shQuote(var),
194
                               "AND year>=",year_start_clim,
195
                               "AND year<",year_end,
196
                               "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
197
  time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
198
  time_minutes<-time_duration[3]/60
199
  dbDisconnect(db)
200
  #Clean out this section!!
201
  date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column
202
  date2<-as.POSIXlt(as.Date(date1))
203
  data_m$date<-date2
204
  #In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010.
205
  #d<-subset(data_m,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations
206
  d<-subset(data_m,mflag=="0" | mflag=="S") #should be input arguments!!
207
  #May need some screeing??? i.e. range of temp and elevation...
208
  d1<-aggregate(value~station+month, data=d, mean)  #Calculate monthly mean for every station in OR
209
  id<-as.data.frame(unique(d1$station))     #Unique station in OR for year 2000-2010: 193 but 7 loss of monthly avg    
210
  
211
  dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID")   #Inner join all columns are retained
212
  
213
  #This allows to change only one name of the data.frame
214
  pos<-match("value",names(dst)) #Find column with name "value"
215
  if (var=="TMAX"){
216
    names(dst)[pos]<-c("TMax")
217
    dst$TMax<-dst$TMax/10                #TMax is the average max temp for monthy data
218
  }
219
  
220
  #Extracting covariates from stack for the monthly dataset...
221
  coords<- dst[c('lon','lat')]              #Define coordinates in a data frame
222
  coordinates(dst)<-coords                      #Assign coordinates to the data frame
223
  proj4string(dst)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
224
  dst_month<-spTransform(dst,CRS(CRS_interp))     #Project from WGS84 to new coord. system
225
  
226
  stations_val<-extract(s_raster,dst_month)  #extraction of the infomration at station location
227
  stations_val<-as.data.frame(stations_val)
228
  dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
229
  dst<-dst_extract
230
  
231
  coords<- dst[c('x','y')]              #Define coordinates in a data frame, this is the local x,y
232
  coordinates(dst)<-coords                    #Assign coordinates to the data frame
233
  proj4string(dst)<-projection(s_raster)        #Assign coordinates reference system in PROJ4 format
234
  
235
  ####
236
  #write out a new shapefile (including .prj component)
237
  dst$OID<-1:nrow(dst) #need a unique ID?
238
  outfile4<-file.path(in_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end,out_prefix,".shp",sep=""))  #Name of the file
239
  writeOGR(dst,dsn= dirname(outfile4),layer= sub(".shp","",basename(outfile4)), driver="ESRI Shapefile",overwrite_layer=TRUE)
240
  
241
  ### list of outputs return
242
  
243
  outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4)
244
  names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_covar_ghcn_data","monthly_covar_ghcn_data")
245
  return(outfiles_obj)
246
  
247
  #END OF FUNCTION # 
248
}
249

  
250
##### END OF SCRIPT ##########
climate/research/oregon/interpolation/Database_stations_extraction_raster_covariates_processing_region.R
1
##################    Data preparation for interpolation   #######################################
2
############################ Extraction of station data ##########################################
3
#This script perform queries on the Postgres database ghcn for stations matching the             
4
#interpolation area. It requires the following inputs:                                           
5
# 1)the text file ofGHCND  stations from NCDC matching the database version release              
6
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84                                                                          #       
7
# 3)a new coordinate system can be provided as an argument                                       
8
# 4)the variable of interest: "TMAX","TMIN" or "PRCP"                                            
9
# 5)the location of raser covariate stack.                                                                                             
10
#The outputs are text files and a shape file of a time subset of the database                    
11
#AUTHOR: Benoit Parmentier                                                                       
12
#DATE: 02/08/2013                                                                                 
13
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--     
14
#Comments and TODO
15
#-Add buffer option...
16
#-Add calculation of monthly mean...
17
##################################################################################################
18

  
19
###Loading R library and packages   
20

  
21
library(RPostgreSQL)
22
library(sp)                                           # Spatial pacakge with class definition by Bivand et al.
23
library(spdep)                                          # Spatial pacakge with methods and spatial stat. by Bivand et al.
24
library(rgdal)                                          # GDAL wrapper for R, spatial utilities
25
library(rgeos)
26
library(rgdal)
27
library(raster)
28
library(rasterVis)
29

  
30
### Parameters and arguments
31

  
32
db.name <- "ghcn"                #name of the Postgres database
33
var <- "TMAX"                    #name of the variables to keep: TMIN, TMAX or PRCP
34
year_start<-"2010"               #starting year for the query (included)
35
year_end<-"2011"                 #end year for the query (excluded)
36
infile1<- "outline_venezuela_region__VE_01292013.shp"      #This is the shape file of outline of the study area.                                              #It is projected alreaday
37
infile2<-"ghcnd-stations.txt"                             #This is the textfile of station locations from GHCND
38
infile3<-"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script
39

  
40
new_proj<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"
41
locs_coord<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0")
42
CRS_locs_WGS84<-"+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0"
43
##Paths to inputs and output
44
in_path <- "/home/parmentier/Data/benoit_test"
45
in_path <- "/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/"
46
out_path<- "/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/output_data/"
47
ghcnd_path<- "/home/layers/data/climate/ghcn/v2.92-upd-2012052822"
48
setwd(in_path) 
49
out_suffix<-"y2010_2010_VE_02082013"                                                 #User defined output prefix
50
out_region_name<-"_venezuela_region"
51
#out_suffix<-"_VE_01292013"
52

  
53
### Functions used in the script
54

  
55
format_s <-function(s_ID){
56
  #Format station ID in a vector format/tuple that is used in a psql query.
57
  # Argument 1: vector of station ID
58
  # Return: character of station ID
59
  tx2<-s_ID
60
  tx2<-as.character(tx2)
61
  stat_list<-tx2
62
  temp<-shQuote(stat_list)
63
  t<-paste(temp, collapse= " ")
64
  t1<-gsub(" ", ",",t)
65
  sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query
66
  return(sf_ID)
67
}
68

  
69
############ BEGIN: START OF THE SCRIPT #################
70

  
71
##### STEP 1: Select station in the study area
72

  
73
filename<-sub(".shp","",infile1)             #Removing the extension from file.
74
interp_area <- readOGR(".",filename)
75
CRS_interp<-proj4string(interp_area)         #Storing the coordinate information: geographic coordinates longlat WGS84
76

  
77
dat_stat <- read.fwf(file.path(ghcnd_path,"ghcnd-stations.txt"), widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE)
78
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID")
79
coords<- dat_stat[,c('lon','lat')]
80
coordinates(dat_stat)<-coords
81
proj4string(dat_stat)<-locs_coord #this is the WGS84 projection
82
#proj4string(dat_stat)<-CRS_interp
83
dat_stat2<-spTransform(dat_stat,CRS(new_proj))         # Project from WGS84 to new coord. system
84

  
85
# Spatial query to find relevant stations
86
inside <- !is.na(over(dat_stat2, as(interp_area, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
87
stat_reg<-dat_stat2[inside,]              #Selecting stations contained in the current interpolation area
88

  
89
#Quick visualization of station locations
90
plot(interp_area, axes =TRUE)
91
plot(stat_reg, pch=1, col="red", cex= 0.7, add=TRUE)
92
#plot(data3,pch=1,col="blue",cex=3,add=TRUE)
93
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6)
94
#only 357 station for Venezuela??
95

  
96
####
97
##Add buffer option? 
98
####
99

  
100
#### STEP 2: Connecting to the database and query for relevant data 
101

  
102
drv <- dbDriver("PostgreSQL")
103
db <- dbConnect(drv, dbname=db.name)
104

  
105
time1<-proc.time()    #Start stop watch
106
list_s<-format_s(stat_reg$STAT_ID)
107
data2<-dbGetQuery(db, paste("SELECT *
108
      FROM ghcn
109
      WHERE element=",shQuote(var),
110
      "AND year>=",year_start,
111
      "AND year<",year_end,
112
      "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
113
time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
114
time_minutes<-time_duration[3]/60
115

  
116
###
117
#Add month query and averages here...
118
###
119

  
120
#data2 contains only 46 stations for Venezueal area??
121
data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID")
122

  
123
#Transform the subset data frame in a spatial data frame and reproject
124
data_reg<-data_table                               #Make a copy of the data frame
125
coords<- data_reg[c('lon','lat')]              #Define coordinates in a data frame: clean up here!!
126
                                                   #Wrong label...it is in fact projected...
127
coordinates(data_reg)<-coords                      #Assign coordinates to the data frame
128
#proj4string(data3)<-locs_coord                  #Assign coordinates reference system in PROJ4 format
129
proj4string(data_reg)<-locs_coord                #Assign coordinates reference system in PROJ4 format
130
data_reg<-spTransform(data_reg,CRS(new_proj))     #Project from WGS84 to new coord. system
131

  
132
plot(interp_area, axes =TRUE)
133
plot(stat_reg, pch=1, col="red", cex= 0.7, add=TRUE)
134
plot(data_reg,pch=2,col="blue",cex=2,add=TRUE)
135

  
136
##################################################################
137
### STEP 3: Save results and outuput in textfile and a shape file
138

  
139
#Save a textfile of the locations of meteorological stations in the study area
140
write.table(as.data.frame(stat_reg), file=file.path(in_path,paste("stations",out_region_name,"_",
141
                                                          out_suffix,".txt",sep="")),sep=",")
142
outfile<-paste("stations",out_region_name,"_",
143
               out_suffix,sep="")
144
writeOGR(stat_reg,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE)
145

  
146
outfile<-paste("ghcn_data_",var,out_suffix,sep="")         #Name of the file
147
#writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
148
writeOGR(data_reg,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE)
149

  
150
###################################################################
151
### STEP 4: Extract values at stations from covariates stack of raster images
152
#Eventually this step may be skipped if the covariates information is stored in the database...
153

  
154
#The names of covariates can be changed...
155
rnames<-c("x","y","lon","lat","N","E","N_w","E_w","elev","slope","aspect","CANHEIGHT","DISTOC")
156
lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10","LC11","LC12")
157
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12",
158
             "nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
159
             "nobs_09","nobs_10","nobs_11","nobs_12")
160

  
161
covar_names<-c(rnames,lc_names,lst_names)
162

  
163
s_raster<-stack(infile3)                   #read in the data stack
164
names(s_raster)<-covar_names               #Assigning names to the raster layers: making sure it is included in the extraction
165
stat_val<- extract(s_raster, data_reg)        #Extracting values from the raster stack for every point location in coords data frame.
166

  
167
#create a shape file and data_frame with names
168

  
169
data_RST<-as.data.frame(stat_val)                                            #This creates a data frame with the values extracted
170
data_RST_SDF<-cbind(data_reg,data_RST)
171
coordinates(data_RST_SDF)<-coordinates(data_reg) #Transforming data_RST_SDF into a spatial point dataframe
172
CRS_reg<-proj4string(data_reg)
173
proj4string(data_RST_SDF)<-CRS_reg  #Need to assign coordinates...
174

  
175
#Creating a date column
176
date1<-ISOdate(data_RST_SDF$year,data_RST_SDF$month,data_RST_SDF$day) #Creating a date object from 3 separate column
177
date2<-gsub("-","",as.character(as.Date(date1)))
178
data_RST_SDF$date<-date2                                              #Date format (year,month,day) is the following: "20100627"
179

  
180
#This allows to change only one name of the data.frame
181
pos<-match("value",names(data_RST_SDF)) #Find column with name "value"
182
if (var=="TMAX"){
183
  #names(data_RST_SDF)[pos]<-c("TMax")
184
  data_RST_SDF$value<-data_RST_SDF$value/10                #TMax is the average max temp for monthy data
185
}
186

  
187
#write out a new shapefile (including .prj component)
188
outfile<-paste("daily_covariates_ghcn_data_",var,out_suffix,sep="")         #Name of the file
189
writeOGR(data_RST_SDF,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE)
190

  
191
###############################################################
192
######## STEP 5: Preparing monthly averages from the ProstGres database
193

  
194
drv <- dbDriver("PostgreSQL")
195
db <- dbConnect(drv, dbname=db.name)
196

  
197
year_start<-2000
198
year_end<-2011
199
time1<-proc.time()    #Start stop watch
200
list_s<-format_s(stat_reg$STAT_ID)
201
data_m<-dbGetQuery(db, paste("SELECT *
202
                            FROM ghcn
203
                            WHERE element=",shQuote(var),
204
                            "AND year>=",year_start,
205
                            "AND year<",year_end,
206
                            "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
207
time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
208
time_minutes<-time_duration[3]/60
209

  
210
# do this work outside of (before) this function
211
# to avoid making a copy of the data frame inside the function call
212
date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column
213
date2<-as.POSIXlt(as.Date(date1))
214
data_m$date<-date2
215
#In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010.
216
#d<-subset(data_m,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations
217
d<-subset(data_m,mflag=="0" | mflag=="S")
218
#May need some screeing??? i.e. range of temp and elevation...
219
d1<-aggregate(value~station+month, data=d, mean)  #Calculate monthly mean for every station in OR
220
id<-as.data.frame(unique(d1$station))     #Unique station in OR for year 2000-2010: 193 but 7 loss of monthly avg    
221

  
222
dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID")   #Inner join all columns are retained
223

  
224
#This allows to change only one name of the data.frame
225
pos<-match("value",names(dst)) #Find column with name "value"
226
if (var=="TMAX"){
227
  names(dst)[pos]<-c("TMax")
228
  dst$TMax<-dst$TMax/10                #TMax is the average max temp for monthy data
229
}
230
#dstjan=dst[dst$month==9,]  #dst contains the monthly averages for tmax for every station over 2000-2010
231

  
232
#Extracting covariates from stack for the monthly dataset...
233
coords<- dst[c('lon','lat')]              #Define coordinates in a data frame
234
coordinates(dst)<-coords                      #Assign coordinates to the data frame
235
proj4string(dst)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
236
dst_month<-spTransform(dst,CRS(CRS_interp))     #Project from WGS84 to new coord. system
237

  
238
stations_val<-extract(s_raster,dst_month)  #extraction of the infomration at station location
239
stations_val<-as.data.frame(stations_val)
240
dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
241
dst<-dst_extract
242

  
243
coords<- dst[c('x','y')]              #Define coordinates in a data frame, this is the local x,y
244
coordinates(dst)<-coords                    #Assign coordinates to the data frame
245
proj4string(dst)<-projection(s_raster)        #Assign coordinates reference system in PROJ4 format
246

  
247
####
248
#write out a new shapefile (including .prj component)
249
outfile<-paste("monthly_covariates_ghcn_data_",var,out_suffix,sep="")         #Name of the file
250
dst$OID<-1:nrow(dst) #need a unique ID?
251
writeOGR(dst,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE)
252

  
253
##### END OF SCRIPT ##########
climate/research/oregon/interpolation/GAM_CAI_analysis_raster_prediction_multisampling.R
1
######################################## METHOD COMPARISON #######################################
2
############################ Constant sampling for GAM CAI method #####################################
3
#This script interpolates tmax values using MODIS LST and GHCND station data                     #
4
#interpolation area. It requires the text file of stations and a shape file of the study area.   #       
5
#Note that the projection for both GHCND and study area is lonlat WGS84.                         #
6
#Method is assedsed using constant sampling with variation  of validation sample with different  #
7
#hold out proportions.                                                                           #
8
#AUTHOR: Benoit Parmentier                                                                       #
9
#DATE: 12/27/2012                                                                                #
10
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491--                                  #
11
###################################################################################################
12

  
13
###Loading R library and packages                                                      
14
library(gtools)                                         # loading some useful tools 
15
library(mgcv)                                           # GAM package by Simon Wood
16
library(sp)                                             # Spatial pacakge with class definition by Bivand et al.
17
library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
18
library(rgdal)                               # GDAL wrapper for R, spatial utilities
19
library(gstat)                               # Kriging and co-kriging by Pebesma et al.
20
library(fields)                              # NCAR Spatial Interpolation methods such as kriging, splines
21
library(raster)                              # Hijmans et al. package for raster processing
22
library(rasterVis)
23
library(parallel)                            # Urbanek S. and Ripley B., package for multi cores & parralel processing
24
library(reshape)
25

  
26
### Parameters and argument
27

  
28
infile1<- "ghcn_or_tmax_covariates_06262012_OR83M.shp"             #GHCN shapefile containing variables for modeling 2010                 
29
#infile2<-"list_10_dates_04212012.txt"                     #List of 10 dates for the regression
30
infile2<-"list_365_dates_04212012.txt"
31
infile3<-"LST_dates_var_names.txt"                        #LST dates name
32
infile4<-"models_interpolation_05142012.txt"              #Interpolation model names
33
infile5<-"mean_day244_rescaled.rst"                       #Raster or grid for the locations of predictions
34
#infile6<-"lst_climatology.txt"
35
infile6<-"LST_files_monthly_climatology.txt"
36
inlistf<-"list_files_05032012.txt"                        #Stack of images containing the Covariates
37

  
38
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_CAI" #Atlas location
39
setwd(path)
40

  
41
#Station location for the study area
42
stat_loc<-read.table(paste(path,"/","location_study_area_OR_0602012.txt",sep=""),sep=",", header=TRUE)
43
#GHCN Database for 1980-2010 for study area (OR) 
44
data3<-read.table(paste(path,"/","ghcn_data_TMAXy1980_2010_OR_0602012.txt",sep=""),sep=",", header=TRUE)
45

  
46
nmodels<-9                #number of models running
47
y_var_name<-"dailyTmax"   #climate variable interpolated
48
climgam<-1                                             #if 1, then GAM is run on the climatology rather than the daily deviation surface...
49
predval<-1                                              #if 1, produce raster prediction
50
prop<-0.3                                               #Proportion of testing retained for validation   
51

  
52
seed_number<- 100                                             #Seed number for random sampling, if seed_number<0, no seed number is used..
53
#out_prefix<-"_365d_GAM_CAI2_const_10222012_"                 #User defined output prefix
54
#out_prefix<-"_365d_GAM_CAI2_const_all_lstd_10272012"         #User defined output prefix
55
out_prefix<-"_365d_GAM_CAI4_all_12272012"               #User defined output prefix
56

  
57
bias_val<-0            #if value 1 then daily training data is used in the bias surface rather than the all monthly stations (added on 07/11/2012)
58
bias_prediction<-1     #if value 1 then use GAM for the BIAS prediction otherwise GAM direct reprediction for y_var (daily tmax)
59
nb_sample<-1           #number of time random sampling must be repeated for every hold out proportion
60
prop_min<-0.3          #if prop_min=prop_max and step=0 then predicitons are done for the number of dates...
61
prop_max<-0.3
62
step<-0         
63
constant<-0            #if value 1 then use the same sample used in the first date for interpolation over the set of dates
64
#projection used in the interpolation of the study area
65
CRS_interp<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
66
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
67

  
68
#This can be entered as textfile or option later...ok for running now on 12/07/2012
69
list_formulas<-vector("list",nmodels)
70

  
71
list_formulas[[1]] <- as.formula("y_var~ s(ELEV_SRTM)", env=.GlobalEnv)
72
list_formulas[[2]] <- as.formula("y_var~ s(LST)", env=.GlobalEnv)
73
list_formulas[[3]] <- as.formula("y_var~ s(ELEV_SRTM,LST)", env=.GlobalEnv)
74
list_formulas[[4]] <- as.formula("y_var~ s(lat)+s(lon)+s(ELEV_SRTM)", env=.GlobalEnv)
75
list_formulas[[5]] <- as.formula("y_var~ s(lat,lon,ELEV_SRTM)", env=.GlobalEnv)
76
list_formulas[[6]] <- as.formula("y_var~ s(lat,lon)+s(ELEV_SRTM)+s(Northness_w,Eastness_w)+s(LST)", env=.GlobalEnv)
77
list_formulas[[7]] <- as.formula("y_var~ s(lat,lon)+s(ELEV_SRTM)+s(Northness_w,Eastness_w)+s(LST)+s(LC1)", env=.GlobalEnv)
78
list_formulas[[8]] <- as.formula("y_var~ s(lat,lon)+s(ELEV_SRTM)+s(Northness_w,Eastness_w)+s(LST)+s(LC3)", env=.GlobalEnv)
79
list_formulas[[9]] <- as.formula("y_var~ s(x_OR83M,y_OR83M)", env=.GlobalEnv)
80

  
81
#source("GAM_CAI_function_multisampling_10252012.R")
82
source("GAM_CAI_function_multisampling_12072012.R")
83

  
84
############ START OF THE SCRIPT ##################
85

  
86
###Reading the station data and setting up for models' comparison
87
filename<-sub(".shp","",infile1)             #Removing the extension from file.
88
ghcn<-readOGR(".", filename)                 #reading shapefile 
89

  
90
CRS<-proj4string(ghcn)                       #Storing projection information (ellipsoid, datum,etc.)
91

  
92
mean_LST<- readGDAL(infile5)                 #Reading the whole raster in memory. This provides a grid for kriging
93
proj4string(mean_LST)<-CRS                   #Assigning coordinate information to prediction grid.
94

  
95
ghcn <- transform(ghcn,Northness = cos(ASPECT*pi/180)) #Adding a variable to the dataframe
96
ghcn <- transform(ghcn,Eastness = sin(ASPECT*pi/180))  #adding variable to the dataframe.
97
ghcn <- transform(ghcn,Northness_w = sin(slope*pi/180)*cos(ASPECT*pi/180)) #Adding a variable to the dataframe
98
ghcn <- transform(ghcn,Eastness_w = sin(slope*pi/180)*sin(ASPECT*pi/180))  #adding variable to the dataframe.
99

  
100
#Remove NA for LC and CANHEIGHT
101
ghcn$LC1[is.na(ghcn$LC1)]<-0
102
ghcn$LC3[is.na(ghcn$LC3)]<-0
103
ghcn$CANHEIGHT[is.na(ghcn$CANHEIGHT)]<-0
104
ghcn$LC4[is.na(ghcn$LC4)]<-0
105
ghcn$LC6[is.na(ghcn$LC6)]<-0
106

  
107
#Use file.path for to construct pathfor independent os platform? !!!
108
dates<-readLines(file.path(path,infile2))
109
#dates <-readLines(paste(path,"/",infile2, sep=""))
110
LST_dates <-readLines(paste(path,"/",infile3, sep=""))
111
models <-readLines(paste(path,"/",infile4, sep=""))
112

  
113
##Extracting the variables values from the raster files                                             
114

  
115
lines<-read.table(paste(path,"/",inlistf,sep=""), sep=" ")                  #Column 1 contains the names of raster files
116
inlistvar<-lines[,1]
117
inlistvar<-paste(path,"/",as.character(inlistvar),sep="")
118
covar_names<-as.character(lines[,2])                                         #Column two contains short names for covaraites
119

  
120
s_raster<- stack(inlistvar)                                                  #Creating a stack of raster images from the list of variables.
121
layerNames(s_raster)<-covar_names                                            #Assigning names to the raster layers
122
projection(s_raster)<-CRS
123

  
124
#stat_val<- extract(s_raster, ghcn3)                                          #Extracting values from the raster stack for every point location in coords data frame.
125
pos<-match("ASPECT",layerNames(s_raster)) #Find column with name "value"
126
r1<-raster(s_raster,layer=pos)             #Select layer from stack
127
pos<-match("slope",layerNames(s_raster)) #Find column with name "value"
128
r2<-raster(s_raster,layer=pos)             #Select layer from stack
129
N<-cos(r1*pi/180)
130
E<-sin(r1*pi/180)
131
Nw<-sin(r2*pi/180)*cos(r1*pi/180)   #Adding a variable to the dataframe
132
Ew<-sin(r2*pi/180)*sin(r1*pi/180)   #Adding variable to the dataframe.
133

  
134
pos<-match("LC1",layerNames(s_raster)) #Find column with name "value"
135
LC1<-raster(s_raster,layer=pos)             #Select layer from stack
136
s_raster<-dropLayer(s_raster,pos)
137
LC1[is.na(LC1)]<-0                      #NA must be set to zero.
138
pos<-match("LC3",layerNames(s_raster)) #Find column with name "value"
139
LC3<-raster(s_raster,layer=pos)             #Select layer from stack
140
s_raster<-dropLayer(s_raster,pos)
141
LC3[is.na(LC3)]<-0
142

  
143
#Modification added to account for other land cover
144

  
145
pos<-match("LC4",layerNames(s_raster)) #Find column with name "value"
146
LC4<-raster(s_raster,layer=pos)             #Select layer from stack
147
s_raster<-dropLayer(s_raster,pos)
148
LC4[is.na(LC4)]<-0
149

  
150
pos<-match("LC6",layerNames(s_raster)) #Find column with name "value"
151
LC6<-raster(s_raster,layer=pos)             #Select layer from stack
152
s_raster<-dropLayer(s_raster,pos)
153
LC6[is.na(LC6)]<-0
154

  
155
LC_s<-stack(LC1,LC3,LC4,LC6)
156
layerNames(LC_s)<-c("LC1_forest","LC3_grass","LC4_crop","LC6_urban")
157
#plot(LC_s)
158

  
159
pos<-match("CANHEIGHT",layerNames(s_raster)) #Find column with name "value"
160
CANHEIGHT<-raster(s_raster,layer=pos)             #Select layer from stack
161
s_raster<-dropLayer(s_raster,pos)
162
CANHEIGHT[is.na(CANHEIGHT)]<-0
163
pos<-match("ELEV_SRTM",layerNames(s_raster)) #Find column with name "ELEV_SRTM"
164
ELEV_SRTM<-raster(s_raster,layer=pos)             #Select layer from stack on 10/30
165
s_raster<-dropLayer(s_raster,pos)
166
ELEV_SRTM[ELEV_SRTM <0]<-NA
167

  
168
xy<-coordinates(r1)  #get x and y projected coordinates...
169
xy_latlon<-project(xy, CRS, inv=TRUE) # find lat long for projected coordinats (or pixels...)
170
lon<-raster(xy_latlon) #Transform a matrix into a raster object ncol=ncol(r1), nrow=nrow(r1))
171
ncol(lon)<-ncol(r1)
172
nrow(lon)<-nrow(r1)
173
extent(lon)<-extent(r1)
174
projection(lon)<-CRS  #At this stage this is still an empty raster with 536 nrow and 745 ncell 
175
lat<-lon
176
values(lon)<-xy_latlon[,1]
177
values(lat)<-xy_latlon[,2]
178

  
179
r<-stack(N,E,Nw,Ew,lon,lat,LC1,LC3,LC4,LC6, CANHEIGHT,ELEV_SRTM)
180
rnames<-c("Northness","Eastness","Northness_w","Eastness_w", "lon","lat","LC1","LC3","LC4","LC6","CANHEIGHT","ELEV_SRTM")
181
layerNames(r)<-rnames
182
s_raster<-addLayer(s_raster, r)
183

  
184
#s_sgdf<-as(s_raster,"SpatialGridDataFrame") #Conversion to spatial grid data frame
185

  
186
####### Preparing LST stack of climatology...
187

  
188
#l=list.files(pattern="mean_month.*rescaled.rst")
189
l <-readLines(paste(path,"/",infile6, sep=""))
190
molst<-stack(l)  #Creating a raster stack...
191
#setwd(old)
192
molst<-molst-273.16  #K->C          #LST stack of monthly average...
193
idx <- seq(as.Date('2010-01-15'), as.Date('2010-12-15'), 'month')
194
molst <- setZ(molst, idx)
195
layerNames(molst) <- month.abb
196

  
197
######  Preparing tables for model assessment: specific diagnostic/metrics
198

  
199
#Model assessment: specific diagnostics/metrics
200
results_m1<- matrix(1,1,nmodels+3)  
201
results_m2<- matrix(1,1,nmodels+3)
202
results_m3<- matrix(1,1,nmodels+3)
203
#results_RMSE_f<- matrix(1,length(models)+3)
204

  
205
#Model assessment: general diagnostic/metrics 
206
results_RMSE <- matrix(1,1,nmodels+4)
207
results_MAE <- matrix(1,1,nmodels+4)
208
results_ME <- matrix(1,1,nmodels+4)       #There are 8+1 models
209
results_R2 <- matrix(1,1,nmodels+4)       #Coef. of determination for the validation dataset
210

  
211
results_RMSE_f<- matrix(1,1,nmodels+4)    #RMSE fit, RMSE for the training dataset
212
results_MAE_f <- matrix(1,1,nmodels+4)
213
results_R2_f<-matrix(1,1,nmodels+4)
214
######## Preparing monthly averages from the ProstGres database
215

  
216
# do this work outside of (before) this function
217
# to avoid making a copy of the data frame inside the function call
218
date1<-ISOdate(data3$year,data3$month,data3$day) #Creating a date object from 3 separate column
219
date2<-as.POSIXlt(as.Date(date1))
220
data3$date<-date2
221
d<-subset(data3,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations
222
#May need some screeing??? i.e. range of temp and elevation...
223
d1<-aggregate(value~station+month, data=d, mean)  #Calculate monthly mean for every station in OR
224
id<-as.data.frame(unique(d1$station))     #Unique station in OR for year 2000-2010: 193 but 7 loss of monthly avg    
225

  
226
dst<-merge(d1, stat_loc, by.x="station", by.y="STAT_ID")   #Inner join all columns are retained
227

  
228
#This allows to change only one name of the data.frame
229
pos<-match("value",names(dst)) #Find column with name "value"
230
names(dst)[pos]<-c("TMax")
231
dst$TMax<-dst$TMax/10                #TMax is the average max temp for monthy data
232
#dstjan=dst[dst$month==9,]  #dst contains the monthly averages for tmax for every station over 2000-2010
233

  
234
#Extracting covariates from stack for the monthly dataset...
235
coords<- dst[c('lon','lat')]              #Define coordinates in a data frame
236
coordinates(dst)<-coords                      #Assign coordinates to the data frame
237
proj4string(dst)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
238
dst_month<-spTransform(dst,CRS(CRS_interp))     #Project from WGS84 to new coord. system
239

  
240
stations_val<-extract(s_raster,dst_month)  #extraction of the infomration at station location
241
stations_val<-as.data.frame(stations_val)
242
dst_extract<-cbind(dst_month,stations_val)
243
dst<-dst_extract
244
#Now clean and screen monthly values
245
dst_all<-dst
246
dst<-subset(dst,dst$TMax>-15 & dst$TMax<40)
247
dst<-subset(dst,dst$ELEV_SRTM>0) #This will drop two stations...or 24 rows
248

  
249
######### Preparing daily values for training and testing
250

  
251
#Screening for bad values: value is tmax in this case
252
#ghcn$value<-as.numeric(ghcn$value)
253
ghcn_all<-ghcn
254
ghcn_test<-subset(ghcn,ghcn$value>-150 & ghcn$value<400)
255
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0)
256
ghcn<-ghcn_test2
257
#coords<- ghcn[,c('x_OR83M','y_OR83M')]
258

  
259
##Sampling: training and testing sites...
260

  
261
if (seed_number>0) {
262
  set.seed(seed_number)                        #Using a seed number allow results based on random number to be compared...
263
}
264
nel<-length(dates)
265
dates_list<-vector("list",nel) #list of one row data.frame
266

  
267
prop_range<-(seq(from=prop_min,to=prop_max,by=step))*100
268
sn<-length(dates)*nb_sample*length(prop_range)
269

  
270
for(i in 1:length(dates)){
271
  d_tmp<-rep(dates[i],nb_sample*length(prop_range)) #repeating same date
272
  s_nb<-rep(1:nb_sample,length(prop_range))         #number of random sample per proportion
273
  prop_tmp<-sort(rep(prop_range, nb_sample))
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff