Project

General

Profile

Download (8.19 KB) Statistics
| Branch: | Revision:
1
####################GWR of Tmax for 10 dates.#####################
2
#This script generates predicted values from station values for the Oregon case study. This program loads the station data from a shp file 
3
#and performs a GWR regression and a kriged surface of residuals.
4
#Script created by Benoit Parmentier on April 3, 2012. 
5

    
6
###Loading r library and packages
7
library(sp)
8
library(spdep)
9
library(rgdal)
10
library(spgwr)
11
library(gpclib)
12
data
13
library(maptools)
14
library(gstat)
15
###Parameters and arguments
16

    
17
infile1<-"ghcn_or_tmax_b_03032012_OR83M.shp" 
18
path<- "/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations/"
19
setwd(path)
20
#infile2<-"dates_interpolation_03012012.txt"  # list of 10 dates for the regression
21
infile2<-"dates_interpolation_03052012.txt"
22
prop<-0.3
23
out_prefix<-"_03272012_Res_fit"
24

    
25
###Reading the shapefile and raster image from the local directory
26

    
27
mean_LST<- readGDAL("mean_day244_rescaled.rst")  #This reads the whole raster in memory and provide a grid for kriging
28
ghcn<-readOGR(".", "ghcn_or_tmax_b_03032012_OR83M") 
29
proj4string(ghcn) #This retrieves the coordinate system for the SDF
30
CRS_ghcn<-proj4string(ghcn) #this can be assigned to mean_LST!!!
31
proj4string(mean_LST)<-CRS_ghcn #Assigning coordinates information
32

    
33
# Creating state outline from county
34

    
35
orcnty<-readOGR(".", "orcnty24_OR83M")
36
proj4string(orcnty) #This retrieves the coordinate system for the SDF
37
lps <-getSpPPolygonsLabptSlots(orcnty)  #Getting centroids county labels
38
IDOneBin <- cut(lps[,1], range(lps[,1]), include.lowest=TRUE)  #Creating one bin var
39
gpclibPermit() #Set the gpclib to True to allow union
40
OR_state <- unionSpatialPolygons(orcnty ,IDOneBin) #Dissolve based on bin var
41

    
42
# Adding variables for the regression
43

    
44
ghcn$Northness<- cos(ghcn$ASPECT) #Adding a variable to the dataframe
45
ghcn$Eastness <- sin(ghcn$ASPECT)  #adding variable to the dataframe.
46

    
47
ghcn$Northness_w <- sin(ghcn$slope)*cos(ghcn$ASPECT) #Adding a variable to the dataframe
48
ghcn$Eastness_w  <- sin(ghcn$slope)*sin(ghcn$ASPECT)  #adding variable to the dataframe.
49

    
50
set.seed(100)
51

    
52
dates <-readLines(paste(path,"/",infile2, sep=""))
53

    
54
results <- matrix(1,length(dates),3)            #This is a matrix containing the diagnostic measures from the GAM models.
55

    
56
#Screening for bad values
57
#tmax range: min max)
58
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400)
59
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0)
60
ghcn<-ghcn_test2
61

    
62
#ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==as.numeric(d)))#this creates a list of 10 subsets data
63
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, ghcn$date==as.numeric(d)))
64

    
65
###Regression part 1: Creating a validation dataset by creating training and testing datasets
66
for(i in 1:length(dates)){            # start of the for loop #1
67
  
68
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
69
    
70
  n<-nrow(ghcn.subsets[[i]])
71
  ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
72
  nv<-n-ns             #create a sample for validation with prop of the rows
73
  #ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
74
  ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
75
  ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training)
76
  data_s <- ghcn.subsets[[i]][ind.training, ]
77
  data_v <- ghcn.subsets[[i]][ind.testing, ]
78
  bwG <- gwr.sel(tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC,data=data_s,gweight=gwr.Gauss, verbose = FALSE)
79
  gwrG<- gwr(tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC, data=data_s, bandwidth=bwG, gweight=gwr.Gauss, hatmatrix=TRUE)
80

    
81
  Res_fit<-gwrG$lm$residuals
82
  RMSE_f<-sqrt(sum(Res_fit^2)/ns)
83
  t<- data_s$tmax-gwrG$lm$fitted.values #Checking output
84
  t2<-t-Res_fit #This should be zero
85
  data_s$residuals <- Res_fit #adding field to the data 
86
  
87
  #Saving the subset in a dataframe
88
  data_name<-paste("ghcn_v_",dates[[i]],sep="")
89
  assign(data_name,data_v)
90
  data_name<-paste("ghcn_s_",dates[[i]],sep="")
91
  assign(data_name,data_s)
92
  
93
  results[i,1]<- dates[i]  #storing the interpolation dates in the first column
94
  results[i,2]<- ns        #number of stations used in the training stage
95
  results[i,3]<- RMSE_f
96
  
97
  #Kriging residuals!!
98
  X11()
99
  hscat(residuals~1,data_s,(0:9)*20000) # 9 lag classes with 20,000m width
100
  v<-variogram(residuals~1, data_s)
101
  plot(v)
102
  tryCatch(v.fit<-fit.variogram(v,vgm(1,"Sph", 150000,1)),error=function()next)
103
  gwr_res_krige<-krige(residuals~1, data_s,mean_LST, v.fit)#mean_LST provides the data grid/raster image for the kriging locations.
104
 
105
  # GWR visualization of Residuals using histograms and over space
106
  X11()
107
  title=paste("Histogram of residuals of ",data_name, sep="")
108
  hist(data_s$residuals,main=title)
109
  savePlot(paste("Histogram_",data_name,out_prefix,".png", sep=""), type="png")
110
  dev.off()  
111

    
112
  X11(width=20,height=20)
113
  topo = cm.colors(9)
114
  image(gwr_res_krige,col=topo) #needs to change to have a bipolar palette !!! 
115

    
116
  plot(OR_state, axes = TRUE, add=TRUE)
117
  plot(data_s, pch=1, col="red", cex= abs(data_s$residuals)/10, add=TRUE) #Taking the absolute values because residuals are 
118
  LegVals<- c(0,10,20,30,40,50,110)
119
  legend(-98000,510000, legend=LegVals,pch=1,col="red",pt.cex=LegVals/10,bty="n",title= "residuals",cex=1.6)
120
  #legend("left", legend=c("275-285","285-295","295-305", "305-315","315-325"),fill=grays, bty="n", title= "LST mean DOY=244")
121
  legend(-98000,290000, legend=c("-60 -30","-30 -20","-20 -10", "-10 0"," 0  10"," 10  20", " 20  30"," 30  60"),fill=topo, bty="n", title= "Kriged RMSE",cex=1.6)
122
  title(paste("Kriging of residuals of ",data_name, sep=""),cex=2)
123

    
124
  krig_raster_name<-paste("Kriged_res_",data_name,out_prefix,".tif", sep="")
125
  #writeGDAL(gwr_res_krige,fname=krig_raster_name, driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL")
126

    
127
  savePlot(paste("Kriged_res_",data_name,out_prefix,".png", sep=""), type="png")
128
  dev.off()  
129

    
130
  }
131
  
132
## Plotting and saving diagnostic measures
133
results_num <-results
134
mode(results_num)<- "numeric"
135
# Make it numeric first
136
# Now turn it into a data.frame...
137

    
138
results_table<-as.data.frame(results_num)
139
colnames(results_table)<-c("dates","ns","RMSE_gwr1")
140

    
141
write.csv(results_table, file= paste(path,"/","results_GWR_Assessment",out_prefix,".txt",sep=""))
142

    
143

    
144
# End of script##########
145

    
146
# ###############################
147

    
148
#  # GWR visualization of Residuals fit over space
149
# X11()
150
# title=paste("Histogram of residuals of ",data_name, sep="")
151
# hist(data_s$residuals,main=title)
152
# savePlot(paste("Histogram_",data_name,out_prefix,".png", sep=""), type="png")
153
# dev.off()  
154
# 
155
# 
156
data_s<-ghcn_s20100901
157
X11(width=20,height=20)
158
topo = cm.colors(9)
159
image(gwr_res_krige,col=topo) #needs to change to have a bipolar palette !!!
160
  
161
#image(mean_LST, col=grays,breaks = c(185,245,255,275,315,325))
162

    
163
plot(OR_state, axes = TRUE, add=TRUE)
164
plot(data_s, pch=1, col="red", cex= abs(data_s$residuals)/10, add=TRUE) #Taking the absolute values because residuals are 
165
LegVals<- c(0,10,20,30,40,50,110)
166
legend(-98000,510000, legend=LegVals,pch=1,col="red",pt.cex=LegVals/10,bty="n",title= "residuals",cex=1.6)
167
#legend("left", legend=c("275-285","285-295","295-305", "305-315","315-325"),fill=grays, bty="n", title= "LST mean DOY=244")
168
legend(-98000,290000, legend=c("-60 -30","-30 -20","-20 -10", "-10 0"," 0  10"," 10  20", " 20  30"," 30  60"),fill=topo, bty="n", title= "Kriged RMSE",cex=1.6)
169
title(paste("Kriging of residuals of ",data_name, sep=""),cex=2)
170

    
171
krig_raster_name<-paste("Kriged_res_",data_name,out_prefix,".rst", sep="")
172
writeGDAL(gwr_res_krige,fname="test_krige.tif", driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL")
173

    
174
savePlot(paste("Kriged_res_",data_name,out_prefix,".png", sep=""), type="png")
175
dev.off()  
176

    
177

    
178

    
179

    
180
#Compare the coefficients and residuals using both 30 and 100%
181
#coefficients are stored in gwrG$SDF$lon
182
#write out a new shapefile (including .prj component)
183
#writeOGR(data_s,".", "ghcn_1507_s", driver ="ESRI Shapefile")
184
#ogrInfo(".", "ghcn_1507_s") #This will check the file...
185
#plot(ghcn_1507, axes=TRUE, border="gray")
186

    
187
#library(foreign)
188
#dbfdata<-read.dbf("file.dbf", as.is=TRUE)
189
##Add new attribute data (just the numbers of 1 to the numbers of objects)
190
#dbfdata$new.att <- 1:nrow(shp)
191
##overwrite the file with this new copy
192
#write.dbf(dbfdata, "file.dbf")
(21-21/32)