Project

General

Profile

Download (6.19 KB) Statistics
| Branch: | Revision:
1
####################GWR of Tmax for 10 dates.#####################
2
#This script generate station values for the Oregon case study. This program loads the station data from a shp file 
3
#and performs a GWR regression. 
4
#Script created by Benoit Parmentier on March 13, 2012. 
5

    
6
###Loading r library and packages
7
library(sp)
8
library(spdep)
9
library(rgdal)
10
library(spgwr)
11
library(gpclib)
12
library(PBSmapping)
13
library(maptools)
14
library(gstat)
15
###Parameters and arguments
16

    
17
infile1<-"ghcn_or_tmax_b_03032012_OR83M.shp" 
18
path<- "/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations/"
19
setwd(path)
20
#infile2<-"dates_interpolation_03012012.txt"  # list of 10 dates for the regression
21
infile2<-"dates_interpolation_03052012.txt"
22
prop<-0.3
23
out_prefix<-"_03272012_Res_fit"
24

    
25
###Reading the shapefile and raster image from the local directory
26

    
27
mean_LST<- readGDAL("mean_day244_rescaled.rst")  #This reads the whole raster in memory and provide a grid for kriging
28
ghcn<-readOGR(".", "ghcn_or_tmax_b_03032012_OR83M") 
29
proj4string(ghcn) #This retrieves the coordinate system for the SDF
30
CRS_ghcn<-proj4string(ghcn) #this can be assigned to mean_LST!!!
31
proj4string(mean_LST)<-CRS_ghcn #Assigning coordinates information
32

    
33
# Creating state outline from county
34

    
35
orcnty<-readOGR(".", "orcnty24_OR83M")
36
proj4string(orcnty) #This retrieves the coordinate system for the SDF
37
lps <-getSpPPolygonsLabptSlots(orcnty)  #Getting centroids county labels
38
IDOneBin <- cut(lps[,1], range(lps[,1]), include.lowest=TRUE)  #Creating one bin var
39
gpclibPermit() #Set the gpclib to True to allow union
40
OR_state <- unionSpatialPolygons(orcnty ,IDOneBin) #Dissolve based on bin var
41

    
42
# Adding variables for the regression
43

    
44
ghcn$Northness<- cos(ghcn$ASPECT) #Adding a variable to the dataframe
45
ghcn$Eastness <- sin(ghcn$ASPECT)  #adding variable to the dataframe.
46

    
47
ghcn$Northness_w <- sin(ghcn$slope)*cos(ghcn$ASPECT) #Adding a variable to the dataframe
48
ghcn$Eastness_w  <- sin(ghcn$slope)*sin(ghcn$ASPECT)  #adding variable to the dataframe.
49

    
50
set.seed(100)
51

    
52
dates <-readLines(paste(path,"/",infile2, sep=""))
53

    
54
results <- matrix(1,length(dates),3)            #This is a matrix containing the diagnostic measures from the GAM models.
55

    
56
#Screening for bad values
57
#tmax range: min max)
58
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400)
59
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0)
60
ghcn<-ghcn_test2
61

    
62
#ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==as.numeric(d)))#this creates a list of 10 subsets data
63
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, ghcn$date==as.numeric(d)))
64

    
65
###Regression part 1: Creating a validation dataset by creating training and testing datasets
66
for(i in 1:length(dates)){            # start of the for loop #1
67
  
68
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
69
    
70
  n<-nrow(ghcn.subsets[[i]])
71
  ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
72
  nv<-n-ns             #create a sample for validation with prop of the rows
73
  #ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
74
  ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
75
  ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training)
76
  data_s <- ghcn.subsets[[i]][ind.training, ]
77
  data_v <- ghcn.subsets[[i]][ind.testing, ]
78
  bwG <- gwr.sel(tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC,data=data_s,gweight=gwr.Gauss, verbose = FALSE)
79
  gwrG<- gwr(tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC, data=data_s, bandwidth=bwG, gweight=gwr.Gauss, hatmatrix=TRUE)
80

    
81
  Res_fit<-gwrG$lm$residuals
82
  RMSE_f<-sqrt(sum(Res_fit^2)/ns)
83
  t<- data_s$tmax-gwrG$lm$fitted.values #Checking output
84
  t2<-t-Res_fit #This should be zero
85
  data_s$residuals <- Res_fit #adding field to the data 
86
  
87
  #Saving the subset in a dataframe
88
  data_name<-paste("ghcn_v_",dates[[i]],sep="")
89
  assign(data_name,data_v)
90
  data_name<-paste("ghcn_s_",dates[[i]],sep="")
91
  assign(data_name,data_s)
92
  
93
  results[i,1]<- dates[i]  #storing the interpolation dates in the first column
94
  results[i,2]<- ns        #number of stations used in the training stage
95
  results[i,3]<- RMSE_f
96
  
97
  #Kriging residuals!!
98
  X11()
99
  hscat(residuals~1,data_s,(0:9)*20000) # 9 lag classes with 20,000m width
100
  v<-variogram(residuals~1, data_s)
101
  plot(v)
102
  tryCatch(v.fit<-fit.variogram(v,vgm(1,"Sph", 150000,1)),error=function()next)
103
  gwr_res_krige<-krige(residuals~1, data_s,mean_LST, v.fit)#mean_LST provides the data grid/raster image for the kriging locations.
104

    
105
  # GWR visualization of Residuals fit over space
106
  grays = gray.colors(5,0.45, 0.95)
107
  image(gwr_res_krige,col=grays) #needs to change to have a bipolar palette !!!
108
  
109
  #image(mean_LST, col=grays,breaks = c(185,245,255,275,315,325))
110

    
111
  plot(OR_state, axes = TRUE, add=TRUE)
112
  plot(data_s, pch=1, col="red", cex= abs(data_s$residuals)/10, add=TRUE) #Taking the absolute values because residuals are 
113
  LegVals<- c(0,20,40,80,110)
114
  legend("topleft", legend=LegVals,pch=1,col="red",pt.cex=LegVals/10,bty="n",title= "residuals")
115
  legend("left", legend=c("275-285","285-295","295-305", "305-315","315-325"),fill=grays, bty="n", title= "LST mean DOY=244")
116

    
117
  savePlot(paste(data_name,out_prefix,".png", sep=""), type="png")
118
  dev.off()
119
  }
120
  
121
## Plotting and saving diagnostic measures
122
results_num <-results
123
mode(results_num)<- "numeric"
124
# Make it numeric first
125
# Now turn it into a data.frame...
126

    
127
results_table<-as.data.frame(results_num)
128
colnames(results_table)<-c("dates","ns","RMSE_gwr1")
129

    
130
write.csv(results_table, file= paste(path,"/","results_GWR_Assessment",out_prefix,".txt",sep=""))
131

    
132

    
133
# End of script##########
134

    
135
# ###############################
136

    
137
  
138
  
139
#Compare the coefficients and residuals using both 30 and 100%
140
#coefficients are stored in gwrG$SDF$lon
141
#write out a new shapefile (including .prj component)
142
#writeOGR(data_s,".", "ghcn_1507_s", driver ="ESRI Shapefile")
143
#ogrInfo(".", "ghcn_1507_s") #This will check the file...
144
#plot(ghcn_1507, axes=TRUE, border="gray")
145

    
146
#library(foreign)
147
#dbfdata<-read.dbf("file.dbf", as.is=TRUE)
148
##Add new attribute data (just the numbers of 1 to the numbers of objects)
149
#dbfdata$new.att <- 1:nrow(shp)
150
##overwrite the file with this new copy
151
#write.dbf(dbfdata, "file.dbf")
(2-2/2)