Project

General

Profile

« Previous | Next » 

Revision 7da7872a

Added by Benoit Parmentier over 12 years ago

Kriging for OR, this is the first script, task #364

View differences:

climate/research/oregon/interpolation/kriging_reg.R
1
####################GWR of Tmax for 10 dates.#####################
2
#This script generates predicted values from station values for the Oregon case study. This program loads the station data from a shp file 
3
#and performs Kriging and co-kriging on tmax regression.
4
#Script created by Benoit Parmentier on April 3, 2012. 
5

  
6
###Loading r library and packages
7
library(sp)
8
library(spdep)
9
library(rgdal)
10
library(spgwr)
11
library(gpclib)
12
library(maptools)
13
library(gstat)
14
###Parameters and arguments
15

  
16
infile1<-"ghcn_or_tmax_b_03032012_OR83M.shp" 
17
path<- "/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations/"
18
setwd(path)
19
#infile2<-"dates_interpolation_03012012.txt"  # list of 10 dates for the regression
20
infile2<-"dates_interpolation_03052012.txt"
21
prop<-0.3
22
out_prefix<-"_03272012_Res_fit"
23

  
24
###Reading the shapefile and raster image from the local directory
25

  
26
mean_LST<- readGDAL("mean_day244_rescaled.rst")  #This reads the whole raster in memory and provide a grid for kriging
27
ghcn<-readOGR(".", "ghcn_or_tmax_b_03032012_OR83M") 
28
proj4string(ghcn) #This retrieves the coordinate system for the SDF
29
CRS_ghcn<-proj4string(ghcn) #this can be assigned to mean_LST!!!
30
proj4string(mean_LST)<-CRS_ghcn #Assigning coordinates information
31

  
32
# Creating state outline from county
33

  
34
orcnty<-readOGR(".", "orcnty24_OR83M")
35
proj4string(orcnty) #This retrieves the coordinate system for the SDF
36
lps <-getSpPPolygonsLabptSlots(orcnty)  #Getting centroids county labels
37
IDOneBin <- cut(lps[,1], range(lps[,1]), include.lowest=TRUE)  #Creating one bin var
38
gpclibPermit() #Set the gpclib to True to allow union
39
OR_state <- unionSpatialPolygons(orcnty ,IDOneBin) #Dissolve based on bin var
40

  
41
# Adding variables for the regression
42

  
43
ghcn$Northness<- cos(ghcn$ASPECT) #Adding a variable to the dataframe
44
ghcn$Eastness <- sin(ghcn$ASPECT)  #adding variable to the dataframe.
45

  
46
ghcn$Northness_w <- sin(ghcn$slope)*cos(ghcn$ASPECT) #Adding a variable to the dataframe
47
ghcn$Eastness_w  <- sin(ghcn$slope)*sin(ghcn$ASPECT)  #adding variable to the dataframe.
48

  
49
set.seed(100)
50

  
51
dates <-readLines(paste(path,"/",infile2, sep=""))
52

  
53
results <- matrix(1,length(dates),3)            #This is a matrix containing the diagnostic measures from the GAM models.
54

  
55
#Screening for bad values
56
#tmax range: min max)
57
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400)
58
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0)
59
ghcn<-ghcn_test2
60

  
61
#ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==as.numeric(d)))#this creates a list of 10 subsets data
62
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, ghcn$date==as.numeric(d)))
63

  
64
###Regression part 1: Creating a validation dataset by creating training and testing datasets
65
for(i in 1:length(dates)){            # start of the for loop #1
66
  
67
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
68
    
69
  n<-nrow(ghcn.subsets[[i]])
70
  ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
71
  nv<-n-ns             #create a sample for validation with prop of the rows
72
  #ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
73
  ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
74
  ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training)
75
  data_s <- ghcn.subsets[[i]][ind.training, ]
76
  data_v <- ghcn.subsets[[i]][ind.testing, ]
77
  
78
  #Saving the subset in a dataframe
79
  data_name<-paste("ghcn_v_",dates[[i]],sep="")
80
  assign(data_name,data_v)
81
  data_name<-paste("ghcn_s_",dates[[i]],sep="")
82
  assign(data_name,data_s)
83
  
84
  results[i,1]<- dates[i]  #storing the interpolation dates in the first column
85
  results[i,2]<- ns        #number of stations used in the training stage
86
  results[i,3]<- RMSE_f
87
  
88
  #Kriging residuals!!
89
  X11()
90
  hscat(residuals~1,data_s,(0:9)*20000) # 9 lag classes with 20,000m width
91
  v<-variogram(tmax~1, data_s)
92
  plot(v)
93
  tryCatch(v.fit<-fit.variogram(v,vgm(1,"Sph", 150000,1)),error=function()next)
94
  tmax_krige1<-krige(tmax~1, data_s,mean_LST, v.fit)#mean_LST provides the data grid/raster image for the kriging locations.
95
  #Find residual of kriged surface...
96
  
97
  # GWR visualization of Residuals using histograms and over space
98
  X11()
99
  title=paste("Histogram of residuals of ",data_name, sep="")
100
  hist(data_s$tmax_krige1,main=title)
101
  savePlot(paste("Histogram_",data_name,out_prefix,".png", sep=""), type="png")
102
  dev.off()  
103

  
104
  X11(width=20,height=20)
105
  topo = cm.colors(9)
106
  image(gwr_res_krige,col=topo) #needs to change to have a bipolar palette !!! 
107

  
108
  plot(OR_state, axes = TRUE, add=TRUE)
109
  plot(data_s, pch=1, col="red", cex= abs(data_s$residuals)/10, add=TRUE) #Taking the absolute values because residuals are 
110
  LegVals<- c(0,10,20,30,40,50,110)
111
  legend(-98000,510000, legend=LegVals,pch=1,col="red",pt.cex=LegVals/10,bty="n",title= "residuals",cex=1.6)
112
  #legend("left", legend=c("275-285","285-295","295-305", "305-315","315-325"),fill=grays, bty="n", title= "LST mean DOY=244")
113
  legend(-98000,290000, legend=c("-60 -30","-30 -20","-20 -10", "-10 0"," 0  10"," 10  20", " 20  30"," 30  60"),fill=topo, bty="n", title= "Kriged RMSE",cex=1.6)
114
  title(paste("Kriging of residuals of ",data_name, sep=""),cex=2)
115

  
116
  krig_raster_name<-paste("Kriged_res_",data_name,out_prefix,".tif", sep="")
117
  #writeGDAL(gwr_res_krige,fname=krig_raster_name, driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL")
118

  
119
  savePlot(paste("Kriged_res_",data_name,out_prefix,".png", sep=""), type="png")
120
  dev.off()  
121

  
122
  }
123
  
124
## Plotting and saving diagnostic measures
125
results_num <-results
126
mode(results_num)<- "numeric"
127
# Make it numeric first
128
# Now turn it into a data.frame...
129

  
130
results_table<-as.data.frame(results_num)
131
colnames(results_table)<-c("dates","ns","RMSE_gwr1")
132

  
133
write.csv(results_table, file= paste(path,"/","results_GWR_Assessment",out_prefix,".txt",sep=""))
134

  
135

  
136
# End of script##########
137

  
138
# ###############################
139

  
140
#  # GWR visualization of Residuals fit over space
141
# X11()
142
# title=paste("Histogram of residuals of ",data_name, sep="")
143
# hist(data_s$residuals,main=title)
144
# savePlot(paste("Histogram_",data_name,out_prefix,".png", sep=""), type="png")
145
# dev.off()  
146
# 
147
# 
148
data_s<-ghcn_s20100901
149
X11(width=20,height=20)
150
topo = cm.colors(9)
151
image(gwr_res_krige,col=topo) #needs to change to have a bipolar palette !!!
152
  
153
#image(mean_LST, col=grays,breaks = c(185,245,255,275,315,325))
154

  
155
plot(OR_state, axes = TRUE, add=TRUE)
156
plot(data_s, pch=1, col="red", cex= abs(data_s$residuals)/10, add=TRUE) #Taking the absolute values because residuals are 
157
LegVals<- c(0,10,20,30,40,50,110)
158
legend(-98000,510000, legend=LegVals,pch=1,col="red",pt.cex=LegVals/10,bty="n",title= "residuals",cex=1.6)
159

  
160
legend(-98000,290000, legend=c("-60 -30","-30 -20","-20 -10", "-10 0"," 0  10"," 10  20", " 20  30"," 30  60"),fill=topo, bty="n", title= "Kriged RMSE",cex=1.6)
161
title(paste("Kriging of residuals of ",data_name, sep=""),cex=2)
162

  
163
krig_raster_name<-paste("Kriged_res_",data_name,out_prefix,".rst", sep="")
164
writeGDAL(gwr_res_krige,fname="test_krige.tif", driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL")
165

  
166
savePlot(paste("Kriged_res_",data_name,out_prefix,".png", sep=""), type="png")
167
dev.off()  
168

  
169

  
170

  
171

  
172
#Compare the coefficients and residuals using both 30 and 100%
173
#coefficients are stored in gwrG$SDF$lon
174
#write out a new shapefile (including .prj component)
175
#writeOGR(data_s,".", "ghcn_1507_s", driver ="ESRI Shapefile")
176
#ogrInfo(".", "ghcn_1507_s") #This will check the file...
177
#plot(ghcn_1507, axes=TRUE, border="gray")
178

  
179
#library(foreign)
180
#dbfdata<-read.dbf("file.dbf", as.is=TRUE)
181
##Add new attribute data (just the numbers of 1 to the numbers of objects)
182
#dbfdata$new.att <- 1:nrow(shp)
183
##overwrite the file with this new copy
184
#write.dbf(dbfdata, "file.dbf")

Also available in: Unified diff