Project

General

Profile

« Previous | Next » 

Revision 3be0f72b

Added by Benoit Parmentier over 12 years ago

Kriging, modifying code to save training and testing samples in shapefiles: task #364

View differences:

climate/research/oregon/interpolation/kriging_reg.R
1
####################GWR of Tmax for 10 dates.#####################
1
####################GWR of Tmax for one Date#####################
2 2
#This script generates predicted values from station values for the Oregon case study. This program loads the station data from a shp file 
3 3
#and performs Kriging and co-kriging on tmax regression.
4
#Script created by Benoit Parmentier on April 3, 2012. 
4
#Script created by Benoit Parmentier on April 10, 2012. 
5 5

  
6 6
###Loading r library and packages
7 7
library(sp)
......
11 11
library(gpclib)
12 12
library(maptools)
13 13
library(gstat)
14
library(graphics)
14 15
###Parameters and arguments
15 16

  
16
infile1<-"ghcn_or_tmax_b_03032012_OR83M.shp" 
17
path<- "/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations/"
17
path<- "/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations/"         #Path to all datasets
18 18
setwd(path)
19
#infile2<-"dates_interpolation_03012012.txt"  # list of 10 dates for the regression
20
infile2<-"dates_interpolation_03052012.txt"
21
prop<-0.3
22
out_prefix<-"_03272012_Res_fit"
19
infile1<-"ghcn_or_tmax_b_03032012_OR83M.shp" #Weather station location in Oregon with input variables
20
infile2<-"dates_interpolation_03052012.txt"  # list of 10 dates for the regression, more thatn 10 dates may be used
21
infile3<-"mean_day244_rescaled.rst"          #This image serves as the reference grid for kriging
22
infile4<- "orcnty24_OR83M.shp"               #Vector file defining the study area: Oregon state and its counties.
23
prop<-0.3                                    #Propotion of weather stations retained for validation/testing
24
out_prefix<-"_04102012_RMSE"                 #output name used in the text file result
25

  
26
###STEP 1 DATA PREPARATION AND PROCESSING#####
23 27

  
24 28
###Reading the shapefile and raster image from the local directory
25 29

  
26
mean_LST<- readGDAL("mean_day244_rescaled.rst")  #This reads the whole raster in memory and provide a grid for kriging
27
ghcn<-readOGR(".", "ghcn_or_tmax_b_03032012_OR83M") 
28
proj4string(ghcn) #This retrieves the coordinate system for the SDF
29
CRS_ghcn<-proj4string(ghcn) #this can be assigned to mean_LST!!!
30
proj4string(mean_LST)<-CRS_ghcn #Assigning coordinates information
30
mean_LST<- readGDAL(infile3)                  #This reads the whole raster in memory and provide a grid for kriging in a SpatialGridDataFrame object
31
filename<-sub(".shp","",infile1)              #Removing the extension from file.
32
ghcn<-readOGR(".", filename)                  #Reading station locations from vector file using rgdal and creating a SpatialPointDataFrame
33
CRS_ghcn<-proj4string(ghcn)                   #This retrieves the coordinate system information for the SDF object (PROJ4 format)
34
proj4string(mean_LST)<-CRS_ghcn               #Assigning coordinates information to SpatialGridDataFrame object
31 35

  
32 36
# Creating state outline from county
33 37

  
34 38
orcnty<-readOGR(".", "orcnty24_OR83M")
35
proj4string(orcnty) #This retrieves the coordinate system for the SDF
36
lps <-getSpPPolygonsLabptSlots(orcnty)  #Getting centroids county labels
39
proj4string(orcnty)                           #This retrieves the coordinate system for the SDF
40
lps <-getSpPPolygonsLabptSlots(orcnty)        #Getting centroids county labels
37 41
IDOneBin <- cut(lps[,1], range(lps[,1]), include.lowest=TRUE)  #Creating one bin var
38
gpclibPermit() #Set the gpclib to True to allow union
42
gpclibPermit()                                #Set the gpclib to True to allow union
39 43
OR_state <- unionSpatialPolygons(orcnty ,IDOneBin) #Dissolve based on bin var
40 44

  
41
# Adding variables for the regression
42

  
43
ghcn$Northness<- cos(ghcn$ASPECT) #Adding a variable to the dataframe
44
ghcn$Eastness <- sin(ghcn$ASPECT)  #adding variable to the dataframe.
45
# Adding variables for the regressions
45 46

  
46
ghcn$Northness_w <- sin(ghcn$slope)*cos(ghcn$ASPECT) #Adding a variable to the dataframe
47
ghcn$Eastness_w  <- sin(ghcn$slope)*sin(ghcn$ASPECT)  #adding variable to the dataframe.
47
ghcn$Northness<- cos(ghcn$ASPECT)             #Adding a variable to the dataframe by calculating the cosine of Aspect
48
ghcn$Eastness <- sin(ghcn$ASPECT)             #Adding variable to the dataframe.
49
ghcn$Northness_w <- sin(ghcn$slope)*cos(ghcn$ASPECT)  #Adding a variable to the dataframe
50
ghcn$Eastness_w  <- sin(ghcn$slope)*sin(ghcn$ASPECT)  #Adding variable to the dataframe.
48 51

  
49
set.seed(100)
52
set.seed(100)                                 #This set a seed number for the random sampling to make results reproducible.
50 53

  
51
dates <-readLines(paste(path,"/",infile2, sep=""))
54
dates <-readLines(paste(path,"/",infile2, sep=""))  #Reading dates in a list from the textile.
55
#results <- matrix(1,length(dates),4)            #This is a matrix containing the diagnostic measures from the GAM models.
56
#results_mod_n<-matrix(1,length(dates),3)
52 57

  
53
results <- matrix(1,length(dates),3)            #This is a matrix containing the diagnostic measures from the GAM models.
58
#Screening for bad values and setting the valid range
54 59

  
55
#Screening for bad values
56
#tmax range: min max)
57
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400)
58
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0)
60
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400) #Values are in tenth of degrees C
61
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0)    #No elevation below sea leve is allowed.
59 62
ghcn<-ghcn_test2
60 63

  
61
#ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==as.numeric(d)))#this creates a list of 10 subsets data
62
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, ghcn$date==as.numeric(d)))
63

  
64
###Regression part 1: Creating a validation dataset by creating training and testing datasets
65
for(i in 1:length(dates)){            # start of the for loop #1
66
  
67
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
68
    
69
  n<-nrow(ghcn.subsets[[i]])
70
  ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
71
  nv<-n-ns             #create a sample for validation with prop of the rows
72
  #ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
73
  ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
74
  ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training)
75
  data_s <- ghcn.subsets[[i]][ind.training, ]
76
  data_v <- ghcn.subsets[[i]][ind.testing, ]
77
  
78
  #Saving the subset in a dataframe
79
  data_name<-paste("ghcn_v_",dates[[i]],sep="")
80
  assign(data_name,data_v)
81
  data_name<-paste("ghcn_s_",dates[[i]],sep="")
82
  assign(data_name,data_s)
83
  
84
  results[i,1]<- dates[i]  #storing the interpolation dates in the first column
85
  results[i,2]<- ns        #number of stations used in the training stage
86
  results[i,3]<- RMSE_f
64
###CREATING SUBSETS BY INPUT DATES AND SAMPLING
65
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, ghcn$date==as.numeric(d)))   #Producing a list of data frame, one data frame per date.
66

  
67
i<-3                                           #Date 10 is used to test kriging
68
n<-nrow(ghcn.subsets[[i]])
69
ns<-n-round(n*prop)                             #Create a sample from the data frame with 70% of the rows
70
nv<-n-ns                                        #create a sample for validation with prop of the rows
71
ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE)  #This selects the index position for 70% of the rows taken randomly
72
ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training)         #This selects the index position for testing subset stations.
73
data_s <- ghcn.subsets[[i]][ind.training, ]
74
data_v <- ghcn.subsets[[i]][ind.testing, ]
75

  
76
###STEP 2 KRIGING###
77

  
78
#Kriging tmax
79

  
80
hscat(tmax~1,data_s,(0:9)*20000)                       # 9 lag classes with 20,000m width
81
v<-variogram(tmax~1, data_s)                           # This plots a sample varigram for date 10 fir the testing dataset
82
plot(v)
83
v.fit<-fit.variogram(v,vgm(2000,"Sph", 150000,1000))   #Model variogram: sill is 2000, spherical, range 15000 and nugget 1000
84
plot(v, v.fit)                                         #Compare model and sample variogram via a graphical plot
85
tmax_krige<-krige(tmax~1, data_s,mean_LST, v.fit)      #mean_LST provides the data grid/raster image for the kriging locations to be predicted.
86

  
87
#Cokriging tmax
88
g<-gstat(NULL,"tmax", tmax~1, data_s)                   #This creates a gstat object "g" that acts as container for kriging specifications.
89
g<-gstat(g, "SRTM_elev",ELEV_SRTM~1,data_s)            #Adding variables to gstat object g
90
g<-gstat(g, "Eastness", Eastness~1,data_s)
91
g<-gstat(g, "Northness", Northness~1, data_s)
92

  
93
vm_g<-variogram(g)                                     #Visualizing multivariate sample variogram.
94
vm_g.fit<-fit.lmc(vm_g,g,vgm(2000,"Sph", 100000,1000)) #Fitting variogram for all variables at once.
95
plot(vm_g,vm_g.fit)                                    #Visualizing variogram fit and sample
96
vm_g.fit$set <-list(nocheck=1)                         #Avoid checking and allow for different range in variogram
97
co_kriged_surf<-predict(vm_g.fit,mean_LST) #Prediction using co-kriging with grid location defined from input raster image.
98
#co_kriged_surf$tmax.pred                              #Results stored in SpatialGridDataFrame with tmax prediction accessible in dataframe.
87 99
  
88
  #Kriging residuals!!
89
  X11()
90
  hscat(residuals~1,data_s,(0:9)*20000) # 9 lag classes with 20,000m width
91
  v<-variogram(tmax~1, data_s)
92
  plot(v)
93
  tryCatch(v.fit<-fit.variogram(v,vgm(1,"Sph", 150000,1)),error=function()next)
94
  tmax_krige1<-krige(tmax~1, data_s,mean_LST, v.fit)#mean_LST provides the data grid/raster image for the kriging locations.
95
  #Find residual of kriged surface...
96
  
97
  # GWR visualization of Residuals using histograms and over space
98
  X11()
99
  title=paste("Histogram of residuals of ",data_name, sep="")
100
  hist(data_s$tmax_krige1,main=title)
101
  savePlot(paste("Histogram_",data_name,out_prefix,".png", sep=""), type="png")
102
  dev.off()  
103

  
104
  X11(width=20,height=20)
105
  topo = cm.colors(9)
106
  image(gwr_res_krige,col=topo) #needs to change to have a bipolar palette !!! 
107

  
108
  plot(OR_state, axes = TRUE, add=TRUE)
109
  plot(data_s, pch=1, col="red", cex= abs(data_s$residuals)/10, add=TRUE) #Taking the absolute values because residuals are 
110
  LegVals<- c(0,10,20,30,40,50,110)
111
  legend(-98000,510000, legend=LegVals,pch=1,col="red",pt.cex=LegVals/10,bty="n",title= "residuals",cex=1.6)
112
  #legend("left", legend=c("275-285","285-295","295-305", "305-315","315-325"),fill=grays, bty="n", title= "LST mean DOY=244")
113
  legend(-98000,290000, legend=c("-60 -30","-30 -20","-20 -10", "-10 0"," 0  10"," 10  20", " 20  30"," 30  60"),fill=topo, bty="n", title= "Kriged RMSE",cex=1.6)
114
  title(paste("Kriging of residuals of ",data_name, sep=""),cex=2)
115

  
116
  krig_raster_name<-paste("Kriged_res_",data_name,out_prefix,".tif", sep="")
117
  #writeGDAL(gwr_res_krige,fname=krig_raster_name, driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL")
118

  
119
  savePlot(paste("Kriged_res_",data_name,out_prefix,".png", sep=""), type="png")
120
  dev.off()  
121

  
122
  }
123
  
124
## Plotting and saving diagnostic measures
125
results_num <-results
126
mode(results_num)<- "numeric"
127
# Make it numeric first
128
# Now turn it into a data.frame...
129

  
130
results_table<-as.data.frame(results_num)
131
colnames(results_table)<-c("dates","ns","RMSE_gwr1")
132

  
133
write.csv(results_table, file= paste(path,"/","results_GWR_Assessment",out_prefix,".txt",sep=""))
134

  
135 100

  
136
# End of script##########
137

  
138
# ###############################
139

  
140
#  # GWR visualization of Residuals fit over space
141
# X11()
142
# title=paste("Histogram of residuals of ",data_name, sep="")
143
# hist(data_s$residuals,main=title)
144
# savePlot(paste("Histogram_",data_name,out_prefix,".png", sep=""), type="png")
145
# dev.off()  
146
# 
101
#spplot.vcov(co_kriged_surf)                           #Visualizing the covariance structure
102
         
103
tmax_krig1_s <- overlay(tmax_krige,data_s)             #This overlays the kriged surface tmax and the location of weather stations
104
tmax_cokrig1_s<- overlay(co_kriged_surf,data_s)        #This overalys the cokriged surface tmax and the location of weather stations
105
tmax_krig1_v <- overlay(tmax_krige,data_v)             #This overlays the kriged surface tmax and the location of weather stations
106
tmax_cokrig1_v<- overlay(co_kriged_surf,data_v)
107
            
108
data_s$tmax_kr<-tmax_krig1_s$var1.pred                 #Adding the results back into the original dataframes.
109
data_v$tmax_kr<-tmax_krig1_v$var1.pred  
110
data_s$tmax_cokr<-tmax_cokrig1_s$tmax.pred    
111
data_v$tmax_cokr<-tmax_cokrig1_v$tmax.pred
112

  
113
# #Co-kriging only on the validation sites for faster computing
147 114
# 
148
data_s<-ghcn_s20100901
149
X11(width=20,height=20)
150
topo = cm.colors(9)
151
image(gwr_res_krige,col=topo) #needs to change to have a bipolar palette !!!
152
  
153
#image(mean_LST, col=grays,breaks = c(185,245,255,275,315,325))
154

  
155
plot(OR_state, axes = TRUE, add=TRUE)
156
plot(data_s, pch=1, col="red", cex= abs(data_s$residuals)/10, add=TRUE) #Taking the absolute values because residuals are 
157
LegVals<- c(0,10,20,30,40,50,110)
158
legend(-98000,510000, legend=LegVals,pch=1,col="red",pt.cex=LegVals/10,bty="n",title= "residuals",cex=1.6)
159

  
160
legend(-98000,290000, legend=c("-60 -30","-30 -20","-20 -10", "-10 0"," 0  10"," 10  20", " 20  30"," 30  60"),fill=topo, bty="n", title= "Kriged RMSE",cex=1.6)
161
title(paste("Kriging of residuals of ",data_name, sep=""),cex=2)
115
# cokrig1_dv<-predict(vm_g.fit,data_v)
116
# cokrig1_ds<-predict(vm_g.fit,data_s)
117
# data_s$tmax_cokr<-cokrig1_ds$tmax.pred    
118
# data_v$tmax_cokr<-cokrig1_dv$tmax.pred
162 119

  
163
krig_raster_name<-paste("Kriged_res_",data_name,out_prefix,".rst", sep="")
164
writeGDAL(gwr_res_krige,fname="test_krige.tif", driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL")
120
#Calculate RMSE and then krig the residuals....!
165 121

  
166
savePlot(paste("Kriged_res_",data_name,out_prefix,".png", sep=""), type="png")
167
dev.off()  
122
res_mod1<- data_v$tmax - data_v$tmax_kr              #Residuals from kriging.
123
res_mod2<- data_v$tmax - data_v$tmax_cokr                #Residuals from cokriging.
168 124

  
169

  
170

  
171

  
172
#Compare the coefficients and residuals using both 30 and 100%
173
#coefficients are stored in gwrG$SDF$lon
174
#write out a new shapefile (including .prj component)
175
#writeOGR(data_s,".", "ghcn_1507_s", driver ="ESRI Shapefile")
176
#ogrInfo(".", "ghcn_1507_s") #This will check the file...
177
#plot(ghcn_1507, axes=TRUE, border="gray")
178

  
179
#library(foreign)
180
#dbfdata<-read.dbf("file.dbf", as.is=TRUE)
181
##Add new attribute data (just the numbers of 1 to the numbers of objects)
182
#dbfdata$new.att <- 1:nrow(shp)
183
##overwrite the file with this new copy
184
#write.dbf(dbfdata, "file.dbf")
125
RMSE_mod1 <- sqrt(sum(res_mod1^2,na.rm=TRUE)/(nv-sum(is.na(res_mod1))))                  #RMSE from kriged surface.
126
RMSE_mod2 <- sqrt(sum(res_mod2^2,na.rm=TRUE)/(nv-sum(is.na(res_mod2))))                  #RMSE from co-kriged surface.
127
                  #(nv-sum(is.na(res_mod2)))       
128
# #######
129
# #Kriging residuals!!
130
# 
131
# hscat(residuals~1,data_s,(0:9)*20000) # 9 lag classes with 20,000m width
132
# v<-variogram(residuals~1, data_s)
133
# plot(v)
134
# v.fit<-fit.variogram(v,vgm(1,"Sph", 150000,1))
135
# gwr_res_krige<-krige(residuals~1, data_s,mean_LST, v.fit)#mean_LST provides the data grid/raster image for the kriging locations.
136
# image(gwr_res_krige,col=grays) #needs to change to have a bipolar palette !!!
137
# grays = gray.colors(5,0.45, 0.95)
138
# #image(mean_LST, col=grays,breaks = c(185,245,255,275,315,325))
139

  
140
#Saving the subset in a dataframe
141
data_name<-paste("ghcn_v_",dates[[i]],sep="")
142
assign(data_name,data_v)
143
data_name<-paste("ghcn_s_",dates[[i]],sep="")
144
assign(data_name,data_s)
145
         
146
results[i,1]<- dates[i]  #storing the interpolation dates in the first column
147
results[i,2]<- ns     #number of stations in training
148
results[i,3]<- RMSE_mod1
149
results[i,4]<- RMSE_mod2  
150
         
151
results_mod_n[i,1]<-dates[i]
152
results_mod_n[i,2]<-(nv-sum(is.na(res_mod1)))
153
results_mod_n[i,3]<-(nv-sum(is.na(res_mod2)))

Also available in: Unified diff