Revision 3be0f72b
Added by Benoit Parmentier over 12 years ago
climate/research/oregon/interpolation/kriging_reg.R | ||
---|---|---|
1 |
####################GWR of Tmax for 10 dates.#####################
|
|
1 |
####################GWR of Tmax for one Date#####################
|
|
2 | 2 |
#This script generates predicted values from station values for the Oregon case study. This program loads the station data from a shp file |
3 | 3 |
#and performs Kriging and co-kriging on tmax regression. |
4 |
#Script created by Benoit Parmentier on April 3, 2012.
|
|
4 |
#Script created by Benoit Parmentier on April 10, 2012.
|
|
5 | 5 |
|
6 | 6 |
###Loading r library and packages |
7 | 7 |
library(sp) |
... | ... | |
11 | 11 |
library(gpclib) |
12 | 12 |
library(maptools) |
13 | 13 |
library(gstat) |
14 |
library(graphics) |
|
14 | 15 |
###Parameters and arguments |
15 | 16 |
|
16 |
infile1<-"ghcn_or_tmax_b_03032012_OR83M.shp" |
|
17 |
path<- "/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations/" |
|
17 |
path<- "/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations/" #Path to all datasets |
|
18 | 18 |
setwd(path) |
19 |
#infile2<-"dates_interpolation_03012012.txt" # list of 10 dates for the regression |
|
20 |
infile2<-"dates_interpolation_03052012.txt" |
|
21 |
prop<-0.3 |
|
22 |
out_prefix<-"_03272012_Res_fit" |
|
19 |
infile1<-"ghcn_or_tmax_b_03032012_OR83M.shp" #Weather station location in Oregon with input variables |
|
20 |
infile2<-"dates_interpolation_03052012.txt" # list of 10 dates for the regression, more thatn 10 dates may be used |
|
21 |
infile3<-"mean_day244_rescaled.rst" #This image serves as the reference grid for kriging |
|
22 |
infile4<- "orcnty24_OR83M.shp" #Vector file defining the study area: Oregon state and its counties. |
|
23 |
prop<-0.3 #Propotion of weather stations retained for validation/testing |
|
24 |
out_prefix<-"_04102012_RMSE" #output name used in the text file result |
|
25 |
|
|
26 |
###STEP 1 DATA PREPARATION AND PROCESSING##### |
|
23 | 27 |
|
24 | 28 |
###Reading the shapefile and raster image from the local directory |
25 | 29 |
|
26 |
mean_LST<- readGDAL("mean_day244_rescaled.rst") #This reads the whole raster in memory and provide a grid for kriging
|
|
27 |
ghcn<-readOGR(".", "ghcn_or_tmax_b_03032012_OR83M")
|
|
28 |
proj4string(ghcn) #This retrieves the coordinate system for the SDF
|
|
29 |
CRS_ghcn<-proj4string(ghcn) #this can be assigned to mean_LST!!!
|
|
30 |
proj4string(mean_LST)<-CRS_ghcn #Assigning coordinates information
|
|
30 |
mean_LST<- readGDAL(infile3) #This reads the whole raster in memory and provide a grid for kriging in a SpatialGridDataFrame object
|
|
31 |
filename<-sub(".shp","",infile1) #Removing the extension from file.
|
|
32 |
ghcn<-readOGR(".", filename) #Reading station locations from vector file using rgdal and creating a SpatialPointDataFrame
|
|
33 |
CRS_ghcn<-proj4string(ghcn) #This retrieves the coordinate system information for the SDF object (PROJ4 format)
|
|
34 |
proj4string(mean_LST)<-CRS_ghcn #Assigning coordinates information to SpatialGridDataFrame object
|
|
31 | 35 |
|
32 | 36 |
# Creating state outline from county |
33 | 37 |
|
34 | 38 |
orcnty<-readOGR(".", "orcnty24_OR83M") |
35 |
proj4string(orcnty) #This retrieves the coordinate system for the SDF |
|
36 |
lps <-getSpPPolygonsLabptSlots(orcnty) #Getting centroids county labels |
|
39 |
proj4string(orcnty) #This retrieves the coordinate system for the SDF
|
|
40 |
lps <-getSpPPolygonsLabptSlots(orcnty) #Getting centroids county labels
|
|
37 | 41 |
IDOneBin <- cut(lps[,1], range(lps[,1]), include.lowest=TRUE) #Creating one bin var |
38 |
gpclibPermit() #Set the gpclib to True to allow union |
|
42 |
gpclibPermit() #Set the gpclib to True to allow union
|
|
39 | 43 |
OR_state <- unionSpatialPolygons(orcnty ,IDOneBin) #Dissolve based on bin var |
40 | 44 |
|
41 |
# Adding variables for the regression |
|
42 |
|
|
43 |
ghcn$Northness<- cos(ghcn$ASPECT) #Adding a variable to the dataframe |
|
44 |
ghcn$Eastness <- sin(ghcn$ASPECT) #adding variable to the dataframe. |
|
45 |
# Adding variables for the regressions |
|
45 | 46 |
|
46 |
ghcn$Northness_w <- sin(ghcn$slope)*cos(ghcn$ASPECT) #Adding a variable to the dataframe |
|
47 |
ghcn$Eastness_w <- sin(ghcn$slope)*sin(ghcn$ASPECT) #adding variable to the dataframe. |
|
47 |
ghcn$Northness<- cos(ghcn$ASPECT) #Adding a variable to the dataframe by calculating the cosine of Aspect |
|
48 |
ghcn$Eastness <- sin(ghcn$ASPECT) #Adding variable to the dataframe. |
|
49 |
ghcn$Northness_w <- sin(ghcn$slope)*cos(ghcn$ASPECT) #Adding a variable to the dataframe |
|
50 |
ghcn$Eastness_w <- sin(ghcn$slope)*sin(ghcn$ASPECT) #Adding variable to the dataframe. |
|
48 | 51 |
|
49 |
set.seed(100) |
|
52 |
set.seed(100) #This set a seed number for the random sampling to make results reproducible.
|
|
50 | 53 |
|
51 |
dates <-readLines(paste(path,"/",infile2, sep="")) |
|
54 |
dates <-readLines(paste(path,"/",infile2, sep="")) #Reading dates in a list from the textile. |
|
55 |
#results <- matrix(1,length(dates),4) #This is a matrix containing the diagnostic measures from the GAM models. |
|
56 |
#results_mod_n<-matrix(1,length(dates),3) |
|
52 | 57 |
|
53 |
results <- matrix(1,length(dates),3) #This is a matrix containing the diagnostic measures from the GAM models.
|
|
58 |
#Screening for bad values and setting the valid range
|
|
54 | 59 |
|
55 |
#Screening for bad values |
|
56 |
#tmax range: min max) |
|
57 |
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400) |
|
58 |
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0) |
|
60 |
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400) #Values are in tenth of degrees C |
|
61 |
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0) #No elevation below sea leve is allowed. |
|
59 | 62 |
ghcn<-ghcn_test2 |
60 | 63 |
|
61 |
#ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==as.numeric(d)))#this creates a list of 10 subsets data |
|
62 |
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, ghcn$date==as.numeric(d))) |
|
63 |
|
|
64 |
###Regression part 1: Creating a validation dataset by creating training and testing datasets |
|
65 |
for(i in 1:length(dates)){ # start of the for loop #1 |
|
66 |
|
|
67 |
###Regression part 1: Creating a validation dataset by creating training and testing datasets |
|
68 |
|
|
69 |
n<-nrow(ghcn.subsets[[i]]) |
|
70 |
ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
|
71 |
nv<-n-ns #create a sample for validation with prop of the rows |
|
72 |
#ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
|
73 |
ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly |
|
74 |
ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training) |
|
75 |
data_s <- ghcn.subsets[[i]][ind.training, ] |
|
76 |
data_v <- ghcn.subsets[[i]][ind.testing, ] |
|
77 |
|
|
78 |
#Saving the subset in a dataframe |
|
79 |
data_name<-paste("ghcn_v_",dates[[i]],sep="") |
|
80 |
assign(data_name,data_v) |
|
81 |
data_name<-paste("ghcn_s_",dates[[i]],sep="") |
|
82 |
assign(data_name,data_s) |
|
83 |
|
|
84 |
results[i,1]<- dates[i] #storing the interpolation dates in the first column |
|
85 |
results[i,2]<- ns #number of stations used in the training stage |
|
86 |
results[i,3]<- RMSE_f |
|
64 |
###CREATING SUBSETS BY INPUT DATES AND SAMPLING |
|
65 |
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, ghcn$date==as.numeric(d))) #Producing a list of data frame, one data frame per date. |
|
66 |
|
|
67 |
i<-3 #Date 10 is used to test kriging |
|
68 |
n<-nrow(ghcn.subsets[[i]]) |
|
69 |
ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
|
70 |
nv<-n-ns #create a sample for validation with prop of the rows |
|
71 |
ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly |
|
72 |
ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training) #This selects the index position for testing subset stations. |
|
73 |
data_s <- ghcn.subsets[[i]][ind.training, ] |
|
74 |
data_v <- ghcn.subsets[[i]][ind.testing, ] |
|
75 |
|
|
76 |
###STEP 2 KRIGING### |
|
77 |
|
|
78 |
#Kriging tmax |
|
79 |
|
|
80 |
hscat(tmax~1,data_s,(0:9)*20000) # 9 lag classes with 20,000m width |
|
81 |
v<-variogram(tmax~1, data_s) # This plots a sample varigram for date 10 fir the testing dataset |
|
82 |
plot(v) |
|
83 |
v.fit<-fit.variogram(v,vgm(2000,"Sph", 150000,1000)) #Model variogram: sill is 2000, spherical, range 15000 and nugget 1000 |
|
84 |
plot(v, v.fit) #Compare model and sample variogram via a graphical plot |
|
85 |
tmax_krige<-krige(tmax~1, data_s,mean_LST, v.fit) #mean_LST provides the data grid/raster image for the kriging locations to be predicted. |
|
86 |
|
|
87 |
#Cokriging tmax |
|
88 |
g<-gstat(NULL,"tmax", tmax~1, data_s) #This creates a gstat object "g" that acts as container for kriging specifications. |
|
89 |
g<-gstat(g, "SRTM_elev",ELEV_SRTM~1,data_s) #Adding variables to gstat object g |
|
90 |
g<-gstat(g, "Eastness", Eastness~1,data_s) |
|
91 |
g<-gstat(g, "Northness", Northness~1, data_s) |
|
92 |
|
|
93 |
vm_g<-variogram(g) #Visualizing multivariate sample variogram. |
|
94 |
vm_g.fit<-fit.lmc(vm_g,g,vgm(2000,"Sph", 100000,1000)) #Fitting variogram for all variables at once. |
|
95 |
plot(vm_g,vm_g.fit) #Visualizing variogram fit and sample |
|
96 |
vm_g.fit$set <-list(nocheck=1) #Avoid checking and allow for different range in variogram |
|
97 |
co_kriged_surf<-predict(vm_g.fit,mean_LST) #Prediction using co-kriging with grid location defined from input raster image. |
|
98 |
#co_kriged_surf$tmax.pred #Results stored in SpatialGridDataFrame with tmax prediction accessible in dataframe. |
|
87 | 99 |
|
88 |
#Kriging residuals!! |
|
89 |
X11() |
|
90 |
hscat(residuals~1,data_s,(0:9)*20000) # 9 lag classes with 20,000m width |
|
91 |
v<-variogram(tmax~1, data_s) |
|
92 |
plot(v) |
|
93 |
tryCatch(v.fit<-fit.variogram(v,vgm(1,"Sph", 150000,1)),error=function()next) |
|
94 |
tmax_krige1<-krige(tmax~1, data_s,mean_LST, v.fit)#mean_LST provides the data grid/raster image for the kriging locations. |
|
95 |
#Find residual of kriged surface... |
|
96 |
|
|
97 |
# GWR visualization of Residuals using histograms and over space |
|
98 |
X11() |
|
99 |
title=paste("Histogram of residuals of ",data_name, sep="") |
|
100 |
hist(data_s$tmax_krige1,main=title) |
|
101 |
savePlot(paste("Histogram_",data_name,out_prefix,".png", sep=""), type="png") |
|
102 |
dev.off() |
|
103 |
|
|
104 |
X11(width=20,height=20) |
|
105 |
topo = cm.colors(9) |
|
106 |
image(gwr_res_krige,col=topo) #needs to change to have a bipolar palette !!! |
|
107 |
|
|
108 |
plot(OR_state, axes = TRUE, add=TRUE) |
|
109 |
plot(data_s, pch=1, col="red", cex= abs(data_s$residuals)/10, add=TRUE) #Taking the absolute values because residuals are |
|
110 |
LegVals<- c(0,10,20,30,40,50,110) |
|
111 |
legend(-98000,510000, legend=LegVals,pch=1,col="red",pt.cex=LegVals/10,bty="n",title= "residuals",cex=1.6) |
|
112 |
#legend("left", legend=c("275-285","285-295","295-305", "305-315","315-325"),fill=grays, bty="n", title= "LST mean DOY=244") |
|
113 |
legend(-98000,290000, legend=c("-60 -30","-30 -20","-20 -10", "-10 0"," 0 10"," 10 20", " 20 30"," 30 60"),fill=topo, bty="n", title= "Kriged RMSE",cex=1.6) |
|
114 |
title(paste("Kriging of residuals of ",data_name, sep=""),cex=2) |
|
115 |
|
|
116 |
krig_raster_name<-paste("Kriged_res_",data_name,out_prefix,".tif", sep="") |
|
117 |
#writeGDAL(gwr_res_krige,fname=krig_raster_name, driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL") |
|
118 |
|
|
119 |
savePlot(paste("Kriged_res_",data_name,out_prefix,".png", sep=""), type="png") |
|
120 |
dev.off() |
|
121 |
|
|
122 |
} |
|
123 |
|
|
124 |
## Plotting and saving diagnostic measures |
|
125 |
results_num <-results |
|
126 |
mode(results_num)<- "numeric" |
|
127 |
# Make it numeric first |
|
128 |
# Now turn it into a data.frame... |
|
129 |
|
|
130 |
results_table<-as.data.frame(results_num) |
|
131 |
colnames(results_table)<-c("dates","ns","RMSE_gwr1") |
|
132 |
|
|
133 |
write.csv(results_table, file= paste(path,"/","results_GWR_Assessment",out_prefix,".txt",sep="")) |
|
134 |
|
|
135 | 100 |
|
136 |
# End of script########## |
|
137 |
|
|
138 |
# ############################### |
|
139 |
|
|
140 |
# # GWR visualization of Residuals fit over space |
|
141 |
# X11() |
|
142 |
# title=paste("Histogram of residuals of ",data_name, sep="") |
|
143 |
# hist(data_s$residuals,main=title) |
|
144 |
# savePlot(paste("Histogram_",data_name,out_prefix,".png", sep=""), type="png") |
|
145 |
# dev.off() |
|
146 |
# |
|
101 |
#spplot.vcov(co_kriged_surf) #Visualizing the covariance structure |
|
102 |
|
|
103 |
tmax_krig1_s <- overlay(tmax_krige,data_s) #This overlays the kriged surface tmax and the location of weather stations |
|
104 |
tmax_cokrig1_s<- overlay(co_kriged_surf,data_s) #This overalys the cokriged surface tmax and the location of weather stations |
|
105 |
tmax_krig1_v <- overlay(tmax_krige,data_v) #This overlays the kriged surface tmax and the location of weather stations |
|
106 |
tmax_cokrig1_v<- overlay(co_kriged_surf,data_v) |
|
107 |
|
|
108 |
data_s$tmax_kr<-tmax_krig1_s$var1.pred #Adding the results back into the original dataframes. |
|
109 |
data_v$tmax_kr<-tmax_krig1_v$var1.pred |
|
110 |
data_s$tmax_cokr<-tmax_cokrig1_s$tmax.pred |
|
111 |
data_v$tmax_cokr<-tmax_cokrig1_v$tmax.pred |
|
112 |
|
|
113 |
# #Co-kriging only on the validation sites for faster computing |
|
147 | 114 |
# |
148 |
data_s<-ghcn_s20100901 |
|
149 |
X11(width=20,height=20) |
|
150 |
topo = cm.colors(9) |
|
151 |
image(gwr_res_krige,col=topo) #needs to change to have a bipolar palette !!! |
|
152 |
|
|
153 |
#image(mean_LST, col=grays,breaks = c(185,245,255,275,315,325)) |
|
154 |
|
|
155 |
plot(OR_state, axes = TRUE, add=TRUE) |
|
156 |
plot(data_s, pch=1, col="red", cex= abs(data_s$residuals)/10, add=TRUE) #Taking the absolute values because residuals are |
|
157 |
LegVals<- c(0,10,20,30,40,50,110) |
|
158 |
legend(-98000,510000, legend=LegVals,pch=1,col="red",pt.cex=LegVals/10,bty="n",title= "residuals",cex=1.6) |
|
159 |
|
|
160 |
legend(-98000,290000, legend=c("-60 -30","-30 -20","-20 -10", "-10 0"," 0 10"," 10 20", " 20 30"," 30 60"),fill=topo, bty="n", title= "Kriged RMSE",cex=1.6) |
|
161 |
title(paste("Kriging of residuals of ",data_name, sep=""),cex=2) |
|
115 |
# cokrig1_dv<-predict(vm_g.fit,data_v) |
|
116 |
# cokrig1_ds<-predict(vm_g.fit,data_s) |
|
117 |
# data_s$tmax_cokr<-cokrig1_ds$tmax.pred |
|
118 |
# data_v$tmax_cokr<-cokrig1_dv$tmax.pred |
|
162 | 119 |
|
163 |
krig_raster_name<-paste("Kriged_res_",data_name,out_prefix,".rst", sep="") |
|
164 |
writeGDAL(gwr_res_krige,fname="test_krige.tif", driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL") |
|
120 |
#Calculate RMSE and then krig the residuals....! |
|
165 | 121 |
|
166 |
savePlot(paste("Kriged_res_",data_name,out_prefix,".png", sep=""), type="png")
|
|
167 |
dev.off()
|
|
122 |
res_mod1<- data_v$tmax - data_v$tmax_kr #Residuals from kriging.
|
|
123 |
res_mod2<- data_v$tmax - data_v$tmax_cokr #Residuals from cokriging.
|
|
168 | 124 |
|
169 |
|
|
170 |
|
|
171 |
|
|
172 |
#Compare the coefficients and residuals using both 30 and 100% |
|
173 |
#coefficients are stored in gwrG$SDF$lon |
|
174 |
#write out a new shapefile (including .prj component) |
|
175 |
#writeOGR(data_s,".", "ghcn_1507_s", driver ="ESRI Shapefile") |
|
176 |
#ogrInfo(".", "ghcn_1507_s") #This will check the file... |
|
177 |
#plot(ghcn_1507, axes=TRUE, border="gray") |
|
178 |
|
|
179 |
#library(foreign) |
|
180 |
#dbfdata<-read.dbf("file.dbf", as.is=TRUE) |
|
181 |
##Add new attribute data (just the numbers of 1 to the numbers of objects) |
|
182 |
#dbfdata$new.att <- 1:nrow(shp) |
|
183 |
##overwrite the file with this new copy |
|
184 |
#write.dbf(dbfdata, "file.dbf") |
|
125 |
RMSE_mod1 <- sqrt(sum(res_mod1^2,na.rm=TRUE)/(nv-sum(is.na(res_mod1)))) #RMSE from kriged surface. |
|
126 |
RMSE_mod2 <- sqrt(sum(res_mod2^2,na.rm=TRUE)/(nv-sum(is.na(res_mod2)))) #RMSE from co-kriged surface. |
|
127 |
#(nv-sum(is.na(res_mod2))) |
|
128 |
# ####### |
|
129 |
# #Kriging residuals!! |
|
130 |
# |
|
131 |
# hscat(residuals~1,data_s,(0:9)*20000) # 9 lag classes with 20,000m width |
|
132 |
# v<-variogram(residuals~1, data_s) |
|
133 |
# plot(v) |
|
134 |
# v.fit<-fit.variogram(v,vgm(1,"Sph", 150000,1)) |
|
135 |
# gwr_res_krige<-krige(residuals~1, data_s,mean_LST, v.fit)#mean_LST provides the data grid/raster image for the kriging locations. |
|
136 |
# image(gwr_res_krige,col=grays) #needs to change to have a bipolar palette !!! |
|
137 |
# grays = gray.colors(5,0.45, 0.95) |
|
138 |
# #image(mean_LST, col=grays,breaks = c(185,245,255,275,315,325)) |
|
139 |
|
|
140 |
#Saving the subset in a dataframe |
|
141 |
data_name<-paste("ghcn_v_",dates[[i]],sep="") |
|
142 |
assign(data_name,data_v) |
|
143 |
data_name<-paste("ghcn_s_",dates[[i]],sep="") |
|
144 |
assign(data_name,data_s) |
|
145 |
|
|
146 |
results[i,1]<- dates[i] #storing the interpolation dates in the first column |
|
147 |
results[i,2]<- ns #number of stations in training |
|
148 |
results[i,3]<- RMSE_mod1 |
|
149 |
results[i,4]<- RMSE_mod2 |
|
150 |
|
|
151 |
results_mod_n[i,1]<-dates[i] |
|
152 |
results_mod_n[i,2]<-(nv-sum(is.na(res_mod1))) |
|
153 |
results_mod_n[i,3]<-(nv-sum(is.na(res_mod2))) |
Also available in: Unified diff
Kriging, modifying code to save training and testing samples in shapefiles: task #364