Revision 7da7872a
Added by Benoit Parmentier over 12 years ago
climate/research/oregon/interpolation/kriging_reg.R | ||
---|---|---|
1 |
####################GWR of Tmax for 10 dates.##################### |
|
2 |
#This script generates predicted values from station values for the Oregon case study. This program loads the station data from a shp file |
|
3 |
#and performs Kriging and co-kriging on tmax regression. |
|
4 |
#Script created by Benoit Parmentier on April 3, 2012. |
|
5 |
|
|
6 |
###Loading r library and packages |
|
7 |
library(sp) |
|
8 |
library(spdep) |
|
9 |
library(rgdal) |
|
10 |
library(spgwr) |
|
11 |
library(gpclib) |
|
12 |
library(maptools) |
|
13 |
library(gstat) |
|
14 |
###Parameters and arguments |
|
15 |
|
|
16 |
infile1<-"ghcn_or_tmax_b_03032012_OR83M.shp" |
|
17 |
path<- "/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations/" |
|
18 |
setwd(path) |
|
19 |
#infile2<-"dates_interpolation_03012012.txt" # list of 10 dates for the regression |
|
20 |
infile2<-"dates_interpolation_03052012.txt" |
|
21 |
prop<-0.3 |
|
22 |
out_prefix<-"_03272012_Res_fit" |
|
23 |
|
|
24 |
###Reading the shapefile and raster image from the local directory |
|
25 |
|
|
26 |
mean_LST<- readGDAL("mean_day244_rescaled.rst") #This reads the whole raster in memory and provide a grid for kriging |
|
27 |
ghcn<-readOGR(".", "ghcn_or_tmax_b_03032012_OR83M") |
|
28 |
proj4string(ghcn) #This retrieves the coordinate system for the SDF |
|
29 |
CRS_ghcn<-proj4string(ghcn) #this can be assigned to mean_LST!!! |
|
30 |
proj4string(mean_LST)<-CRS_ghcn #Assigning coordinates information |
|
31 |
|
|
32 |
# Creating state outline from county |
|
33 |
|
|
34 |
orcnty<-readOGR(".", "orcnty24_OR83M") |
|
35 |
proj4string(orcnty) #This retrieves the coordinate system for the SDF |
|
36 |
lps <-getSpPPolygonsLabptSlots(orcnty) #Getting centroids county labels |
|
37 |
IDOneBin <- cut(lps[,1], range(lps[,1]), include.lowest=TRUE) #Creating one bin var |
|
38 |
gpclibPermit() #Set the gpclib to True to allow union |
|
39 |
OR_state <- unionSpatialPolygons(orcnty ,IDOneBin) #Dissolve based on bin var |
|
40 |
|
|
41 |
# Adding variables for the regression |
|
42 |
|
|
43 |
ghcn$Northness<- cos(ghcn$ASPECT) #Adding a variable to the dataframe |
|
44 |
ghcn$Eastness <- sin(ghcn$ASPECT) #adding variable to the dataframe. |
|
45 |
|
|
46 |
ghcn$Northness_w <- sin(ghcn$slope)*cos(ghcn$ASPECT) #Adding a variable to the dataframe |
|
47 |
ghcn$Eastness_w <- sin(ghcn$slope)*sin(ghcn$ASPECT) #adding variable to the dataframe. |
|
48 |
|
|
49 |
set.seed(100) |
|
50 |
|
|
51 |
dates <-readLines(paste(path,"/",infile2, sep="")) |
|
52 |
|
|
53 |
results <- matrix(1,length(dates),3) #This is a matrix containing the diagnostic measures from the GAM models. |
|
54 |
|
|
55 |
#Screening for bad values |
|
56 |
#tmax range: min max) |
|
57 |
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400) |
|
58 |
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0) |
|
59 |
ghcn<-ghcn_test2 |
|
60 |
|
|
61 |
#ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==as.numeric(d)))#this creates a list of 10 subsets data |
|
62 |
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, ghcn$date==as.numeric(d))) |
|
63 |
|
|
64 |
###Regression part 1: Creating a validation dataset by creating training and testing datasets |
|
65 |
for(i in 1:length(dates)){ # start of the for loop #1 |
|
66 |
|
|
67 |
###Regression part 1: Creating a validation dataset by creating training and testing datasets |
|
68 |
|
|
69 |
n<-nrow(ghcn.subsets[[i]]) |
|
70 |
ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
|
71 |
nv<-n-ns #create a sample for validation with prop of the rows |
|
72 |
#ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
|
73 |
ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly |
|
74 |
ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training) |
|
75 |
data_s <- ghcn.subsets[[i]][ind.training, ] |
|
76 |
data_v <- ghcn.subsets[[i]][ind.testing, ] |
|
77 |
|
|
78 |
#Saving the subset in a dataframe |
|
79 |
data_name<-paste("ghcn_v_",dates[[i]],sep="") |
|
80 |
assign(data_name,data_v) |
|
81 |
data_name<-paste("ghcn_s_",dates[[i]],sep="") |
|
82 |
assign(data_name,data_s) |
|
83 |
|
|
84 |
results[i,1]<- dates[i] #storing the interpolation dates in the first column |
|
85 |
results[i,2]<- ns #number of stations used in the training stage |
|
86 |
results[i,3]<- RMSE_f |
|
87 |
|
|
88 |
#Kriging residuals!! |
|
89 |
X11() |
|
90 |
hscat(residuals~1,data_s,(0:9)*20000) # 9 lag classes with 20,000m width |
|
91 |
v<-variogram(tmax~1, data_s) |
|
92 |
plot(v) |
|
93 |
tryCatch(v.fit<-fit.variogram(v,vgm(1,"Sph", 150000,1)),error=function()next) |
|
94 |
tmax_krige1<-krige(tmax~1, data_s,mean_LST, v.fit)#mean_LST provides the data grid/raster image for the kriging locations. |
|
95 |
#Find residual of kriged surface... |
|
96 |
|
|
97 |
# GWR visualization of Residuals using histograms and over space |
|
98 |
X11() |
|
99 |
title=paste("Histogram of residuals of ",data_name, sep="") |
|
100 |
hist(data_s$tmax_krige1,main=title) |
|
101 |
savePlot(paste("Histogram_",data_name,out_prefix,".png", sep=""), type="png") |
|
102 |
dev.off() |
|
103 |
|
|
104 |
X11(width=20,height=20) |
|
105 |
topo = cm.colors(9) |
|
106 |
image(gwr_res_krige,col=topo) #needs to change to have a bipolar palette !!! |
|
107 |
|
|
108 |
plot(OR_state, axes = TRUE, add=TRUE) |
|
109 |
plot(data_s, pch=1, col="red", cex= abs(data_s$residuals)/10, add=TRUE) #Taking the absolute values because residuals are |
|
110 |
LegVals<- c(0,10,20,30,40,50,110) |
|
111 |
legend(-98000,510000, legend=LegVals,pch=1,col="red",pt.cex=LegVals/10,bty="n",title= "residuals",cex=1.6) |
|
112 |
#legend("left", legend=c("275-285","285-295","295-305", "305-315","315-325"),fill=grays, bty="n", title= "LST mean DOY=244") |
|
113 |
legend(-98000,290000, legend=c("-60 -30","-30 -20","-20 -10", "-10 0"," 0 10"," 10 20", " 20 30"," 30 60"),fill=topo, bty="n", title= "Kriged RMSE",cex=1.6) |
|
114 |
title(paste("Kriging of residuals of ",data_name, sep=""),cex=2) |
|
115 |
|
|
116 |
krig_raster_name<-paste("Kriged_res_",data_name,out_prefix,".tif", sep="") |
|
117 |
#writeGDAL(gwr_res_krige,fname=krig_raster_name, driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL") |
|
118 |
|
|
119 |
savePlot(paste("Kriged_res_",data_name,out_prefix,".png", sep=""), type="png") |
|
120 |
dev.off() |
|
121 |
|
|
122 |
} |
|
123 |
|
|
124 |
## Plotting and saving diagnostic measures |
|
125 |
results_num <-results |
|
126 |
mode(results_num)<- "numeric" |
|
127 |
# Make it numeric first |
|
128 |
# Now turn it into a data.frame... |
|
129 |
|
|
130 |
results_table<-as.data.frame(results_num) |
|
131 |
colnames(results_table)<-c("dates","ns","RMSE_gwr1") |
|
132 |
|
|
133 |
write.csv(results_table, file= paste(path,"/","results_GWR_Assessment",out_prefix,".txt",sep="")) |
|
134 |
|
|
135 |
|
|
136 |
# End of script########## |
|
137 |
|
|
138 |
# ############################### |
|
139 |
|
|
140 |
# # GWR visualization of Residuals fit over space |
|
141 |
# X11() |
|
142 |
# title=paste("Histogram of residuals of ",data_name, sep="") |
|
143 |
# hist(data_s$residuals,main=title) |
|
144 |
# savePlot(paste("Histogram_",data_name,out_prefix,".png", sep=""), type="png") |
|
145 |
# dev.off() |
|
146 |
# |
|
147 |
# |
|
148 |
data_s<-ghcn_s20100901 |
|
149 |
X11(width=20,height=20) |
|
150 |
topo = cm.colors(9) |
|
151 |
image(gwr_res_krige,col=topo) #needs to change to have a bipolar palette !!! |
|
152 |
|
|
153 |
#image(mean_LST, col=grays,breaks = c(185,245,255,275,315,325)) |
|
154 |
|
|
155 |
plot(OR_state, axes = TRUE, add=TRUE) |
|
156 |
plot(data_s, pch=1, col="red", cex= abs(data_s$residuals)/10, add=TRUE) #Taking the absolute values because residuals are |
|
157 |
LegVals<- c(0,10,20,30,40,50,110) |
|
158 |
legend(-98000,510000, legend=LegVals,pch=1,col="red",pt.cex=LegVals/10,bty="n",title= "residuals",cex=1.6) |
|
159 |
|
|
160 |
legend(-98000,290000, legend=c("-60 -30","-30 -20","-20 -10", "-10 0"," 0 10"," 10 20", " 20 30"," 30 60"),fill=topo, bty="n", title= "Kriged RMSE",cex=1.6) |
|
161 |
title(paste("Kriging of residuals of ",data_name, sep=""),cex=2) |
|
162 |
|
|
163 |
krig_raster_name<-paste("Kriged_res_",data_name,out_prefix,".rst", sep="") |
|
164 |
writeGDAL(gwr_res_krige,fname="test_krige.tif", driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL") |
|
165 |
|
|
166 |
savePlot(paste("Kriged_res_",data_name,out_prefix,".png", sep=""), type="png") |
|
167 |
dev.off() |
|
168 |
|
|
169 |
|
|
170 |
|
|
171 |
|
|
172 |
#Compare the coefficients and residuals using both 30 and 100% |
|
173 |
#coefficients are stored in gwrG$SDF$lon |
|
174 |
#write out a new shapefile (including .prj component) |
|
175 |
#writeOGR(data_s,".", "ghcn_1507_s", driver ="ESRI Shapefile") |
|
176 |
#ogrInfo(".", "ghcn_1507_s") #This will check the file... |
|
177 |
#plot(ghcn_1507, axes=TRUE, border="gray") |
|
178 |
|
|
179 |
#library(foreign) |
|
180 |
#dbfdata<-read.dbf("file.dbf", as.is=TRUE) |
|
181 |
##Add new attribute data (just the numbers of 1 to the numbers of objects) |
|
182 |
#dbfdata$new.att <- 1:nrow(shp) |
|
183 |
##overwrite the file with this new copy |
|
184 |
#write.dbf(dbfdata, "file.dbf") |
Also available in: Unified diff
Kriging for OR, this is the first script, task #364