Revision 88248d6c
Added by Benoit Parmentier over 12 years ago
climate/research/oregon/interpolation/kriging_reg.R | ||
---|---|---|
52 | 52 |
set.seed(100) #This set a seed number for the random sampling to make results reproducible. |
53 | 53 |
|
54 | 54 |
dates <-readLines(paste(path,"/",infile2, sep="")) #Reading dates in a list from the textile. |
55 |
#results <- matrix(1,length(dates),4) #This is a matrix containing the diagnostic measures from the GAM models.
|
|
56 |
#results_mod_n<-matrix(1,length(dates),3)
|
|
55 |
results <- matrix(1,length(dates),4) #This is a matrix containing the diagnostic measures from the GAM models. |
|
56 |
results_mod_n<-matrix(1,length(dates),3) |
|
57 | 57 |
|
58 | 58 |
#Screening for bad values and setting the valid range |
59 | 59 |
|
... | ... | |
64 | 64 |
###CREATING SUBSETS BY INPUT DATES AND SAMPLING |
65 | 65 |
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, ghcn$date==as.numeric(d))) #Producing a list of data frame, one data frame per date. |
66 | 66 |
|
67 |
i<-3 #Date 10 is used to test kriging |
|
68 |
n<-nrow(ghcn.subsets[[i]]) |
|
69 |
ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
|
70 |
nv<-n-ns #create a sample for validation with prop of the rows |
|
71 |
ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly |
|
72 |
ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training) #This selects the index position for testing subset stations. |
|
73 |
data_s <- ghcn.subsets[[i]][ind.training, ] |
|
74 |
data_v <- ghcn.subsets[[i]][ind.testing, ] |
|
75 |
|
|
76 |
###STEP 2 KRIGING### |
|
77 |
|
|
78 |
#Kriging tmax |
|
79 |
|
|
80 |
hscat(tmax~1,data_s,(0:9)*20000) # 9 lag classes with 20,000m width |
|
81 |
v<-variogram(tmax~1, data_s) # This plots a sample varigram for date 10 fir the testing dataset |
|
82 |
plot(v) |
|
83 |
v.fit<-fit.variogram(v,vgm(2000,"Sph", 150000,1000)) #Model variogram: sill is 2000, spherical, range 15000 and nugget 1000 |
|
84 |
plot(v, v.fit) #Compare model and sample variogram via a graphical plot |
|
85 |
tmax_krige<-krige(tmax~1, data_s,mean_LST, v.fit) #mean_LST provides the data grid/raster image for the kriging locations to be predicted. |
|
86 |
|
|
87 |
#Cokriging tmax |
|
88 |
g<-gstat(NULL,"tmax", tmax~1, data_s) #This creates a gstat object "g" that acts as container for kriging specifications. |
|
89 |
g<-gstat(g, "SRTM_elev",ELEV_SRTM~1,data_s) #Adding variables to gstat object g |
|
90 |
g<-gstat(g, "Eastness", Eastness~1,data_s) |
|
91 |
g<-gstat(g, "Northness", Northness~1, data_s) |
|
92 |
|
|
93 |
vm_g<-variogram(g) #Visualizing multivariate sample variogram. |
|
94 |
vm_g.fit<-fit.lmc(vm_g,g,vgm(2000,"Sph", 100000,1000)) #Fitting variogram for all variables at once. |
|
95 |
plot(vm_g,vm_g.fit) #Visualizing variogram fit and sample |
|
96 |
vm_g.fit$set <-list(nocheck=1) #Avoid checking and allow for different range in variogram |
|
97 |
co_kriged_surf<-predict(vm_g.fit,mean_LST) #Prediction using co-kriging with grid location defined from input raster image. |
|
98 |
#co_kriged_surf$tmax.pred #Results stored in SpatialGridDataFrame with tmax prediction accessible in dataframe. |
|
99 |
|
|
100 |
|
|
101 |
#spplot.vcov(co_kriged_surf) #Visualizing the covariance structure |
|
102 |
|
|
103 |
tmax_krig1_s <- overlay(tmax_krige,data_s) #This overlays the kriged surface tmax and the location of weather stations |
|
104 |
tmax_cokrig1_s<- overlay(co_kriged_surf,data_s) #This overalys the cokriged surface tmax and the location of weather stations |
|
105 |
tmax_krig1_v <- overlay(tmax_krige,data_v) #This overlays the kriged surface tmax and the location of weather stations |
|
106 |
tmax_cokrig1_v<- overlay(co_kriged_surf,data_v) |
|
107 |
|
|
108 |
data_s$tmax_kr<-tmax_krig1_s$var1.pred #Adding the results back into the original dataframes. |
|
109 |
data_v$tmax_kr<-tmax_krig1_v$var1.pred |
|
110 |
data_s$tmax_cokr<-tmax_cokrig1_s$tmax.pred |
|
111 |
data_v$tmax_cokr<-tmax_cokrig1_v$tmax.pred |
|
112 |
|
|
113 |
# #Co-kriging only on the validation sites for faster computing |
|
114 |
# |
|
115 |
# cokrig1_dv<-predict(vm_g.fit,data_v) |
|
116 |
# cokrig1_ds<-predict(vm_g.fit,data_s) |
|
117 |
# data_s$tmax_cokr<-cokrig1_ds$tmax.pred |
|
118 |
# data_v$tmax_cokr<-cokrig1_dv$tmax.pred |
|
119 |
|
|
120 |
#Calculate RMSE and then krig the residuals....! |
|
121 |
|
|
122 |
res_mod1<- data_v$tmax - data_v$tmax_kr #Residuals from kriging. |
|
123 |
res_mod2<- data_v$tmax - data_v$tmax_cokr #Residuals from cokriging. |
|
124 |
|
|
125 |
RMSE_mod1 <- sqrt(sum(res_mod1^2,na.rm=TRUE)/(nv-sum(is.na(res_mod1)))) #RMSE from kriged surface. |
|
126 |
RMSE_mod2 <- sqrt(sum(res_mod2^2,na.rm=TRUE)/(nv-sum(is.na(res_mod2)))) #RMSE from co-kriged surface. |
|
127 |
#(nv-sum(is.na(res_mod2))) |
|
128 |
# ####### |
|
129 |
# #Kriging residuals!! |
|
130 |
# |
|
131 |
# hscat(residuals~1,data_s,(0:9)*20000) # 9 lag classes with 20,000m width |
|
132 |
# v<-variogram(residuals~1, data_s) |
|
133 |
# plot(v) |
|
134 |
# v.fit<-fit.variogram(v,vgm(1,"Sph", 150000,1)) |
|
135 |
# gwr_res_krige<-krige(residuals~1, data_s,mean_LST, v.fit)#mean_LST provides the data grid/raster image for the kriging locations. |
|
136 |
# image(gwr_res_krige,col=grays) #needs to change to have a bipolar palette !!! |
|
137 |
# grays = gray.colors(5,0.45, 0.95) |
|
138 |
# #image(mean_LST, col=grays,breaks = c(185,245,255,275,315,325)) |
|
139 |
|
|
140 |
#Saving the subset in a dataframe |
|
141 |
data_name<-paste("ghcn_v_",dates[[i]],sep="") |
|
142 |
assign(data_name,data_v) |
|
143 |
data_name<-paste("ghcn_s_",dates[[i]],sep="") |
|
144 |
assign(data_name,data_s) |
|
145 |
|
|
146 |
results[i,1]<- dates[i] #storing the interpolation dates in the first column |
|
147 |
results[i,2]<- ns #number of stations in training |
|
148 |
results[i,3]<- RMSE_mod1 |
|
149 |
results[i,4]<- RMSE_mod2 |
|
150 |
|
|
151 |
results_mod_n[i,1]<-dates[i] |
|
152 |
results_mod_n[i,2]<-(nv-sum(is.na(res_mod1))) |
|
153 |
results_mod_n[i,3]<-(nv-sum(is.na(res_mod2))) |
|
67 |
for(i in 1:length(dates)){ # start of the for loop #1 |
|
68 |
#i<-3 #Date 10 is used to test kriging |
|
69 |
n<-nrow(ghcn.subsets[[i]]) |
|
70 |
ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
|
71 |
nv<-n-ns #create a sample for validation with prop of the rows |
|
72 |
ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly |
|
73 |
ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training) #This selects the index position for testing subset stations. |
|
74 |
data_s <- ghcn.subsets[[i]][ind.training, ] |
|
75 |
data_v <- ghcn.subsets[[i]][ind.testing, ] |
|
76 |
|
|
77 |
###STEP 2 KRIGING### |
|
78 |
|
|
79 |
#Kriging tmax |
|
80 |
|
|
81 |
hscat(tmax~1,data_s,(0:9)*20000) # 9 lag classes with 20,000m width |
|
82 |
v<-variogram(tmax~1, data_s) # This plots a sample varigram for date 10 fir the testing dataset |
|
83 |
plot(v) |
|
84 |
v.fit<-fit.variogram(v,vgm(2000,"Sph", 150000,1000)) #Model variogram: sill is 2000, spherical, range 15000 and nugget 1000 |
|
85 |
plot(v, v.fit) #Compare model and sample variogram via a graphical plot |
|
86 |
tmax_krige<-krige(tmax~1, data_s,mean_LST, v.fit) #mean_LST provides the data grid/raster image for the kriging locations to be predicted. |
|
87 |
|
|
88 |
#Cokriging tmax |
|
89 |
g<-gstat(NULL,"tmax", tmax~1, data_s) #This creates a gstat object "g" that acts as container for kriging specifications. |
|
90 |
g<-gstat(g, "SRTM_elev",ELEV_SRTM~1,data_s) #Adding variables to gstat object g |
|
91 |
g<-gstat(g, "Eastness", Eastness~1,data_s) |
|
92 |
g<-gstat(g, "Northness", Northness~1, data_s) |
|
93 |
|
|
94 |
vm_g<-variogram(g) #Visualizing multivariate sample variogram. |
|
95 |
vm_g.fit<-fit.lmc(vm_g,g,vgm(2000,"Sph", 100000,1000)) #Fitting variogram for all variables at once. |
|
96 |
plot(vm_g,vm_g.fit) #Visualizing variogram fit and sample |
|
97 |
vm_g.fit$set <-list(nocheck=1) #Avoid checking and allow for different range in variogram |
|
98 |
co_kriged_surf<-predict(vm_g.fit,mean_LST) #Prediction using co-kriging with grid location defined from input raster image. |
|
99 |
#co_kriged_surf$tmax.pred #Results stored in SpatialGridDataFrame with tmax prediction accessible in dataframe. |
|
100 |
|
|
101 |
|
|
102 |
#spplot.vcov(co_kriged_surf) #Visualizing the covariance structure |
|
103 |
|
|
104 |
tmax_krig1_s <- overlay(tmax_krige,data_s) #This overlays the kriged surface tmax and the location of weather stations |
|
105 |
tmax_cokrig1_s<- overlay(co_kriged_surf,data_s) #This overalys the cokriged surface tmax and the location of weather stations |
|
106 |
tmax_krig1_v <- overlay(tmax_krige,data_v) #This overlays the kriged surface tmax and the location of weather stations |
|
107 |
tmax_cokrig1_v<- overlay(co_kriged_surf,data_v) |
|
108 |
|
|
109 |
data_s$tmax_kr<-tmax_krig1_s$var1.pred #Adding the results back into the original dataframes. |
|
110 |
data_v$tmax_kr<-tmax_krig1_v$var1.pred |
|
111 |
data_s$tmax_cokr<-tmax_cokrig1_s$tmax.pred |
|
112 |
data_v$tmax_cokr<-tmax_cokrig1_v$tmax.pred |
|
113 |
|
|
114 |
#Co-kriging only on the validation sites for faster computing |
|
115 |
|
|
116 |
cokrig1_dv<-predict(vm_g.fit,data_v) |
|
117 |
cokrig1_ds<-predict(vm_g.fit,data_s) |
|
118 |
data_s$tmax_cokr<-cokrig1_ds$tmax.pred |
|
119 |
data_v$tmax_cokr<-cokrig1_dv$tmax.pred |
|
120 |
|
|
121 |
#Calculate RMSE and then krig the residuals....! |
|
122 |
|
|
123 |
res_mod1<- data_v$tmax - data_v$tmax_kr #Residuals from kriging. |
|
124 |
res_mod2<- data_v$tmax - data_v$tmax_cokr #Residuals from cokriging. |
|
125 |
|
|
126 |
RMSE_mod1 <- sqrt(sum(res_mod1^2,na.rm=TRUE)/(nv-sum(is.na(res_mod1)))) #RMSE from kriged surface. |
|
127 |
RMSE_mod2 <- sqrt(sum(res_mod2^2,na.rm=TRUE)/(nv-sum(is.na(res_mod2)))) #RMSE from co-kriged surface. |
|
128 |
#(nv-sum(is.na(res_mod2))) |
|
129 |
|
|
130 |
#Saving the subset in a dataframe |
|
131 |
data_name<-paste("ghcn_v_",dates[[i]],sep="") |
|
132 |
assign(data_name,data_v) |
|
133 |
data_name<-paste("ghcn_s_",dates[[i]],sep="") |
|
134 |
assign(data_name,data_s) |
|
135 |
|
|
136 |
krig_raster_name<-paste("coKriged_tmax_",data_name,out_prefix,".tif", sep="") |
|
137 |
writeGDAL(co_kriged_surf,fname=krig_raster_name, driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL") |
|
138 |
krig_raster_name<-paste("Kriged_tmax_",data_name,out_prefix,".tif", sep="") |
|
139 |
writeGDAL(tmax_krige,fname=krig_raster_name, driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL") |
|
140 |
X11() |
|
141 |
plot(raster(co_kriged_surf)) |
|
142 |
title(paste("Tmax cokriging for date ",dates[[i]],sep="")) |
|
143 |
savePlot(paste("Cokriged_tmax",data_name,out_prefix,".png", sep=""), type="png") |
|
144 |
dev.off() |
|
145 |
X11() |
|
146 |
plot(raster(tmax_krige)) |
|
147 |
title(paste("Tmax Kriging for date ",dates[[i]],sep="")) |
|
148 |
savePlot(paste("Kriged_res_",data_name,out_prefix,".png", sep=""), type="png") |
|
149 |
dev.off() |
|
150 |
|
|
151 |
results[i,1]<- dates[i] #storing the interpolation dates in the first column |
|
152 |
results[i,2]<- ns #number of stations in training |
|
153 |
results[i,3]<- RMSE_mod1 |
|
154 |
results[i,4]<- RMSE_mod2 |
|
155 |
|
|
156 |
results_mod_n[i,1]<-dates[i] |
|
157 |
results_mod_n[i,2]<-(nv-sum(is.na(res_mod1))) |
|
158 |
results_mod_n[i,3]<-(nv-sum(is.na(res_mod2))) |
|
159 |
} |
|
160 |
|
|
161 |
## Plotting and saving diagnostic measures |
|
162 |
results_num <-results |
|
163 |
mode(results_num)<- "numeric" |
|
164 |
# Make it numeric first |
|
165 |
# Now turn it into a data.frame... |
|
166 |
|
|
167 |
results_table<-as.data.frame(results_num) |
|
168 |
colnames(results_table)<-c("dates","ns","RMSE_gwr1") |
|
169 |
|
|
170 |
write.csv(results_table, file= paste(path,"/","results_GWR_Assessment",out_prefix,".txt",sep="")) |
Also available in: Unified diff
Kriging, modification to save kriged surfaces in geotiff and png files, task #364