1
|
####################GWR of Tmax for one Date#####################
|
2
|
#This script generates predicted values from station values for the Oregon case study. This program loads the station data from a shp file
|
3
|
#and performs Kriging and co-kriging on tmax regression.
|
4
|
#Script created by Benoit Parmentier on April 17, 2012.
|
5
|
|
6
|
###Loading r library and packages
|
7
|
library(sp)
|
8
|
library(spdep)
|
9
|
library(rgdal)
|
10
|
library(spgwr)
|
11
|
library(gpclib)
|
12
|
library(maptools)
|
13
|
library(gstat)
|
14
|
library(graphics)
|
15
|
###Parameters and arguments
|
16
|
|
17
|
path<- "/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations/" #Path to all datasets
|
18
|
setwd(path)
|
19
|
infile1<-"ghcn_or_tmax_b_04142012_OR83M.shp" #Weather station location in Oregon with input variables
|
20
|
infile2<-"dates_interpolation_03052012.txt" # list of 10 dates for the regression, more thatn 10 dates may be used
|
21
|
infile3<-"mean_day244_rescaled.rst" #This image serves as the reference grid for kriging
|
22
|
infile4<- "orcnty24_OR83M.shp" #Vector file defining the study area: Oregon state and its counties.
|
23
|
prop<-0.3 #Propotion of weather stations retained for validation/testing
|
24
|
out_prefix<-"_LST_04172012_RMSE" #output name used in the text file result
|
25
|
|
26
|
###STEP 1 DATA PREPARATION AND PROCESSING#####
|
27
|
|
28
|
###Reading the shapefile and raster image from the local directory
|
29
|
|
30
|
mean_LST<- readGDAL(infile3) #This reads the whole raster in memory and provide a grid for kriging in a SpatialGridDataFrame object
|
31
|
filename<-sub(".shp","",infile1) #Removing the extension from file.
|
32
|
ghcn<-readOGR(".", filename) #Reading station locations from vector file using rgdal and creating a SpatialPointDataFrame
|
33
|
CRS_ghcn<-proj4string(ghcn) #This retrieves the coordinate system information for the SDF object (PROJ4 format)
|
34
|
proj4string(mean_LST)<-CRS_ghcn #Assigning coordinates information to SpatialGridDataFrame object
|
35
|
|
36
|
# Creating state outline from county
|
37
|
|
38
|
orcnty<-readOGR(".", "orcnty24_OR83M")
|
39
|
proj4string(orcnty) #This retrieves the coordinate system for the SDF
|
40
|
lps <-getSpPPolygonsLabptSlots(orcnty) #Getting centroids county labels
|
41
|
IDOneBin <- cut(lps[,1], range(lps[,1]), include.lowest=TRUE) #Creating one bin var
|
42
|
gpclibPermit() #Set the gpclib to True to allow union
|
43
|
OR_state <- unionSpatialPolygons(orcnty ,IDOneBin) #Dissolve based on bin var
|
44
|
|
45
|
# Adding variables for the regressions
|
46
|
|
47
|
ghcn$Northness<- cos(ghcn$ASPECT) #Adding a variable to the dataframe by calculating the cosine of Aspect
|
48
|
ghcn$Eastness <- sin(ghcn$ASPECT) #Adding variable to the dataframe.
|
49
|
ghcn$Northness_w <- sin(ghcn$slope)*cos(ghcn$ASPECT) #Adding a variable to the dataframe
|
50
|
ghcn$Eastness_w <- sin(ghcn$slope)*sin(ghcn$ASPECT) #Adding variable to the dataframe.
|
51
|
|
52
|
set.seed(100) #This set a seed number for the random sampling to make results reproducible.
|
53
|
|
54
|
dates <-readLines(paste(path,"/",infile2, sep="")) #Reading dates in a list from the textile.
|
55
|
results <- matrix(1,length(dates),4) #This is a matrix containing the diagnostic measures from the GAM models.
|
56
|
results_mod_n<-matrix(1,length(dates),3)
|
57
|
|
58
|
#Screening for bad values and setting the valid range
|
59
|
|
60
|
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400) #Values are in tenth of degrees C
|
61
|
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0) #No elevation below sea leve is allowed.
|
62
|
ghcn<-ghcn_test2
|
63
|
|
64
|
###CREATING SUBSETS BY INPUT DATES AND SAMPLING
|
65
|
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, ghcn$date==as.numeric(d))) #Producing a list of data frame, one data frame per date.
|
66
|
|
67
|
for(i in 1:length(dates)){ # start of the for loop #1
|
68
|
#i<-3 #Date 10 is used to test kriging
|
69
|
date<-strptime(dates[i], "%Y%m%d")
|
70
|
month<-strftime(date, "%m")
|
71
|
LST_month<-paste("mm_",month,sep="")
|
72
|
mod <-ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]
|
73
|
ghcn.subsets[[i]]$LST <-mod[[1]]
|
74
|
|
75
|
n<-nrow(ghcn.subsets[[i]])
|
76
|
ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows
|
77
|
nv<-n-ns #create a sample for validation with prop of the rows
|
78
|
ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
|
79
|
ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training) #This selects the index position for testing subset stations.
|
80
|
data_s <- ghcn.subsets[[i]][ind.training, ]
|
81
|
data_v <- ghcn.subsets[[i]][ind.testing, ]
|
82
|
|
83
|
###STEP 2 KRIGING###
|
84
|
|
85
|
#Kriging tmax
|
86
|
|
87
|
hscat(tmax~1,data_s,(0:9)*20000) # 9 lag classes with 20,000m width
|
88
|
v<-variogram(tmax~1, data_s) # This plots a sample varigram for date 10 fir the testing dataset
|
89
|
plot(v)
|
90
|
v.fit<-fit.variogram(v,vgm(2000,"Sph", 150000,1000)) #Model variogram: sill is 2000, spherical, range 15000 and nugget 1000
|
91
|
plot(v, v.fit) #Compare model and sample variogram via a graphical plot
|
92
|
tmax_krige<-krige(tmax~1, data_s,mean_LST, v.fit) #mean_LST provides the data grid/raster image for the kriging locations to be predicted.
|
93
|
|
94
|
#Cokriging tmax
|
95
|
g<-gstat(NULL,"tmax", tmax~1, data_s) #This creates a gstat object "g" that acts as container for kriging specifications.
|
96
|
g<-gstat(g, "SRTM_elev",ELEV_SRTM~1,data_s) #Adding variables to gstat object g
|
97
|
g<-gstat(g, "LST", LST~1,data_s)
|
98
|
|
99
|
vm_g<-variogram(g) #Visualizing multivariate sample variogram.
|
100
|
vm_g.fit<-fit.lmc(vm_g,g,vgm(2000,"Sph", 100000,1000)) #Fitting variogram for all variables at once.
|
101
|
plot(vm_g,vm_g.fit) #Visualizing variogram fit and sample
|
102
|
vm_g.fit$set <-list(nocheck=1) #Avoid checking and allow for different range in variogram
|
103
|
co_kriged_surf<-predict(vm_g.fit,mean_LST) #Prediction using co-kriging with grid location defined from input raster image.
|
104
|
#co_kriged_surf$tmax.pred #Results stored in SpatialGridDataFrame with tmax prediction accessible in dataframe.
|
105
|
|
106
|
|
107
|
#spplot.vcov(co_kriged_surf) #Visualizing the covariance structure
|
108
|
|
109
|
tmax_krig1_s <- overlay(tmax_krige,data_s) #This overlays the kriged surface tmax and the location of weather stations
|
110
|
tmax_cokrig1_s<- overlay(co_kriged_surf,data_s) #This overalys the cokriged surface tmax and the location of weather stations
|
111
|
tmax_krig1_v <- overlay(tmax_krige,data_v) #This overlays the kriged surface tmax and the location of weather stations
|
112
|
tmax_cokrig1_v<- overlay(co_kriged_surf,data_v)
|
113
|
|
114
|
data_s$tmax_kr<-tmax_krig1_s$var1.pred #Adding the results back into the original dataframes.
|
115
|
data_v$tmax_kr<-tmax_krig1_v$var1.pred
|
116
|
data_s$tmax_cokr<-tmax_cokrig1_s$tmax.pred
|
117
|
data_v$tmax_cokr<-tmax_cokrig1_v$tmax.pred
|
118
|
|
119
|
#Co-kriging only on the validation sites for faster computing
|
120
|
|
121
|
cokrig1_dv<-predict(vm_g.fit,data_v)
|
122
|
cokrig1_ds<-predict(vm_g.fit,data_s)
|
123
|
data_s$tmax_cokr<-cokrig1_ds$tmax.pred
|
124
|
data_v$tmax_cokr<-cokrig1_dv$tmax.pred
|
125
|
|
126
|
#Calculate RMSE and then krig the residuals....!
|
127
|
|
128
|
res_mod1<- data_v$tmax - data_v$tmax_kr #Residuals from kriging.
|
129
|
res_mod2<- data_v$tmax - data_v$tmax_cokr #Residuals from cokriging.
|
130
|
|
131
|
RMSE_mod1 <- sqrt(sum(res_mod1^2,na.rm=TRUE)/(nv-sum(is.na(res_mod1)))) #RMSE from kriged surface.
|
132
|
RMSE_mod2 <- sqrt(sum(res_mod2^2,na.rm=TRUE)/(nv-sum(is.na(res_mod2)))) #RMSE from co-kriged surface.
|
133
|
#(nv-sum(is.na(res_mod2)))
|
134
|
|
135
|
#Saving the subset in a dataframe
|
136
|
data_name<-paste("ghcn_v_",dates[[i]],sep="")
|
137
|
assign(data_name,data_v)
|
138
|
data_name<-paste("ghcn_s_",dates[[i]],sep="")
|
139
|
assign(data_name,data_s)
|
140
|
|
141
|
krig_raster_name<-paste("coKriged_tmax_",data_name,out_prefix,".tif", sep="")
|
142
|
writeGDAL(co_kriged_surf,fname=krig_raster_name, driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL")
|
143
|
krig_raster_name<-paste("Kriged_tmax_",data_name,out_prefix,".tif", sep="")
|
144
|
writeGDAL(tmax_krige,fname=krig_raster_name, driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL")
|
145
|
X11()
|
146
|
plot(raster(co_kriged_surf))
|
147
|
title(paste("Tmax cokriging for date ",dates[[i]],sep=""))
|
148
|
savePlot(paste("Cokriged_tmax",data_name,out_prefix,".png", sep=""), type="png")
|
149
|
dev.off()
|
150
|
X11()
|
151
|
plot(raster(tmax_krige))
|
152
|
title(paste("Tmax Kriging for date ",dates[[i]],sep=""))
|
153
|
savePlot(paste("Kriged_res_",data_name,out_prefix,".png", sep=""), type="png")
|
154
|
dev.off()
|
155
|
|
156
|
results[i,1]<- dates[i] #storing the interpolation dates in the first column
|
157
|
results[i,2]<- ns #number of stations in training
|
158
|
results[i,3]<- RMSE_mod1
|
159
|
results[i,4]<- RMSE_mod2
|
160
|
|
161
|
results_mod_n[i,1]<-dates[i]
|
162
|
results_mod_n[i,2]<-(nv-sum(is.na(res_mod1)))
|
163
|
results_mod_n[i,3]<-(nv-sum(is.na(res_mod2)))
|
164
|
}
|
165
|
|
166
|
## Plotting and saving diagnostic measures
|
167
|
results_num <-results
|
168
|
mode(results_num)<- "numeric"
|
169
|
# Make it numeric first
|
170
|
# Now turn it into a data.frame...
|
171
|
|
172
|
results_table<-as.data.frame(results_num)
|
173
|
colnames(results_table)<-c("dates","ns","RMSE")
|
174
|
|
175
|
write.csv(results_table, file= paste(path,"/","results_Kriging_Assessment",out_prefix,".txt",sep=""))
|