Revision 3b657271
Added by Benoit Parmentier over 12 years ago
climate/research/oregon/interpolation/kriging_reg.R | ||
---|---|---|
11 | 11 |
#also included and assessed using the RMSE,MAE,ME and R2 from validation dataset. # |
12 | 12 |
#TThe dates must be provided as a textfile. # |
13 | 13 |
#AUTHOR: Benoit Parmentier # |
14 |
#DATE: 07/07/2012 #
|
|
14 |
#DATE: 07/15/2012 #
|
|
15 | 15 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#364-- # |
16 | 16 |
################################################################################################## |
17 | 17 |
|
... | ... | |
35 | 35 |
#infile2<-"list_365_dates_04212012.txt" |
36 | 36 |
infile3<-"LST_dates_var_names.txt" #LST dates name |
37 | 37 |
infile4<-"models_interpolation_05142012.txt" #Interpolation model names |
38 |
infile5<-"mean_day244_rescaled.rst" |
|
38 |
infile5<-"mean_day244_rescaled.rst" |
|
39 |
inlistf<-"list_files_05032012.txt" #Stack of images containing the Covariates |
|
39 | 40 |
|
40 |
# infile1<- "ghcn_or_tmax_b_04142012_OR83M.shp" #GHCN shapefile containing variables |
|
41 |
# infile2<-"list_10_dates_04212012.txt" #List of 10 dates for the regression |
|
42 |
# #infile2<-"list_365_dates_04212012.txt" |
|
43 |
# infile3<-"mean_day244_rescaled.rst" #This image serves as the reference grid for kriging |
|
44 |
# infile4<- "orcnty24_OR83M.shp" #Vector file defining the study area: Oregon state and its counties. |
|
45 |
|
|
46 |
path<-"/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations_07152012" #Jupiter LOCATION on EOS |
|
47 |
#path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations" #Jupiter LOCATION on EOS/Atlas |
|
41 |
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_07152012" #Jupiter LOCATION on Atlas for kriging |
|
48 | 42 |
#path<-"H:/Data/IPLANT_project/data_Oregon_stations" #Jupiter Location on XANDERS |
43 |
|
|
49 | 44 |
setwd(path) |
50 | 45 |
prop<-0.3 #Proportion of testing retained for validation |
51 | 46 |
seed_number<- 100 #Seed number for random sampling |
52 |
models<-5 #Number of kriging model
|
|
47 |
models<-7 #Number of kriging model
|
|
53 | 48 |
out_prefix<-"_07132012_auto_krig_" #User defined output prefix |
54 | 49 |
|
55 | 50 |
###STEP 1 DATA PREPARATION AND PROCESSING##### |
... | ... | |
63 | 58 |
mean_LST<- readGDAL(infile5) #Reading the whole raster in memory. This provides a grid for kriging |
64 | 59 |
proj4string(mean_LST)<-CRS #Assigning coordinate information to prediction grid. |
65 | 60 |
|
61 |
##Extracting the variables values from the raster files |
|
62 |
|
|
63 |
lines<-read.table(paste(path,"/",inlistf,sep=""), sep=" ") #Column 1 contains the names of raster files |
|
64 |
inlistvar<-lines[,1] |
|
65 |
inlistvar<-paste(path,"/",as.character(inlistvar),sep="") |
|
66 |
covar_names<-as.character(lines[,2]) #Column two contains short names for covaraites |
|
67 |
|
|
68 |
s_raster<- stack(inlistvar) #Creating a stack of raster images from the list of variables. |
|
69 |
layerNames(s_raster)<-covar_names #Assigning names to the raster layers |
|
70 |
projection(s_raster)<-CRS |
|
71 |
|
|
72 |
#stat_val<- extract(s_raster, ghcn3) #Extracting values from the raster stack for every point location in coords data frame. |
|
73 |
pos<-match("ASPECT",layerNames(s_raster)) #Find column with name "value" |
|
74 |
r1<-raster(s_raster,layer=pos) #Select layer from stack |
|
75 |
pos<-match("slope",layerNames(s_raster)) #Find column with name "value" |
|
76 |
r2<-raster(s_raster,layer=pos) #Select layer from stack |
|
77 |
N<-cos(r1*pi/180) |
|
78 |
E<-sin(r1*pi/180) |
|
79 |
Nw<-sin(r2*pi/180)*cos(r1*pi/180) #Adding a variable to the dataframe |
|
80 |
Ew<-sin(r2*pi/180)*sin(r1*pi/180) #Adding variable to the dataframe. |
|
81 |
r<-stack(N,E,Nw,Ew) |
|
82 |
rnames<-c("Northness","Eastness","Northness_w","Eastness_w") |
|
83 |
layerNames(r)<-rnames |
|
84 |
s_raster<-addLayer(s_raster, r) |
|
85 |
s_sgdf<-as(s_raster,"SpatialGridDataFrame") #Conversion to spatial grid data frame |
|
86 |
|
|
87 |
### adding var |
|
66 | 88 |
ghcn = transform(ghcn,Northness = cos(ASPECT*pi/180)) #Adding a variable to the dataframe |
67 | 89 |
ghcn = transform(ghcn,Eastness = sin(ASPECT*pi/180)) #adding variable to the dataframe. |
68 | 90 |
ghcn = transform(ghcn,Northness_w = sin(slope*pi/180)*cos(ASPECT*pi/180)) #Adding a variable to the dataframe |
... | ... | |
77 | 99 |
|
78 | 100 |
dates <-readLines(paste(path,"/",infile2, sep="")) |
79 | 101 |
LST_dates <-readLines(paste(path,"/",infile3, sep="")) |
80 |
models <-readLines(paste(path,"/",infile4, sep="")) |
|
102 |
#models <-readLines(paste(path,"/",infile4, sep="")) |
|
103 |
|
|
104 |
#models<-5 |
|
81 | 105 |
#Model assessment: specific diagnostic/metrics for GAM |
82 | 106 |
results_AIC<- matrix(1,length(dates),models+3) |
83 | 107 |
results_GCV<- matrix(1,length(dates),models+3) |
... | ... | |
89 | 113 |
results_R2 <- matrix(1,length(dates),models+3) #Coef. of determination for the validation dataset |
90 | 114 |
results_RMSE_f<- matrix(1,length(dates),models+3) |
91 | 115 |
|
92 |
###Reading the shapefile and raster image from the local directory |
|
93 |
|
|
94 |
mean_LST<- readGDAL(infile3) #This reads the whole raster in memory and provide a grid for kriging in a SpatialGridDataFrame object |
|
95 |
filename<-sub(".shp","",infile1) #Removing the extension from file. |
|
96 |
ghcn<-readOGR(".", filename) #Reading station locations from vector file using rgdal and creating a SpatialPointDataFrame |
|
97 |
CRS_ghcn<-proj4string(ghcn) #This retrieves the coordinate system information for the SDF object (PROJ4 format) |
|
98 |
proj4string(mean_LST)<-CRS_ghcn #Assigning coordinates information to SpatialGridDataFrame object |
|
99 |
|
|
100 |
# Adding variables for the regressions |
|
101 |
|
|
102 |
ghcn$Northness<- cos(ghcn$ASPECT*pi/180) #Adding a variable to the dataframe by calculating the cosine of Aspect |
|
103 |
ghcn$Eastness <- sin(ghcn$ASPECT*pi/180) #Adding variable to the dataframe. |
|
104 |
ghcn$Northness_w <- sin(ghcn$slope*pi/180)*cos(ghcn$ASPECT*pi/180) #Adding a variable to the dataframe |
|
105 |
ghcn$Eastness_w <- sin(ghcn$slope*pi/180)*sin(ghcn$ASPECT*pi/180) #Adding variable to the dataframe. |
|
106 |
|
|
107 |
set.seed(seed_number) #This set a seed number for the random sampling to make results reproducible. |
|
108 |
|
|
109 |
dates <-readLines(paste(path,"/",infile2, sep="")) #Reading dates in a list from the textile. |
|
110 | 116 |
|
117 |
#Screening for bad values: value is tmax in this case |
|
118 |
#ghcn$value<-as.numeric(ghcn$value) |
|
119 |
ghcn_all<-ghcn |
|
120 |
ghcn_test<-subset(ghcn,ghcn$value>-150 & ghcn$value<400) |
|
121 |
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0) |
|
122 |
ghcn<-ghcn_test2 |
|
123 |
#coords<- ghcn[,c('x_OR83M','y_OR83M')] |
|
111 | 124 |
|
112 |
#Screening for bad values and setting the valid range |
|
113 | 125 |
|
114 |
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400) #Values are in tenth of degrees C |
|
115 |
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0) #No elevation below sea leve is allowed. |
|
116 |
ghcn<-ghcn_test2 |
|
117 | 126 |
|
118 | 127 |
###CREATING SUBSETS BY INPUT DATES AND SAMPLING |
119 | 128 |
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, ghcn$date==as.numeric(d))) #Producing a list of data frame, one data frame per date. |
120 | 129 |
|
121 | 130 |
for(i in 1:length(dates)){ # start of the for loop #1 |
122 | 131 |
#i<-3 #Date 10 is used to test kriging |
132 |
|
|
133 |
#This allows to change only one name of the |
|
134 |
|
|
123 | 135 |
date<-strptime(dates[i], "%Y%m%d") |
124 | 136 |
month<-strftime(date, "%m") |
125 | 137 |
LST_month<-paste("mm_",month,sep="") |
138 |
#adding to SpatialGridDataFrame |
|
139 |
#t<-s_sgdf[,match(LST_month, names(s_sgdf))] |
|
140 |
#s_sgdf$LST<-s_sgdf[c(LST_month)] |
|
126 | 141 |
mod <-ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))] |
127 | 142 |
ghcn.subsets[[i]]$LST <-mod[[1]] |
128 | 143 |
|
... | ... | |
134 | 149 |
data_s <- ghcn.subsets[[i]][ind.training, ] |
135 | 150 |
data_v <- ghcn.subsets[[i]][ind.testing, ] |
136 | 151 |
|
152 |
|
|
153 |
###BEFORE Kringing the data object must be transformed to SDF |
|
154 |
|
|
155 |
coords<- data_v[,c('x_OR83M','y_OR83M')] |
|
156 |
coordinates(data_v)<-coords |
|
157 |
proj4string(data_v)<-CRS #Need to assign coordinates... |
|
158 |
coords<- data_s[,c('x_OR83M','y_OR83M')] |
|
159 |
coordinates(data_s)<-coords |
|
160 |
proj4string(data_s)<-CRS #Need to assign coordinates.. |
|
161 |
|
|
162 |
#This allows to change only one name of the data.frame |
|
163 |
pos<-match("value",names(data_s)) #Find column with name "value" |
|
164 |
names(data_s)[pos]<-c("tmax") |
|
165 |
data_s$tmax<-data_s$tmax/10 #TMax is the average max temp for months |
|
166 |
pos<-match("value",names(data_v)) #Find column with name "value" |
|
167 |
names(data_v)[pos]<-c("tmax") |
|
168 |
data_v$tmax<-data_v$tmax/10 |
|
169 |
#dstjan=dst[dst$month==9,] #dst contains the monthly averages for tmax for every station over 2000-2010 |
|
170 |
############## |
|
137 | 171 |
###STEP 2 KRIGING### |
138 | 172 |
|
139 | 173 |
#Kriging tmax |
... | ... | |
145 | 179 |
# plot(v, v.fit) #Compare model and sample variogram via a graphical plot |
146 | 180 |
# tmax_krige<-krige(tmax~1, data_s,mean_LST, v.fit) #mean_LST provides the data grid/raster image for the kriging locations to be predicted. |
147 | 181 |
|
148 |
krmod1<-autoKrige(tmax~1, data_s,mean_LST,data_s) #Use autoKrige instead of krige: with data_s for fitting on a grid |
|
149 |
krmod2<-autoKrige(tmax~lat+lon,input_data=data_s,new_data=mean_LST,data_variogram=data_s) |
|
150 |
krmod2<-autoKrige(tmax~lat+lon,data_s,mean_LST, verbose=TRUE) |
|
151 |
|
|
152 |
krmod3<-autoKrige(tmax~LST, data_s,mean_LST,data_s) |
|
153 |
krmod4<-autoKrige(tmax~LST+ELEV_SRTM, data_s,mean_LST,data_s) |
|
154 |
krmod5<-autoKrige(tmax~LST+ELEV_SRTM+DISTOC, data_s,mean_LST,data_s) |
|
182 |
krmod1<-autoKrige(tmax~1, data_s,s_sgdf,data_s) #Use autoKrige instead of krige: with data_s for fitting on a grid |
|
183 |
krmod2<-autoKrige(tmax~x_OR83M+y_OR83M,input_data=data_s,new_data=s_sgdf,data_variogram=data_s) |
|
184 |
krmod3<-autoKrige(tmax~x_OR83M+y_OR83M+ELEV_SRTM,input_data=data_s,new_data=s_sgdf,data_variogram=data_s) |
|
185 |
krmod4<-autoKrige(tmax~x_OR83M+y_OR83M+DISTOC,input_data=data_s,new_data=s_sgdf,data_variogram=data_s) |
|
186 |
krmod5<-autoKrige(tmax~x_OR83M+y_OR83M+ELEV_SRTM+DISTOC,input_data=data_s,new_data=s_sgdf,data_variogram=data_s) |
|
187 |
krmod6<-autoKrige(tmax~x_OR83M+y_OR83M+Northness+Eastness,input_data=data_s,new_data=s_sgdf,data_variogram=data_s) |
|
188 |
krmod7<-autoKrige(tmax~x_OR83M+y_OR83M+Northness+Eastness,input_data=data_s,new_data=s_sgdf,data_variogram=data_s) |
|
189 |
#krmod8<-autoKrige(tmax~LST,input_data=data_s,new_data=s_sgdf,data_variogram=data_s) |
|
190 |
#krmod9<-autoKrige(tmax~x_OR83M+y_OR83M+LST,input_data=data_s,new_data=s_sgdf,data_variogram=data_s) |
|
155 | 191 |
|
156 | 192 |
krig1<-krmod1$krige_output #Extracting Spatial Grid Data frame |
157 | 193 |
krig2<-krmod2$krige_output |
158 | 194 |
krig3<-krmod3$krige_outpu |
159 | 195 |
krig4<-krmod4$krige_output |
160 | 196 |
krig5<-krmod5$krige_output |
197 |
krig6<-krmod6$krige_output #Extracting Spatial Grid Data frame |
|
198 |
krig7<-krmod7$krige_output |
|
199 |
#krig8<-krmod8$krige_outpu |
|
200 |
#krig9<-krmod9$krige_output |
|
201 |
|
|
161 | 202 |
#tmax_krig1_s <- overlay(krige,data_s) #This overlays the kriged surface tmax and the location of weather stations |
162 | 203 |
#tmax_krig1_v <- overlay(krige,data_v) |
163 | 204 |
# |
... | ... | |
180 | 221 |
|
181 | 222 |
for (j in 1:models){ |
182 | 223 |
|
183 |
krmod<-paste("krig",j,sep="")
|
|
184 |
|
|
224 |
mod<-paste("krig",j,sep="") |
|
225 |
krmod<-get(mod) |
|
185 | 226 |
krig_val_s <- overlay(krmod,data_s) #This overlays the kriged surface tmax and the location of weather stations |
186 | 227 |
krig_val_v <- overlay(krmod,data_v) #This overlays the kriged surface tmax and the location of weather stations |
187 | 228 |
|
... | ... | |
201 | 242 |
MAE_mod_kr_v<- sum(abs(res_mod_kr_v),na.rm=TRUE)/(nv-sum(is.na(res_mod_kr_v))) #MAE from kriged surface validation |
202 | 243 |
ME_mod_kr_s<- sum(res_mod_kr_s,na.rm=TRUE)/(nv-sum(is.na(res_mod_kr_s))) #ME, Mean Error or bias FOR REGRESSION STEP 1: GAM |
203 | 244 |
ME_mod_kr_v<- sum(res_mod_kr_v,na.rm=TRUE)/(nv-sum(is.na(res_mod_kr_v))) #ME, Mean Error or bias FOR REGRESSION STEP 1: GAM |
204 |
R2_mod_kr_s<- cor(data_s$tmax,data_s[[gam_kr]],use="complete.obs")^2 #R2, coef. of determination FOR REGRESSION STEP 1: GAM
|
|
205 |
R2_mod_kr_v<- cor(data_v$tmax,data_v[[gam_kr]],use="complete.obs")^2 #R2, coef. of determinationFOR REGRESSION STEP 1: GAM
|
|
245 |
R2_mod_kr_s<- cor(data_s$tmax,data_s[[pred_krmod]],use="complete.obs")^2 #R2, coef. of determination FOR REGRESSION STEP 1: GAM
|
|
246 |
R2_mod_kr_v<- cor(data_v$tmax,data_v[[pred_krmod]],use="complete.obs")^2 #R2, coef. of determinationFOR REGRESSION STEP 1: GAM
|
|
206 | 247 |
#(nv-sum(is.na(res_mod2))) |
207 | 248 |
#Writing out results |
208 | 249 |
|
... | ... | |
238 | 279 |
data_v[[name3]]<-as.numeric(res_mod_kr_v) |
239 | 280 |
#Writing residuals from kriging |
240 | 281 |
|
282 |
#Saving kriged surface in raster images |
|
283 |
data_name<-paste("mod",j,"_",dates[[i]],sep="") |
|
284 |
krig_raster_name<-paste("krmod_",data_name,out_prefix,".tif", sep="") |
|
285 |
writeGDAL(krmod,fname=krig_raster_name, driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL") |
|
286 |
krig_raster_name<-paste("krmod_",data_name,out_prefix,".rst", sep="") |
|
287 |
writeRaster(raster(krmod), filename=krig_raster_name) #Writing the data in a raster file format...(IDRISI) |
|
288 |
|
|
289 |
#krig_raster_name<-paste("Kriged_tmax_",data_name,out_prefix,".tif", sep="") |
|
290 |
#writeGDAL(tmax_krige,fname=krig_raster_name, driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL") |
|
291 |
#X11() |
|
292 |
#plot(raster(co_kriged_surf)) |
|
293 |
#title(paste("Tmax cokriging for date ",dates[[i]],sep="")) |
|
294 |
#savePlot(paste("Cokriged_tmax",data_name,out_prefix,".png", sep=""), type="png") |
|
295 |
#dev.off() |
|
296 |
#X11() |
|
297 |
#plot(raster(tmax_krige)) |
|
298 |
#title(paste("Tmax Kriging for date ",dates[[i]],sep="")) |
|
299 |
#savePlot(paste("Kriged_res_",data_name,out_prefix,".png", sep=""), type="png") |
|
300 |
#dev.off() |
|
301 |
# |
|
302 |
|
|
241 | 303 |
} |
242 | 304 |
|
243 | 305 |
# #Co-kriging only on the validation sites for faster computing |
... | ... | |
261 | 323 |
assign(data_name,data_v) |
262 | 324 |
data_name<-paste("ghcn_s_",dates[[i]],sep="") |
263 | 325 |
assign(data_name,data_s) |
264 |
|
|
265 |
#Saving kriged surface in raster images |
|
266 |
|
|
267 |
#krig_raster_name<-paste("coKriged_tmax_",data_name,out_prefix,".tif", sep="") |
|
268 |
#writeGDAL(co_kriged_surf,fname=krig_raster_name, driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL") |
|
269 |
#krig_raster_name<-paste("Kriged_tmax_",data_name,out_prefix,".tif", sep="") |
|
270 |
#writeGDAL(tmax_krige,fname=krig_raster_name, driver="GTiff", type="Float32",options ="INTERLEAVE=PIXEL") |
|
271 |
#X11() |
|
272 |
#plot(raster(co_kriged_surf)) |
|
273 |
#title(paste("Tmax cokriging for date ",dates[[i]],sep="")) |
|
274 |
#savePlot(paste("Cokriged_tmax",data_name,out_prefix,".png", sep=""), type="png") |
|
275 |
#dev.off() |
|
276 |
#X11() |
|
277 |
#plot(raster(tmax_krige)) |
|
278 |
#title(paste("Tmax Kriging for date ",dates[[i]],sep="")) |
|
279 |
#savePlot(paste("Kriged_res_",data_name,out_prefix,".png", sep=""), type="png") |
|
280 |
#dev.off() |
|
281 |
# |
|
326 |
|
|
282 | 327 |
# results[i,1]<- dates[i] #storing the interpolation dates in the first column |
283 | 328 |
# results[i,2]<- ns #number of stations in training |
284 | 329 |
# results[i,3]<- RMSE_mod1 |
Also available in: Unified diff
KRIGING, raster prediction-major changes for spatially explicit interp.