Revision b52ed033
Added by Benoit Parmentier over 12 years ago
climate/research/oregon/interpolation/kriging_prediction_reg_function.R | ||
---|---|---|
1 |
runKriging <- function(i) { # loop over dates |
|
2 |
|
|
3 |
date<-strptime(dates[i], "%Y%m%d") # interpolation date being processed |
|
4 |
month<-strftime(date, "%m") # current month of the date being processed |
|
5 |
LST_month<-paste("mm_",month,sep="") # name of LST month to be matched |
|
6 |
|
|
7 |
#i=1 |
|
8 |
date_proc<-dates[i] |
|
9 |
date_proc<-strptime(dates[i], "%Y%m%d") # interpolation date being processed |
|
10 |
mo<-as.integer(strftime(date_proc, "%m")) # current month of the date being processed |
|
11 |
day<-as.integer(strftime(date_proc, "%d")) |
|
12 |
year<-as.integer(strftime(date_proc, "%Y")) |
|
13 |
|
|
14 |
|
|
15 |
#Adding layer LST to the raster stack |
|
16 |
|
|
17 |
pos<-match(LST_month,layerNames(s_raster)) #Find column with the current month for instance mm12 |
|
18 |
r1<-raster(s_raster,layer=pos) #Select layer from stack |
|
19 |
layerNames(r1)<-"LST" |
|
20 |
s_raster<-addLayer(s_raster,r1) #Adding current month |
|
21 |
s_sgdf<-as(s_raster,"SpatialGridDataFrame") #Conversion to spatial grid data frame |
|
22 |
|
|
23 |
###Regression part 1: Creating a validation dataset by creating training and testing datasets |
|
24 |
|
|
25 |
mod_LST <-ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))] #Match interpolation date and monthly LST average |
|
26 |
ghcn.subsets[[i]] = transform(ghcn.subsets[[i]],LST = mod_LST) #Add the variable LST to the subset dataset |
|
27 |
#n<-nrow(ghcn.subsets[[i]]) |
|
28 |
#ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
|
29 |
#nv<-n-ns #create a sample for validation with prop of the rows |
|
30 |
#ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly |
|
31 |
ind.training<-sampling[[i]] |
|
32 |
ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training) |
|
33 |
data_s <- ghcn.subsets[[i]][ind.training, ] #Training dataset currently used in the modeling |
|
34 |
data_v <- ghcn.subsets[[i]][ind.testing, ] #Testing/validation dataset using input sampling |
|
35 |
|
|
36 |
ns<-nrow(data_s) |
|
37 |
nv<-nrow(data_v) |
|
38 |
|
|
39 |
###BEFORE model prediction the data object must be transformed to SDF |
|
40 |
|
|
41 |
coords<- data_v[,c('x_OR83M','y_OR83M')] |
|
42 |
coordinates(data_v)<-coords |
|
43 |
proj4string(data_v)<-CRS #Need to assign coordinates... |
|
44 |
coords<- data_s[,c('x_OR83M','y_OR83M')] |
|
45 |
coordinates(data_s)<-coords |
|
46 |
proj4string(data_s)<-CRS #Need to assign coordinates.. |
|
47 |
|
|
48 |
ns<-nrow(data_s) #This is added to because some loss of data might have happened because of the averaging... |
|
49 |
nv<-nrow(data_v) |
|
50 |
|
|
51 |
### PREDICTION/ Interpolation |
|
52 |
|
|
53 |
pos<-match("value",names(data_s)) #Find column with name "value" |
|
54 |
names(data_s)[pos]<-y_var_name |
|
55 |
pos<-match("value",names(data_v)) #Find column with name "value" |
|
56 |
names(data_v)[pos]<-y_var_name |
|
57 |
|
|
58 |
#if y_var_name=="dailyTmax" |
|
59 |
data_v$y_var<-data_v[[y_var_name]]/10 #Note that values are divided by 10 because the var is temp |
|
60 |
data_s$y_var<-data_s[[y_var_name]]/10 |
|
61 |
|
|
62 |
#Model and response variable can be changed without affecting the script |
|
63 |
|
|
64 |
formula1 <- as.formula("y_var ~1", env=.GlobalEnv) |
|
65 |
formula2 <- as.formula("y_var~ x_OR83M+y_OR83M", env=.GlobalEnv) |
|
66 |
formula3 <- as.formula("y_var~ x_OR83M+y_OR83M+ELEV_SRTM", env=.GlobalEnv) |
|
67 |
formula4 <- as.formula("y_var~ x_OR83M+y_OR83M+DISTOC", env=.GlobalEnv) |
|
68 |
formula5 <- as.formula("y_var~ x_OR83M+y_OR83M+ELEV_SRTM+DISTOC", env=.GlobalEnv) |
|
69 |
formula6 <- as.formula("y_var~ x_OR83M+y_OR83M+Northness+Eastness", env=.GlobalEnv) |
|
70 |
formula7 <- as.formula("y_var~ LST", env=.GlobalEnv) |
|
71 |
formula8 <- as.formula("y_var~ x_OR83M+y_OR83M+LST", env=.GlobalEnv) |
|
72 |
formula9 <- as.formula("y_var~ x_OR83M+y_OR83M+ELEV_SRTM+LST", env=.GlobalEnv) |
|
73 |
|
|
74 |
mod1<- try(autoKrige(formula1, input_data=data_s,new_data=s_sgdf,data_variogram=data_s)) |
|
75 |
mod2<- try(autoKrige(formula2, input_data=data_s,new_data=s_sgdf,data_variogram=data_s)) |
|
76 |
mod3<- try(autoKrige(formula3, input_data=data_s,new_data=s_sgdf,data_variogram=data_s)) |
|
77 |
mod4<- try(autoKrige(formula4, input_data=data_s,new_data=s_sgdf,data_variogram=data_s)) |
|
78 |
mod5<- try(autoKrige(formula5, input_data=data_s,new_data=s_sgdf,data_variogram=data_s)) |
|
79 |
mod6<- try(autoKrige(formula6, input_data=data_s,new_data=s_sgdf,data_variogram=data_s)) |
|
80 |
mod7<- try(autoKrige(formula7, input_data=data_s,new_data=s_sgdf,data_variogram=data_s)) |
|
81 |
mod8<- try(autoKrige(formula8, input_data=data_s,new_data=s_sgdf,data_variogram=data_s)) |
|
82 |
mod9<- try(autoKrige(formula9, input_data=data_s,new_data=s_sgdf,data_variogram=data_s)) |
|
83 |
|
|
84 |
#tmax_predicted=themolst+daily_delta_rast-bias_rast #Final surface?? but daily_rst |
|
85 |
|
|
86 |
### Model assessment |
|
87 |
|
|
88 |
for (j in 1:nmodels){ |
|
89 |
|
|
90 |
##Model assessment: specific diagnostic/metrics for GAM |
|
91 |
|
|
92 |
name<-paste("mod",j,sep="") #modj is the name of The "j" model (mod1 if j=1) |
|
93 |
mod<-get(name) #accessing GAM model ojbect "j" |
|
94 |
#krmod_auto<-get(mod) |
|
95 |
|
|
96 |
#If mod "j" is not a model object |
|
97 |
if (inherits(mod,"try-error")) { |
|
98 |
|
|
99 |
results_m1[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
100 |
results_m1[1,2]<- ns #number of stations used in the training stage |
|
101 |
results_m1[1,3]<- "SSERR" |
|
102 |
results_m1[1,j+3]<- NA |
|
103 |
|
|
104 |
results_m2[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
105 |
results_m2[1,2]<- ns #number of stations used in the training |
|
106 |
results_m2[1,3]<- "GCV" |
|
107 |
results_m2[1,j+3]<- NA |
|
108 |
|
|
109 |
results_m3[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
110 |
results_m3[1,2]<- ns #number of stations used in the training stage |
|
111 |
results_m3[1,3]<- "DEV" |
|
112 |
results_m3[1,j+3]<- NA |
|
113 |
|
|
114 |
results_RMSE_f[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
115 |
results_RMSE_f[1,2]<- ns #number of stations used in the training stage |
|
116 |
results_RMSE_f[1,3]<- "RSME_f" |
|
117 |
results_RMSE_f[1,j+3]<- NA |
|
118 |
|
|
119 |
results_MAE_f[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
120 |
results_MAE_f[1,2]<- ns #number of stations used in the training stage |
|
121 |
results_MAE_f[1,3]<- "MAE_f" |
|
122 |
results_MAE_f[1,j+3]<-NA |
|
123 |
|
|
124 |
results_RMSE[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
125 |
results_RMSE[1,2]<- ns #number of stations used in the training stage |
|
126 |
results_RMSE[1,3]<- "RMSE" |
|
127 |
results_RMSE[1,j+3]<- NA #Storing RMSE for the model j |
|
128 |
results_MAE[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
129 |
results_MAE[1,2]<- ns #number of stations used in the training stage |
|
130 |
results_MAE[1,3]<- "MAE" |
|
131 |
results_MAE[1,j+3]<- NA #Storing MAE for the model j |
|
132 |
results_ME[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
133 |
results_ME[1,2]<- ns #number of stations used in the training stage |
|
134 |
results_ME[1,3]<- "ME" |
|
135 |
results_ME[1,j+3]<- NA #Storing ME for the model j |
|
136 |
results_R2[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
137 |
results_R2[1,2]<- ns #number of stations used in the training stage |
|
138 |
results_R2[1,3]<- "R2" |
|
139 |
results_R2[1,j+3]<- NA #Storing R2 for the model j |
|
140 |
|
|
141 |
} |
|
142 |
|
|
143 |
#If mod is a modelobject |
|
144 |
|
|
145 |
#If mod "j" is not a model object |
|
146 |
if (inherits(mod,"autoKrige")) { |
|
147 |
|
|
148 |
rpred<-mod$krige_output #Extracting the SptialGriDataFrame from the autokrige object |
|
149 |
|
|
150 |
#rpred<- predict(mod, newdata=s_sgdf, se.fit = TRUE) #Using the coeff to predict new values. |
|
151 |
y_pred<-rpred$var1.pred #is the order the same? |
|
152 |
#y_prederr<-rpred$var1.var |
|
153 |
raster_pred<-r1 |
|
154 |
layerNames(raster_pred)<-"y_pred" |
|
155 |
clearValues(raster_pred) #Clear values in memory, just in case... |
|
156 |
values(raster_pred)<-as.numeric(y_pred) #Assign values to every pixels |
|
157 |
|
|
158 |
data_name<-paste("predicted_mod",j,"_",dates[[i]],sep="") |
|
159 |
raster_name<-paste("Kriging_",data_name,out_prefix,".rst", sep="") |
|
160 |
writeRaster(raster_pred, filename=raster_name,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI) |
|
161 |
#writeRaster(r2, filename=raster_name,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI) |
|
162 |
|
|
163 |
#Save png plot here... |
|
164 |
data_name<-paste("predicted_mod",j,"_",dates[[i]],sep="") |
|
165 |
png_name<-paste("Kriging_plot_",data_name,out_prefix,".png", sep="") |
|
166 |
png(png_name) #Create file to write a plot |
|
167 |
#datelabel2=format(ISOdate(year,mo,day),"%B ") #Plot label |
|
168 |
plot(mod) #Plot to file the autokrige object |
|
169 |
#savePlot(paste("Bias_surface_LST_TMax_",dates[i],out_prefix,".png", sep=""), type="png") |
|
170 |
dev.off() #Release the hold to the file |
|
171 |
|
|
172 |
pred_sgdf<-as(raster_pred,"SpatialGridDataFrame" ) #Conversion to spatial grid data frame |
|
173 |
#rpred_val_s <- overlay(raster_pred,data_s) #This overlays the kriged surface tmax and the location of weather stations |
|
174 |
|
|
175 |
rpred_val_s <- overlay(pred_sgdf,data_s) #This overlays the kriged surface tmax and the location of weather stations |
|
176 |
rpred_val_v <- overlay(pred_sgdf,data_v) #This overlays the kriged surface tmax and the location of weather stations |
|
177 |
|
|
178 |
pred_mod<-paste("pred_mod",j,sep="") |
|
179 |
#Adding the results back into the original dataframes. |
|
180 |
data_s[[pred_mod]]<-rpred_val_s$y_pred |
|
181 |
data_v[[pred_mod]]<-rpred_val_v$y_pred |
|
182 |
|
|
183 |
#Model assessment: RMSE and then krig the residuals....! |
|
184 |
|
|
185 |
res_mod_s<- data_s$y_var - data_s[[pred_mod]] #Residuals from kriging training |
|
186 |
res_mod_v<- data_v$y_var - data_v[[pred_mod]] #Residuals from kriging validation |
|
187 |
|
|
188 |
####ADDED ON JULY 20th |
|
189 |
res_mod<-res_mod_v |
|
190 |
|
|
191 |
#RMSE_mod <- sqrt(sum(res_mod^2)/nv) #RMSE FOR REGRESSION STEP 1: GAM |
|
192 |
RMSE_mod<- sqrt(mean(res_mod^2,na.rm=TRUE)) |
|
193 |
#MAE_mod<- sum(abs(res_mod),na.rm=TRUE)/(nv-sum(is.na(res_mod))) #MAE from kriged surface validation |
|
194 |
MAE_mod<- mean(abs(res_mod), na.rm=TRUE) |
|
195 |
#ME_mod<- sum(res_mod,na.rm=TRUE)/(nv-sum(is.na(res_mod))) #ME, Mean Error or bias FOR REGRESSION STEP 1: GAM |
|
196 |
ME_mod<- mean(res_mod,na.rm=TRUE) #ME, Mean Error or bias FOR REGRESSION STEP 1: GAM |
|
197 |
#R2_mod<- cor(data_v$y_var,data_v[[pred_mod]])^2 #R2, coef. of var FOR REGRESSION STEP 1: GAM |
|
198 |
R2_mod<- cor(data_v$y_var,data_v[[pred_mod]], use="complete")^2 |
|
199 |
|
|
200 |
R2_mod_f<- cor(data_s$y_var,data_s[[pred_mod]], use="complete")^2 |
|
201 |
RMSE_mod_f<- sqrt(mean(res_mod_s^2,na.rm=TRUE)) |
|
202 |
#MAE_mod<- sum(abs(res_mod),na.rm=TRUE)/(nv-sum(is.na(res_mod))) #MAE from kriged surface validation |
|
203 |
MAE_mod_f<- mean(abs(res_mod_s), na.rm=TRUE) |
|
204 |
|
|
205 |
results_m1[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
206 |
results_m1[1,2]<- ns #number of stations used in the training stage |
|
207 |
results_m1[1,3]<- "SSERR" |
|
208 |
results_m1[1,j+3]<- mod$sserr |
|
209 |
|
|
210 |
results_m2[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
211 |
results_m2[1,2]<- ns #number of stations used in the training |
|
212 |
results_m2[1,3]<- "GCV" |
|
213 |
results_m2[1,j+3]<- NA |
|
214 |
|
|
215 |
results_m3[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
216 |
results_m3[1,2]<- ns #number of stations used in the training stage |
|
217 |
results_m3[1,3]<- "DEV" |
|
218 |
results_m3[1,j+3]<- NA |
|
219 |
|
|
220 |
results_RMSE_f[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
221 |
results_RMSE_f[1,2]<- ns #number of stations used in the training stage |
|
222 |
results_RMSE_f[1,3]<- "RSME_f" |
|
223 |
results_RMSE_f[1,j+3]<-RMSE_mod_f |
|
224 |
|
|
225 |
results_MAE_f[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
226 |
results_MAE_f[1,2]<- ns #number of stations used in the training stage |
|
227 |
results_MAE_f[1,3]<- "MAE_f" |
|
228 |
results_MAE_f[1,j+3]<-MAE_mod_f |
|
229 |
|
|
230 |
results_R2_f[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
231 |
results_R2_f[1,2]<- ns #number of stations used in the training stage |
|
232 |
results_R2_f[1,3]<- "R2_f" |
|
233 |
results_R2_f[1,j+3]<- R2_mod_f #Storing R2 for the model j |
|
234 |
|
|
235 |
results_RMSE[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
236 |
results_RMSE[1,2]<- ns #number of stations used in the training stage |
|
237 |
results_RMSE[1,3]<- "RMSE" |
|
238 |
results_RMSE[1,j+3]<- RMSE_mod #Storing RMSE for the model j |
|
239 |
results_MAE[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
240 |
results_MAE[1,2]<- ns #number of stations used in the training stage |
|
241 |
results_MAE[1,3]<- "MAE" |
|
242 |
results_MAE[1,j+3]<- MAE_mod #Storing MAE for the model j |
|
243 |
results_ME[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
244 |
results_ME[1,2]<- ns #number of stations used in the training stage |
|
245 |
results_ME[1,3]<- "ME" |
|
246 |
results_ME[1,j+3]<- ME_mod #Storing ME for the model j |
|
247 |
results_R2[1,1]<- dates[i] #storing the interpolation dates in the first column |
|
248 |
results_R2[1,2]<- ns #number of stations used in the training stage |
|
249 |
results_R2[1,3]<- "R2" |
|
250 |
results_R2[1,j+3]<- R2_mod #Storing R2 for the model j |
|
251 |
|
|
252 |
#Saving residuals and prediction in the dataframes: tmax predicted from GAM |
|
253 |
|
|
254 |
name2<-paste("res_mod",j,sep="") |
|
255 |
data_v[[name2]]<-as.numeric(res_mod_v) |
|
256 |
data_s[[name2]]<-as.numeric(res_mod_s) |
|
257 |
#end of loop calculating RMSE |
|
258 |
} |
|
259 |
} |
|
260 |
|
|
261 |
#if (i==length(dates)){ |
|
262 |
|
|
263 |
#Specific diagnostic measures related to the testing datasets |
|
264 |
|
|
265 |
results_table_RMSE<-as.data.frame(results_RMSE) |
|
266 |
results_table_MAE<-as.data.frame(results_MAE) |
|
267 |
results_table_ME<-as.data.frame(results_ME) |
|
268 |
results_table_R2<-as.data.frame(results_R2) |
|
269 |
results_table_RMSE_f<-as.data.frame(results_RMSE_f) |
|
270 |
results_table_MAE_f<-as.data.frame(results_MAE_f) |
|
271 |
results_table_R2_f<-as.data.frame(results_R2_f) |
|
272 |
|
|
273 |
results_table_m1<-as.data.frame(results_m1) |
|
274 |
results_table_m2<-as.data.frame(results_m2) |
|
275 |
results_table_m3<-as.data.frame(results_m3) |
|
276 |
|
|
277 |
tb_metrics1<-rbind(results_table_RMSE,results_table_MAE, results_table_ME, |
|
278 |
results_table_R2,results_table_RMSE_f,results_table_MAE_f,results_table_R2_f) # |
|
279 |
tb_metrics2<-rbind(results_table_m1,results_table_m2, results_table_m3) |
|
280 |
cname<-c("dates","ns","metric","mod1", "mod2","mod3", "mod4", "mod5", "mod6", "mod7","mod8","mod9") |
|
281 |
colnames(tb_metrics1)<-cname |
|
282 |
cname<-c("dates","ns","metric","mod1", "mod2","mod3", "mod4", "mod5", "mod6", "mod7","mod8","mod9") |
|
283 |
colnames(tb_metrics2)<-cname |
|
284 |
#colnames(results_table_RMSE)<-cname |
|
285 |
#colnames(results_table_RMSE_f)<-cname |
|
286 |
#tb_diagnostic1<-results_table_RMSE #measures of validation |
|
287 |
#tb_diagnostic2<-results_table_RMSE_f #measures of fit |
|
288 |
|
|
289 |
#write.table(tb_diagnostic1, file= paste(path,"/","results_fusion_Assessment_measure1",out_prefix,".txt",sep=""), sep=",") |
|
290 |
|
|
291 |
#} |
|
292 |
print(paste(dates[i],"processed")) |
|
293 |
# Kriging object may need to be modified...because it contains the full image of prediction!! |
|
294 |
##loop through model objects data frame and set field to zero... |
|
295 |
|
|
296 |
mod_obj<-list(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8,mod9) |
|
297 |
names(mod_obj)<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9") #generate names automatically?? |
|
298 |
#results_list<-list(data_s,data_v,tb_metrics1,tb_metrics2) |
|
299 |
#save(mod_obj,file= paste(path,"/","results_list_mod_objects_",dates[i],out_prefix,".RData",sep="")) |
|
300 |
|
|
301 |
for (j in 1:nmodels){ |
|
302 |
if (inherits(mod_obj[[j]],"autoKrige")){ |
|
303 |
mod_obj[[j]]$krige_output<-NULL |
|
304 |
} |
|
305 |
} |
|
306 |
results_list<-list(data_s,data_v,tb_metrics1,tb_metrics2,mod_obj) |
|
307 |
names(results_list)<-c("data_s","data_v","tb_metrics1","tb_metrics2","mod_obj") |
|
308 |
save(results_list,file= paste(path,"/","results_list_metrics_objects_",dates[i],out_prefix,".RData",sep="")) |
|
309 |
return(results_list) |
|
310 |
#return(tb_diagnostic1) |
|
311 |
} |
Also available in: Unified diff
KRIGING, modified function for memory clean up and other improvements