Revision 729a2357
Added by Benoit Parmentier over 12 years ago
climate/research/oregon/interpolation/fusion_gam_prediction_reg_function.R | ||
---|---|---|
1 |
runFusion <- function(i) { # loop over dates |
|
1 |
runGAMFusion <- function(i) { # loop over dates
|
|
2 | 2 |
|
3 | 3 |
date<-strptime(dates[i], "%Y%m%d") # interpolation date being processed |
4 | 4 |
month<-strftime(date, "%m") # current month of the date being processed |
5 | 5 |
LST_month<-paste("mm_",month,sep="") # name of LST month to be matched |
6 | 6 |
|
7 |
#Adding layer LST to the raster stack |
|
8 |
|
|
9 |
pos<-match(LST_month,layerNames(s_raster)) #Find column with the current month for instance mm12 |
|
10 |
r1<-raster(s_raster,layer=pos) #Select layer from stack |
|
11 |
layerNames(r1)<-"LST" |
|
12 |
s_raster<-addLayer(s_raster,r1) #Adding current month |
|
13 |
|
|
7 | 14 |
###Regression part 1: Creating a validation dataset by creating training and testing datasets |
8 | 15 |
|
9 | 16 |
mod_LST <-ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))] #Match interpolation date and monthly LST average |
... | ... | |
15 | 22 |
ind.training<-sampling[[i]] |
16 | 23 |
ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training) |
17 | 24 |
data_s <- ghcn.subsets[[i]][ind.training, ] #Training dataset currently used in the modeling |
18 |
data_v <- ghcn.subsets[[i]][ind.testing, ] #Testing/validation dataset |
|
25 |
data_v <- ghcn.subsets[[i]][ind.testing, ] #Testing/validation dataset using input sampling
|
|
19 | 26 |
|
20 | 27 |
ns<-nrow(data_s) |
21 | 28 |
nv<-nrow(data_v) |
... | ... | |
29 | 36 |
datelabel=format(ISOdate(year,mo,day),"%b %d, %Y") |
30 | 37 |
|
31 | 38 |
########### |
32 |
# STEP 1 - 10 year monthly averages |
|
39 |
# STEP 1 - LST 10 year monthly averages
|
|
33 | 40 |
########### |
34 |
|
|
35 |
#l=list.files(pattern="mean_month.*rescaled.rst") |
|
36 |
l <-readLines(paste(path,"/",infile6, sep="")) |
|
37 |
molst<-stack(l) #Creating a raster stack... |
|
38 |
#setwd(old) |
|
39 |
molst=molst-273.16 #K->C |
|
40 |
idx <- seq(as.Date('2010-01-15'), as.Date('2010-12-15'), 'month') |
|
41 |
molst <- setZ(molst, idx) |
|
42 |
layerNames(molst) <- month.abb |
|
41 |
|
|
43 | 42 |
themolst<-raster(molst,mo) #current month being processed saved in a raster image |
44 | 43 |
plot(themolst) |
45 | 44 |
|
... | ... | |
47 | 46 |
# STEP 2 - Weather station means across same days: Monthly mean calculation |
48 | 47 |
########### |
49 | 48 |
|
50 |
##Added by Benoit ###### |
|
51 |
date1<-ISOdate(data3$year,data3$month,data3$day) #Creating a date object from 3 separate column |
|
52 |
date2<-as.POSIXlt(as.Date(date1)) |
|
53 |
data3$date<-date2 |
|
54 |
d<-subset(data3,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations |
|
55 |
#May need some screeing??? i.e. range of temp and elevation... |
|
56 |
d1<-aggregate(value~station+month, data=d, mean) #Calculate monthly mean for every station in OR |
|
57 |
id<-as.data.frame(unique(d1$station)) #Unique station in OR for year 2000-2010: 193 but 7 loss of monthly avg |
|
58 |
|
|
59 |
dst<-merge(d1, stat_loc, by.x="station", by.y="STAT_ID") #Inner join all columns are retained |
|
60 |
|
|
61 |
#This allows to change only one name of the data.frame |
|
62 |
pos<-match("value",names(dst)) #Find column with name "value" |
|
63 |
names(dst)[pos]<-c("TMax") |
|
64 |
dst$TMax<-dst$TMax/10 #TMax is the average max temp for months |
|
65 |
#dstjan=dst[dst$month==9,] #dst contains the monthly averages for tmax for every station over 2000-2010 |
|
66 |
############## |
|
67 |
|
|
68 | 49 |
modst=dst[dst$month==mo,] #Subsetting dataset for the relevant month of the date being processed |
69 | 50 |
|
70 | 51 |
########## |
... | ... | |
90 | 71 |
modst$LSTD_bias<-sta_bias #Adding bias to data frame modst containning the monthly average for 10 years |
91 | 72 |
|
92 | 73 |
bias_xy=project(as.matrix(sta_lola),proj_str) |
93 |
# windows() |
|
94 | 74 |
png(paste("LST_TMax_scatterplot_",dates[i],out_prefix,".png", sep="")) |
95 | 75 |
plot(modst$TMax,sta_tmax_from_lst,xlab="Station mo Tmax",ylab="LST mo Tmax",main=paste("LST vs TMax for",datelabel,sep=" ")) |
96 | 76 |
abline(0,1) |
... | ... | |
102 | 82 |
d<-data_s |
103 | 83 |
|
104 | 84 |
pos<-match("value",names(d)) #Find column with name "value" |
105 |
names(d)[pos]<-c("dailyTmax") |
|
106 |
names(x)[pos]<-c("dailyTmax") |
|
85 |
#names(d)[pos]<-c("dailyTmax") |
|
86 |
names(d)[pos]<-y_var_name |
|
87 |
names(x)[pos]<-y_var_name |
|
88 |
#names(x)[pos]<-c("dailyTmax") |
|
107 | 89 |
d$dailyTmax=(as.numeric(d$dailyTmax))/10 #stored as 1/10 degree C to allow integer storage |
108 | 90 |
x$dailyTmax=(as.numeric(x$dailyTmax))/10 #stored as 1/10 degree C to allow integer storage |
109 | 91 |
pos<-match("station",names(d)) #Find column with name "value" |
... | ... | |
190 | 172 |
data_s<-dmoday #put the |
191 | 173 |
data_s$daily_delta<-daily_delta |
192 | 174 |
|
193 |
|
|
194 | 175 |
#data_s$y_var<-daily_delta #y_var is the variable currently being modeled, may be better with BIAS!! |
195 |
data_s$y_var<-data_s$LSTD_bias |
|
196 |
#data_s$y_var<-(data_s$dailyTmax)*10 |
|
197 |
#Model and response variable can be changed without affecting the script |
|
198 |
|
|
199 |
mod1<- try(gam(y_var~ s(lat) + s (lon) + s (ELEV_SRTM), data=data_s)) |
|
200 |
mod2<- try(gam(y_var~ s(lat,lon)+ s(ELEV_SRTM), data=data_s)) #modified nesting....from 3 to 2 |
|
201 |
mod3<- try(gam(y_var~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness)+ s (Eastness) + s(DISTOC), data=data_s)) |
|
202 |
mod4<- try(gam(y_var~ s(lat) + s (lon) + s(ELEV_SRTM) + s(Northness) + s (Eastness) + s(DISTOC) + s(LST), data=data_s)) |
|
203 |
mod5<- try(gam(y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST), data=data_s)) |
|
204 |
mod6<- try(gam(y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)+s(LC1), data=data_s)) |
|
205 |
mod7<- try(gam(y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)+s(LC3), data=data_s)) |
|
206 |
mod8<- try(gam(y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LC1), data=data_s)) |
|
207 |
|
|
208 |
#Added |
|
209 |
#tmax_predicted=themolst+daily_delta_rast-bias_rast #Final surface?? but daily_rst |
|
210 |
|
|
176 |
#data_s$y_var<-data_s$LSTD_bias |
|
211 | 177 |
#### Added by Benoit ends |
212 | 178 |
|
213 | 179 |
######### |
214 | 180 |
# STEP 8 - assemble final answer - T=LST+Bias(interpolated)+delta(interpolated) |
215 | 181 |
######### |
216 | 182 |
|
217 |
|
|
218 | 183 |
bias_rast=interpolate(themolst,fitbias) #interpolation using function from raster package |
219 | 184 |
#themolst is raster layer, fitbias is "Krig" object from bias surface |
220 | 185 |
#plot(bias_rast,main="Raster bias") #This not displaying... |
... | ... | |
279 | 244 |
### END OF BRIAN's code |
280 | 245 |
|
281 | 246 |
### Added by benoit |
282 |
#Store results using TPS |
|
283 |
# j=9 |
|
284 |
# results_RMSE[i,1]<- dates[i] #storing the interpolation dates in the first column |
|
285 |
# results_RMSE[i,2]<- ns #number of stations used in the training stage |
|
286 |
# results_RMSE[i,3]<- "RMSE" |
|
287 |
# results_RMSE[i,j+3]<- rmse #Storing RMSE for the model j |
|
288 |
# |
|
289 |
# results_RMSE_f[i,1]<- dates[i] #storing the interpolation dates in the first column |
|
290 |
# results_RMSE_f[i,2]<- ns #number of stations used in the training stage |
|
291 |
# results_RMSE_f[i,3]<- "RMSE" |
|
292 |
# results_RMSE_f[i,j+3]<- rmse_fit #Storing RMSE for the model j |
|
293 |
# |
|
247 |
|
|
248 |
###BEFORE GAM prediction the data object must be transformed to SDF |
|
249 |
|
|
250 |
coords<- data_v[,c('x_OR83M','y_OR83M')] |
|
251 |
coordinates(data_v)<-coords |
|
252 |
proj4string(data_v)<-CRS #Need to assign coordinates... |
|
253 |
coords<- data_s[,c('x_OR83M','y_OR83M')] |
|
254 |
coordinates(data_s)<-coords |
|
255 |
proj4string(data_s)<-CRS #Need to assign coordinates.. |
|
256 |
|
|
294 | 257 |
ns<-nrow(data_s) #This is added to because some loss of data might have happened because of the averaging... |
295 |
nv<-nrow(data_v) |
|
258 |
nv<-nrow(data_v) |
|
259 |
|
|
260 |
###GAM PREDICTION |
|
261 |
|
|
262 |
#data_s$y_var<-data_s$dailyTmax #This shoudl be changed for any variable!!! |
|
263 |
#data_v$y_var<-data_v$dailyTmax |
|
264 |
data_v$y_var<-data_v[[y_var_name]] |
|
265 |
data_s$y_var<-data_s[[y_var_name]] |
|
266 |
|
|
267 |
#Model and response variable can be changed without affecting the script |
|
268 |
|
|
269 |
formula1 <- as.formula("y_var ~ s(lat) + s(lon) + s(ELEV_SRTM)", env=.GlobalEnv) |
|
270 |
formula2 <- as.formula("y_var~ s(lat,lon)+ s(ELEV_SRTM)", env=.GlobalEnv) |
|
271 |
formula3 <- as.formula("y_var~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness)+ s (Eastness) + s(DISTOC)", env=.GlobalEnv) |
|
272 |
formula4 <- as.formula("y_var~ s(lat) + s (lon) + s(ELEV_SRTM) + s(Northness) + s (Eastness) + s(DISTOC) + s(LST)", env=.GlobalEnv) |
|
273 |
formula5 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)", env=.GlobalEnv) |
|
274 |
formula6 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)+s(LC1)", env=.GlobalEnv) |
|
275 |
formula7 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)+s(LC3)", env=.GlobalEnv) |
|
276 |
formula8 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LC1,LC3)", env=.GlobalEnv) |
|
277 |
|
|
278 |
mod1<- try(gam(formula1, data=data_s)) |
|
279 |
mod2<- try(gam(formula2, data=data_s)) #modified nesting....from 3 to 2 |
|
280 |
mod3<- try(gam(formula3, data=data_s)) |
|
281 |
mod4<- try(gam(formula4, data=data_s)) |
|
282 |
mod5<- try(gam(formula5, data=data_s)) |
|
283 |
mod6<- try(gam(formula6, data=data_s)) |
|
284 |
mod7<- try(gam(formula7, data=data_s)) |
|
285 |
mod8<- try(gam(formula8, data=data_s)) |
|
286 |
|
|
287 |
# mod1<- try(gam(formula1, data=data_s)) |
|
288 |
# mod2<- try(gam(formula2, data=data_s)) #modified nesting....from 3 to 2 |
|
289 |
# mod3<- try(gam(y_var~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness)+ s (Eastness) + s(DISTOC), data=data_s)) |
|
290 |
# mod4<- try(gam(y_var~ s(lat) + s (lon) + s(ELEV_SRTM) + s(Northness) + s (Eastness) + s(DISTOC) + s(LST), data=data_s)) |
|
291 |
# mod5<- try(gam(y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST), data=data_s)) |
|
292 |
# mod6<- try(gam(y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)+s(LC1), data=data_s)) |
|
293 |
# mod7<- try(gam(y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)+s(LC3), data=data_s)) |
|
294 |
# mod8<- try(gam(y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LC1,LC3), data=data_s)) |
|
295 |
# |
|
296 |
#Added |
|
297 |
#tmax_predicted=themolst+daily_delta_rast-bias_rast #Final surface?? but daily_rst |
|
296 | 298 |
|
297 | 299 |
### Added by benoit |
298 | 300 |
#Store results using TPS |
299 |
j=9
|
|
301 |
j=nmodels+1
|
|
300 | 302 |
results_RMSE[1]<- dates[i] #storing the interpolation dates in the first column |
301 | 303 |
results_RMSE[2]<- ns #number of stations used in the training stage |
302 | 304 |
results_RMSE[3]<- "RMSE" |
305 |
|
|
303 | 306 |
results_RMSE[j+3]<- rmse #Storing RMSE for the model j |
304 | 307 |
|
305 | 308 |
results_RMSE_f[1]<- dates[i] #storing the interpolation dates in the first column |
... | ... | |
330 | 333 |
#ns<-nrow(data_s) #This is added to because some loss of data might have happened because of the averaging... |
331 | 334 |
#nv<-nrow(data_v) |
332 | 335 |
|
333 |
for (j in 1:length(models)){ |
|
336 |
|
|
337 |
for (j in 1:nmodels){ |
|
334 | 338 |
|
335 | 339 |
##Model assessment: specific diagnostic/metrics for GAM |
336 | 340 |
|
... | ... | |
403 | 407 |
results_DEV[3]<- "DEV" |
404 | 408 |
results_DEV[j+3]<- mod$deviance |
405 | 409 |
|
406 |
sta_LST_s=lookup(themolst,data_s$lat,data_s$lon) |
|
407 |
sta_delta_s=lookup(daily_delta_rast,data_s$lat,data_s$lon) #delta surface has been calculated before!! |
|
408 |
sta_bias_s= mod$fit |
|
409 |
#Need to extract values from the kriged delta surface... |
|
410 |
#sta_delta= lookup(delta_surface,data_v$lat,data_v$lon) |
|
411 |
#tmax_predicted=sta_LST+sta_bias-y_mod$fit |
|
412 |
tmax_predicted_s= sta_LST_s-sta_bias_s+sta_delta_s |
|
413 |
|
|
410 |
y_var_fit= mod$fit |
|
411 |
|
|
414 | 412 |
results_RMSE_f[1]<- dates[i] #storing the interpolation dates in the first column |
415 | 413 |
results_RMSE_f[2]<- ns #number of stations used in the training stage |
416 | 414 |
results_RMSE_f[3]<- "RSME_f" |
417 |
results_RMSE_f[j+3]<- sqrt(sum((tmax_predicted_s-data_s$dailyTmax)^2)/ns) |
|
415 |
#results_RMSE_f[j+3]<- sqrt(sum((y_var_fit-data_s$y_var)^2)/ns) |
|
416 |
results_RMSE_f[j+3]<-sqrt(mean(mod$residuals^2,na.rm=TRUE)) |
|
418 | 417 |
|
419 | 418 |
results_MAE_f[1]<- dates[i] #storing the interpolation dates in the first column |
420 | 419 |
results_MAE_f[2]<- ns #number of stations used in the training stage |
421 | 420 |
results_MAE_f[3]<- "MAE_f" |
422 |
results_MAE_f[j+3]<-sum(abs(tmax_predicted_s-data_s$dailyTmax))/ns |
|
421 |
#results_MAE_f[j+3]<-sum(abs(y_var_fit-data_s$y_var))/ns |
|
422 |
results_MAE_f[j+3]<-mean(abs(mod$residuals),na.rm=TRUE) |
|
423 | 423 |
|
424 | 424 |
##Model assessment: general diagnostic/metrics |
425 | 425 |
##validation: using the testing data |
426 |
if (predval==1) { |
|
426 | 427 |
|
427 |
#data_v$y_var<-data_v$LSTD_bias |
|
428 |
#data_v$y_var<-tmax |
|
429 |
y_mod<- predict(mod, newdata=data_v, se.fit = TRUE) #Using the coeff to predict new values. |
|
428 |
##Model assessment: specific diagnostic/metrics for GAM |
|
429 |
|
|
430 |
name<-paste("mod",j,sep="") #modj is the name of The "j" model (mod1 if j=1) |
|
431 |
mod<-get(name) #accessing GAM model ojbect "j" |
|
432 |
|
|
433 |
s_sgdf<-as(s_raster,"SpatialGridDataFrame") #Conversion to spatial grid data frame |
|
434 |
|
|
435 |
rpred<- predict(mod, newdata=s_sgdf, se.fit = TRUE) #Using the coeff to predict new values. |
|
436 |
y_pred<-rpred$fit |
|
437 |
raster_pred<-r1 |
|
438 |
layerNames(raster_pred)<-"y_pred" |
|
439 |
values(raster_pred)<-as.numeric(y_pred) |
|
440 |
data_name<-paste("predicted_mod",j,"_",dates[[i]],sep="") |
|
441 |
raster_name<-paste("GAM_",data_name,out_prefix,".rst", sep="") |
|
442 |
writeRaster(raster_pred, filename=raster_name,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI) |
|
443 |
#writeRaster(r2, filename=raster_name,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI) |
|
444 |
|
|
445 |
pred_sgdf<-as(raster_pred,"SpatialGridDataFrame") #Conversion to spatial grid data frame |
|
446 |
#rpred_val_s <- overlay(raster_pred,data_s) #This overlays the kriged surface tmax and the location of weather stations |
|
447 |
|
|
448 |
rpred_val_s <- overlay(pred_sgdf,data_s) #This overlays the kriged surface tmax and the location of weather stations |
|
449 |
rpred_val_v <- overlay(pred_sgdf,data_v) #This overlays the kriged surface tmax and the location of weather stations |
|
450 |
|
|
451 |
pred_mod<-paste("pred_mod",j,sep="") |
|
452 |
#Adding the results back into the original dataframes. |
|
453 |
data_s[[pred_mod]]<-rpred_val_s$y_pred |
|
454 |
data_v[[pred_mod]]<-rpred_val_v$y_pred |
|
455 |
|
|
456 |
#Model assessment: RMSE and then krig the residuals....! |
|
457 |
|
|
458 |
res_mod_s<- data_s$y_var - data_s[[pred_mod]] #Residuals from kriging training |
|
459 |
res_mod_v<- data_v$y_var - data_v[[pred_mod]] #Residuals from kriging validation |
|
460 |
|
|
461 |
} |
|
430 | 462 |
|
431 |
####ADDED ON JULY 5th |
|
432 |
sta_LST_v=lookup(themolst,data_v$lat,data_v$lon) |
|
433 |
sta_delta_v=lookup(daily_delta_rast,data_v$lat,data_v$lon) #delta surface has been calculated before!! |
|
434 |
sta_bias_v= y_mod$fit |
|
435 |
#Need to extract values from the kriged delta surface... |
|
436 |
#sta_delta= lookup(delta_surface,data_v$lat,data_v$lon) |
|
437 |
#tmax_predicted=sta_LST+sta_bias-y_mod$fit |
|
438 |
tmax_predicted_v= sta_LST_v-sta_bias_v+sta_delta_v |
|
463 |
if (predval==0) { |
|
439 | 464 |
|
440 |
#data_v$tmax<-(data_v$tmax)/10 |
|
441 |
res_mod<- data_v$dailyTmax - tmax_predicted_v #Residuals for the model for fusion |
|
442 |
#res_mod<- data_v$y_var - y_mod$fit #Residuals for the model |
|
443 |
|
|
444 |
RMSE_mod <- sqrt(sum(res_mod^2)/nv) #RMSE FOR REGRESSION STEP 1: GAM |
|
445 |
MAE_mod<- sum(abs(res_mod))/nv #MAE, Mean abs. Error FOR REGRESSION STEP 1: GAM |
|
446 |
ME_mod<- sum(res_mod)/nv #ME, Mean Error or bias FOR REGRESSION STEP 1: GAM |
|
447 |
R2_mod<- cor(data_v$dailyTmax,tmax_predicted_v)^2 #R2, coef. of var FOR REGRESSION STEP 1: GAM |
|
465 |
y_mod<- predict(mod, newdata=data_v, se.fit = TRUE) #Using the coeff to predict new values. |
|
466 |
|
|
467 |
pred_mod<-paste("pred_mod",j,sep="") |
|
468 |
#Adding the results back into the original dataframes. |
|
469 |
data_s[[pred_mod]]<-as.numeric(mod$fit) |
|
470 |
data_v[[pred_mod]]<-as.numeric(y_mod$fit) |
|
471 |
|
|
472 |
#Model assessment: RMSE and then krig the residuals....! |
|
473 |
|
|
474 |
res_mod_s<- data_s$y_var - data_s[[pred_mod]] #Residuals from kriging training |
|
475 |
res_mod_v<- data_v$y_var - data_v[[pred_mod]] #Residuals from kriging validation |
|
476 |
} |
|
477 |
|
|
478 |
####ADDED ON JULY 20th |
|
479 |
res_mod<-res_mod_v |
|
448 | 480 |
|
481 |
#RMSE_mod <- sqrt(sum(res_mod^2)/nv) #RMSE FOR REGRESSION STEP 1: GAM |
|
482 |
RMSE_mod<- sqrt(mean(res_mod^2,na.rm=TRUE)) |
|
483 |
#MAE_mod<- sum(abs(res_mod),na.rm=TRUE)/(nv-sum(is.na(res_mod))) #MAE from kriged surface validation |
|
484 |
MAE_mod<- mean(abs(res_mod), na.rm=TRUE) |
|
485 |
#ME_mod<- sum(res_mod,na.rm=TRUE)/(nv-sum(is.na(res_mod))) #ME, Mean Error or bias FOR REGRESSION STEP 1: GAM |
|
486 |
ME_mod<- mean(res_mod,na.rm=TRUE) #ME, Mean Error or bias FOR REGRESSION STEP 1: GAM |
|
487 |
#R2_mod<- cor(data_v$y_var,data_v[[pred_mod]])^2 #R2, coef. of var FOR REGRESSION STEP 1: GAM |
|
488 |
R2_mod<- cor(data_v$y_var,data_v[[pred_mod]], use="complete")^2 |
|
449 | 489 |
results_RMSE[1]<- dates[i] #storing the interpolation dates in the first column |
450 | 490 |
results_RMSE[2]<- ns #number of stations used in the training stage |
451 | 491 |
results_RMSE[3]<- "RMSE" |
... | ... | |
464 | 504 |
results_R2[j+3]<- R2_mod #Storing R2 for the model j |
465 | 505 |
|
466 | 506 |
#Saving residuals and prediction in the dataframes: tmax predicted from GAM |
467 |
pred<-paste("pred_mod",j,sep="") |
|
468 |
#data_v[[pred]]<-as.numeric(y_mod$fit) |
|
469 |
data_v[[pred]]<-as.numeric(tmax_predicted_v) |
|
470 |
data_s[[pred]]<-as.numeric(tmax_predicted_s) #Storing model fit values (predicted on training sample) |
|
471 |
#data_s[[pred]]<-as.numeric(mod$fit) #Storing model fit values (predicted on training sample) |
|
472 |
|
|
507 |
|
|
473 | 508 |
name2<-paste("res_mod",j,sep="") |
474 |
data_v[[name2]]<-as.numeric(res_mod) |
|
475 |
temp<-tmax_predicted_s-data_s$dailyTmax |
|
476 |
data_s[[name2]]<-as.numeric(temp) |
|
509 |
data_v[[name2]]<-as.numeric(res_mod_v) |
|
510 |
data_s[[name2]]<-as.numeric(res_mod_s) |
|
477 | 511 |
#end of loop calculating RMSE |
478 | 512 |
} |
479 | 513 |
} |
... | ... | |
508 | 542 |
|
509 | 543 |
#} |
510 | 544 |
print(paste(dates[i],"processed")) |
511 |
#mod_obj<-list(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8,mod9a,mod9b) |
|
512 | 545 |
# end of the for loop1 |
513 |
results_list<-list(data_s,data_v,tb_metrics1,tb_metrics2) |
|
514 |
#results_list<-list(data_s,data_v,tb_metrics1,tb_metrics2,mod_obj) |
|
546 |
mod_obj<-list(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8,mod9a,mod9b) |
|
547 |
names(mod_obj)<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9a","mod9b") |
|
548 |
#results_list<-list(data_s,data_v,tb_metrics1,tb_metrics2) |
|
549 |
results_list<-list(data_s,data_v,tb_metrics1,tb_metrics2,mod_obj) |
|
550 |
names(results_list)<-c("data_s","data_v","tb_metrics1","tb_metrics2","mod_obj") |
|
551 |
save(results_list,file= paste(path,"/","results_list_metrics_objects_",dates[i],out_prefix,".RData",sep="")) |
|
515 | 552 |
return(results_list) |
516 | 553 |
#return(tb_diagnostic1) |
517 | 554 |
} |
Also available in: Unified diff
FUSION function to predict raster, clean up of mod object using as.formula