Revision 42a1c3b5
Added by Benoit Parmentier over 10 years ago
climate/research/oregon/interpolation/nex_run_gam_fitting.R | ||
---|---|---|
1 | 1 |
#################################### INTERPOLATION OF TEMPERATURES ####################################### |
2 | 2 |
############################ Script for experimentation of GAM model fitting ############################## |
3 | 3 |
#This script uses the worklfow code applied to the globe. Results currently reside on NEX/PLEIADES NASA. |
4 |
#The purpose is to test different parameters for the GAM function to allow . |
|
4 |
#The purpose is to test different parameters for the GAM function to allow . |
|
5 |
## - experiment with gamma |
|
6 |
# - experiment with k |
|
7 |
# A table of diagnostic is created. |
|
5 | 8 |
#AUTHOR: Benoit Parmentier |
6 | 9 |
#CREATED ON: 07/30/2014 |
7 |
#MODIFIED ON: 07/31/2014
|
|
10 |
#MODIFIED ON: 08/06/2014
|
|
8 | 11 |
#Version: 1 |
9 | 12 |
#PROJECT: Environmental Layers project |
10 | 13 |
#TO DO: |
11 |
# - experiment with gamma |
|
12 |
# - experiment with k |
|
13 | 14 |
# |
14 | 15 |
################################################################################################# |
15 | 16 |
|
... | ... | |
67 | 68 |
formula<-list_formulas[[k]] |
68 | 69 |
mod<- try(gam(formula, data=data_training)) #change to any model!! |
69 | 70 |
#mod<- try(autoKrige(formula, input_data=data_s,new_data=s_sgdf,data_variogram=data_s)) |
70 |
|
|
71 |
#This is where to change the function?? |
|
71 | 72 |
model_name<-paste("mod",k,sep="") |
72 | 73 |
assign(model_name,mod) |
73 | 74 |
list_fitted_models[[k]]<-mod |
... | ... | |
75 | 76 |
return(list_fitted_models) |
76 | 77 |
} |
77 | 78 |
|
79 |
## Maybe write a new function to experiment gam fitting based on the above results?? |
|
80 |
|
|
78 | 81 |
##Function to predict using a mod object from the workflow... |
79 | 82 |
predict_raster_model<-function(in_models,r_stack,out_filename){ |
80 | 83 |
|
... | ... | |
265 | 268 |
return(df_val) |
266 | 269 |
} |
267 | 270 |
|
271 |
## Run whole process of diagnostic creation for given formula (without k), training data and model name |
|
272 |
fit_gam_model_with_diagnostics <- function(l_k,data_training,formula,names_mod){ |
|
273 |
#Input parameters: |
|
274 |
#l_k: vector of k value for each smooth term |
|
275 |
#data_training: data.frame containing data for fitting of gam |
|
276 |
#names_mod: character (string) with name of the model fitted. |
|
277 |
|
|
278 |
library(mgcv) #to avoid error below?? Maybe an indepent graphic devices needs to be opened?? |
|
279 |
#Error in get(name, envir = asNamespace(pkg), inherits = FALSE) : |
|
280 |
#object 'rversion' not found |
|
281 |
#Graphics error: Error in get(name, envir = asNamespace(pkg), inherits = FALSE) : |
|
282 |
#object 'rversion' not found |
|
283 |
|
|
284 |
#STEP 1: fit with a range for k values |
|
285 |
|
|
286 |
#Fit models using given k values, formula and training dataset |
|
287 |
#This is done starting at l_k/2 e.g. c(30,10,10) becomes c(15,5,5) where k is assigned to each term in the order given |
|
288 |
l_k_obj <- test_k_gam(formula,l_k,data_training) |
|
289 |
|
|
290 |
#STEP 2: get k-index diagnostic for ech model |
|
291 |
|
|
292 |
#Now get k_index for each model and store it in a table. |
|
293 |
#function gam.check is sued |
|
294 |
|
|
295 |
list_df_k <- create_gam_check_table(l_k_obj) |
|
296 |
|
|
297 |
#STEP 3: produce additional metrics for diagnostis of fit |
|
298 |
#this includes the calculation of RMSE, MAE, bias, gcv,aic for fitted model |
|
299 |
list_mod <- l_k_obj$list_mod |
|
300 |
#remove try-error model!! |
|
301 |
list_mod<- list_mod[unlist(lapply(list_mod,FUN=function(x){!inherits(x,"try-error")}))] |
|
302 |
|
|
303 |
list_mod_gam_stat <- lapply(list_mod,FUN=extract_fitting_mod_gam_stat) |
|
304 |
|
|
305 |
#STEP 4: combine tables of diagnostics, format and add additional information |
|
306 |
|
|
307 |
#combine tables... |
|
308 |
l_df_diagnostics <- lapply(1:length(list_df_k),FUN=function(i,df_1,df_2){cbind(df_1[[i]],df_2[[i]])},df_1=list_df_k,df_2=list_mod_gam_stat) |
|
309 |
df_diagnostics <- do.call(rbind,l_df_diagnostics) |
|
310 |
df_diagnostics$date <- rep(unique(data_training$date),nrow(df_diagnostics)) |
|
311 |
df_diagnostics$month <- rep(unique(data_training$month)) |
|
312 |
df_diagnostics$pred_mod <-names_mod #defined earlier... (input argutment ) |
|
313 |
|
|
314 |
#Return the diagnostic table and model object |
|
315 |
names(list_mod) <- paste("t_",1:length(list_mod),sep="") |
|
316 |
diagnostics_obj <- list(df_diagnostics,list_mod) |
|
317 |
names(diagnostics_obj) <- c("df_diagnostics","list_mod") |
|
318 |
|
|
319 |
return(diagnostics_obj) |
|
320 |
} |
|
321 |
|
|
268 | 322 |
##############################s |
269 | 323 |
#### Parameters and constants |
270 | 324 |
|
... | ... | |
279 | 333 |
|
280 | 334 |
########################## START SCRIPT ############################## |
281 | 335 |
|
282 |
#### PART I: Explore fitting of GAM #### |
|
336 |
######################################## |
|
337 |
#### PART I: Explore fitting of GAM with k and gamma parameters #### |
|
283 | 338 |
|
284 | 339 |
raster_obj<- load_obj(raster_obj_infile) |
285 | 340 |
|
... | ... | |
318 | 373 |
formula <-list_formulas[[k]] |
319 | 374 |
mod<- try(gam(formula, data=data_training)) #does not fit!! as expected |
320 | 375 |
|
376 |
### TRY with different gamma |
|
377 |
|
|
378 |
mod_g1<- try(gam(y_var ~ s(lat, lon) + s(elev_s),gamma=1.4, data=data_training)) #change to any model!! |
|
379 |
gam.check(mod_g1) |
|
380 |
# k' edf k-index p-value |
|
381 |
#s(lat,lon) 29.000 9.470 0.967 0.32 |
|
382 |
#s(elev_s) 9.000 2.020 0.732 0.02 |
|
383 |
|
|
384 |
mod_g2<- try(gam(y_var ~ s(lat, lon) + s(elev_s),gamma=10, data=data_training)) #change to any model!! |
|
385 |
gam.check(mod_g2) #increase in k-index!! |
|
386 |
|
|
387 |
# k' edf k-index p-value |
|
388 |
#s(lat,lon) 29.00 29.00 1.50 1.00 |
|
389 |
#s(elev_s) 9.00 9.00 1.11 0.74 |
|
390 |
|
|
391 |
mod<- try(gam(y_var ~ s(lat, lon) + s(elev_s) + s(LST),gamma=1.4, data=data_training)) #does not fit! |
|
392 |
mod<- try(gam(y_var ~ s(lat, lon) + s(elev_s) + s(LST),gamma=10, data=data_training)) #does not fitl!! |
|
393 |
|
|
394 |
### TRY with different k |
|
395 |
|
|
321 | 396 |
mod_t1<- try(gam(y_var ~ s(lat, lon,k=14) + s(elev_s) + s(LST), data=data_training)) #change to any model!! |
322 | 397 |
gam.check(mod_t1) |
323 | 398 |
mod_t2<- try(gam(y_var ~ s(lat, lon,k=5) + s(elev_s) + s(LST), data=data_training)) #change to any model!! |
... | ... | |
338 | 413 |
mod_t1$aic |
339 | 414 |
mod_t1$edf |
340 | 415 |
|
416 |
######################################## |
|
341 | 417 |
#### PART II: Use the new functions to explore fitting with k-dimension in GAM #### |
342 | 418 |
|
343 | 419 |
j <- 7 # July |
... | ... | |
348 | 424 |
nb_models <- length((raster_obj$clim_method_mod_obj[[1]]$formulas)) |
349 | 425 |
list_formulas <- (raster_obj$clim_method_mod_obj[[1]]$formulas) |
350 | 426 |
pred_mod <- paste("mod",c(1:nb_models,"_kr"),sep="") |
427 |
|
|
428 |
#model_name<-paste("mod",k,sep="") |
|
351 | 429 |
#we are assuming no monthly hold out... |
352 | 430 |
#we are assuming only one specific daily prop? |
353 | 431 |
nb_models <- length(pred_mod) |
... | ... | |
357 | 435 |
#list_fitted_models<-vector("list",length(list_formulas)) |
358 | 436 |
k <-2 #select model 3 with LST |
359 | 437 |
names_mod <- paste("mod_",k,sep="") |
438 |
model_name<-paste("mod",k,sep="") |
|
439 |
|
|
360 | 440 |
formula <-list_formulas[[k]] |
361 | 441 |
|
362 | 442 |
l_k <- c(30,10,10) #default values |
... | ... | |
388 | 468 |
#choice of the highest k for fit and k_index > 1 |
389 | 469 |
#then use model 7 |
390 | 470 |
|
391 |
########### Use loop to fit model for for 1 to 12 ?##############
|
|
392 |
#Now function to run mod depending on conditions
|
|
471 |
########################################
|
|
472 |
#### PART III: Use the general functions to explore fitting with k-dimension in GAM ####
|
|
393 | 473 |
|
474 |
#This can be done accross different months ... |
|
475 |
#The general function provides a quick call to all function and formatting of diagnostic table |
|
394 | 476 |
#first make a function for one model (could be month) |
477 |
|
|
395 | 478 |
j <- 7 # July |
396 | 479 |
clim_method_mod_obj <- raster_obj$clim_method_mod_obj[[j]] |
397 | 480 |
#this is made of "clim",data_month, data_month_v , sampling_month_dat, mod and formulas |
... | ... | |
422 | 505 |
data_training <- clim_method_mod_obj$data_month |
423 | 506 |
|
424 | 507 |
#list_fitted_models<-vector("list",length(list_formulas)) |
425 |
k <-2 #select model 3 with LST |
|
426 |
names_mod <- paste("mod_",k,sep="") |
|
508 |
k <-2 #select model 2 with LST |
|
509 |
#names_mod <- paste("mod_",k,sep="") |
|
510 |
model_name<-paste("mod",k,sep="") |
|
511 |
|
|
427 | 512 |
formula <-list_formulas[[k]] |
428 | 513 |
|
429 | 514 |
l_k <- c(30,10,10) #default values |
430 | 515 |
|
431 |
test_df <- fit_gam_model_with_diagnostics(l_k,data_training,formula,names_mod) |
|
432 |
|
|
433 |
fit_gam_model_with_diagnostics <- function(l_k,data_training,formula,names_mod){ |
|
434 |
|
|
435 |
|
|
436 |
#STEP 1: fit with a range for k values |
|
437 |
|
|
438 |
#Fit models using given k values, formula and training dataset |
|
439 |
#This is done starting at l_k/2 e.g. c(30,10,10) becomes c(15,5,5) where k is assigned to each term in the order given |
|
440 |
l_k_obj <- test_k_gam(formula,l_k,data_training) |
|
441 |
|
|
442 |
#STEP 2: get k-index diagnostic for ech model |
|
443 |
|
|
444 |
#Now get k_index for each model and store it in a table. |
|
445 |
#function gam.check is sued |
|
446 |
|
|
447 |
list_df_k <- create_gam_check_table(l_k_obj) |
|
448 |
|
|
449 |
#STEP 3: produce additional metrics for diagnostis of fit |
|
450 |
#this includes the calculation of RMSE, MAE, bias, gcv,aic for fitted model |
|
451 |
list_mod <- l_k_obj$list_mod |
|
452 |
list_mod_gam_stat <- lapply(list_mod,FUN=extract_fitting_mod_gam_stat) |
|
453 |
|
|
454 |
#STEP 4: combine tables of diagnostics, format and add additional information |
|
516 |
#test_df <- fit_gam_model_with_diagnostics(l_k,data_training,formula,model_name) |
|
517 |
test_obj <- fit_gam_model_with_diagnostics(l_k,data_training,formula,model_name) |
|
455 | 518 |
|
456 |
#combine tables... |
|
457 |
l_df_diagnostics <- lapply(1:length(list_df_k),FUN=function(i,df_1,df_2){cbind(df_1[[i]],df_2[[2]])},df_1=list_df_k,df_2=list_mod_gam_stat) |
|
458 |
df_diagnostics <- do.call(rbind,l_df_diagnostics) |
|
459 |
df_diagnostics$date <- rep(unique(data_training$date),nrow(df_diagnostics)) |
|
460 |
df_diagnostics$month <- rep(unique(data_training$month)) |
|
461 |
df_diagnostics$pred_mod <-names_mod #defined earlier... (input argutment ) |
|
462 |
|
|
463 |
#Return the diagnostic table |
|
464 |
return(df_diagnostics) |
|
465 |
} |
|
519 |
#Now do this over 12 months?? |
|
466 | 520 |
|
467 | 521 |
################## END OF SCRIPT ############### |
Also available in: Unified diff
gam fitting diagnostic, degug functions, cleaning up of code