Project

General

Profile

« Previous | Next » 

Revision 42a1c3b5

Added by Benoit Parmentier over 10 years ago

gam fitting diagnostic, degug functions, cleaning up of code

View differences:

climate/research/oregon/interpolation/nex_run_gam_fitting.R
1 1
####################################  INTERPOLATION OF TEMPERATURES  #######################################
2 2
############################  Script for experimentation of GAM model fitting ##############################
3 3
#This script uses the worklfow code applied to the globe. Results currently reside on NEX/PLEIADES NASA.
4
#The purpose is to test different parameters for the GAM function to allow . 
4
#The purpose is to test different parameters for the GAM function to allow .
5
## - experiment with gamma
6
# - experiment with k
7
# A table of diagnostic is created.
5 8
#AUTHOR: Benoit Parmentier 
6 9
#CREATED ON: 07/30/2014  
7
#MODIFIED ON: 07/31/2014            
10
#MODIFIED ON: 08/06/2014            
8 11
#Version: 1
9 12
#PROJECT: Environmental Layers project  
10 13
#TO DO:
11
# - experiment with gamma
12
# - experiment with k
13 14
#
14 15
#################################################################################################
15 16

  
......
67 68
    formula<-list_formulas[[k]]
68 69
    mod<- try(gam(formula, data=data_training)) #change to any model!!
69 70
    #mod<- try(autoKrige(formula, input_data=data_s,new_data=s_sgdf,data_variogram=data_s))
70

  
71
    #This is where to change the function??
71 72
    model_name<-paste("mod",k,sep="")
72 73
    assign(model_name,mod) 
73 74
    list_fitted_models[[k]]<-mod
......
75 76
  return(list_fitted_models) 
76 77
}
77 78

  
79
## Maybe write a new function to experiment gam fitting based on the above results??
80

  
78 81
##Function to predict using a mod object from the workflow...
79 82
predict_raster_model<-function(in_models,r_stack,out_filename){
80 83

  
......
265 268
  return(df_val)
266 269
} 
267 270

  
271
## Run whole process of diagnostic creation for given formula (without k), training data and model name
272
fit_gam_model_with_diagnostics <- function(l_k,data_training,formula,names_mod){
273
  #Input parameters:
274
  #l_k: vector of k value for each smooth term
275
  #data_training: data.frame containing  data for fitting of  gam
276
  #names_mod: character (string) with name of the model fitted.
277
  
278
  library(mgcv) #to avoid error below?? Maybe an indepent graphic devices needs to be opened??
279
  #Error in get(name, envir = asNamespace(pkg), inherits = FALSE) : 
280
  #object 'rversion' not found
281
  #Graphics error: Error in get(name, envir = asNamespace(pkg), inherits = FALSE) : 
282
  #object 'rversion' not found
283
  
284
  #STEP 1: fit with a range for k values
285
  
286
  #Fit models using given k values, formula and training dataset   
287
  #This is done starting at l_k/2 e.g. c(30,10,10) becomes c(15,5,5) where k is assigned to each term in the order given
288
  l_k_obj <- test_k_gam(formula,l_k,data_training)
289
  
290
  #STEP 2: get  k-index diagnostic for ech model
291
  
292
  #Now get k_index for each model and store it in a table.
293
  #function gam.check is sued
294
  
295
  list_df_k <- create_gam_check_table(l_k_obj)
296
  
297
  #STEP 3: produce additional metrics for diagnostis of fit
298
  #this includes the calculation of RMSE, MAE, bias, gcv,aic for fitted model
299
  list_mod <- l_k_obj$list_mod
300
  #remove try-error model!!
301
  list_mod<- list_mod[unlist(lapply(list_mod,FUN=function(x){!inherits(x,"try-error")}))]
302

  
303
  list_mod_gam_stat <- lapply(list_mod,FUN=extract_fitting_mod_gam_stat)
304
  
305
  #STEP 4: combine tables of diagnostics, format and add additional information  
306

  
307
  #combine tables...
308
  l_df_diagnostics <- lapply(1:length(list_df_k),FUN=function(i,df_1,df_2){cbind(df_1[[i]],df_2[[i]])},df_1=list_df_k,df_2=list_mod_gam_stat)
309
  df_diagnostics <- do.call(rbind,l_df_diagnostics)
310
  df_diagnostics$date <- rep(unique(data_training$date),nrow(df_diagnostics))
311
  df_diagnostics$month <- rep(unique(data_training$month))
312
  df_diagnostics$pred_mod <-names_mod #defined earlier... (input argutment )
313
  
314
  #Return the diagnostic table and model object
315
  names(list_mod) <- paste("t_",1:length(list_mod),sep="")
316
  diagnostics_obj <- list(df_diagnostics,list_mod)
317
  names(diagnostics_obj) <- c("df_diagnostics","list_mod")
318
  
319
  return(diagnostics_obj)
320
}
321

  
268 322
##############################s
269 323
#### Parameters and constants  
270 324

  
......
279 333

  
280 334
########################## START SCRIPT ##############################
281 335

  
282
#### PART I: Explore fitting of GAM  ####
336
########################################
337
#### PART I: Explore fitting of GAM with k and gamma parameters ####
283 338

  
284 339
raster_obj<- load_obj(raster_obj_infile)
285 340

  
......
318 373
formula <-list_formulas[[k]]
319 374
mod<- try(gam(formula, data=data_training)) #does not fit!! as expected
320 375

  
376
### TRY with different gamma
377

  
378
mod_g1<- try(gam(y_var ~ s(lat, lon) + s(elev_s),gamma=1.4, data=data_training)) #change to any model!!
379
gam.check(mod_g1)
380
#               k'    edf k-index p-value
381
#s(lat,lon) 29.000  9.470   0.967    0.32
382
#s(elev_s)   9.000  2.020   0.732    0.02
383

  
384
mod_g2<- try(gam(y_var ~ s(lat, lon) + s(elev_s),gamma=10, data=data_training)) #change to any model!!
385
gam.check(mod_g2) #increase in k-index!!
386

  
387
#              k'   edf k-index p-value
388
#s(lat,lon) 29.00 29.00    1.50    1.00
389
#s(elev_s)   9.00  9.00    1.11    0.74
390

  
391
mod<- try(gam(y_var ~ s(lat, lon) + s(elev_s) + s(LST),gamma=1.4, data=data_training)) #does not fit!
392
mod<- try(gam(y_var ~ s(lat, lon) + s(elev_s) + s(LST),gamma=10, data=data_training)) #does not fitl!!
393

  
394
### TRY with different k
395

  
321 396
mod_t1<- try(gam(y_var ~ s(lat, lon,k=14) + s(elev_s) + s(LST), data=data_training)) #change to any model!!
322 397
gam.check(mod_t1)
323 398
mod_t2<- try(gam(y_var ~ s(lat, lon,k=5) + s(elev_s) + s(LST), data=data_training)) #change to any model!!
......
338 413
mod_t1$aic
339 414
mod_t1$edf
340 415

  
416
########################################
341 417
#### PART II: Use the new functions to explore fitting with k-dimension in GAM  ####
342 418

  
343 419
j <- 7 # July
......
348 424
nb_models <- length((raster_obj$clim_method_mod_obj[[1]]$formulas))
349 425
list_formulas <- (raster_obj$clim_method_mod_obj[[1]]$formulas)
350 426
pred_mod <- paste("mod",c(1:nb_models,"_kr"),sep="")
427

  
428
#model_name<-paste("mod",k,sep="")
351 429
#we are assuming no monthly hold out...
352 430
#we are assuming only one specific daily prop?
353 431
nb_models <- length(pred_mod)
......
357 435
#list_fitted_models<-vector("list",length(list_formulas))
358 436
k <-2 #select model 3 with LST
359 437
names_mod <- paste("mod_",k,sep="")
438
model_name<-paste("mod",k,sep="")
439

  
360 440
formula <-list_formulas[[k]]
361 441

  
362 442
l_k <- c(30,10,10) #default values
......
388 468
#choice of the highest k for fit and k_index > 1
389 469
#then use model 7
390 470

  
391
########### Use loop to fit model for for  1 to  12 ?##############
392
#Now function to run mod depending on conditions
471
########################################
472
#### PART III: Use the general functions to explore fitting with k-dimension in GAM  ####
393 473

  
474
#This can be done accross different months ...
475
#The general function provides a quick call to all function and formatting of diagnostic table
394 476
#first make a function for one model (could be month)
477

  
395 478
j <- 7 # July
396 479
clim_method_mod_obj <- raster_obj$clim_method_mod_obj[[j]]
397 480
#this is made of "clim",data_month, data_month_v , sampling_month_dat, mod and formulas
......
422 505
data_training <- clim_method_mod_obj$data_month
423 506

  
424 507
#list_fitted_models<-vector("list",length(list_formulas))
425
k <-2 #select model 3 with LST
426
names_mod <- paste("mod_",k,sep="")
508
k <-2 #select model 2 with LST
509
#names_mod <- paste("mod_",k,sep="")
510
model_name<-paste("mod",k,sep="")
511

  
427 512
formula <-list_formulas[[k]]
428 513

  
429 514
l_k <- c(30,10,10) #default values
430 515

  
431
test_df <- fit_gam_model_with_diagnostics(l_k,data_training,formula,names_mod)
432

  
433
fit_gam_model_with_diagnostics <- function(l_k,data_training,formula,names_mod){
434

  
435
  
436
  #STEP 1: fit with a range for k values
437
  
438
  #Fit models using given k values, formula and training dataset   
439
  #This is done starting at l_k/2 e.g. c(30,10,10) becomes c(15,5,5) where k is assigned to each term in the order given
440
  l_k_obj <- test_k_gam(formula,l_k,data_training)
441
  
442
  #STEP 2: get  k-index diagnostic for ech model
443
  
444
  #Now get k_index for each model and store it in a table.
445
  #function gam.check is sued
446
  
447
  list_df_k <- create_gam_check_table(l_k_obj)
448
  
449
  #STEP 3: produce additional metrics for diagnostis of fit
450
  #this includes the calculation of RMSE, MAE, bias, gcv,aic for fitted model
451
  list_mod <- l_k_obj$list_mod
452
  list_mod_gam_stat <- lapply(list_mod,FUN=extract_fitting_mod_gam_stat)
453
  
454
  #STEP 4: combine tables of diagnostics, format and add additional information  
516
#test_df <- fit_gam_model_with_diagnostics(l_k,data_training,formula,model_name)
517
test_obj <- fit_gam_model_with_diagnostics(l_k,data_training,formula,model_name)
455 518

  
456
  #combine tables...
457
  l_df_diagnostics <- lapply(1:length(list_df_k),FUN=function(i,df_1,df_2){cbind(df_1[[i]],df_2[[2]])},df_1=list_df_k,df_2=list_mod_gam_stat)
458
  df_diagnostics <- do.call(rbind,l_df_diagnostics)
459
  df_diagnostics$date <- rep(unique(data_training$date),nrow(df_diagnostics))
460
  df_diagnostics$month <- rep(unique(data_training$month))
461
  df_diagnostics$pred_mod <-names_mod #defined earlier... (input argutment )
462
  
463
  #Return the diagnostic table
464
  return(df_diagnostics)
465
}
519
#Now do this over 12 months??
466 520

  
467 521
################## END OF SCRIPT ###############

Also available in: Unified diff