Project

General

Profile

« Previous | Next » 

Revision 80bb01bb

Added by Benoit Parmentier over 10 years ago

gam fitting diagnostic for low station count, adding function to collect k-index diagnostics

View differences:

climate/research/oregon/interpolation/nex_run_gam_fitting.R
141 141
  return(tb_list_tmp) #this is  a data.frame
142 142
}
143 143

  
144
##############################s
145
#### Parameters and constants  
146

  
144 147
#scp -rp raster_prediction_obj_gam_CAI_dailyTmax15.0_20.0.RData parmentier@atlas.nceas.ucsb.edu:/data/project/layers/commons/NEX_data/output_regions/15.0_20.0"
145 148
#scp -rp reg*.tif parmentier@atlas.nceas.ucsb.edu:/data/project/layers/commons/NEX_data/output_regions/15.0_20.0"
146 149

  
......
149 152

  
150 153
setwd(in_dir)
151 154

  
155
########################## START SCRIPT ##############################
156

  
152 157
raster_obj<- load_obj(raster_obj_infile)
153 158

  
154 159
#raster_obj <- load_obj(unlist(raster_obj_file)) #may not need unlist
......
184 189
vis.gam(mod1)
185 190
vis.gam(mod1,view=c("lat","lon"),theta= 35) # plot against by variable
186 191
#http://stats.stackexchange.com/questions/12223/how-to-tune-smoothing-in-mgcv-gam-model
192
mod1<- try(gam(y_var ~ s(lat, lon,k=22) + s(elev_s) + s(LST), data=data_training)) #change to any model!!
187 193

  
194
##New functions...
188 195
fit_models<-function(list_formulas,data_training){
189 196

  
190 197
  #This functions several models and returns model objects.
......
209 216
  return(list_fitted_models) 
210 217
}
211 218

  
212
mod1<- try(gam(y_var ~ s(lat, lon,k=22) + s(elev_s) + s(LST), data=data_training)) #change to any model!!
213

  
214
> sink("test.txt")
215
> (gam.check(mod1))
216
> getwd(0)
217
Error in getwd(0) : unused argument (0)
218
> getwd()
219
> sink()
220
> getwd(0)
221
Error in getwd(0) : unused argument (0)
222
> getwd()
223
[1] "/data/project/layers/commons/NEX_data/output_regions/15.0_20.0"
224
> system("more test.txt")
225

  
219
##Format formula using prescribed k values for gam
226 220
format_formula_k_fun <- function(formula,l_k){
227 221
  #l_k: list of k parameter for GAM method with the value of k and the name of the variable
228 222
  #split the formula by term 
......
241 235
  #mod <- try(gam(formula_k, data=data_training)) #change to any model!!
242 236
}
243 237

  
244
test_k_gam <- function(formula,l_k){
238
## Fit model using training  data, formula and k dimensions with increment
239
test_k_gam <- function(formula,l_k,data_training){
245 240

  
246 241
  l_k_tmp <- as.numeric(l_k)/2
247 242
  #l_k_tmp <- l_k
......
259 254
    mod <- try(gam(formula_k, data=data_training)) #change to any model!! 
260 255
    list_l_k[[j]] <- l_k_tmp
261 256
    list_mod[[j]] <- mod
262
    l_k_tmp <- l_k_tmp + rep(1,length(l_k_tmp))
257
    #if opt_increase{}
258
    l_k_tmp <- l_k_tmp + rep(1,length(l_k_tmp)) #can modify this option to go -1 instead of plus 1?
259
    #if opt_decrease{}
260
    l_k_tmp <- l_k_tmp + rep(1,length(l_k_tmp)) #can modify this option to go -1 instead of plus 1?
261
    
263 262
    j <- j+1
264 263
    limit_l_k <- (l_k_tmp > l_k) #now check that c(30,10,10 is not gone above!!)
265 264
    for (i in 1:length(limit_l_k)){
......
269 268
    }
270 269
  }
271 270
  #store mod and l_k in a object
272
  l_k_obj <- list(list_mod,l_k_tmp)
273
  names(l_k_obj) <- c("list_mod","l_k_tmp")
271
  l_k_obj <- list(list_mod,list_l_k)
272
  names(l_k_obj) <- c("list_mod","list_l_k")
274 273
  
275 274
  return(l_k_obj)
276 275
}
277 276

  
278
#d
279
#Now get k_index for each model and store it in a table.
280
#function gam.check with
281
#select the  right mode based on k-index, edf or other criteria?
282

  
283
l_k <- vector("list",3)
284
l_k <- list(25,10,10)
285
l_k <- c(30,10,10)
277
## generate table of edf and k_index based on fitted models  
278
create_gam_check_table <-function(l_k_obj){
279
  list_mod <- l_k_obj$list_mod
280
  list_l_k <- l_k_obj$list_l_k
286 281

  
282
  #remove try-error model!!
283
  list_mod<- list_mod[unlist(lapply(list_mod,FUN=function(x){!inherits(x,"try-error")}))]
284
  list_df_k <- vector("list",length(list_mod))
285
  
286
  for(i in 1:length(list_mod)){
287
    mod <- list_mod[[i]]
288
    l_k <- list_l_k[[i]]
289
    sink("test.txt")
290
    gam.check(mod)
291
    sink()
292
    f <- readLines("test.txt") #read all lines
293
    f_tmp <- strsplit(f," ")
294
    names_term <- attr(terms(formula),"term.labels")
295
    match(f_tmp,names_term[1])
296
    grepl(glob2rx(paste(names_term[1],"*",sep="")),f_tmp)
297
    selected_lines_s <- grep("s\\(",f_tmp)
298
    selected_lines_ti <- grep("ti\\(",f_tmp)
299
    selected_lines_te <- grep("te\\(",f_tmp)    
300
    selected_lines <- sort(c(selected_lines_s,selected_lines_te,selected_lines_ti))
301
    #selected_lines <- c(min(selected_lines)-1,selected_lines)
302
    
303
    rows <- f[selected_lines]
304
    list_df <- vector("list",length(rows))
305
    for (j in  1:length(rows)){
306
      tmp <- unlist(strsplit(rows[j]," ",fixed=T))
307
      index <- unlist(lapply(tmp,FUN=function(x){x!=""}))
308
      tmp<- tmp[index]
309
      list_df[[j]] <- data.frame(term=tmp[1],kp=tmp[2],edf=tmp[3],k_index=tmp[4],p_value=tmp[5])
310
    }
311
    df_k <-do.call(rbind,list_df)
312
    df_k$k  <- l_k 
313
    df_k$fit_no <- paste("t_",i,sep="")
314
    list_df_k[[i]] <- df_k
315
  }
316
  return(list_df_k)
317
}
287 318

  
288
#1. Fit with standard settings:
289
#(Start at k=10 or k=30 if 2 interactive term.)
290
#start with the highest k first...
291
#If  does not fit then
292 319

  
293
#mod <- try(gam(formula_k, data=data_training)) #change to any model!!
320
#l_k <- vector("list",3)
321
#l_k <- list(25,10,10)
322
l_k <- c(30,10,10)
323
l_k_obj <- test_k_gam(formula,l_k,data_training)
294 324

  
295
#         K=15 for interactive term and k=5 unique term:
296
#         - do gam.check:
297
#                    If k-index < 1
298
#                     Then increase k by 1 for interactive term and  fit again
299
#          If k-index >1 or k=10 for unique term and k=30  
300
#Breakā€¦
325
#d
326
#Now get k_index for each model and store it in a table.
327
#function gam.check with
328
#select the  right mode based on k-index, edf or other criteria?
301 329

  
330
#Now function to run mod depending on conditions
302 331

  
303
#Now function to run mod depending on conditions
332
################## END OF SCRIPT ###############

Also available in: Unified diff