Revision 80bb01bb
Added by Benoit Parmentier over 10 years ago
climate/research/oregon/interpolation/nex_run_gam_fitting.R | ||
---|---|---|
141 | 141 |
return(tb_list_tmp) #this is a data.frame |
142 | 142 |
} |
143 | 143 |
|
144 |
##############################s |
|
145 |
#### Parameters and constants |
|
146 |
|
|
144 | 147 |
#scp -rp raster_prediction_obj_gam_CAI_dailyTmax15.0_20.0.RData parmentier@atlas.nceas.ucsb.edu:/data/project/layers/commons/NEX_data/output_regions/15.0_20.0" |
145 | 148 |
#scp -rp reg*.tif parmentier@atlas.nceas.ucsb.edu:/data/project/layers/commons/NEX_data/output_regions/15.0_20.0" |
146 | 149 |
|
... | ... | |
149 | 152 |
|
150 | 153 |
setwd(in_dir) |
151 | 154 |
|
155 |
########################## START SCRIPT ############################## |
|
156 |
|
|
152 | 157 |
raster_obj<- load_obj(raster_obj_infile) |
153 | 158 |
|
154 | 159 |
#raster_obj <- load_obj(unlist(raster_obj_file)) #may not need unlist |
... | ... | |
184 | 189 |
vis.gam(mod1) |
185 | 190 |
vis.gam(mod1,view=c("lat","lon"),theta= 35) # plot against by variable |
186 | 191 |
#http://stats.stackexchange.com/questions/12223/how-to-tune-smoothing-in-mgcv-gam-model |
192 |
mod1<- try(gam(y_var ~ s(lat, lon,k=22) + s(elev_s) + s(LST), data=data_training)) #change to any model!! |
|
187 | 193 |
|
194 |
##New functions... |
|
188 | 195 |
fit_models<-function(list_formulas,data_training){ |
189 | 196 |
|
190 | 197 |
#This functions several models and returns model objects. |
... | ... | |
209 | 216 |
return(list_fitted_models) |
210 | 217 |
} |
211 | 218 |
|
212 |
mod1<- try(gam(y_var ~ s(lat, lon,k=22) + s(elev_s) + s(LST), data=data_training)) #change to any model!! |
|
213 |
|
|
214 |
> sink("test.txt") |
|
215 |
> (gam.check(mod1)) |
|
216 |
> getwd(0) |
|
217 |
Error in getwd(0) : unused argument (0) |
|
218 |
> getwd() |
|
219 |
> sink() |
|
220 |
> getwd(0) |
|
221 |
Error in getwd(0) : unused argument (0) |
|
222 |
> getwd() |
|
223 |
[1] "/data/project/layers/commons/NEX_data/output_regions/15.0_20.0" |
|
224 |
> system("more test.txt") |
|
225 |
|
|
219 |
##Format formula using prescribed k values for gam |
|
226 | 220 |
format_formula_k_fun <- function(formula,l_k){ |
227 | 221 |
#l_k: list of k parameter for GAM method with the value of k and the name of the variable |
228 | 222 |
#split the formula by term |
... | ... | |
241 | 235 |
#mod <- try(gam(formula_k, data=data_training)) #change to any model!! |
242 | 236 |
} |
243 | 237 |
|
244 |
test_k_gam <- function(formula,l_k){ |
|
238 |
## Fit model using training data, formula and k dimensions with increment |
|
239 |
test_k_gam <- function(formula,l_k,data_training){ |
|
245 | 240 |
|
246 | 241 |
l_k_tmp <- as.numeric(l_k)/2 |
247 | 242 |
#l_k_tmp <- l_k |
... | ... | |
259 | 254 |
mod <- try(gam(formula_k, data=data_training)) #change to any model!! |
260 | 255 |
list_l_k[[j]] <- l_k_tmp |
261 | 256 |
list_mod[[j]] <- mod |
262 |
l_k_tmp <- l_k_tmp + rep(1,length(l_k_tmp)) |
|
257 |
#if opt_increase{} |
|
258 |
l_k_tmp <- l_k_tmp + rep(1,length(l_k_tmp)) #can modify this option to go -1 instead of plus 1? |
|
259 |
#if opt_decrease{} |
|
260 |
l_k_tmp <- l_k_tmp + rep(1,length(l_k_tmp)) #can modify this option to go -1 instead of plus 1? |
|
261 |
|
|
263 | 262 |
j <- j+1 |
264 | 263 |
limit_l_k <- (l_k_tmp > l_k) #now check that c(30,10,10 is not gone above!!) |
265 | 264 |
for (i in 1:length(limit_l_k)){ |
... | ... | |
269 | 268 |
} |
270 | 269 |
} |
271 | 270 |
#store mod and l_k in a object |
272 |
l_k_obj <- list(list_mod,l_k_tmp)
|
|
273 |
names(l_k_obj) <- c("list_mod","l_k_tmp")
|
|
271 |
l_k_obj <- list(list_mod,list_l_k)
|
|
272 |
names(l_k_obj) <- c("list_mod","list_l_k")
|
|
274 | 273 |
|
275 | 274 |
return(l_k_obj) |
276 | 275 |
} |
277 | 276 |
|
278 |
#d |
|
279 |
#Now get k_index for each model and store it in a table. |
|
280 |
#function gam.check with |
|
281 |
#select the right mode based on k-index, edf or other criteria? |
|
282 |
|
|
283 |
l_k <- vector("list",3) |
|
284 |
l_k <- list(25,10,10) |
|
285 |
l_k <- c(30,10,10) |
|
277 |
## generate table of edf and k_index based on fitted models |
|
278 |
create_gam_check_table <-function(l_k_obj){ |
|
279 |
list_mod <- l_k_obj$list_mod |
|
280 |
list_l_k <- l_k_obj$list_l_k |
|
286 | 281 |
|
282 |
#remove try-error model!! |
|
283 |
list_mod<- list_mod[unlist(lapply(list_mod,FUN=function(x){!inherits(x,"try-error")}))] |
|
284 |
list_df_k <- vector("list",length(list_mod)) |
|
285 |
|
|
286 |
for(i in 1:length(list_mod)){ |
|
287 |
mod <- list_mod[[i]] |
|
288 |
l_k <- list_l_k[[i]] |
|
289 |
sink("test.txt") |
|
290 |
gam.check(mod) |
|
291 |
sink() |
|
292 |
f <- readLines("test.txt") #read all lines |
|
293 |
f_tmp <- strsplit(f," ") |
|
294 |
names_term <- attr(terms(formula),"term.labels") |
|
295 |
match(f_tmp,names_term[1]) |
|
296 |
grepl(glob2rx(paste(names_term[1],"*",sep="")),f_tmp) |
|
297 |
selected_lines_s <- grep("s\\(",f_tmp) |
|
298 |
selected_lines_ti <- grep("ti\\(",f_tmp) |
|
299 |
selected_lines_te <- grep("te\\(",f_tmp) |
|
300 |
selected_lines <- sort(c(selected_lines_s,selected_lines_te,selected_lines_ti)) |
|
301 |
#selected_lines <- c(min(selected_lines)-1,selected_lines) |
|
302 |
|
|
303 |
rows <- f[selected_lines] |
|
304 |
list_df <- vector("list",length(rows)) |
|
305 |
for (j in 1:length(rows)){ |
|
306 |
tmp <- unlist(strsplit(rows[j]," ",fixed=T)) |
|
307 |
index <- unlist(lapply(tmp,FUN=function(x){x!=""})) |
|
308 |
tmp<- tmp[index] |
|
309 |
list_df[[j]] <- data.frame(term=tmp[1],kp=tmp[2],edf=tmp[3],k_index=tmp[4],p_value=tmp[5]) |
|
310 |
} |
|
311 |
df_k <-do.call(rbind,list_df) |
|
312 |
df_k$k <- l_k |
|
313 |
df_k$fit_no <- paste("t_",i,sep="") |
|
314 |
list_df_k[[i]] <- df_k |
|
315 |
} |
|
316 |
return(list_df_k) |
|
317 |
} |
|
287 | 318 |
|
288 |
#1. Fit with standard settings: |
|
289 |
#(Start at k=10 or k=30 if 2 interactive term.) |
|
290 |
#start with the highest k first... |
|
291 |
#If does not fit then |
|
292 | 319 |
|
293 |
#mod <- try(gam(formula_k, data=data_training)) #change to any model!! |
|
320 |
#l_k <- vector("list",3) |
|
321 |
#l_k <- list(25,10,10) |
|
322 |
l_k <- c(30,10,10) |
|
323 |
l_k_obj <- test_k_gam(formula,l_k,data_training) |
|
294 | 324 |
|
295 |
# K=15 for interactive term and k=5 unique term: |
|
296 |
# - do gam.check: |
|
297 |
# If k-index < 1 |
|
298 |
# Then increase k by 1 for interactive term and fit again |
|
299 |
# If k-index >1 or k=10 for unique term and k=30 |
|
300 |
#Breakā¦ |
|
325 |
#d |
|
326 |
#Now get k_index for each model and store it in a table. |
|
327 |
#function gam.check with |
|
328 |
#select the right mode based on k-index, edf or other criteria? |
|
301 | 329 |
|
330 |
#Now function to run mod depending on conditions |
|
302 | 331 |
|
303 |
#Now function to run mod depending on conditions |
|
332 |
################## END OF SCRIPT ############### |
Also available in: Unified diff
gam fitting diagnostic for low station count, adding function to collect k-index diagnostics