Revision 9287ad3b
Added by Benoit Parmentier over 11 years ago
climate/research/oregon/interpolation/analyses_papers_methods_comparison_part1.R | ||
---|---|---|
1 |
######################################## Paper Methods_comparison_FSS ####################################### |
|
2 |
############################ Scripts for figures and analyses for the the IBS poster ##################################### |
|
3 |
#This script performs analyses and create figures for the FSS paper. |
|
4 |
#It uses inputs from interpolation objects created at earlier stages... # |
|
5 |
#AUTHOR: Benoit Parmentier # |
|
6 |
#DATE: 06/27/2013 # |
|
7 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491-- # |
|
8 |
################################################################################################### |
|
9 |
|
|
10 |
###Loading R library and packages |
|
11 |
#library(gtools) # loading some useful tools |
|
12 |
library(mgcv) # GAM package by Wood 2006 (version 2012) |
|
13 |
library(sp) # Spatial pacakge with class definition by Bivand et al. 2008 |
|
14 |
library(spdep) # Spatial package with methods and spatial stat. by Bivand et al. 2012 |
|
15 |
library(rgdal) # GDAL wrapper for R, spatial utilities (Keitt et al. 2012) |
|
16 |
library(gstat) # Kriging and co-kriging by Pebesma et al. 2004 |
|
17 |
library(automap) # Automated Kriging based on gstat module by Hiemstra et al. 2008 |
|
18 |
library(spgwr) |
|
19 |
library(maptools) |
|
20 |
library(graphics) |
|
21 |
library(parallel) # Urbanek S. and Ripley B., package for multi cores & parralel processing |
|
22 |
library(raster) |
|
23 |
library(rasterVis) |
|
24 |
library(plotrix) # Draw circle on graph and additional plotting options |
|
25 |
library(reshape) # Data format and type transformation |
|
26 |
|
|
27 |
## Functions |
|
28 |
#loading R objects that might have similar names |
|
29 |
load_obj <- function(f) |
|
30 |
{ |
|
31 |
env <- new.env() |
|
32 |
nm <- load(f, env)[1] |
|
33 |
env[[nm]] |
|
34 |
} |
|
35 |
|
|
36 |
script_path<-"/home/parmentier/Data/IPLANT_project/env_layers_scripts/" |
|
37 |
source(file.path(script_path,"interpolation_method_day_function_multisampling_06082013.R")) #Include GAM_day |
|
38 |
|
|
39 |
## Parmeters |
|
40 |
|
|
41 |
in_dir<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_GAM_fus_all_lst_05312013" |
|
42 |
setwd(in_dir) |
|
43 |
y_var_name <- "dailyTmax" |
|
44 |
y_var_month <- "TMax" |
|
45 |
#y_var_month <- "LSTD_bias" |
|
46 |
|
|
47 |
infile_covariates<-list.files(pattern="covariates.*.tif") |
|
48 |
out_prefix<-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_GAM_fus_all_lst_05312013" |
|
49 |
method_interpolation <- "gam_fusion" |
|
50 |
raster_obj_file <- "raster_prediction_obj_gam_fusion_dailyTmax_365d_GAM_fus_all_lst_05312013.RData" |
|
51 |
#Load objects containing training, testing, models objects |
|
52 |
|
|
53 |
#The names of covariates can be changed...these names should be output/input from covar script!!! |
|
54 |
rnames<-c("x","y","lon","lat","N","E","N_w","E_w","elev_s","slope","aspect","CANHEIGHT","DISTOC") |
|
55 |
lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10","LC11","LC12") |
|
56 |
#lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10") #use older version for continuity check to be changed |
|
57 |
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12", |
|
58 |
"nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08", |
|
59 |
"nobs_09","nobs_10","nobs_11","nobs_12") |
|
60 |
covar_names<-c(rnames,lc_names,lst_names) |
|
61 |
|
|
62 |
#formulas<- |
|
63 |
|
|
64 |
list_models<-c("y_var ~ s(elev_s)", |
|
65 |
"y_var ~ s(LST)", |
|
66 |
"y_var ~ s(elev_s,LST)", |
|
67 |
"y_var ~ s(lat) + s(lon)+ s(elev_s)", |
|
68 |
"y_var ~ s(lat,lon,elev_s)", |
|
69 |
"y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST)", |
|
70 |
"y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST) + s(LC2)", |
|
71 |
"y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST) + s(LC6)", |
|
72 |
"y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST) + s(DISTOC)") |
|
73 |
|
|
74 |
raster_prediction_obj <-load_obj(raster_obj_file) |
|
75 |
#gam_cai_mod <-load_obj("results_mod_obj__365d_GAM_CAI4_all_12272012.RData") |
|
76 |
|
|
77 |
names(raster_prediction_obj) #list of two objects |
|
78 |
|
|
79 |
clim_method_mod_obj<-raster_prediction_obj$clim_method_mod_obj |
|
80 |
raster_prediction_obj$summary_metrics_v |
|
81 |
|
|
82 |
j<-1 #selected month for climatology |
|
83 |
i<-1 |
|
84 |
data_s <- raster_prediction_obj$method_mod_obj[[i]]$data_s #training data |
|
85 |
data_v <- raster_prediction_obj$method_mod_obj[[i]]$data_v #testing data |
|
86 |
|
|
87 |
data_month1 <- clim_method_mod_obj[[1]]$data_month #monthly data |
|
88 |
data_month <- clim_method_mod_obj[[j]]$data_month #monthly data |
|
89 |
clim_mod_obj_month <- clim_method_mod_obj[[j]] |
|
90 |
names(clim_mod_obj_month) |
|
91 |
|
|
92 |
##### |
|
93 |
s_raster <- brick(infile_covariates) |
|
94 |
names(s_raster)<-covar_names |
|
95 |
data_s$y_var <- data_s[[y_var_name]] |
|
96 |
formula<-"y_var ~ s(lat,lon,elev_s)" |
|
97 |
|
|
98 |
|
|
99 |
### MONTH MODELS |
|
100 |
|
|
101 |
data_month$y_var<-data_month[[y_var_month]] |
|
102 |
mod_mgam1_s <- gam(y_var ~ s(lat,lon,elev_s),data=data_month) |
|
103 |
mod_mgam1_s |
|
104 |
|
|
105 |
raster_pred_s<- predict(object=s_raster,model=mod_mgam1_s,na.rm=FALSE) |
|
106 |
names(raster_pred_s)<-"raster_pred_s" |
|
107 |
plot(raster_pred_s) |
|
108 |
plot(data_month, add=TRUE) |
|
109 |
levelplot(raster_pred_s) |
|
110 |
|
|
111 |
mod_mgam1_te <- gam(y_var ~ te(lat,lon,elev_s),data=data_month) |
|
112 |
mod_mgam1_te |
|
113 |
|
|
114 |
raster_pred_te<- predict(object=s_raster,model=mod_mgam1_te,na.rm=FALSE) |
|
115 |
names(raster_pred_te)<- "raster_pred_te" |
|
116 |
|
|
117 |
plot(raster_pred_te) |
|
118 |
plot(data_month, add=TRUE) |
|
119 |
|
|
120 |
raster_comp <- stack(raster_pred_te,raster_pred_s) |
|
121 |
plot(raster_comp) |
|
122 |
levelplot(raster_comp) |
|
123 |
|
|
124 |
diff<- raster_pred_s -raster_pred_te |
|
125 |
freq(diff) |
|
126 |
plot(diff) |
|
127 |
|
|
128 |
vis.gam(mod_mgam1_s,view=c("elev_s","lon")) |
|
129 |
vis.gam(mod_mgam1_te,view=c("elev_s","lon")) |
|
130 |
|
|
131 |
vis.gam(mod_mgam1_te,view=c("elev_s","lon"),theta=210,phi=40) |
|
132 |
vis.gam(mod_mgam1_s,view=c("elev_s","lon"),theta=210,phi=40) |
|
133 |
|
|
134 |
vis.gam(mod_mgam1_s,view=c("elev_s","lon"),theta=210,phi=40,plot.type="contour") |
|
135 |
vis.gam(mod_mgam1_te,view=c("elev_s","lon"),theta=210,phi=40,plot.type="contour") |
|
136 |
|
|
137 |
### USING LST and elev_s |
|
138 |
|
|
139 |
data_month<-data_month[data_month$month==j,] #Subsetting dataset for the relevant month of the date being processed |
|
140 |
LST_name<-lst_avg[j] # name of LST month to be matched |
|
141 |
data_month$LST<-data_month[[LST_name]] |
|
142 |
pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA |
|
143 |
s_raster<-dropLayer(s_raster,pos) # If it exists drop layer |
|
144 |
LST<-subset(s_raster,LST_name) |
|
145 |
names(LST)<-"LST" |
|
146 |
s_raster<-addLayer(s_raster,LST) #Adding current month |
|
147 |
|
|
148 |
mod_mgam1_s <- gam(y_var ~ s(LST,elev_s),data=data_month) |
|
149 |
mod_mgam1_s |
|
150 |
|
|
151 |
raster_pred_s <- predict(object=s_raster,model=mod_mgam1_s,na.rm=FALSE) |
|
152 |
names(raster_pred_s)<-"raster_pred_s" |
|
153 |
plot(raster_pred_s) |
|
154 |
plot(data_month, add=TRUE) |
|
155 |
levelplot(raster_pred_s) |
|
156 |
|
|
157 |
mod_mgam1_te <- gam(y_var ~ te(LST,elev_s),data=data_month) |
|
158 |
mod_mgam1_te |
|
159 |
|
|
160 |
raster_pred_te<- predict(object=s_raster,model=mod_mgam1_te,na.rm=FALSE) |
|
161 |
names(raster_pred_te)<- "raster_pred_te" |
|
162 |
|
|
163 |
plot(raster_pred_te) |
|
164 |
plot(data_month, add=TRUE) |
|
165 |
|
|
166 |
raster_comp <- stack(raster_pred_te,raster_pred_s) |
|
167 |
plot(raster_comp) |
|
168 |
levelplot(raster_comp) |
|
169 |
|
|
170 |
diff<- raster_pred_s -raster_pred_te |
|
171 |
freq(diff) |
|
172 |
plot(diff) |
|
173 |
|
|
174 |
vis.gam(mod_mgam1_s,view=c("LST","elev_s")) |
|
175 |
vis.gam(mod_mgam1_te,view=c("LST","elev_s")) |
|
176 |
|
|
177 |
vis.gam(mod_mgam1_te,view=c("LST","elev_s"),theta=210,phi=40) |
|
178 |
vis.gam(mod_mgam1_s,view=c("LST","elev_s"),theta=210,phi=40) |
|
179 |
|
|
180 |
vis.gam(mod_mgam1_s,view=c("LST","elev_s"),theta=210,phi=40,plot.type="contour") |
|
181 |
vis.gam(mod_mgam1_te,view=c("LST","elev_s"),theta=210,phi=40,plot.type="contour") |
|
182 |
|
|
183 |
### TEST USING TI() bases |
|
184 |
|
|
185 |
mod_mgam1_ti <-gam(y_var~ ti(LST, elev_s),data=data_month) |
|
186 |
mod_mgam1_ti |
|
187 |
|
|
188 |
raster_pred_ti<- predict(object=s_raster,model=mod_mgam1_ti,na.rm=FALSE) |
|
189 |
names(raster_pred_ti)<- "raster_pred_ti" |
|
190 |
|
|
191 |
plot(raster_pred_ti) |
|
192 |
|
|
193 |
vis.gam(mod_mgam1_ti,view=c("LST","elev_s")) |
|
194 |
|
|
195 |
vis.gam(mod_mgam1_ti,view=c("LST","elev_s"),theta=210,phi=40) |
|
196 |
|
|
197 |
### USE DIFFERENT BASE |
|
198 |
|
|
199 |
mod_mgam1_s <- gam(y_var ~ s(LST,elev_s,bs="cr"),data=data_month) #DOES NOT WORK SINCE CR ONLY HANDLES ONE VARIABLE |
|
200 |
mod_mgam1_s <- gam(y_var ~ s(LST,elev_s,bs="cc"),data=data_month) #DOES NOT WORK SINCE CR ONLY HANDLES ONE VARIABLE |
|
201 |
|
|
202 |
mod_mgam1_s <- gam(y_var ~ s(LST,elev_s,bs="ps"),data=data_month) #DOES NOT WORK SINCE CR ONLY HANDLES ONE VARIABLE |
|
203 |
mod_mgam1_s <- gam(y_var ~ s(LST,bs="ps"),data=data_month) #DOES NOT WORK SINCE CR ONLY HANDLES ONE VARIABLE |
|
204 |
|
|
205 |
mod_mgam1_s |
|
206 |
summary(mod_mgam1_s) |
|
207 |
mod_mgam1_s <- gam(y_var ~ s(LST,bs="tp"),data=data_month) |
|
208 |
summary(mod_mgam1_s) |
|
209 |
|
|
210 |
|
|
211 |
|
|
212 |
raster_pred_s <- predict(object=s_raster,model=mod_mgam1_s,na.rm=FALSE) |
|
213 |
names(raster_pred_s)<-"raster_pred_s" |
|
214 |
plot(raster_pred_s) |
|
215 |
plot(data_month, add=TRUE) |
|
216 |
levelplot(raster_pred_s) |
|
217 |
|
|
218 |
mod_mgam1_te <- gam(y_var ~ te(LST,elev_s),data=data_month) |
|
219 |
mod_mgam1_te |
|
220 |
|
|
221 |
raster_pred_te<- predict(object=s_raster,model=mod_mgam1_te,na.rm=FALSE) |
|
222 |
names(raster_pred_te)<- "raster_pred_te" |
|
223 |
|
|
224 |
plot(raster_pred_te) |
|
225 |
plot(data_month, add=TRUE) |
|
226 |
|
|
227 |
raster_comp <- stack(raster_pred_te,raster_pred_s) |
|
228 |
plot(raster_comp) |
|
229 |
levelplot(raster_comp) |
|
230 |
|
|
231 |
|
|
232 |
|
|
233 |
############################### |
|
234 |
## DAILY MODELS USING GAM |
|
235 |
|
|
236 |
k_dim <- 20 |
|
237 |
#y_var_min<-range(data_s$y_var)[1] |
|
238 |
#y_var_max<-range(data_s$y_var)[2] |
|
239 |
mod_dgam1 <- gam(y_var ~ s(lat,lon,elev_s),data=data_s) #not working |
|
240 |
|
|
241 |
#gamFit2 <- gam(y_var ~ s(lat,lon,elev_s,k=k_dim),knots=list( x=seq(from=y_var_min,to=y_var_max, len=k_dim),data=data_s)) |
|
242 |
k_dim <- 20 |
|
243 |
mod_dgam2 <- gam(y_var ~ s(lat,lon,elev_s,k=k_dim), |
|
244 |
knots=list(lat=seq(from=range(data_s$lat,na.rm=TRUE)[1],to=range(data_s$lat,na.rm=TRUE)[2], len=k_dim), |
|
245 |
lon=seq(from=range(data_s$lon,na.rm=TRUE)[1],to=range(data_s$lon,na.rm=TRUE)[2], len=k_dim), |
|
246 |
elev_s=seq(from=range(data_s$elev_s,na.rm=TRUE)[1],to=range(data_s$elev_s,na.rm=TRUE)[2], len=k_dim)), |
|
247 |
data=data_s) |
|
248 |
mod_dgam2 |
|
249 |
#raster_pred<- predict(object=r_stack,model=mod,na.rm=FALSE) |
|
250 |
raster_pred<- predict(object=s_raster,model=mod_gam2,na.rm=FALSE) |
|
251 |
|
|
252 |
mod_gam3 <-gam(y_var~ te(lat,lon,elev_s),data=data_s) |
|
253 |
|
|
254 |
mod_dgam4 <- gam(y_var ~ te(lat,lon,elev_s,k=k_dim), |
|
255 |
knots=list(lat=seq(from=range(data_s$lat,na.rm=TRUE)[1],to=range(data_s$lat,na.rm=TRUE)[2], len=k_dim), |
|
256 |
lon=seq(from=range(data_s$lon,na.rm=TRUE)[1],to=range(data_s$lon,na.rm=TRUE)[2], len=k_dim), |
|
257 |
elev_s=seq(from=range(data_s$elev_s,na.rm=TRUE)[1],to=range(data_s$elev_s,na.rm=TRUE)[2], len=k_dim)), |
|
258 |
data=data_s) |
|
259 |
|
|
260 |
mod_dgam1_ti <-gam(y_var~ ti(lat,lon,elev_s),data=data_s) |
|
261 |
mod_dgam1_ti |
|
262 |
|
|
263 |
raster_pred_ti<- predict(object=s_raster,model=mod_mgam1_ti,na.rm=FALSE) |
|
264 |
names(raster_pred_ti)<- "raster_pred_ti" |
|
265 |
|
|
266 |
plot(raster_pred_ti) |
|
267 |
|
|
268 |
test_gam <- gam(y_var ~ ti(lat)+ti(lon)+ti(elev_s)+ti(lat,lon)+ti(lat,elev_s)+ti(lon,elev_s)+ti(lat,lon,elev_s),data=data_s) |
|
269 |
test_gam <- gam(y_var ~ ti(lat)+ti(lon)+ti(elev_s)+ti(lat,lon)+ti(lat,elev_s)+ti(lon,elev_s),data=data_s) |
|
270 |
test_gam <- gam(y_var ~ ti(lat)+ti(lon)+ti(elev_s)+ti(lat,lon)+ti(lat,elev_s),data=data_s) |
|
271 |
test_gam <- gam(y_var ~ ti(lat)+ti(lon)+ti(elev_s)+ti(lat,lon)+ti(lat,lon,elev_s),data=data_s) |
|
272 |
|
|
273 |
test_gam <- gam(y_var ~ ti(lat)+ti(lon)+ti(elev_s)+ti(lat,lon,elev_s),data=data_s) |
|
274 |
test_gam |
|
275 |
raster_pred_ti<- predict(object=s_raster,model=test_gam,na.rm=FALSE) |
|
276 |
names(raster_pred_ti)<- "raster_pred_ti" |
|
277 |
plot(raster_pred_ti) |
|
278 |
|
|
279 |
|
|
280 |
k_dim <- 35 #takes more than 2hours and does not fit a model |
|
281 |
|
|
282 |
mod_gam5 <- gam(y_var ~ te(lat,lon,elev_s,k=k_dim), |
|
283 |
knots=list(lat=seq(from=range(data_s$lat,na.rm=TRUE)[1],to=range(data_s$lat,na.rm=TRUE)[2], len=k_dim), |
|
284 |
lon=seq(from=range(data_s$lon,na.rm=TRUE)[1],to=range(data_s$lon,na.rm=TRUE)[2], len=k_dim), |
|
285 |
elev_s=seq(from=range(data_s$elev_s,na.rm=TRUE)[1],to=range(data_s$elev_s,na.rm=TRUE)[2], len=k_dim)), |
|
286 |
data=data_s) |
|
287 |
|
|
288 |
|
|
289 |
|
|
290 |
#CONTRIBUTION OF VARIABLES... |
|
291 |
|
|
292 |
#myModels<-clim_mod_obj_month$mod[1:6] |
|
293 |
myModels<-method_mod_obj$mod |
|
294 |
|
|
295 |
summary_list <- lapply(myModels, summary) |
|
296 |
s.table_list <- lapply(summary_list, `[[`, 's.table') |
|
297 |
p.table_list <- lapply(summary_list, `[[`, 'p.table') |
|
298 |
#now put in one table |
|
299 |
name_col<-function(i,x){ |
|
300 |
x_mat<-x[[i]] |
|
301 |
x_df<-as.data.frame(x_mat) |
|
302 |
x_df$mod_name<-rep(names(x)[i],nrow(x_df)) |
|
303 |
x_df$term_name <-row.names(x_df) |
|
304 |
return(x_df) |
|
305 |
} |
|
306 |
|
|
307 |
s.table_list2<-lapply(1:6,name_col,s.table_list) |
|
308 |
p.table_list2<-lapply(1:6,name_col,p.table_list) |
|
309 |
s.table_term <-do.call(rbind,s.table_list2) |
|
310 |
#p.table_term <-do.call(rbind,p.table_list2) |
|
311 |
|
|
312 |
table_term <- rbind(p.table_term,s.table_term) |
|
313 |
test |
|
314 |
|
|
315 |
#Testing one variable at a time: |
|
316 |
|
|
317 |
#Models to run...this can be change for each run |
|
318 |
|
|
319 |
list_models<-c("y_var ~ s(elev_s)", |
|
320 |
"y_var ~ s(LST)", |
|
321 |
"y_var ~ s(lat)", |
|
322 |
"y_var ~ s(DISTOC)", |
|
323 |
"y_var ~ s(lon)", |
|
324 |
"y_var ~ s(LC2)", |
|
325 |
"y_var ~ s(LC6)", |
|
326 |
"y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST) + s(LC6)", |
|
327 |
"y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST) + s(DISTOC)") |
|
328 |
|
|
329 |
|
|
330 |
|
|
331 |
|
|
332 |
t<-clim_mod_obj_month$mod[!sapply(clim_mod_obj_month$mod,is.null)] #remove NULL elements in list |
|
333 |
#t<-method_mod_obj[[1]]$mod[!sapply(method_mod_obj$mod[[1]],is.null)] #remove NULL elements in list |
|
334 |
t<-method_mod_obj[[1]]$mod[!sapply(method_mod_obj[[1]]$mod,inherits,"try_errors")] #remove NULL elements in list |
|
335 |
inherits(clim_mod_obj_month$mod[[7]],"try-error") |
|
336 |
|
|
337 |
myModels <- clim_mod_obj_month$mod[] |
|
338 |
if(inherits(myModel,"gam")){ |
|
339 |
list_mod[[j]] <- myModel |
|
340 |
} |
|
341 |
|
|
342 |
########### GET MOD OBJECT |
|
343 |
remove_errors_list<-function(list_items){ |
|
344 |
|
|
345 |
list_tmp<-list_items |
|
346 |
for(i in 1:length(list_items)){ |
|
347 |
|
|
348 |
if(inherits(list_items[[i]],"try-error")){ |
|
349 |
list_tmp[[i]]<-0 |
|
350 |
}else{ |
|
351 |
list_tmp[[i]]<-1 |
|
352 |
} |
|
353 |
} |
|
354 |
cnames<-names(list_tmp[list_tmp>0]) |
|
355 |
x<-list_items[match(cnames,names(list_items))] |
|
356 |
return(x) |
|
357 |
} |
|
358 |
|
|
359 |
myModels<-remove_errors_list(t$mod) |
|
360 |
#could add AIC, GCV to the table as well as ME, RMSE...+dates... |
|
361 |
|
|
362 |
#mean contribution for terms in dffiernt models over full year... |
|
363 |
|
|
364 |
myParameters = lapply(1:length(myModels),function(x){ if(x==1) |
|
365 |
paste("myModels[[",x,"]]",sep = "") |
|
366 |
paste("force(myModels[[",x,"]])",sep = "")}) |
|
367 |
myParStr = toString(paste( myParameters )) |
|
368 |
eval(parse(text = paste("anova(",myParStr,")"))) |
|
369 |
eval(parse(text = paste("aov(",myParStr,")"))) |
|
370 |
|
|
371 |
#TO DEAL WITH NA |
|
372 |
#1) all.vars(formula) #get all the terms for all formulas... |
|
373 |
#2) do na.omit... |
Also available in: Unified diff
initial commit, analyses paper part 1 with revised results