Revision d8763ab5
Added by Benoit Parmentier over 11 years ago
climate/research/oregon/interpolation/contribution_of_covariates_paper_interpolation.R | ||
---|---|---|
1 | 1 |
#################################### INTERPOLATION OF TEMPERATURES ####################################### |
2 | 2 |
############################ Script for manuscript analyses,tables and figures ####################################### |
3 |
#This script reads information concerning the Oregon case study to adapt data for the revised
|
|
4 |
# interpolation code.
|
|
5 |
#Figures and data for the contribution of covariate paper are also produced.
|
|
6 |
#AUTHOR: Benoit Parmentier #
|
|
7 |
#DATE: 08/13/2013
|
|
8 |
#Version: 1
|
|
9 |
#PROJECT: Environmental Layers project #
|
|
3 |
#This script uses the worklfow code applied to the Oregon case study. Daily methods (GAM,GWR, Kriging) are tested with
|
|
4 |
#different covariates using two baselines. Accuracy methods are added in the the function script to evaluate results.
|
|
5 |
#Figures, tables and data for the contribution of covariate paper are also produced in the script.
|
|
6 |
#AUTHOR: Benoit Parmentier |
|
7 |
#DATE: 08/15/2013
|
|
8 |
#Version: 2
|
|
9 |
#PROJECT: Environmental Layers project |
|
10 | 10 |
################################################################################################# |
11 | 11 |
|
12 |
###Loading R library and packages |
|
12 |
### Loading R library and packages |
|
13 |
#library used in the workflow production: |
|
13 | 14 |
library(gtools) # loading some useful tools |
14 | 15 |
library(mgcv) # GAM package by Simon Wood |
15 | 16 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
... | ... | |
18 | 19 |
library(gstat) # Kriging and co-kriging by Pebesma et al. |
19 | 20 |
library(fields) # NCAR Spatial Interpolation methods such as kriging, splines |
20 | 21 |
library(raster) # Hijmans et al. package for raster processing |
21 |
library(gdata) # various tools with xls reading |
|
22 |
library(rasterVis) |
|
23 |
library(parallel) |
|
24 |
library(maptools) |
|
25 |
library(maps) |
|
26 |
library(reshape) |
|
27 |
library(plotrix) |
|
28 |
library(plyr) |
|
22 |
library(gdata) # various tools with xls reading, cbindX |
|
23 |
library(rasterVis) # Raster plotting functions |
|
24 |
library(parallel) # Parallelization of processes with multiple cores |
|
25 |
library(maptools) # Tools and functions for sp and other spatial objects e.g. spCbind |
|
26 |
library(maps) # Tools and data for spatial/geographic objects |
|
27 |
library(reshape) # Change shape of object, summarize results |
|
28 |
library(plotrix) # Additional plotting functions |
|
29 |
library(plyr) # Various tools including rbind.fill |
|
30 |
library(spgwr) # GWR method |
|
31 |
library(automap) # Kriging automatic fitting of variogram using gstat |
|
32 |
library(rgeos) # Geometric, topologic library of functions |
|
33 |
#RPostgreSQL # Interface R and Postgres, not used in this script |
|
34 |
|
|
35 |
#Additional libraries not used in workflow |
|
36 |
library(pgirmess) # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess} |
|
29 | 37 |
|
30 | 38 |
#### FUNCTION USED IN SCRIPT |
31 | 39 |
|
32 |
load_obj <- function(f) |
|
33 |
{ |
|
34 |
env <- new.env() |
|
35 |
nm <- load(f, env)[1] |
|
36 |
env[[nm]] |
|
37 |
} |
|
38 |
|
|
39 |
extract_list_from_list_obj<-function(obj_list,list_name){ |
|
40 |
#Create a list of an object from a given list of object using a name prodived as input |
|
41 |
|
|
42 |
list_tmp<-vector("list",length(obj_list)) |
|
43 |
for (i in 1:length(obj_list)){ |
|
44 |
tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame |
|
45 |
list_tmp[[i]]<-tmp |
|
46 |
} |
|
47 |
return(list_tmp) #this is a data.frame |
|
48 |
} |
|
49 |
|
|
50 |
#This extract a data.frame object from raster prediction obj and combine them in one data.frame |
|
51 |
extract_from_list_obj<-function(obj_list,list_name){ |
|
52 |
#extract object from list of list. This useful for raster_prediction_obj |
|
53 |
library(ply) |
|
54 |
|
|
55 |
list_tmp<-vector("list",length(obj_list)) |
|
56 |
for (i in 1:length(obj_list)){ |
|
57 |
tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame |
|
58 |
list_tmp[[i]]<-tmp |
|
59 |
} |
|
60 |
tb_list_tmp<-do.call(rbind.fill,list_tmp) #long rownames |
|
61 |
#tb_list_tmp<-do.call(rbind,list_tmp) #long rownames |
|
62 |
|
|
63 |
return(tb_list_tmp) #this is a data.frame |
|
64 |
} |
|
65 |
|
|
66 |
calc_stat_from_raster_prediction_obj <-function(raster_prediction_obj,stat){ |
|
67 |
tb <-raster_prediction_obj$tb_diagnostic_v #Kriging methods |
|
68 |
|
|
69 |
t<-melt(tb, |
|
70 |
measure=c("mae","rmse","r","m50"), |
|
71 |
id=c("pred_mod"), |
|
72 |
na.rm=T) |
|
73 |
|
|
74 |
stat_tb<-cast(t,pred_mod~variable,stat) |
|
75 |
return(stat_tb) |
|
76 |
} |
|
77 |
|
|
78 |
## Produce data.frame with residuals for models and distance to closest fitting station |
|
79 |
calc_dist_ref_data_point <- function(i,list_param){ |
|
80 |
#This function creates a list of data.frame containing the distance to teh closest |
|
81 |
# reference point (e.g. fitting station) for a give data frame. |
|
82 |
#Inputs: |
|
83 |
#data_s: given data.frame from wich distance is computed |
|
84 |
#data_v: reference data.frame, destination, often the fitting points used in analyses |
|
85 |
#i: index variable to operate on list |
|
86 |
#names_var: |
|
87 |
#Outputs: |
|
88 |
#list_dstspat_er |
|
89 |
|
|
90 |
#Parsing input arguments |
|
91 |
data_s<-list_param$data_s[[i]] |
|
92 |
data_v<-list_param$data_v[[i]] |
|
93 |
|
|
94 |
names_var<-list_param$names_var |
|
95 |
|
|
96 |
###### |
|
97 |
|
|
98 |
names_var<-intersect(names_var,names(data_v)) #there may be missing columns |
|
99 |
#use columns that exists |
|
100 |
|
|
101 |
d_s_v<-matrix(0,nrow(data_v),nrow(data_s)) |
|
102 |
for(k in 1:nrow(data_s)){ |
|
103 |
pt<-data_s[k,] |
|
104 |
d_pt<-(spDistsN1(data_v,pt,longlat=FALSE))/1000 #Distance to station k in km |
|
105 |
d_s_v[,k]<-d_pt |
|
106 |
} |
|
107 |
|
|
108 |
#Create data.frame with position, ID, dst and residuals... |
|
109 |
|
|
110 |
pos<-vector("numeric",nrow(data_v)) |
|
111 |
y<-vector("numeric",nrow(data_v)) |
|
112 |
dst<-vector("numeric",nrow(data_v)) |
|
113 |
|
|
114 |
for (k in 1:nrow(data_v)){ |
|
115 |
pos[k]<-match(min(d_s_v[k,]),d_s_v[k,]) |
|
116 |
dst[k]<-min(d_s_v[k,]) |
|
117 |
} |
|
118 |
|
|
119 |
dstspat_er<-as.data.frame(cbind(v_id=as.vector(data_v$id),s_id=as.vector(data_s$id[pos]), |
|
120 |
pos=pos, lat=data_v$lat, lon=data_v$lon, x=data_v$x,y=data_v$y, |
|
121 |
dst=dst, |
|
122 |
as.data.frame(data_v[,names_var]))) |
|
123 |
|
|
124 |
return(dstspat_er) |
|
125 |
} |
|
126 |
|
|
127 |
### Main function to call to obtain distance to closest fitting stations for valiation dataset |
|
128 |
distance_to_closest_fitting_station<-function(raster_prediction_obj,names_mod,dist_classes=c(0)){ |
|
129 |
#This function computes the distance between training and testing points and returns and data frame |
|
130 |
#with distance,residuals, ID of training and testing points |
|
131 |
#Input: raster_prediction_obj, names of residuals models to return, distance classes |
|
132 |
#Output: data frame |
|
133 |
|
|
134 |
#Functions used in the script |
|
135 |
|
|
136 |
mae_fun<-function(x){mean(abs(x))} #Mean Absolute Error give a residuals vector |
|
137 |
sd_abs_fun<-function(x){sd(abs(x))} #sd Absolute Error give a residuals vector |
|
138 |
|
|
139 |
##BEGIN |
|
140 |
|
|
141 |
##### PART I: generate data.frame with residuals in term of distance to closest fitting station |
|
142 |
|
|
143 |
#return list of training and testing data frames |
|
144 |
list_data_s <- extract_list_from_list_obj(raster_prediction_obj$validation_mod_obj,"data_s") #training |
|
145 |
list_data_v <- extract_list_from_list_obj(raster_prediction_obj$validation_mod_obj,"data_v") #testing (validation) |
|
146 |
|
|
147 |
i<-1 |
|
148 |
names_var<-c(names_mod,"dates") |
|
149 |
list_param_dst<-list(i,list_data_s,list_data_v,names_mod) |
|
150 |
names(list_param_dst) <- c("index","data_s","data_v","names_var") |
|
151 |
|
|
152 |
#call function "calc_dist_ref_data_point" over 365 dates |
|
153 |
#note that this function depends on other functions !!! see script |
|
154 |
|
|
155 |
list_dstspat_er <-lapply(1:length(list_data_v),FUN=calc_dist_ref_data_point,list_param=list_param_dst) |
|
156 |
#now assemble in one data.frame |
|
157 |
dstspat_dat<-do.call(rbind.fill,list_dstspat_er) |
|
158 |
|
|
159 |
########### PART II: generate distance classes and summary statistics |
|
160 |
|
|
161 |
if (length(dist_classes)==1){ |
|
162 |
range_val<-range(dstspat_dat$dst) |
|
163 |
max_val<-round_any(range_val[2],10, f=ceiling) #round max value to nearest 10 (from plyr package) |
|
164 |
min_val<-0 |
|
165 |
limit_val<-seq(min_val,max_val, by=10) |
|
166 |
}else{ |
|
167 |
limit_val<-dist_classes |
|
168 |
} |
|
169 |
|
|
170 |
dstspat_dat$dst_cat1 <- cut(dstspat_dat$dst,include.lowest=TRUE,breaks=limit_val) |
|
171 |
|
|
172 |
names_var <- intersect(names_mod,names(dstspat_dat)) |
|
173 |
t<-melt(dstspat_dat, |
|
174 |
measure=names_var, |
|
175 |
id=c("dst_cat1"), |
|
176 |
na.rm=T) |
|
177 |
|
|
178 |
mae_tb<-cast(t,dst_cat1~variable,mae_fun) |
|
179 |
sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun) |
|
180 |
|
|
181 |
avg_tb<-cast(t,dst_cat1~variable,mean) |
|
182 |
sd_tb<-cast(t,dst_cat1~variable,sd) |
|
183 |
n_tb<-cast(t,dst_cat1~variable,length) |
|
184 |
#n_NA<-cast(t,dst_cat1~variable,is.na) |
|
185 |
|
|
186 |
#### prepare returning object |
|
187 |
dstspat_obj<-list(dstspat_dat,mae_tb,sd_abs_tb,avg_tb,sd_tb,n_tb) |
|
188 |
names(dstspat_obj) <-c("dstspat_dat","mae_tb","sd_abs_tb","avg_tb","sd_tb","n_tb") |
|
189 |
|
|
190 |
return(dstspat_obj) |
|
191 |
|
|
192 |
} |
|
193 |
|
|
194 |
# create plot of accury in term of distance to closest fitting station |
|
195 |
plot_dst_spat_fun<-function(stat_tb,names_var,cat_val){ |
|
196 |
|
|
197 |
range_y<-range(as.vector(unlist(stat_tb[,names_var])),na.rm=T) #flatten data.frame |
|
198 |
col_t<-rainbow(length(names_var)) |
|
199 |
pch_t<- 1:length(names_var) |
|
200 |
plot(stat_tb[,names_var[1]], ylim=range_y,pch=pch_t[1],col=col_t[1],type="b", |
|
201 |
yla="MAE (in degree C)",xlab="",xaxt="n") |
|
202 |
#points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[1],col=col_t[1]),type="p") |
|
203 |
for (k in 2:length(names_var)){ |
|
204 |
lines(stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k],type="b", |
|
205 |
xlab="",axes=F) |
|
206 |
#points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k]),type="p") |
|
207 |
} |
|
208 |
legend("topleft",legend=names_var, |
|
209 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
|
210 |
axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1]) |
|
211 |
} |
|
212 |
|
|
213 |
plot_dst_MAE <-function(list_param){ |
|
214 |
# |
|
215 |
#list_dist_obj: list of dist object |
|
216 |
#col_t: list of color for each |
|
217 |
#pch_t: symbol for line |
|
218 |
#legend_text: text for line and symbol |
|
219 |
#mod_name: selected models |
|
220 |
# |
|
221 |
## BEGIN ## |
|
222 |
|
|
223 |
list_dist_obj<-list_param$list_dist_obj |
|
224 |
col_t<-list_param$col_t |
|
225 |
pch_t<- list_param$pch_t |
|
226 |
legend_text <- list_param$legend_text |
|
227 |
list_mod_name<-list_param$mod_name |
|
228 |
|
|
229 |
for (i in 1:length(list_dist_obj)){ |
|
230 |
|
|
231 |
l<-list_dist_obj[[i]] |
|
232 |
mae_tb<-l$mae_tb |
|
233 |
n_tb<-l$n_tb |
|
234 |
sd_abs_tb<-l$sd_abs_tb |
|
235 |
|
|
236 |
mod_name<-list_mod_name[i] |
|
237 |
xlab_text<-"distance to fitting station" |
|
238 |
|
|
239 |
n <- unlist(n_tb[1:13,c(mod_name)]) |
|
240 |
y <- unlist(mae_tb[1:13,c(mod_name)]) |
|
241 |
|
|
242 |
x<- 1:length(y) |
|
243 |
y_sd <- unlist(sd_abs_tb[1:12,c(mod_name)]) |
|
244 |
|
|
245 |
ciw <-y_sd |
|
246 |
#ciw2 <- qt(0.975, n) * y_sd2 / sqrt(n) |
|
247 |
|
|
248 |
#plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" MAE for ",mod_name,sep=""), barcol="blue", lwd=1, |
|
249 |
# ylab="RMSE (C)", xlab=xlab_text) |
|
250 |
|
|
251 |
ciw <- qt(0.975, n) * y_sd / sqrt(n) |
|
252 |
|
|
253 |
if(i==1){ |
|
254 |
plotCI(y=y, x=x, uiw=ciw, col=col_t[i], main=paste(" Comparison of MAE in ",mod_name,sep=""), barcol="blue", lwd=1, |
|
255 |
ylab="RMSE (C)", xlab=xlab_text) |
|
256 |
lines(y~x, col=col_t[i]) |
|
257 |
|
|
258 |
}else{ |
|
259 |
lines(y~x, col=col_t[i]) |
|
260 |
} |
|
261 |
|
|
262 |
} |
|
263 |
legend("topleft",legend=legend_text, |
|
264 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
|
265 |
#axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1]) |
|
266 |
|
|
267 |
} |
|
268 |
|
|
269 |
calc_stat_prop_tb <-function(names_mod,raster_prediction_obj,testing=TRUE){ |
|
270 |
|
|
271 |
#add for testing?? |
|
272 |
if (testing==TRUE){ |
|
273 |
tb <-raster_prediction_obj$tb_diagnostic_v #use testing accuracy information |
|
274 |
}else{ |
|
275 |
tb <-raster_prediction_obj$tb_diagnostic_s #use training accuracy information |
|
276 |
} |
|
277 |
|
|
278 |
t<-melt(subset(tb,pred_mod==names_mod), |
|
279 |
measure=c("mae","rmse","r","m50"), |
|
280 |
id=c("pred_mod","prop"), |
|
281 |
na.rm=T) |
|
282 |
|
|
283 |
avg_tb<-cast(t,pred_mod+prop~variable,mean) |
|
284 |
sd_tb<-cast(t,pred_mod+prop~variable,sd) |
|
285 |
n_tb<-cast(t,pred_mod+prop~variable,length) |
|
286 |
#n_NA<-cast(t,dst_cat1~variable,is.na) |
|
287 |
|
|
288 |
#### prepare returning object |
|
289 |
prop_obj<-list(tb,avg_tb,sd_tb,n_tb) |
|
290 |
names(prop_obj) <-c("tb","avg_tb","sd_tb","n_tb") |
|
291 |
|
|
292 |
return(prop_obj) |
|
293 |
} |
|
294 |
|
|
295 |
#ploting |
|
296 |
plot_prop_metrics <-function(list_param){ |
|
297 |
# |
|
298 |
#list_dist_obj: list of dist object |
|
299 |
#col_t: list of color for each |
|
300 |
#pch_t: symbol for line |
|
301 |
#legend_text: text for line and symbol |
|
302 |
#mod_name: selected models |
|
303 |
# |
|
304 |
## BEGIN ## |
|
305 |
|
|
306 |
list_obj<-list_param$list_prop_obj |
|
307 |
col_t <-list_param$col_t |
|
308 |
pch_t <- list_param$pch_t |
|
309 |
legend_text <- list_param$legend_text |
|
310 |
list_mod_name<-list_param$mod_name |
|
311 |
metric_name<-list_param$metric_name |
|
312 |
|
|
313 |
for (i in 1:length(list_obj)){ |
|
314 |
|
|
315 |
l<-list_obj[[i]] |
|
316 |
mod_name<-list_mod_name[i] |
|
317 |
avg_tb<-subset(l$avg_tb,pred_mod==mod_name,select=metric_name) #selecte relevant accuarcy metric |
|
318 |
n_tb<-subset(l$n_tb,pred_mod==mod_name,select=metric_name) |
|
319 |
sd_tb<-subset(l$sd_tb,pred_mod==mod_name,select=metric_name) #l$sd_abs_tb[,metric_name] |
|
320 |
|
|
321 |
xlab_text<-"holdout proportion" |
|
322 |
|
|
323 |
no <- unlist(as.data.frame(n_tb)) |
|
324 |
y <- unlist(as.data.frame(avg_tb)) |
|
325 |
|
|
326 |
x<- l$avg_tb$prop |
|
327 |
y_sd <- unlist(as.data.frame(sd_tb)) #sd_tb |
|
328 |
|
|
329 |
ciw <-y_sd |
|
330 |
#ciw2 <- qt(0.975, n) * y_sd2 / sqrt(n) |
|
331 |
|
|
332 |
#plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" MAE for ",mod_name,sep=""), barcol="blue", lwd=1, |
|
333 |
# ylab="RMSE (C)", xlab=xlab_text) |
|
334 |
|
|
335 |
ciw <- qt(0.975, no) * y_sd / sqrt(no) |
|
336 |
|
|
337 |
if(i==1){ |
|
338 |
plotCI(y=y, x=x, uiw=ciw, col=col_t[i], main=paste(" Comparison of ",metric_name," in ",mod_name,sep=""), barcol="blue", lwd=1, |
|
339 |
ylab="RMSE (C)", xlab=xlab_text) |
|
340 |
lines(y~x, col=col_t[i]) |
|
341 |
|
|
342 |
}else{ |
|
343 |
lines(y~x, col=col_t[i]) |
|
344 |
} |
|
345 |
|
|
346 |
} |
|
347 |
legend("topleft",legend=legend_text, |
|
348 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
|
349 |
#axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1]) |
|
350 |
|
|
351 |
} |
|
352 |
|
|
353 |
plot_dst_spat_fun<-function(stat_tb,names_var,cat_val){ |
|
354 |
|
|
355 |
range_y<-range(as.vector(unlist(stat_tb[,names_var])),na.rm=T) #flatten data.frame |
|
356 |
col_t<-rainbow(length(names_var)) |
|
357 |
pch_t<- 1:length(names_var) |
|
358 |
plot(stat_tb[,names_var[1]], ylim=range_y,pch=pch_t[1],col=col_t[1],type="b", |
|
359 |
yla="MAE (in degree C)",xlab="",xaxt="n") |
|
360 |
#points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[1],col=col_t[1]),type="p") |
|
361 |
for (k in 2:length(names_var)){ |
|
362 |
lines(stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k],type="b", |
|
363 |
xlab="",axes=F) |
|
364 |
#points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k]),type="p") |
|
365 |
} |
|
366 |
legend("topleft",legend=names_var, |
|
367 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
|
368 |
axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1]) |
|
369 |
} |
|
370 |
|
|
371 |
plot_prop_metrics <-function(list_param){ |
|
372 |
# |
|
373 |
#list_dist_obj: list of dist object |
|
374 |
#col_t: list of color for each |
|
375 |
#pch_t: symbol for line |
|
376 |
#legend_text: text for line and symbol |
|
377 |
#mod_name: selected models |
|
378 |
# |
|
379 |
## BEGIN ## |
|
380 |
|
|
381 |
list_obj<-list_param$list_prop_obj |
|
382 |
col_t <-list_param$col_t |
|
383 |
pch_t <- list_param$pch_t |
|
384 |
legend_text <- list_param$legend_text |
|
385 |
list_mod_name<-list_param$mod_name |
|
386 |
metric_name<-list_param$metric_name |
|
387 |
|
|
388 |
for (i in 1:length(list_obj)){ |
|
389 |
|
|
390 |
l<-list_obj[[i]] |
|
391 |
mod_name<-list_mod_name[i] |
|
392 |
avg_tb<-subset(l$avg_tb,pred_mod==mod_name,select=metric_name) #selecte relevant accuarcy metric |
|
393 |
n_tb<-subset(l$n_tb,pred_mod==mod_name,select=metric_name) |
|
394 |
sd_tb<-subset(l$sd_tb,pred_mod==mod_name,select=metric_name) #l$sd_abs_tb[,metric_name] |
|
395 |
|
|
396 |
xlab_text<-"holdout proportion" |
|
397 |
|
|
398 |
no <- unlist(as.data.frame(n_tb)) |
|
399 |
y <- unlist(as.data.frame(avg_tb)) |
|
400 |
|
|
401 |
x<- l$avg_tb$prop |
|
402 |
y_sd <- unlist(as.data.frame(sd_tb)) #sd_tb |
|
403 |
|
|
404 |
ciw <-y_sd |
|
405 |
#ciw2 <- qt(0.975, n) * y_sd2 / sqrt(n) |
|
406 |
|
|
407 |
#plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" MAE for ",mod_name,sep=""), barcol="blue", lwd=1, |
|
408 |
# ylab="RMSE (C)", xlab=xlab_text) |
|
409 |
|
|
410 |
ciw <- qt(0.975, no) * y_sd / sqrt(no) |
|
411 |
|
|
412 |
if(i==1){ |
|
413 |
plotCI(y=y, x=x, uiw=ciw, col=col_t[i], main=paste(" Comparison of ",metric_name," in ",mod_name,sep=""), barcol="blue", lwd=1, |
|
414 |
ylab="RMSE (C)", xlab=xlab_text) |
|
415 |
lines(y~x, col=col_t[i]) |
|
416 |
|
|
417 |
}else{ |
|
418 |
lines(y~x, col=col_t[i]) |
|
419 |
} |
|
420 |
|
|
421 |
} |
|
422 |
legend("topleft",legend=legend_text, |
|
423 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
|
424 |
#axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1]) |
|
425 |
|
|
426 |
} |
|
427 |
|
|
428 |
create_s_and_p_table_term_models <-function(i,list_myModels){ |
|
429 |
#Purpose: |
|
430 |
#Function to extract smooth term table,parameter table and AIC from a list of models. |
|
431 |
#Originally created to processed GAM models run over a full year. |
|
432 |
#Inputs: |
|
433 |
#1) list_myModels: list of fitted GAM models |
|
434 |
#2) i: index of list to run with lapply or mcapply |
|
435 |
#Outputs: list of |
|
436 |
#1)s.table.term |
|
437 |
#2)p.table.term |
|
438 |
#3)AIC list |
|
439 |
#Authors: Benoit Parmentier |
|
440 |
#date: 08/142013 |
|
441 |
|
|
442 |
### Functions used in the scritp: |
|
443 |
|
|
444 |
#Remove models that were not fitted from the list |
|
445 |
#All modesl that are "try-error" are removed |
|
446 |
remove_errors_list<-function(list_items){ |
|
447 |
#This function removes "error" items in a list |
|
448 |
list_tmp<-list_items |
|
449 |
for(i in 1:length(list_items)){ |
|
450 |
|
|
451 |
if(inherits(list_items[[i]],"try-error")){ |
|
452 |
list_tmp[[i]]<-0 |
|
453 |
}else{ |
|
454 |
list_tmp[[i]]<-1 |
|
455 |
} |
|
456 |
} |
|
457 |
cnames<-names(list_tmp[list_tmp>0]) |
|
458 |
x<-list_items[match(cnames,names(list_items))] |
|
459 |
return(x) |
|
460 |
} |
|
461 |
|
|
462 |
#turn term from list into data.frame |
|
463 |
name_col<-function(i,x){ |
|
464 |
x_mat<-x[[i]] |
|
465 |
x_df<-as.data.frame(x_mat) |
|
466 |
x_df$mod_name<-rep(names(x)[i],nrow(x_df)) |
|
467 |
x_df$term_name <-row.names(x_df) |
|
468 |
return(x_df) |
|
469 |
} |
|
470 |
|
|
471 |
##BEGIN ### |
|
472 |
|
|
473 |
myModels <- list_myModels[[i]] |
|
474 |
myModels<-remove_errors_list(myModels) |
|
475 |
#could add AIC, GCV to the table as well as ME, RMSE...+dates... |
|
476 |
|
|
477 |
summary_list <- lapply(myModels, summary) |
|
478 |
s.table_list <- lapply(summary_list, `[[`, 's.table') |
|
479 |
p.table_list <- lapply(summary_list, `[[`, 'p.table') |
|
480 |
AIC_list <- lapply(myModels, AIC) |
|
481 |
|
|
482 |
#now put in one table |
|
483 |
|
|
484 |
s.table_list2<-lapply(1:length(myModels),name_col,s.table_list) |
|
485 |
p.table_list2<-lapply(1:length(myModels),name_col,p.table_list) |
|
486 |
s.table_term <-do.call(rbind,s.table_list2) |
|
487 |
p.table_term <-do.call(rbind,p.table_list2) |
|
488 |
|
|
489 |
#Now get AIC |
|
490 |
AIC_list2<-lapply(1:length(myModels),name_col,AIC_list) |
|
491 |
AIC_models <- do.call(rbind,AIC_list2) |
|
492 |
names(AIC_models)[1]<-"AIC" |
|
493 |
|
|
494 |
#Set up return object |
|
495 |
|
|
496 |
s_p_table_term_obj<-list(s.table_term,p.table_term,AIC_models) |
|
497 |
names(s_p_table_term_obj) <-c("s.table_term","p.table_term","AIC_models") |
|
498 |
return(s_p_table_term_obj) |
|
499 |
|
|
500 |
} |
|
501 |
|
|
502 |
convert_spdf_to_df_from_list <-function(obj_list,list_name){ |
|
503 |
#extract object from list of list. This useful for raster_prediction_obj |
|
504 |
#output: data.frame formed by rbinding sp data.frame in the list |
|
505 |
library(plyr) |
|
506 |
|
|
507 |
list_tmp<-vector("list",length(obj_list)) |
|
508 |
for (i in 1:length(obj_list)){ |
|
509 |
#tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame |
|
510 |
list_tmp[[i]]<-as.data.frame(obj_list[[i]]) |
|
511 |
} |
|
512 |
tb_list_tmp<-do.call(rbind.fill,list_tmp) #long rownames |
|
513 |
#tb_list_tmp<-do.call(rbind,list_tmp) #long rownames |
|
514 |
|
|
515 |
return(tb_list_tmp) #this is a data.frame |
|
516 |
} |
|
40 |
function_analyses_paper <-"contribution_of_covariates_paper_interpolation_functions_08152013.R" |
|
517 | 41 |
|
518 | 42 |
############################## |
519 | 43 |
#### Parameters and constants |
520 | 44 |
|
45 |
script_path<-"/home/parmentier/Data/IPLANT_project/env_layers_scripts/" #path to script |
|
46 |
source(file.path(script_path,function_analyses_paper)) #source all functions used in this script. |
|
521 | 47 |
|
522 |
script_path<-"/home/parmentier/Data/IPLANT_project/env_layers_scripts/" |
|
523 |
#source(file.path(script_path,"interpolation_method_day_function_multisampling_06082013.R")) #Include GAM_day |
|
524 |
|
|
525 |
#in_dir<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_GAM_fus_all_lst_05312013" |
|
526 |
in_dir1 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_lst_comb3_07092013/" |
|
48 |
in_dir1 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_lst_comb3_08132013" |
|
527 | 49 |
in_dir2 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_lst_comb4_07152013/" |
528 | 50 |
|
529 | 51 |
#kriging results: |
... | ... | |
540 | 62 |
|
541 | 63 |
y_var_name <- "dailyTmax" |
542 | 64 |
|
543 |
out_prefix<-"analyses_08032013"
|
|
65 |
out_prefix<-"analyses_08152013"
|
|
544 | 66 |
|
545 | 67 |
method_interpolation <- "gam_daily" |
546 |
covar_obj_file1 <- "covar_obj__365d_gam_day_lst_comb3_07092013.RData"
|
|
547 |
met_obj_file_1 <- "met_stations_outfiles_obj_gam_daily__365d_gam_day_lst_comb3_07092013.RData"
|
|
68 |
covar_obj_file_1 <- "covar_obj__365d_gam_day_lst_comb3_08132013.RData"
|
|
69 |
met_obj_file_1 <- "met_stations_outfiles_obj_gam_daily__365d_gam_day_lst_comb3_08132013.RData"
|
|
548 | 70 |
|
549 | 71 |
#raster_prediciton object for baseline 1 () s(lat,lon) + s(elev)) and baseline 2 (slat,lon)) |
550 |
raster_obj_file_1 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_lst_comb3_07092013.RData"
|
|
72 |
raster_obj_file_1 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_lst_comb3_08132013.RData"
|
|
551 | 73 |
raster_obj_file_2 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_lst_comb4_07152013.RData" |
552 | 74 |
|
553 | 75 |
raster_obj_file_3 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_day_lst_comb3_07112013.RData" |
... | ... | |
560 | 82 |
|
561 | 83 |
#Load objects containing training, testing, models objects |
562 | 84 |
|
563 |
covar_obj <-load_obj(file.path(in_dir1,covar_obj_file1))
|
|
85 |
covar_obj <-load_obj(file.path(in_dir1,covar_obj_file_1)) #Reading covariates object for GAM daily method
|
|
564 | 86 |
infile_covariates <- covar_obj$infile_covariates |
565 | 87 |
infile_reg_outline <- covar_obj$infile_reg_outline |
566 | 88 |
covar_names<- covar_obj$covar_names |
567 | 89 |
##### |
568 |
s_raster <- brick(file.path(in_dir1,infile_covariates))
|
|
90 |
s_raster <- brick(infile_covariates)
|
|
569 | 91 |
names(s_raster)<-covar_names |
570 | 92 |
|
571 |
raster_prediction_obj_1 <-load_obj(file.path(in_dir1,raster_obj_file_1)) #comb3 |
|
572 |
raster_prediction_obj_2 <-load_obj(file.path(in_dir2,raster_obj_file_2)) #comb4 |
|
573 |
raster_prediction_obj_3 <-load_obj(file.path(in_dir3,raster_obj_file_3)) |
|
574 |
raster_prediction_obj_4 <-load_obj(file.path(in_dir4,raster_obj_file_4)) |
|
93 |
raster_prediction_obj_1 <-load_obj(file.path(in_dir1,raster_obj_file_1)) #comb3 (baseline 2)
|
|
94 |
raster_prediction_obj_2 <-load_obj(file.path(in_dir2,raster_obj_file_2)) #comb4 (baseline 1)
|
|
95 |
raster_prediction_obj_3 <-load_obj(file.path(in_dir3,raster_obj_file_3)) #comb3/mod1 baseline 2, kriging
|
|
96 |
raster_prediction_obj_4 <-load_obj(file.path(in_dir4,raster_obj_file_4)) #comb3/mod1 baseline 2, gwr
|
|
575 | 97 |
raster_prediction_obj_5 <-load_obj(file.path(in_dir5,raster_obj_file_5)) #gam daily multisampling 10 to 70% |
576 |
raster_prediction_obj_6 <-load_obj(file.path(in_dir6,raster_obj_file_6)) #kriging daily multisampling |
|
577 |
raster_prediction_obj_7 <-load_obj(file.path(in_dir7,raster_obj_file_7)) #kriging daily multisampling
|
|
98 |
raster_prediction_obj_6 <-load_obj(file.path(in_dir6,raster_obj_file_6)) #kriging daily multisampling 10 to 70%
|
|
99 |
raster_prediction_obj_7 <-load_obj(file.path(in_dir7,raster_obj_file_7)) #gwr daily multisampling 10 to 70%
|
|
578 | 100 |
|
579 |
names(raster_prediction_obj_1) #list of two objects
|
|
101 |
############### BEGIN SCRIPT #################
|
|
580 | 102 |
|
581 |
### ACCURACY TABLE WITH BASELINES |
|
103 |
############ |
|
104 |
##### 1) Generate: Table 3. Contribution of covariates using validation accuracy metrics |
|
105 |
## This is a table of accuracy compared to baseline by difference |
|
582 | 106 |
|
583 | 107 |
#Check input covariates and model formula: |
584 |
raster_prediction_obj_1$method_mod_obj[[1]]$formulas |
|
585 |
raster_prediction_obj_2$method_mod_obj[[1]]$formulas |
|
586 |
|
|
587 |
###baseline 2: s(lat,lon) + s(elev) |
|
108 |
raster_prediction_obj_1$method_mod_obj[[1]]$formulas #models run for baseline 2 |
|
109 |
raster_prediction_obj_2$method_mod_obj[[1]]$formulas #models run for baseline 1 |
|
588 | 110 |
|
589 | 111 |
summary_metrics_v1<-raster_prediction_obj_1$summary_metrics_v |
590 | 112 |
summary_metrics_v2<-raster_prediction_obj_2$summary_metrics_v |
113 |
tb1 <-raster_prediction_obj_1$tb_diagnostic_v #365 days accuracy table for baseline 2 |
|
114 |
tb2 <-raster_prediction_obj_2$tb_diagnostic_v #365 days accuracy table for baseline 1 |
|
591 | 115 |
|
592 |
table_data1 <-summary_metrics_v1$avg[,c("mae","rmse","me","r")] |
|
116 |
table_data1 <-summary_metrics_v1$avg[,c("mae","rmse","me","r")] #select relevant columns from data.frame
|
|
593 | 117 |
table_data2 <-summary_metrics_v2$avg[,c("mae","rmse","me","r")] |
594 | 118 |
|
595 |
model_col<-c("Baseline2","Northness","Eastness","LST","DISTOC","Forest","CANHEIGHT","LST*Forest") # removed ,"LST*CANHEIGHT") |
|
119 |
###Table 3a, baseline 1: s(lat,lon) |
|
120 |
|
|
121 |
model_col<-c("Baseline1","Elevation","Northing","Easting","LST","DISTOC","Forest","CANHEIGHT") #,"LST*Forest","LST*CANHEIGHT") # |
|
122 |
df3a<- as.data.frame(sapply(table_data2,FUN=function(x) x-x[1])) |
|
123 |
df3a<- round(df3a,digit=3) #roundto three digits teh differences |
|
124 |
df3a$Model <-model_col |
|
125 |
names(df3a)<- names_table_col |
|
126 |
print(df3a) #show resulting table |
|
127 |
|
|
128 |
###Table 3b, baseline 2: s(lat,lon) + s(elev) |
|
129 |
|
|
130 |
model_col<-c("Baseline2","Northness","Eastness","LST","DISTOC","Forest","CANHEIGHT","LST*Forest","LST*CANHEIGHT") |
|
596 | 131 |
names_table_col<-c("DiffMAE","DiffRMSE","DiffME","Diffr","Model") |
597 | 132 |
|
598 |
df1<- as.data.frame(sapply(table_data1,FUN=function(x) x-x[1]))
|
|
599 |
df1<- round(df1,digit=3) #roundto three digits teh differences
|
|
600 |
df1$Model <-model_col
|
|
601 |
names(df1)<- names_table_col
|
|
602 |
df1
|
|
133 |
df3b <- as.data.frame(sapply(table_data1,FUN=function(x) x-x[1])) #difference between baseline (line 1) and other models
|
|
134 |
df3b <- round(df3b,digit=3) #roundto three digits the differences
|
|
135 |
df3b$Model <- model_col
|
|
136 |
names(df3b)<- names_table_col
|
|
137 |
print(df3b) #Part b of table 3
|
|
603 | 138 |
|
604 |
###baseline 1: s(lat,lon)
|
|
139 |
#Testing siginificance between models
|
|
605 | 140 |
|
606 |
model_col<-c("Baseline1","Elevation","Northing","Easting","LST","DISTOC","Forest","CANHEIGHT") #,"LST*Forest") # removed ,"LST*CANHEIGHT") |
|
607 |
df2<- as.data.frame(sapply(table_data2,FUN=function(x) x-x[1])) |
|
608 |
df2<- round(df2,digit=3) #roundto three digits teh differences |
|
609 |
df2$Model <-model_col |
|
610 |
names(df2)<- names_table_col |
|
611 |
df2 |
|
141 |
mod_compk1 <-kruskal.test(tb1$rmse ~ as.factor(tb1$pred_mod)) #Kruskal Wallis test |
|
142 |
mod_compk2 <-kruskal.test(tb2$rmse ~ as.factor(tb2$pred_mod)) |
|
143 |
print(mod_compk1) #not significant |
|
144 |
print(mod_compk2) #not significant |
|
612 | 145 |
|
613 |
file_name<-paste("table3b_baseline2_paper","_",out_prefix,".txt",sep="") |
|
614 |
write.table(df1,file=file_name,sep=",") |
|
146 |
#Multiple Kruskal Wallis |
|
147 |
mod_compk1 <-kruskalmc(tb1$rmse ~ as.factor(tb1$pred_mod)) |
|
148 |
mod_compk2 <-kruskalmc(tb2$rmse ~ as.factor(tb2$pred_mod)) |
|
149 |
|
|
150 |
print(mod_compk1) |
|
151 |
print(mod_compk2) |
|
152 |
|
|
153 |
#Now write out table 3 |
|
615 | 154 |
|
616 | 155 |
file_name<-paste("table3a_baseline1_paper","_",out_prefix,".txt",sep="") |
617 |
write.table(df2,file=file_name,sep=",") |
|
156 |
write.table(df3a,file=file_name,sep=",") |
|
157 |
|
|
158 |
file_name<-paste("table3b_baseline2_paper","_",out_prefix,".txt",sep="") |
|
159 |
write.table(df3b,file=file_name,sep=",") |
|
160 |
|
|
161 |
############ |
|
162 |
##### 2) Generate: Table 4. Comparison between interpolation methods using validation accuracy metrics |
|
163 |
## This is a table of accuracy metric for the optimal model (baseline2) as identified in the previous step |
|
618 | 164 |
|
619 | 165 |
##Table 4: Interpolation methods comparison |
620 | 166 |
|
621 | 167 |
#get sd for kriging, gam and gwr |
622 |
tb1 <-raster_prediction_obj_1$tb_diagnostic_v #Kriging methods
|
|
623 |
tb2 <-raster_prediction_obj_2$tb_diagnostic_v #Kriging methods
|
|
168 |
#tb1 <-raster_prediction_obj_1$tb_diagnostic_v HGAM baseline 1, loaded
|
|
169 |
#tb2 <-raster_prediction_obj_2$tb_diagnostic_v #GAM baseline 2, loaded
|
|
624 | 170 |
tb3 <-raster_prediction_obj_3$tb_diagnostic_v #Kriging methods |
625 |
tb4 <-raster_prediction_obj_4$tb_diagnostic_v #Kriging methods
|
|
171 |
tb4 <-raster_prediction_obj_4$tb_diagnostic_v #GWR methods
|
|
626 | 172 |
|
627 |
names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9") |
|
173 |
names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9","mod10")
|
|
628 | 174 |
|
629 |
sd1 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_1,"sd") |
|
630 |
sd2 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_2,"sd") |
|
631 |
sd3 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_3,"sd") |
|
632 |
sd4 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_4,"sd") |
|
175 |
#Calculate standard deviation for each metric |
|
176 |
sd1 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_1,"sd") # see function script |
|
177 |
sd2 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_2,"sd") # standard deviation for baseline 2 |
|
178 |
sd3 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_3,"sd") # kriging |
|
179 |
sd4 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_4,"sd") #gwr |
|
633 | 180 |
|
634 |
table_sd<-rbind(sd1[1,],sd3[1,]) |
|
635 |
table_sd<-rbind(table_sd,sd4[1,]) |
|
181 |
#Combined sd in one table for mod1 (baseline 2) |
|
182 |
table_sd <- do.call(rbind,list(sd1[1,],sd3[1,],sd4[1,])) #table containing the sd for the three mdethods for baseline 2 |
|
183 |
table_sd <- round(table_sd[,-1],digit=3) #round to three digits the differences |
|
184 |
table_sd <- table_sd[,c("mae","rmse","me","r")] #column 5 contains m50, remove it |
|
636 | 185 |
|
637 | 186 |
summary_metrics_v3<-raster_prediction_obj_3$summary_metrics_v #Kriging methods |
638 | 187 |
summary_metrics_v4<-raster_prediction_obj_4$summary_metrics_v # GWR method |
... | ... | |
641 | 190 |
table_data4 <-summary_metrics_v4$avg[1,c("mae","rmse","me","r")] #first line mod1 (baseline) |
642 | 191 |
table_data1 <- table_data1[1,] |
643 | 192 |
|
644 |
table<-rbind(table_data1,table_data3) |
|
645 |
table<-rbind(table,table_data4) |
|
646 |
table<- round(table,digit=3) #roundto three digits teh differences |
|
193 |
table_ac <-do.call(rbind, list(table_data1,table_data3,table_data4)) |
|
194 |
table_ac <- round(table_ac,digit=3) #roundto three digits teh differences |
|
195 |
|
|
196 |
#combined tables with accuracy metrics and their standard deviations |
|
197 |
table4_paper <-table_combined_symbol(table_ac,table_sd,"±") |
|
198 |
#lapply(lapply(table_ac,FUN=paste,table_sd,FUN=paste,sep="±"),FUN=paste) |
|
647 | 199 |
|
648 | 200 |
model_col<-c("GAM","Kriging","GWR") |
649 | 201 |
names_table_col<-c("MAE","RMSE","ME","R","Model") |
650 | 202 |
|
651 |
table$Model <-model_col |
|
652 |
names(table)<- names_table_col |
|
653 |
table |
|
654 |
|
|
655 |
file_name<-paste("table4_avg_paper","_",out_prefix,".txt",sep="") |
|
656 |
write.table(table,file=file_name,sep=",") |
|
657 |
|
|
658 |
file_name<-paste("table4_sd_paper","_",out_prefix,".txt",sep="") |
|
659 |
write.table(table_sd,file=file_name,sep=",") |
|
203 |
table4_paper$Model <-model_col |
|
204 |
names(table4_paper)<- names_table_col |
|
660 | 205 |
|
661 |
#for(i in nrow(table)) |
|
662 |
#mean_val<-table[i,j] |
|
663 |
#sd_val<-table_sd[i,j] |
|
664 |
#element<-paste(mean_val,"+-",sd_val,sep="") |
|
665 |
#table__paper[i,j]<-element |
|
206 |
file_name<-paste("table4_compariaons_interpolation_methods_avg_paper","_",out_prefix,".txt",sep="") |
|
207 |
write.table(as.data.frame(table4_paper),file=file_name,sep=",") |
|
666 | 208 |
|
667 | 209 |
#################################### |
668 | 210 |
####### Now create figures ############# |
... | ... | |
674 | 216 |
#Figure 5. Overtraining tendency |
675 | 217 |
#Figure 6: Spatial pattern of prediction for one day |
676 | 218 |
|
677 |
### Figure 1 |
|
219 |
### Figure 1: Oregon study area
|
|
678 | 220 |
|
679 |
### Figure 2:
|
|
221 |
#...add code
|
|
680 | 222 |
|
681 |
#not generated in R |
|
223 |
### Figure 2: Method comparison workflow |
|
224 |
|
|
225 |
# Workflow not generated in R |
|
682 | 226 |
|
683 | 227 |
################################################ |
684 |
################### Figure 3 |
|
228 |
################### Figure 3. MAE/RMSE and distance to closest fitting station.
|
|
685 | 229 |
|
686 | 230 |
#Analysis accuracy in term of distance to closest station |
687 | 231 |
#Assign model's names |
688 | 232 |
|
689 |
names_mod <- paste("res_mod",1:9,sep="")
|
|
233 |
names_mod <- paste("res_mod",1:10,sep="")
|
|
690 | 234 |
names(raster_prediction_obj_1$validation_mod_obj[[1]]) |
691 | 235 |
limit_val<-seq(0,150, by=10) |
692 | 236 |
|
693 |
l1 <- distance_to_closest_fitting_station(raster_prediction_obj_1,names_mod,dist_classes=limit_val) |
|
694 |
l3 <- distance_to_closest_fitting_station(raster_prediction_obj_3,names_mod,dist_classes=limit_val) |
|
695 |
l4 <- distance_to_closest_fitting_station(raster_prediction_obj_4,names_mod,dist_classes=limit_val) |
|
237 |
#Call function to extract residuals in term of distance to closest fitting station and summary statistics |
|
238 |
l1 <- distance_to_closest_fitting_station(raster_prediction_obj_1,names_mod,dist_classes=limit_val) #GAM method |
|
239 |
l3 <- distance_to_closest_fitting_station(raster_prediction_obj_3,names_mod,dist_classes=limit_val) #Kriging method |
|
240 |
l4 <- distance_to_closest_fitting_station(raster_prediction_obj_4,names_mod,dist_classes=limit_val) #GWR method |
|
696 | 241 |
|
697 |
list_dist_obj<-list(l1,l3,l4) |
|
698 |
col_t<-c("red","blue","black") |
|
699 |
pch_t<- 1:length(col_t) |
|
700 |
legend_text <- c("GAM","Kriging","GWR") |
|
701 |
mod_name<-c("res_mod1","res_mod1","res_mod1")#selected models |
|
242 |
l1$mae_tb #contains |
|
702 | 243 |
|
703 |
#png_names<- |
|
704 |
#png(file.path(out_path,paste("clim_surface_",y_var_name,"_",sampling_dat$date[i],"_",sampling_dat$prop[i], |
|
705 |
# "_",sampling_dat$run_samp[i],out_prefix,".png", sep=""))) |
|
706 |
|
|
707 |
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name) |
|
708 |
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name") |
|
244 |
#Prepare parameters/arguments for plotting |
|
245 |
list_dist_obj <-list(l1,l3,l4) |
|
246 |
col_t <- c("red","blue","black") |
|
247 |
pch_t <- 1:length(col_t) |
|
248 |
legend_text <- c("GAM","Kriging","GWR") |
|
249 |
mod_name <- c("res_mod1","res_mod1","res_mod1")#selected models |
|
250 |
x_tick_labels <- limit_val<-seq(5,125, by=10) |
|
251 |
metric_name <-"rmse_tb" |
|
252 |
title_plot <- "RMSE and distance to closest station for baseline 2" |
|
253 |
y_lab_text <- "RMSE (C)" |
|
254 |
#quick test |
|
255 |
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,title_plot,y_lab_text) |
|
256 |
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name","title_plot","y_lab_text") |
|
257 |
plot_dst_MAE(list_param_plot) |
|
709 | 258 |
|
710 |
#debug(plot_dst_MAE) |
|
259 |
metric_name <-"mae_tb" |
|
260 |
title_plot <- "MAE and distance to closest fitting station" |
|
261 |
y_lab_text <- "MAE (C)" |
|
262 |
|
|
263 |
#Now set up plotting device |
|
264 |
res_pix<-480 |
|
265 |
col_mfrow<-2 |
|
266 |
row_mfrow<-1 |
|
267 |
png_file_name<- paste("Figure_3_accuracy_and_distance_to_closest_fitting_station_",out_prefix,".png", sep="") |
|
268 |
png(filename=file.path(out_dir,png_file_name), |
|
269 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
270 |
par(mfrow=c(row_mfrow,col_mfrow)) |
|
271 |
|
|
272 |
#Figure 3a |
|
273 |
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,title_plot,y_lab_text) |
|
274 |
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name","title_plot","y_lab_text") |
|
711 | 275 |
plot_dst_MAE(list_param_plot) |
276 |
title(xlab="Distance to closest fitting station (km)") |
|
277 |
|
|
278 |
#Figure 3b: histogram |
|
279 |
barplot(l1$n_tb$res_mod1,names.arg=limit_val, |
|
280 |
ylab="Number of observations", |
|
281 |
xlab="Distance to closest fitting station (km)") |
|
282 |
title("Number of observation in term of distance bins") |
|
283 |
box() |
|
284 |
dev.off() |
|
712 | 285 |
|
713 | 286 |
#################################################### |
714 |
#########Figure 4. RMSE and MAE, mulitisampling and hold out for FSS and GAM.
|
|
287 |
#########Figure 4. RMSE and MAE, mulitisampling and hold out for single time scale methods.
|
|
715 | 288 |
|
716 | 289 |
#Using baseline 2: lat,lon and elev |
717 | 290 |
|
... | ... | |
722 | 295 |
#names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9") |
723 | 296 |
names_mod<-c("mod1") |
724 | 297 |
|
725 |
debug(calc_stat_prop_tb) |
|
298 |
#debug(calc_stat_prop_tb)
|
|
726 | 299 |
prop_obj_gam<-calc_stat_prop_tb(names_mod,raster_prediction_obj_5) |
727 | 300 |
prop_obj_kriging<-calc_stat_prop_tb(names_mod,raster_prediction_obj_6) |
728 | 301 |
prop_obj_gwr<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7) |
729 | 302 |
|
730 | 303 |
list_prop_obj<-list(prop_obj_gam,prop_obj_kriging,prop_obj_gwr) |
304 |
|
|
305 |
## plot setting for figure 4 |
|
306 |
|
|
731 | 307 |
col_t<-c("red","blue","black") |
732 | 308 |
pch_t<- 1:length(col_t) |
733 | 309 |
legend_text <- c("GAM","Kriging","GWR") |
734 | 310 |
mod_name<-c("mod1","mod1","mod1")#selected models |
735 |
metric_name<-"rmse" |
|
736 |
#png_names<- |
|
737 |
#png(file.path(out_path,paste("clim_surface_",y_var_name,"_",sampling_dat$date[i],"_",sampling_dat$prop[i], |
|
738 |
# "_",sampling_dat$run_samp[i],out_prefix,".png", sep=""))) |
|
739 | 311 |
|
312 |
##### plot figure 4 for paper |
|
313 |
#### |
|
314 |
|
|
315 |
res_pix<-480 |
|
316 |
col_mfrow<-2 |
|
317 |
row_mfrow<-1 |
|
318 |
png_file_name<- paste("Figure_4_proportion_of_holdout_and_accuracy_",out_prefix,".png", sep="") |
|
319 |
png(filename=file.path(out_dir,png_file_name), |
|
320 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
321 |
par(mfrow=c(row_mfrow,col_mfrow)) |
|
322 |
metric_name<-"mae" |
|
323 |
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name) |
|
324 |
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name") |
|
325 |
|
|
326 |
plot_prop_metrics(list_param_plot) |
|
327 |
title(main="MAE for hold out and methods", |
|
328 |
xlab="Hold out validation/testing proportion", |
|
329 |
ylab="MAE (C)") |
|
330 |
|
|
331 |
#now figure 4b |
|
332 |
metric_name<-"rmse" |
|
740 | 333 |
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name) |
741 | 334 |
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name") |
742 |
#debug(plot_prop_metrics) |
|
743 | 335 |
plot_prop_metrics(list_param_plot) |
336 |
title(main="RMSE for hold out and methods", |
|
337 |
xlab="Hold out validation/testing proportion", |
|
338 |
ylab="RMSE (C)") |
|
339 |
|
|
340 |
dev.off() |
|
744 | 341 |
|
745 | 342 |
#################################################### |
746 | 343 |
#########Figure 5. Overtraining tendency |
747 | 344 |
|
748 | 345 |
#read in relevant data: |
749 |
|
|
750 |
tb5 <-raster_prediction_obj_5$tb_diagnostic_v #gam dailycontains the accuracy metrics for each run... |
|
751 |
tb6 <-raster_prediction_obj_6$tb_diagnostic_v #Kriging daily methods |
|
752 |
tb7 <-raster_prediction_obj_7$tb_diagnostic_v #gwr daily methods |
|
753 |
|
|
754 |
prop_obj_gam_s<-calc_stat_prop_tb(names_mod,raster_prediction_o bj_5,testing=FALSE) |
|
755 |
prop_obj_kriging_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_6,testing=FALSE) |
|
756 |
prop_obj_gwr_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7,testing=FALSE) |
|
757 |
|
|
758 |
prop_obj_gam$avg_tb - prop_obj_gam_s$avg_tb |
|
759 |
plot(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, type="b",) |
|
760 |
|
|
761 |
y_range<-range(prop_obj_gam$avg_tb$rmse,prop_obj_gam_s$avg_tb$rmse) |
|
762 |
plot(prop_obj_gam$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, type="b",ylim=y_range) |
|
763 |
lines(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, type="b",ylim=y_range,col=c("red")) |
|
764 |
lines(prop_obj_gam$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, type="b",ylim=y_range,col=c("red"),lty=2) |
|
765 |
lines(prop_obj_gwr_s$avg_tb$rmse ~ prop_obj_gwr_s$avg_tb$prop, type="b",ylim=y_range,col=c("black")) |
|
766 |
lines(prop_obj_gwr$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, type="b",ylim=y_range,col=c("black"),lty=2) |
|
767 |
|
|
768 |
y_range<-range(prop_obj_kriging$avg_tb$rmse,prop_obj_kriging_s$avg_tb$rmse) |
|
769 |
plot(prop_obj_kriging$avg_tb$rmse ~ prop_obj_kriging$avg_tb$prop, type="b",ylim=y_range,col=c("blue"),lty=2) |
|
770 |
lines(prop_obj_kriging_s$avg_tb$rmse ~ prop_obj_kriging_s$avg_tb$prop, type="b",ylim=y_range,col=c("blue")) |
|
771 |
|
|
772 | 346 |
## Calculate average difference for RMSE for all three methods |
773 | 347 |
#read in relevant data: |
774 | 348 |
tb1_s<-extract_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"metrics_s") |
775 | 349 |
rownames(tb1_s)<-NULL #remove row names |
776 | 350 |
tb1_s$method_interp <- "gam_daily" #add type of interpolation...out_prefix too?? |
777 | 351 |
|
778 |
tb3_s<-extract_from_list_obj(raster_prediction_obj_4$validation_mod_obj,"metrics_s")
|
|
352 |
tb3_s<-extract_from_list_obj(raster_prediction_obj_3$validation_mod_obj,"metrics_s")
|
|
779 | 353 |
rownames(tb1_s)<-NULL #remove row names |
780 | 354 |
tb3_s$method_interp <- "kriging_daily" #add type of interpolation...out_prefix too?? |
781 | 355 |
|
782 |
tb4_s<-extract_from_list_obj(raster_prediction_obj_3$validation_mod_obj,"metrics_s")
|
|
356 |
tb4_s<-extract_from_list_obj(raster_prediction_obj_4$validation_mod_obj,"metrics_s")
|
|
783 | 357 |
rownames(tb4_s)<-NULL #remove row names |
784 | 358 |
tb4_s$method_interp <- "gwr_daily" #add type of interpolation...out_prefix too?? |
785 | 359 |
|
... | ... | |
791 | 365 |
tb3 <-raster_prediction_obj_3$tb_diagnostic_v #Kriging daily methods |
792 | 366 |
tb4 <-raster_prediction_obj_4$tb_diagnostic_v #gwr daily methods |
793 | 367 |
|
794 |
diff_df<-function(tb_s,tb_v,list_metric_names){ |
|
795 |
tb_diff<-vector("list", length(list_metric_names)) |
|
796 |
for (i in 1:length(list_metric_names)){ |
|
797 |
metric_name<-list_metric_names[i] |
|
798 |
tb_diff[[i]] <-tb_s[,c(metric_name)] - tb_v[,c(metric_name)] |
|
799 |
} |
|
800 |
names(tb_diff)<-list_metric_names |
|
801 |
tb_diff<-as.data.frame(do.call(cbind,tb_diff)) |
|
802 |
return(tb_diff) |
|
803 |
} |
|
368 |
#Calculate difference in MAE and RMSE for training and testing data: call diff_df function |
|
369 |
diff_tb1 <-diff_df(tb1_s[tb1_s$pred_mod=="mod1",],tb1[tb1$pred_mod=="mod1",],c("mae","rmse")) #gam select differences for mod1 |
|
370 |
diff_tb3 <-diff_df(tb3_s[tb3_s$pred_mod=="mod1",],tb3[tb3$pred_mod=="mod1",],c("mae","rmse")) #kriging |
|
371 |
diff_tb4 <-diff_df(tb4_s[tb4_s$pred_mod=="mod1",],tb4[tb4$pred_mod=="mod1",],c("mae","rmse")) #gwr |
|
372 |
|
|
373 |
diff_mae_data <-data.frame(gam=diff_tb1$mae,kriging=diff_tb3$mae,gwr=diff_tb4$mae) |
|
374 |
diff_rmse_data <-data.frame(gam=diff_tb1$rmse,kriging=diff_tb3$rmse,gwr=diff_tb4$rmse) |
|
375 |
|
|
376 |
#Test the plot |
|
377 |
boxplot(diff_mae_data) |
|
378 |
boxplot(diff_rmse_data) #plot differences in training and testing accuracies for three methods |
|
379 |
title(main="Training and testing RMSE for hold out and interpolation methods", |
|
380 |
xlab="Interpolation method", |
|
381 |
ylab="RMSE (C)") |
|
382 |
|
|
383 |
tb5 <-raster_prediction_obj_5$tb_diagnostic_v #gam dailycontains the accuracy metrics for each run... |
|
384 |
tb6 <-raster_prediction_obj_6$tb_diagnostic_v #Kriging daily methods |
|
385 |
tb7 <-raster_prediction_obj_7$tb_diagnostic_v #gwr daily methods |
|
804 | 386 |
|
805 |
#debug(diff_df) |
|
806 |
diff_tb1 <-diff_df(tb1_s[tb1_s$pred_mod=="mod1",],tb1[tb1$pred_mod=="mod1",],c("mae","rmse")) #select differences for mod1 |
|
807 |
diff_tb3 <-diff_df(tb3_s[tb3_s$pred_mod=="mod1",],tb3[tb3$pred_mod=="mod1",],c("mae","rmse")) |
|
808 |
diff_tb4 <-diff_df(tb4_s[tb4_s$pred_mod=="mod1",],tb4[tb4$pred_mod=="mod1",],c("mae","rmse")) |
|
387 |
prop_obj_gam_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_5,testing=FALSE) #training accuracy with hold out proportion |
|
388 |
prop_obj_kriging_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_6,testing=FALSE) |
|
389 |
prop_obj_gwr_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7,testing=FALSE) |
|
390 |
|
|
391 |
plot(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, type="b",) |
|
392 |
|
|
393 |
############### |
|
394 |
#### plot figure 5 |
|
395 |
#set up the output file to plot |
|
396 |
res_pix<-480 |
|
397 |
col_mfrow<-2 |
|
398 |
row_mfrow<-1 |
|
399 |
png_file_name<- paste("Figure_5_overtraining_tendency_and_holdout_proportion_",out_prefix,".png", sep="") |
|
400 |
png(filename=file.path(out_dir,png_file_name), |
|
401 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
402 |
par(mfrow=c(row_mfrow,col_mfrow)) |
|
403 |
|
|
404 |
col_t<-c("red","blue","black") |
|
405 |
pch_t<- 1:length(col_t) |
|
406 |
|
|
407 |
y_range<-range(prop_obj_kriging$avg_tb$rmse,prop_obj_kriging_s$avg_tb$rmse) |
|
408 |
#y_range<-range(prop_obj_gam$avg_tb$rmse,prop_obj_gam_s$avg_tb$rmse) |
|
409 |
plot(prop_obj_gam$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, ylab="",xlab="",type="b",col=c("red"),pch=pch_t[1],ylim=y_range,lty=2) |
|
410 |
lines(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, ylab="",xlab="",type="b",pch=pch_t[1],ylim=y_range,col=c("red")) |
|
411 |
lines(prop_obj_gwr_s$avg_tb$rmse ~ prop_obj_gwr_s$avg_tb$prop, ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[3],col=c("black")) |
|
412 |
lines(prop_obj_gwr$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[3],,col=c("black"),lty=2) |
|
413 |
lines(prop_obj_kriging$avg_tb$rmse ~ prop_obj_kriging$avg_tb$prop,ylab="",xlab="", type="b",ylim=y_range,pch=pch_t[2],,col=c("blue"),lty=2) |
|
414 |
lines(prop_obj_kriging_s$avg_tb$rmse ~ prop_obj_kriging_s$avg_tb$prop,ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[2],col=c("blue")) |
|
415 |
|
|
416 |
legend("topleft",legend=legend_text, |
|
417 |
cex=0.9, pch=pch_t,col=col_t,lty=1,bty="n") |
|
418 |
title(main="Training and testing RMSE for hold out and methods", |
|
419 |
xlab="Hold out validation/testing proportion", |
|
420 |
ylab="RMSE (C)") |
|
421 |
|
|
422 |
boxplot(diff_mae_data) #plot differences in training and testing accuracies for three methods |
|
423 |
title(main="Difference between training and testing MAE", |
|
424 |
xlab="Interpolation method", |
|
425 |
ylab="MAE (C)") |
|
809 | 426 |
|
810 |
x<-data.frame(gam=diff_tb1$mae,gwr=diff_tb3$mae,kriging=diff_tb4$mae)
|
|
427 |
dev.off()
|
|
811 | 428 |
|
812 |
boxplot(x) #plot differences in training and testing accuracies for three methods |
|
429 |
############### STUDY TIME AND accuracy |
|
430 |
#########Figure 6: Monthly RMSE averages for the three interpolation methods: GAM, GWR and Kriging. |
|
813 | 431 |
|
814 | 432 |
mae_tmp<- data.frame(gam=tb1[tb1$pred_mod=="mod1",c("mae")], |
815 | 433 |
kriging=tb3[tb3$pred_mod=="mod1",c("mae")], |
816 | 434 |
gwr=tb4[tb4$pred_mod=="mod1",c("mae")]) |
817 | 435 |
|
818 |
plot(mae_tmp$gam,col=c("red"),type="b") |
|
819 |
lines(mae_tmp$kriging,col=c("blue"),type="b") |
|
820 |
lines(mae_tmp$gwr,col=c("black"),type="b") |
|
436 |
plot(mae_tmp$gam,col=c("red"),type="b",pch=1)
|
|
437 |
lines(mae_tmp$kriging,col=c("blue"),type="b",pch=2)
|
|
438 |
lines(mae_tmp$gwr,col=c("black"),type="b",pch=2)
|
|
821 | 439 |
legend("topleft",legend=legend_text, |
822 | 440 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
823 | 441 |
|
... | ... | |
826 | 444 |
x2<-tb1[tb1$pred_mod=="mod1",c("mae","date")] |
827 | 445 |
arrange(x2,desc(mae)) |
828 | 446 |
|
829 |
kriging=tb3[tb3$pred_mod=="mod1",c("mae")], |
|
830 |
gwr=tb4[tb4$pred_mod=="mod1",c("mae")]) |
|
447 |
#kriging=tb3[tb3$pred_mod=="mod1",c("mae")],
|
|
448 |
# gwr=tb4[tb4$pred_mod=="mod1",c("mae")])
|
|
831 | 449 |
|
832 | 450 |
##### MONTHLY AVERAGES |
833 | 451 |
|
... | ... | |
840 | 458 |
lines(1:12,tb3_month$mae,col=c("blue"),type="b") |
841 | 459 |
lines(1:12,tb4_month$mae,col=c("black"),type="b") |
842 | 460 |
|
843 |
date<-strptime(tb1$date, "%Y%m%d") # interpolation date being processed |
|
844 |
month<-strftime(date, "%m") # current month of the date being processed |
|
461 |
add_month_tag<-function(tb){ |
|
462 |
date<-strptime(tb$date, "%Y%m%d") # interpolation date being processed |
|
463 |
month<-strftime(date, "%m") # current month of the date being processed |
|
464 |
} |
|
465 |
tb1$month<-add_month_tag(tb1) |
|
466 |
tb3$month<-add_month_tag(tb3) |
|
467 |
tb4$month<-add_month_tag(tb4) |
|
468 |
|
|
469 |
metric_name<-"mae" |
|
470 |
month_data_list<-list(gam=tb1[tb1$pred_mod=="mod1",c(metric_name,"month")], |
|
471 |
kriging=tb3[tb3$pred_mod=="mod1",c(metric_name,"month")], |
|
472 |
gwr=tb4[tb4$pred_mod=="mod1",c(metric_name,"month")]) |
|
473 |
y_range<-range(unlist(month_data_list)) |
|
474 |
|
|
475 |
|
|
476 |
#boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5, |
|
477 |
# ylab=paste(metric_ac,"in degree C",sep=" "),,axisnames=FALSE,axes=FALSE) |
|
478 |
#boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5, |
|
479 |
# ylab=paste(metric_ac,"in degree C",sep=" ")) |
|
480 |
#axis(1, labels = FALSE) |
|
481 |
## Create some text labels |
|
482 |
#labels <- month.abb # abbreviated names for each month |
|
483 |
## Plot x axis labels at default tick marks |
|
484 |
#text(1:length(labels), par("usr")[3] - 0.25, srt = 45, adj = 1, |
|
485 |
# labels = labels, xpd = TRUE) |
|
486 |
#axis(2) |
|
487 |
#box() |
|
488 |
|
|
489 |
#Now plot figure 6 |
|
490 |
res_pix<-480 |
|
491 |
col_mfrow<-2 |
|
492 |
row_mfrow<-2 |
|
493 |
png_file_name<- paste("Figure_6_monthly_accuracy_",out_prefix,".png", sep="") |
|
494 |
png(filename=file.path(out_dir,png_file_name), |
|
495 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
496 |
par(mfrow=c(row_mfrow,col_mfrow)) |
|
845 | 497 |
|
846 |
tb1$month<-month |
|
847 |
x3<-tb1[tb1$pred_mod=="mod1",] |
|
498 |
y_range<-range(tb1_month$mae,tb3_month$mae,tb4_month$mae) |
|
499 |
xlab_tick <- unique(tb1$month) |
|
500 |
xlab_text <-"Month" |
|
501 |
|
|
502 |
plot(1:12,tb1_month$mae,col=c("red"),type="b",ylim=y_range,xlab=xlab_text,xaxt="n") |
|
503 |
lines(1:12,tb3_month$mae,col=c("blue"),type="b") |
|
504 |
lines(1:12,tb4_month$mae,col=c("black"),type="b") |
|
505 |
axis(1,at=1:12,labels=xlab_tick) |
|
506 |
title(main="Monthly average MAE") |
|
848 | 507 |
|
849 |
(plot(x3[month=="01",c("mae")])) |
|
508 |
ylab_text<-"MAE (C)" |
|
509 |
xlab_text<-"Month" |
|
510 |
y_range<-range(month_data_list$gam$mae,month_data_list$kriging$mae,month_data_list$gwr$mae) |
|
511 |
boxplot(mae~month,data=month_data_list$gam,ylim=y_range,main="GAM",ylab=ylab_text,outline=FALSE) |
|
512 |
boxplot(mae~month,data=month_data_list$kriging,ylim=y_range,main="Kriging",ylab=ylab_text,outline=FALSE) |
|
513 |
boxplot(mae~month,data=month_data_list$gwr,ylim=y_range,main="GWR",ylab=ylab_text,outline=FALSE) |
|
514 |
|
|
515 |
dev.off() |
|
516 |
|
|
517 |
plot(x3[month=="01",c("mae")])) |
|
850 | 518 |
median(x3[x3$month=="03",c("mae")],na.rm=T) |
851 | 519 |
mean(x3[x3$month=="03",c("mae")],na.rm=T) |
852 | 520 |
|
521 |
boxplot(x) |
|
853 | 522 |
|
854 |
####### FIGURE 6 ###### |
|
523 |
#Now generate table |
|
524 |
|
|
525 |
length(tb1_month$mae) |
|
526 |
names(tb1_month) |
|
527 |
|
|
528 |
####### FIGURE 7: Spatial pattern ###### |
|
855 | 529 |
|
856 | 530 |
y_var_name <-"dailyTmax" |
857 | 531 |
index<-244 #index corresponding to January 1 |
... | ... | |
880 | 554 |
min_val <- 0 |
881 | 555 |
layout_m<-c(1,3) #one row two columns |
882 | 556 |
|
883 |
png(paste("spatial_pattern_tmax_prediction_levelplot_",date_selected,out_prefix,".png", sep=""), |
|
557 |
png(paste("Figure7__spatial_pattern_tmax_prediction_levelplot_",date_selected,out_prefix,".png", sep=""),
|
|
884 | 558 |
height=480*layout_m[1],width=480*layout_m[2]) |
885 | 559 |
|
886 | 560 |
levelplot(pred_temp_s,main="Interpolated Surfaces Method Comparison", ylab=NULL,xlab=NULL, |
... | ... | |
1037 | 711 |
#for (i in 1:length(list_myModels)){ |
1038 | 712 |
# i<-1 |
1039 | 713 |
|
1040 |
|
|
1041 | 714 |
list_models_info <-lapply(1:length(list_myModels),FUN=create_s_and_p_table_term_models,list_myModels) |
1042 | 715 |
#raster_prediction_obj_1$method_mod_obj[[i]]$sampling_dat$date |
1043 | 716 |
dates<-(extract_from_list_obj(raster_prediction_obj_1$method_mod_obj,"sampling_dat"))$date #get vector of dates |
1044 | 717 |
names(list_models_info)<-dates |
1045 | 718 |
|
1046 |
#Add dates to the data.frame?? |
|
719 |
#Add dates to the data.frame?? -->later
|
|
1047 | 720 |
|
1048 | 721 |
s.table_term_tb <-extract_from_list_obj(list_models_info,"s.table_term") |
1049 |
s.table_term_tb_t <-extract_list_from_list_obj(list_models_info,"s.table_term") |
|
722 |
#s.table_term_tb_t <-extract_list_from_list_obj(list_models_info,"s.table_term") #add dates to summary later |
|
723 |
AIC_models_tb <-extract_from_list_obj(list_models_info,"AIC_models") |
|
1050 | 724 |
|
1051 |
max_val<-round_any(range_val[2],10, f=ceiling) #round max value to nearest 10 (from plyr package) |
|
1052 |
min_val<-0 |
|
1053 |
limit_val<-seq(min_val,max_val, by=10) |
|
1054 |
}else{ |
|
1055 |
limit_val<-dist_classes |
|
1056 |
} |
|
1057 | 725 |
threshold_val<-c(0.01,0.05,0.1) |
1058 | 726 |
s.table_term_tb$p_val_rec1 <- s.table_term_tb[["p-value"]] < threshold_val[1] |
1059 | 727 |
s.table_term_tb$p_val_rec2 <- s.table_term_tb[["p-value"]] < threshold_val[2] |
1060 | 728 |
s.table_term_tb$p_val_rec3 <- s.table_term_tb[["p-value"]] < threshold_val[3] |
1061 | 729 |
|
1062 |
#dstspat_dat$dst_cat1 <- cut(dstspat_dat$dst,include.lowest=TRUE,breaks=limit_val) |
|
1063 |
|
|
1064 | 730 |
#test<-do.call(rbind,s.table_term_tb_t) |
1065 |
#cut |
|
731 |
|
|
1066 | 732 |
s.table_term_tb |
1067 | 733 |
names_var <- c("p-value") |
1068 | 734 |
#id_var <- |
... | ... | |
1084 | 750 |
summary_s.table_term2 |
1085 | 751 |
|
1086 | 752 |
#Now combine tables and drop duplicate columns the combined table can be modified for the paper... |
1087 |
s.table_summary_tb <- cbind(summary_s.table_term,summary_s.table_term2[,-c("term_name","mod_name")]) |
|
753 |
s.table_summary_tb <- cbind(summary_s.table_term,summary_s.table_term2[,]) #-c("term_name","mod_name")]) |
|
754 |
|
|
755 |
AIC_models_tb |
|
756 |
names_var <- c("AIC") |
|
757 |
#id_var <- |
|
758 |
t3<-melt(AIC_models_tb, |
|
759 |
measure=names_var, |
|
760 |
id=c("mod_name","term_name"), |
|
761 |
na.rm=T) |
|
762 |
|
|
763 |
summary_AIC <- cast(t3,term_name+mod_name~variable,median) |
|
764 |
summary_AIC |
|
765 |
|
|
766 |
|
|
767 |
#Now write out table... |
|
768 |
|
|
769 |
table<-rbind(table_data1,table_data3) |
|
770 |
table<-rbind(table,table_data4) |
|
771 |
table<- round(table,digit=3) #roundto three digits teh differences |
|
1088 | 772 |
|
773 |
model_col<-c("GAM","Kriging","GWR") |
|
774 |
names_table_col<-c("MAE","RMSE","ME","R","Model") |
|
1089 | 775 |
|
776 |
table$Model <-model_col |
|
777 |
names(table)<- names_table_col |
|
778 |
table |
|
779 |
|
|
780 |
file_name<-paste("table4_avg_paper","_",out_prefix,".txt",sep="") |
|
781 |
write.table(table,file=file_name,sep=",") |
|
782 |
|
|
783 |
file_name<-paste("table4_sd_paper","_",out_prefix,".txt",sep="") |
|
784 |
write.table(table_sd,file=file_name,sep=",") |
|
1090 | 785 |
|
1091 |
################### END OF SCRIPT ###################
|
|
786 |
###################### END OF SCRIPT #######################
|
|
1092 | 787 |
|
1093 | 788 |
|
Also available in: Unified diff
contribution of covariates paper, script major clean up and modification -splitting functions and script