Project

General

Profile

« Previous | Next » 

Revision d8763ab5

Added by Benoit Parmentier over 11 years ago

contribution of covariates paper, script major clean up and modification -splitting functions and script

View differences:

climate/research/oregon/interpolation/contribution_of_covariates_paper_interpolation.R
1 1
####################################  INTERPOLATION OF TEMPERATURES  #######################################
2 2
############################  Script for manuscript analyses,tables and figures #######################################
3
#This script reads information concerning the Oregon case study to adapt data for the revised 
4
# interpolation code.
5
#Figures and data for the contribution of covariate paper are also produced.
6
#AUTHOR: Benoit Parmentier                                                                      #
7
#DATE: 08/13/2013            
8
#Version: 1
9
#PROJECT: Environmental Layers project                                       #
3
#This script uses the worklfow code applied to the Oregon case study. Daily methods (GAM,GWR, Kriging) are tested with
4
#different covariates using two baselines. Accuracy methods are added in the the function script to evaluate results.
5
#Figures, tables and data for the contribution of covariate paper are also produced in the script.
6
#AUTHOR: Benoit Parmentier                                                                      
7
#DATE: 08/15/2013            
8
#Version: 2
9
#PROJECT: Environmental Layers project                                     
10 10
#################################################################################################
11 11

  
12
###Loading R library and packages                                                      
12
### Loading R library and packages        
13
#library used in the workflow production:
13 14
library(gtools)                              # loading some useful tools 
14 15
library(mgcv)                                # GAM package by Simon Wood
15 16
library(sp)                                  # Spatial pacakge with class definition by Bivand et al.
......
18 19
library(gstat)                               # Kriging and co-kriging by Pebesma et al.
19 20
library(fields)                              # NCAR Spatial Interpolation methods such as kriging, splines
20 21
library(raster)                              # Hijmans et al. package for raster processing
21
library(gdata)                               # various tools with xls reading
22
library(rasterVis)
23
library(parallel)
24
library(maptools)
25
library(maps)
26
library(reshape)
27
library(plotrix)
28
library(plyr)
22
library(gdata)                               # various tools with xls reading, cbindX
23
library(rasterVis)                           # Raster plotting functions
24
library(parallel)                            # Parallelization of processes with multiple cores
25
library(maptools)                            # Tools and functions for sp and other spatial objects e.g. spCbind
26
library(maps)                                # Tools and data for spatial/geographic objects
27
library(reshape)                             # Change shape of object, summarize results 
28
library(plotrix)                             # Additional plotting functions
29
library(plyr)                                # Various tools including rbind.fill
30
library(spgwr)                               # GWR method
31
library(automap)                             # Kriging automatic fitting of variogram using gstat
32
library(rgeos)                               # Geometric, topologic library of functions
33
#RPostgreSQL                                 # Interface R and Postgres, not used in this script
34

  
35
#Additional libraries not used in workflow
36
library(pgirmess)                            # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess}  
29 37

  
30 38
#### FUNCTION USED IN SCRIPT
31 39

  
32
load_obj <- function(f)
33
{
34
  env <- new.env()
35
  nm <- load(f, env)[1]
36
  env[[nm]]
37
}
38

  
39
extract_list_from_list_obj<-function(obj_list,list_name){
40
  #Create a list of an object from a given list of object using a name prodived as input
41
  
42
  list_tmp<-vector("list",length(obj_list))
43
  for (i in 1:length(obj_list)){
44
    tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame
45
    list_tmp[[i]]<-tmp
46
  }
47
  return(list_tmp) #this is  a data.frame
48
}
49

  
50
#This extract a data.frame object from raster prediction obj and combine them in one data.frame 
51
extract_from_list_obj<-function(obj_list,list_name){
52
  #extract object from list of list. This useful for raster_prediction_obj
53
  library(ply)
54
  
55
  list_tmp<-vector("list",length(obj_list))
56
  for (i in 1:length(obj_list)){
57
    tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame
58
    list_tmp[[i]]<-tmp
59
  }
60
  tb_list_tmp<-do.call(rbind.fill,list_tmp) #long rownames
61
  #tb_list_tmp<-do.call(rbind,list_tmp) #long rownames
62
  
63
  return(tb_list_tmp) #this is  a data.frame
64
}
65

  
66
calc_stat_from_raster_prediction_obj <-function(raster_prediction_obj,stat){
67
  tb <-raster_prediction_obj$tb_diagnostic_v  #Kriging methods
68
  
69
  t<-melt(tb,
70
          measure=c("mae","rmse","r","m50"), 
71
          id=c("pred_mod"),
72
          na.rm=T)
73
  
74
  stat_tb<-cast(t,pred_mod~variable,stat)
75
  return(stat_tb)
76
}
77

  
78
## Produce data.frame with residuals for models and distance to closest fitting station
79
calc_dist_ref_data_point <- function(i,list_param){
80
  #This function creates a list of data.frame containing the distance to teh closest
81
  # reference point (e.g. fitting station) for a give data frame. 
82
  #Inputs:
83
  #data_s: given data.frame from wich distance is computed
84
  #data_v: reference data.frame, destination, often the fitting points used in analyses
85
  #i: index variable to operate on list
86
  #names_var: 
87
  #Outputs:
88
  #list_dstspat_er
89
  
90
  #Parsing input arguments
91
  data_s<-list_param$data_s[[i]]
92
  data_v<-list_param$data_v[[i]]
93
  
94
  names_var<-list_param$names_var
95
  
96
  ######
97
  
98
  names_var<-intersect(names_var,names(data_v)) #there may be missing columns
99
  #use columns that exists
100
  
101
  d_s_v<-matrix(0,nrow(data_v),nrow(data_s))
102
  for(k in 1:nrow(data_s)){
103
    pt<-data_s[k,]
104
    d_pt<-(spDistsN1(data_v,pt,longlat=FALSE))/1000  #Distance to station k in km
105
    d_s_v[,k]<-d_pt
106
  }
107
  
108
  #Create data.frame with position, ID, dst and residuals...
109
  
110
  pos<-vector("numeric",nrow(data_v))
111
  y<-vector("numeric",nrow(data_v))
112
  dst<-vector("numeric",nrow(data_v))
113
  
114
  for (k in 1:nrow(data_v)){
115
    pos[k]<-match(min(d_s_v[k,]),d_s_v[k,])
116
    dst[k]<-min(d_s_v[k,]) 
117
  }
118
  
119
  dstspat_er<-as.data.frame(cbind(v_id=as.vector(data_v$id),s_id=as.vector(data_s$id[pos]),
120
                                  pos=pos, lat=data_v$lat, lon=data_v$lon, x=data_v$x,y=data_v$y,
121
                                  dst=dst,
122
                                  as.data.frame(data_v[,names_var])))
123
  
124
  return(dstspat_er)  
125
}  
126

  
127
### Main function to call to obtain distance to closest fitting stations for valiation dataset
128
distance_to_closest_fitting_station<-function(raster_prediction_obj,names_mod,dist_classes=c(0)){
129
  #This function computes the distance between training and testing points and returns and data frame
130
  #with distance,residuals, ID of training and testing points
131
  #Input: raster_prediction_obj, names of residuals models to return, distance classes
132
  #Output: data frame
133
  
134
  #Functions used in the script
135
  
136
  mae_fun<-function(x){mean(abs(x))} #Mean Absolute Error give a residuals vector
137
  sd_abs_fun<-function(x){sd(abs(x))} #sd Absolute Error give a residuals vector
138
  
139
  ##BEGIN
140
  
141
  ##### PART I: generate data.frame with residuals in term of distance to closest fitting station
142
  
143
  #return list of training and testing data frames
144
  list_data_s <- extract_list_from_list_obj(raster_prediction_obj$validation_mod_obj,"data_s") #training
145
  list_data_v <- extract_list_from_list_obj(raster_prediction_obj$validation_mod_obj,"data_v") #testing (validation)
146
  
147
  i<-1
148
  names_var<-c(names_mod,"dates")
149
  list_param_dst<-list(i,list_data_s,list_data_v,names_mod)
150
  names(list_param_dst) <- c("index","data_s","data_v","names_var")
151
  
152
  #call function "calc_dist_ref_data_point" over 365 dates
153
  #note that this function depends on other functions !!! see script
154
  
155
  list_dstspat_er <-lapply(1:length(list_data_v),FUN=calc_dist_ref_data_point,list_param=list_param_dst)
156
  #now assemble in one data.frame
157
  dstspat_dat<-do.call(rbind.fill,list_dstspat_er)
158

  
159
  ########### PART II: generate distance classes and summary statistics
160
  
161
  if (length(dist_classes)==1){
162
    range_val<-range(dstspat_dat$dst)
163
    max_val<-round_any(range_val[2],10, f=ceiling) #round max value to nearest 10 (from plyr package)
164
    min_val<-0
165
    limit_val<-seq(min_val,max_val, by=10)
166
  }else{
167
    limit_val<-dist_classes
168
  }
169
 
170
  dstspat_dat$dst_cat1 <- cut(dstspat_dat$dst,include.lowest=TRUE,breaks=limit_val)
171
  
172
  names_var <- intersect(names_mod,names(dstspat_dat))
173
  t<-melt(dstspat_dat,
174
          measure=names_var, 
175
          id=c("dst_cat1"),
176
          na.rm=T)
177
  
178
  mae_tb<-cast(t,dst_cat1~variable,mae_fun)
179
  sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun)
180
  
181
  avg_tb<-cast(t,dst_cat1~variable,mean)
182
  sd_tb<-cast(t,dst_cat1~variable,sd)
183
  n_tb<-cast(t,dst_cat1~variable,length)
184
  #n_NA<-cast(t,dst_cat1~variable,is.na)
185
  
186
  #### prepare returning object
187
  dstspat_obj<-list(dstspat_dat,mae_tb,sd_abs_tb,avg_tb,sd_tb,n_tb)
188
  names(dstspat_obj) <-c("dstspat_dat","mae_tb","sd_abs_tb","avg_tb","sd_tb","n_tb")
189
  
190
  return(dstspat_obj)
191
  
192
}
193

  
194
# create plot of accury in term of distance to closest fitting station
195
plot_dst_spat_fun<-function(stat_tb,names_var,cat_val){
196
  
197
  range_y<-range(as.vector(unlist(stat_tb[,names_var])),na.rm=T) #flatten data.frame
198
  col_t<-rainbow(length(names_var))
199
  pch_t<- 1:length(names_var)
200
  plot(stat_tb[,names_var[1]], ylim=range_y,pch=pch_t[1],col=col_t[1],type="b",
201
       yla="MAE (in degree C)",xlab="",xaxt="n")
202
  #points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[1],col=col_t[1]),type="p")
203
  for (k in 2:length(names_var)){
204
    lines(stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k],type="b",
205
          xlab="",axes=F)
206
    #points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k]),type="p")
207
  }
208
  legend("topleft",legend=names_var, 
209
         cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n")
210
  axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1])
211
}
212

  
213
plot_dst_MAE <-function(list_param){
214
  #
215
  #list_dist_obj: list of dist object 
216
  #col_t: list of color for each 
217
  #pch_t: symbol for line
218
  #legend_text: text for line and symbol
219
  #mod_name: selected models
220
  #
221
  ## BEGIN ##
222
  
223
  list_dist_obj<-list_param$list_dist_obj
224
  col_t<-list_param$col_t 
225
  pch_t<- list_param$pch_t 
226
  legend_text <- list_param$legend_text
227
  list_mod_name<-list_param$mod_name
228
  
229
  for (i in 1:length(list_dist_obj)){
230
    
231
    l<-list_dist_obj[[i]]
232
    mae_tb<-l$mae_tb
233
    n_tb<-l$n_tb
234
    sd_abs_tb<-l$sd_abs_tb
235
    
236
    mod_name<-list_mod_name[i]
237
    xlab_text<-"distance to fitting station"
238
    
239
    n <- unlist(n_tb[1:13,c(mod_name)])
240
    y <- unlist(mae_tb[1:13,c(mod_name)])
241
    
242
    x<- 1:length(y)
243
    y_sd <- unlist(sd_abs_tb[1:12,c(mod_name)])
244
    
245
    ciw <-y_sd
246
    #ciw2   <- qt(0.975, n) * y_sd2 / sqrt(n)
247
    
248
    #plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" MAE for ",mod_name,sep=""), barcol="blue", lwd=1,
249
    #       ylab="RMSE (C)", xlab=xlab_text)
250
    
251
    ciw   <- qt(0.975, n) * y_sd / sqrt(n)
252
    
253
    if(i==1){
254
      plotCI(y=y, x=x, uiw=ciw, col=col_t[i], main=paste(" Comparison of MAE in ",mod_name,sep=""), barcol="blue", lwd=1,
255
             ylab="RMSE (C)", xlab=xlab_text)
256
      lines(y~x, col=col_t[i])
257
      
258
    }else{
259
      lines(y~x, col=col_t[i])
260
    }
261
    
262
  }
263
  legend("topleft",legend=legend_text, 
264
         cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n")
265
  #axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1])
266
  
267
}
268

  
269
calc_stat_prop_tb <-function(names_mod,raster_prediction_obj,testing=TRUE){
270
  
271
  #add for testing??
272
  if (testing==TRUE){
273
    tb <-raster_prediction_obj$tb_diagnostic_v #use testing accuracy information
274
  }else{
275
    tb <-raster_prediction_obj$tb_diagnostic_s #use training accuracy information
276
  }
277
  
278
  t<-melt(subset(tb,pred_mod==names_mod),
279
          measure=c("mae","rmse","r","m50"), 
280
          id=c("pred_mod","prop"),
281
          na.rm=T)
282
  
283
  avg_tb<-cast(t,pred_mod+prop~variable,mean)
284
  sd_tb<-cast(t,pred_mod+prop~variable,sd)
285
  n_tb<-cast(t,pred_mod+prop~variable,length)
286
  #n_NA<-cast(t,dst_cat1~variable,is.na)
287
  
288
  #### prepare returning object
289
  prop_obj<-list(tb,avg_tb,sd_tb,n_tb)
290
  names(prop_obj) <-c("tb","avg_tb","sd_tb","n_tb")
291
  
292
  return(prop_obj)
293
}
294

  
295
#ploting 
296
plot_prop_metrics <-function(list_param){
297
  #
298
  #list_dist_obj: list of dist object 
299
  #col_t: list of color for each 
300
  #pch_t: symbol for line
301
  #legend_text: text for line and symbol
302
  #mod_name: selected models
303
  #
304
  ## BEGIN ##
305
  
306
  list_obj<-list_param$list_prop_obj
307
  col_t <-list_param$col_t 
308
  pch_t <- list_param$pch_t 
309
  legend_text <- list_param$legend_text
310
  list_mod_name<-list_param$mod_name
311
  metric_name<-list_param$metric_name
312
  
313
  for (i in 1:length(list_obj)){
314
    
315
    l<-list_obj[[i]]
316
    mod_name<-list_mod_name[i]
317
    avg_tb<-subset(l$avg_tb,pred_mod==mod_name,select=metric_name) #selecte relevant accuarcy metric
318
    n_tb<-subset(l$n_tb,pred_mod==mod_name,select=metric_name) 
319
    sd_tb<-subset(l$sd_tb,pred_mod==mod_name,select=metric_name) #l$sd_abs_tb[,metric_name]
320
    
321
    xlab_text<-"holdout proportion"
322
    
323
    no <- unlist(as.data.frame(n_tb))
324
    y <- unlist(as.data.frame(avg_tb))
325
    
326
    x<- l$avg_tb$prop
327
    y_sd <- unlist(as.data.frame(sd_tb)) #sd_tb
328
    
329
    ciw <-y_sd
330
    #ciw2   <- qt(0.975, n) * y_sd2 / sqrt(n)
331
    
332
    #plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" MAE for ",mod_name,sep=""), barcol="blue", lwd=1,
333
    #       ylab="RMSE (C)", xlab=xlab_text)
334
    
335
    ciw   <- qt(0.975, no) * y_sd / sqrt(no)
336
    
337
    if(i==1){
338
      plotCI(y=y, x=x, uiw=ciw, col=col_t[i], main=paste(" Comparison of ",metric_name," in ",mod_name,sep=""), barcol="blue", lwd=1,
339
             ylab="RMSE (C)", xlab=xlab_text)
340
      lines(y~x, col=col_t[i])
341
      
342
    }else{
343
      lines(y~x, col=col_t[i])
344
    }
345
    
346
  }
347
  legend("topleft",legend=legend_text, 
348
         cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n")
349
  #axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1])
350
  
351
}
352

  
353
plot_dst_spat_fun<-function(stat_tb,names_var,cat_val){
354
  
355
  range_y<-range(as.vector(unlist(stat_tb[,names_var])),na.rm=T) #flatten data.frame
356
  col_t<-rainbow(length(names_var))
357
  pch_t<- 1:length(names_var)
358
  plot(stat_tb[,names_var[1]], ylim=range_y,pch=pch_t[1],col=col_t[1],type="b",
359
       yla="MAE (in degree C)",xlab="",xaxt="n")
360
  #points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[1],col=col_t[1]),type="p")
361
  for (k in 2:length(names_var)){
362
    lines(stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k],type="b",
363
          xlab="",axes=F)
364
    #points((stat_tb[,names_var[k]], ylim=range_y,pch=pch_t[k],col=col_t[k]),type="p")
365
  }
366
  legend("topleft",legend=names_var, 
367
         cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n")
368
  axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1])
369
}
370

  
371
plot_prop_metrics <-function(list_param){
372
  #
373
  #list_dist_obj: list of dist object 
374
  #col_t: list of color for each 
375
  #pch_t: symbol for line
376
  #legend_text: text for line and symbol
377
  #mod_name: selected models
378
  #
379
  ## BEGIN ##
380
  
381
  list_obj<-list_param$list_prop_obj
382
  col_t <-list_param$col_t 
383
  pch_t <- list_param$pch_t 
384
  legend_text <- list_param$legend_text
385
  list_mod_name<-list_param$mod_name
386
  metric_name<-list_param$metric_name
387
  
388
  for (i in 1:length(list_obj)){
389
    
390
    l<-list_obj[[i]]
391
    mod_name<-list_mod_name[i]
392
    avg_tb<-subset(l$avg_tb,pred_mod==mod_name,select=metric_name) #selecte relevant accuarcy metric
393
    n_tb<-subset(l$n_tb,pred_mod==mod_name,select=metric_name) 
394
    sd_tb<-subset(l$sd_tb,pred_mod==mod_name,select=metric_name) #l$sd_abs_tb[,metric_name]
395
    
396
    xlab_text<-"holdout proportion"
397
    
398
    no <- unlist(as.data.frame(n_tb))
399
    y <- unlist(as.data.frame(avg_tb))
400
    
401
    x<- l$avg_tb$prop
402
    y_sd <- unlist(as.data.frame(sd_tb)) #sd_tb
403
    
404
    ciw <-y_sd
405
    #ciw2   <- qt(0.975, n) * y_sd2 / sqrt(n)
406
    
407
    #plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" MAE for ",mod_name,sep=""), barcol="blue", lwd=1,
408
    #       ylab="RMSE (C)", xlab=xlab_text)
409
    
410
    ciw   <- qt(0.975, no) * y_sd / sqrt(no)
411
    
412
    if(i==1){
413
      plotCI(y=y, x=x, uiw=ciw, col=col_t[i], main=paste(" Comparison of ",metric_name," in ",mod_name,sep=""), barcol="blue", lwd=1,
414
             ylab="RMSE (C)", xlab=xlab_text)
415
      lines(y~x, col=col_t[i])
416
      
417
    }else{
418
      lines(y~x, col=col_t[i])
419
    }
420
    
421
  }
422
  legend("topleft",legend=legend_text, 
423
         cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n")
424
  #axis(1,at=1:length(stat_tb[,1]),labels=stat_tb[,1])
425
  
426
}
427

  
428
create_s_and_p_table_term_models <-function(i,list_myModels){
429
  #Purpose:
430
  #Function to extract smooth term  table,parameter table and AIC from a list of models.
431
  #Originally created to processed GAM models run over a full year.
432
  #Inputs: 
433
  #1) list_myModels: list of fitted GAM models
434
  #2) i: index of list to run with lapply or mcapply
435
  #Outputs: list of 
436
  #1)s.table.term
437
  #2)p.table.term
438
  #3)AIC list
439
  #Authors: Benoit Parmentier
440
  #date: 08/142013
441
  
442
  ### Functions used in the scritp:
443
  
444
  #Remove models that were not fitted from the list
445
  #All modesl that are "try-error" are removed
446
  remove_errors_list<-function(list_items){
447
    #This function removes "error" items in a list
448
    list_tmp<-list_items
449
    for(i in 1:length(list_items)){
450
      
451
      if(inherits(list_items[[i]],"try-error")){
452
        list_tmp[[i]]<-0
453
      }else{
454
        list_tmp[[i]]<-1
455
      }
456
    }
457
    cnames<-names(list_tmp[list_tmp>0])
458
    x<-list_items[match(cnames,names(list_items))]
459
    return(x)
460
  }
461
  
462
  #turn term from list into data.frame
463
  name_col<-function(i,x){
464
    x_mat<-x[[i]]
465
    x_df<-as.data.frame(x_mat)
466
    x_df$mod_name<-rep(names(x)[i],nrow(x_df))
467
    x_df$term_name <-row.names(x_df)
468
    return(x_df)
469
  }
470
  
471
  ##BEGIN ###
472
  
473
  myModels <- list_myModels[[i]]
474
  myModels<-remove_errors_list(myModels)
475
  #could add AIC, GCV to the table as well as ME, RMSE...+dates...
476
  
477
  summary_list <- lapply(myModels, summary)
478
  s.table_list <- lapply(summary_list, `[[`, 's.table')
479
  p.table_list <- lapply(summary_list, `[[`, 'p.table')
480
  AIC_list <- lapply(myModels, AIC)
481
  
482
  #now put in one table
483
  
484
  s.table_list2<-lapply(1:length(myModels),name_col,s.table_list)
485
  p.table_list2<-lapply(1:length(myModels),name_col,p.table_list)
486
  s.table_term <-do.call(rbind,s.table_list2)
487
  p.table_term <-do.call(rbind,p.table_list2)
488
  
489
  #Now get AIC
490
  AIC_list2<-lapply(1:length(myModels),name_col,AIC_list)
491
  AIC_models <- do.call(rbind,AIC_list2)
492
  names(AIC_models)[1]<-"AIC"
493
  
494
  #Set up return object
495
  
496
  s_p_table_term_obj<-list(s.table_term,p.table_term,AIC_models)
497
  names(s_p_table_term_obj) <-c("s.table_term","p.table_term","AIC_models")
498
  return(s_p_table_term_obj)
499
  
500
}
501

  
502
convert_spdf_to_df_from_list <-function(obj_list,list_name){
503
  #extract object from list of list. This useful for raster_prediction_obj
504
  #output: data.frame formed by rbinding sp data.frame in the list
505
  library(plyr)
506
  
507
  list_tmp<-vector("list",length(obj_list))
508
  for (i in 1:length(obj_list)){
509
    #tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame
510
    list_tmp[[i]]<-as.data.frame(obj_list[[i]])
511
  }
512
  tb_list_tmp<-do.call(rbind.fill,list_tmp) #long rownames
513
  #tb_list_tmp<-do.call(rbind,list_tmp) #long rownames
514
  
515
  return(tb_list_tmp) #this is  a data.frame
516
}
40
function_analyses_paper <-"contribution_of_covariates_paper_interpolation_functions_08152013.R"
517 41

  
518 42
##############################
519 43
#### Parameters and constants  
520 44

  
45
script_path<-"/home/parmentier/Data/IPLANT_project/env_layers_scripts/" #path to script
46
source(file.path(script_path,function_analyses_paper)) #source all functions used in this script.
521 47

  
522
script_path<-"/home/parmentier/Data/IPLANT_project/env_layers_scripts/"
523
#source(file.path(script_path,"interpolation_method_day_function_multisampling_06082013.R")) #Include GAM_day
524

  
525
#in_dir<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_GAM_fus_all_lst_05312013"
526
in_dir1 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_lst_comb3_07092013/"
48
in_dir1 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_lst_comb3_08132013"
527 49
in_dir2 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_lst_comb4_07152013/"
528 50

  
529 51
#kriging results:
......
540 62

  
541 63
y_var_name <- "dailyTmax"
542 64

  
543
out_prefix<-"analyses_08032013"
65
out_prefix<-"analyses_08152013"
544 66

  
545 67
method_interpolation <- "gam_daily"
546
covar_obj_file1 <- "covar_obj__365d_gam_day_lst_comb3_07092013.RData"
547
met_obj_file_1 <- "met_stations_outfiles_obj_gam_daily__365d_gam_day_lst_comb3_07092013.RData"
68
covar_obj_file_1 <- "covar_obj__365d_gam_day_lst_comb3_08132013.RData"
69
met_obj_file_1 <- "met_stations_outfiles_obj_gam_daily__365d_gam_day_lst_comb3_08132013.RData"
548 70

  
549 71
#raster_prediciton object for baseline 1 () s(lat,lon) + s(elev)) and baseline 2 (slat,lon))
550
raster_obj_file_1 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_lst_comb3_07092013.RData" 
72
raster_obj_file_1 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_lst_comb3_08132013.RData" 
551 73
raster_obj_file_2 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_lst_comb4_07152013.RData"
552 74

  
553 75
raster_obj_file_3 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_day_lst_comb3_07112013.RData"
......
560 82

  
561 83
#Load objects containing training, testing, models objects 
562 84

  
563
covar_obj <-load_obj(file.path(in_dir1,covar_obj_file1))
85
covar_obj <-load_obj(file.path(in_dir1,covar_obj_file_1)) #Reading covariates object for GAM daily method
564 86
infile_covariates <- covar_obj$infile_covariates
565 87
infile_reg_outline <- covar_obj$infile_reg_outline
566 88
covar_names<- covar_obj$covar_names
567 89
#####
568
s_raster <- brick(file.path(in_dir1,infile_covariates))
90
s_raster <- brick(infile_covariates)
569 91
names(s_raster)<-covar_names
570 92

  
571
raster_prediction_obj_1 <-load_obj(file.path(in_dir1,raster_obj_file_1)) #comb3
572
raster_prediction_obj_2 <-load_obj(file.path(in_dir2,raster_obj_file_2)) #comb4
573
raster_prediction_obj_3 <-load_obj(file.path(in_dir3,raster_obj_file_3))
574
raster_prediction_obj_4 <-load_obj(file.path(in_dir4,raster_obj_file_4)) 
93
raster_prediction_obj_1 <-load_obj(file.path(in_dir1,raster_obj_file_1)) #comb3 (baseline 2)
94
raster_prediction_obj_2 <-load_obj(file.path(in_dir2,raster_obj_file_2)) #comb4 (baseline 1)
95
raster_prediction_obj_3 <-load_obj(file.path(in_dir3,raster_obj_file_3)) #comb3/mod1 baseline 2, kriging
96
raster_prediction_obj_4 <-load_obj(file.path(in_dir4,raster_obj_file_4)) #comb3/mod1 baseline 2, gwr
575 97
raster_prediction_obj_5 <-load_obj(file.path(in_dir5,raster_obj_file_5)) #gam daily multisampling 10 to 70%
576
raster_prediction_obj_6 <-load_obj(file.path(in_dir6,raster_obj_file_6)) #kriging daily multisampling 
577
raster_prediction_obj_7 <-load_obj(file.path(in_dir7,raster_obj_file_7)) #kriging daily multisampling 
98
raster_prediction_obj_6 <-load_obj(file.path(in_dir6,raster_obj_file_6)) #kriging daily multisampling 10 to 70% 
99
raster_prediction_obj_7 <-load_obj(file.path(in_dir7,raster_obj_file_7)) #gwr daily multisampling 10 to 70%
578 100

  
579
names(raster_prediction_obj_1) #list of two objects
101
############### BEGIN SCRIPT #################
580 102

  
581
### ACCURACY TABLE WITH BASELINES
103
############
104
##### 1) Generate: Table 3. Contribution of covariates using validation accuracy metrics
105
## This is a table of accuracy compared to baseline by difference 
582 106

  
583 107
#Check input covariates and model formula:
584
raster_prediction_obj_1$method_mod_obj[[1]]$formulas
585
raster_prediction_obj_2$method_mod_obj[[1]]$formulas
586

  
587
###baseline 2: s(lat,lon) + s(elev)
108
raster_prediction_obj_1$method_mod_obj[[1]]$formulas #models run for baseline 2
109
raster_prediction_obj_2$method_mod_obj[[1]]$formulas #models run for baseline 1
588 110

  
589 111
summary_metrics_v1<-raster_prediction_obj_1$summary_metrics_v
590 112
summary_metrics_v2<-raster_prediction_obj_2$summary_metrics_v
113
tb1 <-raster_prediction_obj_1$tb_diagnostic_v #365 days accuracy table for baseline 2
114
tb2 <-raster_prediction_obj_2$tb_diagnostic_v #365 days accuracy table for baseline 1
591 115

  
592
table_data1 <-summary_metrics_v1$avg[,c("mae","rmse","me","r")]
116
table_data1 <-summary_metrics_v1$avg[,c("mae","rmse","me","r")] #select relevant columns from data.frame
593 117
table_data2 <-summary_metrics_v2$avg[,c("mae","rmse","me","r")]
594 118

  
595
model_col<-c("Baseline2","Northness","Eastness","LST","DISTOC","Forest","CANHEIGHT","LST*Forest") # removed ,"LST*CANHEIGHT")
119
###Table 3a, baseline 1: s(lat,lon) 
120

  
121
model_col<-c("Baseline1","Elevation","Northing","Easting","LST","DISTOC","Forest","CANHEIGHT") #,"LST*Forest","LST*CANHEIGHT") # 
122
df3a<- as.data.frame(sapply(table_data2,FUN=function(x) x-x[1]))
123
df3a<- round(df3a,digit=3) #roundto three digits teh differences
124
df3a$Model <-model_col
125
names(df3a)<- names_table_col
126
print(df3a) #show resulting table
127

  
128
###Table 3b, baseline 2: s(lat,lon) + s(elev)
129

  
130
model_col<-c("Baseline2","Northness","Eastness","LST","DISTOC","Forest","CANHEIGHT","LST*Forest","LST*CANHEIGHT")
596 131
names_table_col<-c("DiffMAE","DiffRMSE","DiffME","Diffr","Model")
597 132

  
598
df1<- as.data.frame(sapply(table_data1,FUN=function(x) x-x[1]))
599
df1<- round(df1,digit=3) #roundto three digits teh differences
600
df1$Model <-model_col
601
names(df1)<- names_table_col
602
df1
133
df3b <- as.data.frame(sapply(table_data1,FUN=function(x) x-x[1])) #difference between baseline (line 1) and other models
134
df3b <- round(df3b,digit=3) #roundto three digits the differences
135
df3b$Model <- model_col
136
names(df3b)<- names_table_col
137
print(df3b) #Part b of table 3
603 138

  
604
###baseline 1: s(lat,lon) 
139
#Testing siginificance between models
605 140

  
606
model_col<-c("Baseline1","Elevation","Northing","Easting","LST","DISTOC","Forest","CANHEIGHT") #,"LST*Forest") # removed ,"LST*CANHEIGHT")
607
df2<- as.data.frame(sapply(table_data2,FUN=function(x) x-x[1]))
608
df2<- round(df2,digit=3) #roundto three digits teh differences
609
df2$Model <-model_col
610
names(df2)<- names_table_col
611
df2
141
mod_compk1 <-kruskal.test(tb1$rmse ~ as.factor(tb1$pred_mod)) #Kruskal Wallis test
142
mod_compk2 <-kruskal.test(tb2$rmse ~ as.factor(tb2$pred_mod))
143
print(mod_compk1) #not significant
144
print(mod_compk2) #not significant
612 145

  
613
file_name<-paste("table3b_baseline2_paper","_",out_prefix,".txt",sep="")
614
write.table(df1,file=file_name,sep=",")
146
#Multiple Kruskal Wallis
147
mod_compk1 <-kruskalmc(tb1$rmse ~ as.factor(tb1$pred_mod))
148
mod_compk2 <-kruskalmc(tb2$rmse ~ as.factor(tb2$pred_mod))
149

  
150
print(mod_compk1)
151
print(mod_compk2)
152

  
153
#Now write out table 3
615 154

  
616 155
file_name<-paste("table3a_baseline1_paper","_",out_prefix,".txt",sep="")
617
write.table(df2,file=file_name,sep=",")
156
write.table(df3a,file=file_name,sep=",")
157

  
158
file_name<-paste("table3b_baseline2_paper","_",out_prefix,".txt",sep="")
159
write.table(df3b,file=file_name,sep=",")
160

  
161
############
162
##### 2) Generate: Table 4. Comparison between interpolation methods using validation accuracy metrics
163
## This is a table of accuracy metric for the optimal model (baseline2) as identified in the previous step 
618 164

  
619 165
##Table 4: Interpolation methods comparison
620 166

  
621 167
#get sd for kriging, gam and gwr
622
tb1 <-raster_prediction_obj_1$tb_diagnostic_v  #Kriging methods
623
tb2 <-raster_prediction_obj_2$tb_diagnostic_v  #Kriging methods
168
#tb1 <-raster_prediction_obj_1$tb_diagnostic_v  HGAM baseline 1, loaded
169
#tb2 <-raster_prediction_obj_2$tb_diagnostic_v  #GAM baseline 2, loaded
624 170
tb3 <-raster_prediction_obj_3$tb_diagnostic_v  #Kriging methods
625
tb4 <-raster_prediction_obj_4$tb_diagnostic_v  #Kriging methods
171
tb4 <-raster_prediction_obj_4$tb_diagnostic_v  #GWR methods
626 172

  
627
names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9")
173
names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9","mod10")
628 174

  
629
sd1 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_1,"sd")
630
sd2 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_2,"sd")
631
sd3 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_3,"sd")
632
sd4 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_4,"sd")
175
#Calculate standard deviation for each metric
176
sd1 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_1,"sd") # see function script
177
sd2 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_2,"sd") # standard deviation for baseline 2
178
sd3 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_3,"sd") # kriging
179
sd4 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_4,"sd") #gwr
633 180

  
634
table_sd<-rbind(sd1[1,],sd3[1,])
635
table_sd<-rbind(table_sd,sd4[1,])
181
#Combined sd in one table for mod1 (baseline 2)
182
table_sd <- do.call(rbind,list(sd1[1,],sd3[1,],sd4[1,])) #table containing the sd for the three mdethods for baseline 2
183
table_sd <- round(table_sd[,-1],digit=3) #round to three digits the differences
184
table_sd <- table_sd[,c("mae","rmse","me","r")] #column 5 contains m50, remove it
636 185

  
637 186
summary_metrics_v3<-raster_prediction_obj_3$summary_metrics_v  #Kriging methods
638 187
summary_metrics_v4<-raster_prediction_obj_4$summary_metrics_v  # GWR method
......
641 190
table_data4 <-summary_metrics_v4$avg[1,c("mae","rmse","me","r")] #first line mod1 (baseline)
642 191
table_data1 <- table_data1[1,]
643 192

  
644
table<-rbind(table_data1,table_data3)
645
table<-rbind(table,table_data4)
646
table<- round(table,digit=3) #roundto three digits teh differences
193
table_ac <-do.call(rbind, list(table_data1,table_data3,table_data4))
194
table_ac <- round(table_ac,digit=3) #roundto three digits teh differences
195

  
196
#combined tables with accuracy metrics and their standard deviations
197
table4_paper <-table_combined_symbol(table_ac,table_sd,"±")
198
#lapply(lapply(table_ac,FUN=paste,table_sd,FUN=paste,sep="±"),FUN=paste)
647 199

  
648 200
model_col<-c("GAM","Kriging","GWR")
649 201
names_table_col<-c("MAE","RMSE","ME","R","Model")
650 202

  
651
table$Model <-model_col
652
names(table)<- names_table_col
653
table
654

  
655
file_name<-paste("table4_avg_paper","_",out_prefix,".txt",sep="")
656
write.table(table,file=file_name,sep=",")
657

  
658
file_name<-paste("table4_sd_paper","_",out_prefix,".txt",sep="")
659
write.table(table_sd,file=file_name,sep=",")
203
table4_paper$Model <-model_col
204
names(table4_paper)<- names_table_col
660 205

  
661
#for(i in nrow(table))
662
#mean_val<-table[i,j]
663
#sd_val<-table_sd[i,j]
664
#element<-paste(mean_val,"+-",sd_val,sep="")
665
#table__paper[i,j]<-element
206
file_name<-paste("table4_compariaons_interpolation_methods_avg_paper","_",out_prefix,".txt",sep="")
207
write.table(as.data.frame(table4_paper),file=file_name,sep=",")
666 208

  
667 209
####################################
668 210
####### Now create figures #############
......
674 216
#Figure 5. Overtraining tendency
675 217
#Figure 6: Spatial pattern of prediction for one day
676 218

  
677
### Figure 1
219
### Figure 1: Oregon study area
678 220

  
679
### Figure 2: 
221
#...add code
680 222

  
681
#not generated in R
223
### Figure 2:  Method comparison workflow 
224

  
225
# Workflow not generated in R
682 226

  
683 227
################################################
684
################### Figure 3
228
################### Figure 3. MAE/RMSE and distance to closest fitting station.
685 229

  
686 230
#Analysis accuracy in term of distance to closest station
687 231
#Assign model's names
688 232

  
689
names_mod <- paste("res_mod",1:9,sep="")
233
names_mod <- paste("res_mod",1:10,sep="")
690 234
names(raster_prediction_obj_1$validation_mod_obj[[1]])
691 235
limit_val<-seq(0,150, by=10)
692 236

  
693
l1 <- distance_to_closest_fitting_station(raster_prediction_obj_1,names_mod,dist_classes=limit_val)
694
l3 <- distance_to_closest_fitting_station(raster_prediction_obj_3,names_mod,dist_classes=limit_val)
695
l4 <- distance_to_closest_fitting_station(raster_prediction_obj_4,names_mod,dist_classes=limit_val)
237
#Call function to extract residuals in term of distance to closest fitting station and summary statistics
238
l1 <- distance_to_closest_fitting_station(raster_prediction_obj_1,names_mod,dist_classes=limit_val) #GAM method
239
l3 <- distance_to_closest_fitting_station(raster_prediction_obj_3,names_mod,dist_classes=limit_val) #Kriging method
240
l4 <- distance_to_closest_fitting_station(raster_prediction_obj_4,names_mod,dist_classes=limit_val) #GWR method
696 241

  
697
list_dist_obj<-list(l1,l3,l4)
698
col_t<-c("red","blue","black")
699
pch_t<- 1:length(col_t)
700
legend_text <- c("GAM","Kriging","GWR")
701
mod_name<-c("res_mod1","res_mod1","res_mod1")#selected models
242
l1$mae_tb #contains
702 243

  
703
#png_names<- 
704
#png(file.path(out_path,paste("clim_surface_",y_var_name,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],
705
#                             "_",sampling_dat$run_samp[i],out_prefix,".png", sep="")))
706

  
707
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name)
708
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name")
244
#Prepare parameters/arguments for plotting
245
list_dist_obj <-list(l1,l3,l4)
246
col_t <- c("red","blue","black")
247
pch_t <- 1:length(col_t)
248
legend_text <- c("GAM","Kriging","GWR")
249
mod_name <- c("res_mod1","res_mod1","res_mod1")#selected models
250
x_tick_labels <- limit_val<-seq(5,125, by=10)
251
metric_name <-"rmse_tb"
252
title_plot <- "RMSE and distance to closest station for baseline 2"
253
y_lab_text <- "RMSE (C)"
254
#quick test
255
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,title_plot,y_lab_text)
256
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name","title_plot","y_lab_text")
257
plot_dst_MAE(list_param_plot)
709 258

  
710
#debug(plot_dst_MAE)
259
metric_name <-"mae_tb"
260
title_plot <- "MAE and distance to closest fitting station"
261
y_lab_text <- "MAE (C)"
262

  
263
#Now set up plotting device
264
res_pix<-480
265
col_mfrow<-2
266
row_mfrow<-1
267
png_file_name<- paste("Figure_3_accuracy_and_distance_to_closest_fitting_station_",out_prefix,".png", sep="")
268
png(filename=file.path(out_dir,png_file_name),
269
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
270
par(mfrow=c(row_mfrow,col_mfrow))
271

  
272
#Figure 3a
273
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,title_plot,y_lab_text)
274
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name","title_plot","y_lab_text")
711 275
plot_dst_MAE(list_param_plot)
276
title(xlab="Distance to closest fitting station (km)")
277

  
278
#Figure 3b: histogram
279
barplot(l1$n_tb$res_mod1,names.arg=limit_val,
280
        ylab="Number of observations",
281
        xlab="Distance to closest fitting station (km)")
282
title("Number of observation in term of distance bins")
283
box()
284
dev.off()
712 285

  
713 286
####################################################
714
#########Figure 4. RMSE and MAE, mulitisampling and hold out for FSS and GAM.
287
#########Figure 4. RMSE and MAE, mulitisampling and hold out for single time scale methods.
715 288

  
716 289
#Using baseline 2: lat,lon and elev
717 290

  
......
722 295
#names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9")
723 296
names_mod<-c("mod1")
724 297

  
725
debug(calc_stat_prop_tb)
298
#debug(calc_stat_prop_tb)
726 299
prop_obj_gam<-calc_stat_prop_tb(names_mod,raster_prediction_obj_5)
727 300
prop_obj_kriging<-calc_stat_prop_tb(names_mod,raster_prediction_obj_6)
728 301
prop_obj_gwr<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7)
729 302

  
730 303
list_prop_obj<-list(prop_obj_gam,prop_obj_kriging,prop_obj_gwr)
304

  
305
## plot setting for figure 4
306

  
731 307
col_t<-c("red","blue","black")
732 308
pch_t<- 1:length(col_t)
733 309
legend_text <- c("GAM","Kriging","GWR")
734 310
mod_name<-c("mod1","mod1","mod1")#selected models
735
metric_name<-"rmse"
736
#png_names<- 
737
#png(file.path(out_path,paste("clim_surface_",y_var_name,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],
738
#                             "_",sampling_dat$run_samp[i],out_prefix,".png", sep="")))
739 311

  
312
##### plot figure 4 for paper
313
####
314

  
315
res_pix<-480
316
col_mfrow<-2
317
row_mfrow<-1
318
png_file_name<- paste("Figure_4_proportion_of_holdout_and_accuracy_",out_prefix,".png", sep="")
319
png(filename=file.path(out_dir,png_file_name),
320
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
321
par(mfrow=c(row_mfrow,col_mfrow))
322
metric_name<-"mae"
323
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name)
324
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name")
325

  
326
plot_prop_metrics(list_param_plot)
327
title(main="MAE for hold out and methods",
328
      xlab="Hold out validation/testing proportion",
329
      ylab="MAE (C)")
330

  
331
#now figure 4b
332
metric_name<-"rmse"
740 333
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name)
741 334
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name")
742
#debug(plot_prop_metrics)
743 335
plot_prop_metrics(list_param_plot)
336
title(main="RMSE for hold out and methods",
337
      xlab="Hold out validation/testing proportion",
338
      ylab="RMSE (C)")
339

  
340
dev.off()
744 341

  
745 342
####################################################
746 343
#########Figure 5. Overtraining tendency
747 344

  
748 345
#read in relevant data:
749

  
750
tb5 <-raster_prediction_obj_5$tb_diagnostic_v  #gam dailycontains the accuracy metrics for each run...
751
tb6 <-raster_prediction_obj_6$tb_diagnostic_v  #Kriging daily methods
752
tb7 <-raster_prediction_obj_7$tb_diagnostic_v  #gwr daily methods
753

  
754
prop_obj_gam_s<-calc_stat_prop_tb(names_mod,raster_prediction_o  bj_5,testing=FALSE)
755
prop_obj_kriging_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_6,testing=FALSE)
756
prop_obj_gwr_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7,testing=FALSE)
757

  
758
prop_obj_gam$avg_tb - prop_obj_gam_s$avg_tb
759
plot(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, type="b",)
760

  
761
y_range<-range(prop_obj_gam$avg_tb$rmse,prop_obj_gam_s$avg_tb$rmse)
762
plot(prop_obj_gam$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, type="b",ylim=y_range)
763
lines(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, type="b",ylim=y_range,col=c("red"))
764
lines(prop_obj_gam$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, type="b",ylim=y_range,col=c("red"),lty=2)
765
lines(prop_obj_gwr_s$avg_tb$rmse ~ prop_obj_gwr_s$avg_tb$prop, type="b",ylim=y_range,col=c("black"))
766
lines(prop_obj_gwr$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, type="b",ylim=y_range,col=c("black"),lty=2)
767

  
768
y_range<-range(prop_obj_kriging$avg_tb$rmse,prop_obj_kriging_s$avg_tb$rmse)
769
plot(prop_obj_kriging$avg_tb$rmse ~ prop_obj_kriging$avg_tb$prop, type="b",ylim=y_range,col=c("blue"),lty=2)
770
lines(prop_obj_kriging_s$avg_tb$rmse ~ prop_obj_kriging_s$avg_tb$prop, type="b",ylim=y_range,col=c("blue"))
771

  
772 346
## Calculate average difference for RMSE for all three methods
773 347
#read in relevant data:
774 348
tb1_s<-extract_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"metrics_s")
775 349
rownames(tb1_s)<-NULL #remove row names
776 350
tb1_s$method_interp <- "gam_daily" #add type of interpolation...out_prefix too??
777 351

  
778
tb3_s<-extract_from_list_obj(raster_prediction_obj_4$validation_mod_obj,"metrics_s")
352
tb3_s<-extract_from_list_obj(raster_prediction_obj_3$validation_mod_obj,"metrics_s")
779 353
rownames(tb1_s)<-NULL #remove row names
780 354
tb3_s$method_interp <- "kriging_daily" #add type of interpolation...out_prefix too??
781 355

  
782
tb4_s<-extract_from_list_obj(raster_prediction_obj_3$validation_mod_obj,"metrics_s")
356
tb4_s<-extract_from_list_obj(raster_prediction_obj_4$validation_mod_obj,"metrics_s")
783 357
rownames(tb4_s)<-NULL #remove row names
784 358
tb4_s$method_interp <- "gwr_daily" #add type of interpolation...out_prefix too??
785 359

  
......
791 365
tb3 <-raster_prediction_obj_3$tb_diagnostic_v  #Kriging daily methods
792 366
tb4 <-raster_prediction_obj_4$tb_diagnostic_v  #gwr daily methods
793 367

  
794
diff_df<-function(tb_s,tb_v,list_metric_names){
795
  tb_diff<-vector("list", length(list_metric_names))
796
  for (i in 1:length(list_metric_names)){
797
    metric_name<-list_metric_names[i]
798
    tb_diff[[i]] <-tb_s[,c(metric_name)] - tb_v[,c(metric_name)]
799
  }
800
  names(tb_diff)<-list_metric_names
801
  tb_diff<-as.data.frame(do.call(cbind,tb_diff))
802
  return(tb_diff)
803
}
368
#Calculate difference in MAE and RMSE for training and testing data: call diff_df function
369
diff_tb1 <-diff_df(tb1_s[tb1_s$pred_mod=="mod1",],tb1[tb1$pred_mod=="mod1",],c("mae","rmse")) #gam select differences for mod1
370
diff_tb3 <-diff_df(tb3_s[tb3_s$pred_mod=="mod1",],tb3[tb3$pred_mod=="mod1",],c("mae","rmse")) #kriging
371
diff_tb4 <-diff_df(tb4_s[tb4_s$pred_mod=="mod1",],tb4[tb4$pred_mod=="mod1",],c("mae","rmse")) #gwr
372

  
373
diff_mae_data <-data.frame(gam=diff_tb1$mae,kriging=diff_tb3$mae,gwr=diff_tb4$mae)
374
diff_rmse_data <-data.frame(gam=diff_tb1$rmse,kriging=diff_tb3$rmse,gwr=diff_tb4$rmse)
375

  
376
#Test the plot
377
boxplot(diff_mae_data)
378
boxplot(diff_rmse_data) #plot differences in training and testing accuracies for three methods
379
title(main="Training and testing RMSE for hold out and interpolation methods",
380
      xlab="Interpolation method",
381
      ylab="RMSE (C)")
382

  
383
tb5 <-raster_prediction_obj_5$tb_diagnostic_v  #gam dailycontains the accuracy metrics for each run...
384
tb6 <-raster_prediction_obj_6$tb_diagnostic_v  #Kriging daily methods
385
tb7 <-raster_prediction_obj_7$tb_diagnostic_v  #gwr daily methods
804 386

  
805
#debug(diff_df)
806
diff_tb1 <-diff_df(tb1_s[tb1_s$pred_mod=="mod1",],tb1[tb1$pred_mod=="mod1",],c("mae","rmse")) #select differences for mod1
807
diff_tb3 <-diff_df(tb3_s[tb3_s$pred_mod=="mod1",],tb3[tb3$pred_mod=="mod1",],c("mae","rmse"))
808
diff_tb4 <-diff_df(tb4_s[tb4_s$pred_mod=="mod1",],tb4[tb4$pred_mod=="mod1",],c("mae","rmse"))
387
prop_obj_gam_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_5,testing=FALSE) #training accuracy with hold out proportion
388
prop_obj_kriging_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_6,testing=FALSE)
389
prop_obj_gwr_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7,testing=FALSE)
390

  
391
plot(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, type="b",)
392

  
393
###############
394
#### plot figure 5
395
#set up the output file to plot
396
res_pix<-480
397
col_mfrow<-2
398
row_mfrow<-1
399
png_file_name<- paste("Figure_5_overtraining_tendency_and_holdout_proportion_",out_prefix,".png", sep="")
400
png(filename=file.path(out_dir,png_file_name),
401
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
402
par(mfrow=c(row_mfrow,col_mfrow))
403

  
404
col_t<-c("red","blue","black")
405
pch_t<- 1:length(col_t)
406

  
407
y_range<-range(prop_obj_kriging$avg_tb$rmse,prop_obj_kriging_s$avg_tb$rmse)
408
#y_range<-range(prop_obj_gam$avg_tb$rmse,prop_obj_gam_s$avg_tb$rmse)
409
plot(prop_obj_gam$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, ylab="",xlab="",type="b",col=c("red"),pch=pch_t[1],ylim=y_range,lty=2)
410
lines(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, ylab="",xlab="",type="b",pch=pch_t[1],ylim=y_range,col=c("red"))
411
lines(prop_obj_gwr_s$avg_tb$rmse ~ prop_obj_gwr_s$avg_tb$prop, ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[3],col=c("black"))
412
lines(prop_obj_gwr$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[3],,col=c("black"),lty=2)
413
lines(prop_obj_kriging$avg_tb$rmse ~ prop_obj_kriging$avg_tb$prop,ylab="",xlab="", type="b",ylim=y_range,pch=pch_t[2],,col=c("blue"),lty=2)
414
lines(prop_obj_kriging_s$avg_tb$rmse ~ prop_obj_kriging_s$avg_tb$prop,ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[2],col=c("blue"))
415

  
416
legend("topleft",legend=legend_text, 
417
       cex=0.9, pch=pch_t,col=col_t,lty=1,bty="n")
418
title(main="Training and testing RMSE for hold out and methods",
419
      xlab="Hold out validation/testing proportion",
420
      ylab="RMSE (C)")
421

  
422
boxplot(diff_mae_data) #plot differences in training and testing accuracies for three methods
423
title(main="Difference between training and testing MAE",
424
      xlab="Interpolation method",
425
      ylab="MAE (C)")
809 426

  
810
x<-data.frame(gam=diff_tb1$mae,gwr=diff_tb3$mae,kriging=diff_tb4$mae)
427
dev.off()
811 428

  
812
boxplot(x) #plot differences in training and testing accuracies for three methods
429
############### STUDY TIME AND accuracy
430
#########Figure 6: Monthly RMSE averages for the three interpolation methods: GAM, GWR and Kriging.
813 431

  
814 432
mae_tmp<- data.frame(gam=tb1[tb1$pred_mod=="mod1",c("mae")],
815 433
                     kriging=tb3[tb3$pred_mod=="mod1",c("mae")],
816 434
                     gwr=tb4[tb4$pred_mod=="mod1",c("mae")])
817 435

  
818
plot(mae_tmp$gam,col=c("red"),type="b")
819
lines(mae_tmp$kriging,col=c("blue"),type="b")
820
lines(mae_tmp$gwr,col=c("black"),type="b")
436
plot(mae_tmp$gam,col=c("red"),type="b",pch=1)
437
lines(mae_tmp$kriging,col=c("blue"),type="b",pch=2)
438
lines(mae_tmp$gwr,col=c("black"),type="b",pch=2)
821 439
legend("topleft",legend=legend_text, 
822 440
       cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n")
823 441

  
......
826 444
x2<-tb1[tb1$pred_mod=="mod1",c("mae","date")]
827 445
arrange(x2,desc(mae))
828 446

  
829
kriging=tb3[tb3$pred_mod=="mod1",c("mae")],
830
                     gwr=tb4[tb4$pred_mod=="mod1",c("mae")])
447
#kriging=tb3[tb3$pred_mod=="mod1",c("mae")],
448
#                     gwr=tb4[tb4$pred_mod=="mod1",c("mae")])
831 449

  
832 450
##### MONTHLY AVERAGES
833 451

  
......
840 458
lines(1:12,tb3_month$mae,col=c("blue"),type="b")
841 459
lines(1:12,tb4_month$mae,col=c("black"),type="b")
842 460

  
843
date<-strptime(tb1$date, "%Y%m%d")   # interpolation date being processed
844
month<-strftime(date, "%m")          # current month of the date being processed
461
add_month_tag<-function(tb){
462
  date<-strptime(tb$date, "%Y%m%d")   # interpolation date being processed
463
  month<-strftime(date, "%m")          # current month of the date being processed
464
}
465
tb1$month<-add_month_tag(tb1)
466
tb3$month<-add_month_tag(tb3)
467
tb4$month<-add_month_tag(tb4)
468

  
469
metric_name<-"mae"
470
month_data_list<-list(gam=tb1[tb1$pred_mod=="mod1",c(metric_name,"month")],
471
                      kriging=tb3[tb3$pred_mod=="mod1",c(metric_name,"month")],
472
                      gwr=tb4[tb4$pred_mod=="mod1",c(metric_name,"month")])
473
y_range<-range(unlist(month_data_list))
474

  
475

  
476
#boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5,
477
#        ylab=paste(metric_ac,"in degree C",sep=" "),,axisnames=FALSE,axes=FALSE)
478
#boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5,
479
#        ylab=paste(metric_ac,"in degree C",sep=" "))
480
#axis(1, labels = FALSE)
481
## Create some text labels
482
#labels <- month.abb # abbreviated names for each month
483
## Plot x axis labels at default tick marks
484
#text(1:length(labels), par("usr")[3] - 0.25, srt = 45, adj = 1,
485
#     labels = labels, xpd = TRUE)
486
#axis(2)
487
#box()
488

  
489
#Now plot figure 6
490
res_pix<-480
491
col_mfrow<-2
492
row_mfrow<-2
493
png_file_name<- paste("Figure_6_monthly_accuracy_",out_prefix,".png", sep="")
494
png(filename=file.path(out_dir,png_file_name),
495
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
496
par(mfrow=c(row_mfrow,col_mfrow))
845 497

  
846
tb1$month<-month
847
x3<-tb1[tb1$pred_mod=="mod1",]
498
y_range<-range(tb1_month$mae,tb3_month$mae,tb4_month$mae)
499
xlab_tick <- unique(tb1$month)
500
xlab_text <-"Month"
501
  
502
plot(1:12,tb1_month$mae,col=c("red"),type="b",ylim=y_range,xlab=xlab_text,xaxt="n")
503
lines(1:12,tb3_month$mae,col=c("blue"),type="b")
504
lines(1:12,tb4_month$mae,col=c("black"),type="b")
505
axis(1,at=1:12,labels=xlab_tick)
506
title(main="Monthly average MAE")
848 507

  
849
(plot(x3[month=="01",c("mae")]))
508
ylab_text<-"MAE (C)"
509
xlab_text<-"Month"
510
y_range<-range(month_data_list$gam$mae,month_data_list$kriging$mae,month_data_list$gwr$mae)
511
boxplot(mae~month,data=month_data_list$gam,ylim=y_range,main="GAM",ylab=ylab_text,outline=FALSE)
512
boxplot(mae~month,data=month_data_list$kriging,ylim=y_range,main="Kriging",ylab=ylab_text,outline=FALSE)
513
boxplot(mae~month,data=month_data_list$gwr,ylim=y_range,main="GWR",ylab=ylab_text,outline=FALSE)
514

  
515
dev.off()
516

  
517
plot(x3[month=="01",c("mae")]))
850 518
median(x3[x3$month=="03",c("mae")],na.rm=T)
851 519
mean(x3[x3$month=="03",c("mae")],na.rm=T)
852 520

  
521
boxplot(x)
853 522

  
854
####### FIGURE 6 ######
523
#Now generate table
524

  
525
length(tb1_month$mae)
526
names(tb1_month)
527

  
528
####### FIGURE 7: Spatial pattern ######
855 529

  
856 530
y_var_name <-"dailyTmax"
857 531
index<-244 #index corresponding to January 1
......
880 554
min_val <- 0
881 555
layout_m<-c(1,3) #one row two columns
882 556

  
883
png(paste("spatial_pattern_tmax_prediction_levelplot_",date_selected,out_prefix,".png", sep=""),
557
png(paste("Figure7__spatial_pattern_tmax_prediction_levelplot_",date_selected,out_prefix,".png", sep=""),
884 558
    height=480*layout_m[1],width=480*layout_m[2])
885 559

  
886 560
levelplot(pred_temp_s,main="Interpolated Surfaces Method Comparison", ylab=NULL,xlab=NULL,
......
1037 711
#for (i in 1:length(list_myModels)){
1038 712
#  i<-1
1039 713

  
1040

  
1041 714
list_models_info <-lapply(1:length(list_myModels),FUN=create_s_and_p_table_term_models,list_myModels)
1042 715
#raster_prediction_obj_1$method_mod_obj[[i]]$sampling_dat$date
1043 716
dates<-(extract_from_list_obj(raster_prediction_obj_1$method_mod_obj,"sampling_dat"))$date #get vector of dates
1044 717
names(list_models_info)<-dates
1045 718

  
1046
#Add dates to the data.frame??
719
#Add dates to the data.frame?? -->later
1047 720

  
1048 721
s.table_term_tb <-extract_from_list_obj(list_models_info,"s.table_term")
1049
s.table_term_tb_t <-extract_list_from_list_obj(list_models_info,"s.table_term")
722
#s.table_term_tb_t <-extract_list_from_list_obj(list_models_info,"s.table_term") #add dates to summary later
723
AIC_models_tb <-extract_from_list_obj(list_models_info,"AIC_models")
1050 724

  
1051
max_val<-round_any(range_val[2],10, f=ceiling) #round max value to nearest 10 (from plyr package)
1052
min_val<-0
1053
limit_val<-seq(min_val,max_val, by=10)
1054
}else{
1055
  limit_val<-dist_classes
1056
}
1057 725
threshold_val<-c(0.01,0.05,0.1)
1058 726
s.table_term_tb$p_val_rec1 <- s.table_term_tb[["p-value"]] < threshold_val[1]
1059 727
s.table_term_tb$p_val_rec2 <- s.table_term_tb[["p-value"]] < threshold_val[2]
1060 728
s.table_term_tb$p_val_rec3 <- s.table_term_tb[["p-value"]] < threshold_val[3]
1061 729

  
1062
#dstspat_dat$dst_cat1 <- cut(dstspat_dat$dst,include.lowest=TRUE,breaks=limit_val)
1063

  
1064 730
#test<-do.call(rbind,s.table_term_tb_t)
1065
#cut
731

  
1066 732
s.table_term_tb
1067 733
names_var <- c("p-value")
1068 734
#id_var <- 
......
1084 750
summary_s.table_term2
1085 751

  
1086 752
#Now combine tables and drop duplicate columns the combined table can be modified for the paper...
1087
s.table_summary_tb <- cbind(summary_s.table_term,summary_s.table_term2[,-c("term_name","mod_name")]) 
753
s.table_summary_tb <- cbind(summary_s.table_term,summary_s.table_term2[,]) #-c("term_name","mod_name")]) 
754

  
755
AIC_models_tb
756
names_var <- c("AIC")
757
#id_var <- 
758
t3<-melt(AIC_models_tb,
759
        measure=names_var, 
760
        id=c("mod_name","term_name"),
761
        na.rm=T)
762

  
763
summary_AIC <- cast(t3,term_name+mod_name~variable,median)
764
summary_AIC 
765

  
766

  
767
#Now write out table...
768

  
769
table<-rbind(table_data1,table_data3)
770
table<-rbind(table,table_data4)
771
table<- round(table,digit=3) #roundto three digits teh differences
1088 772

  
773
model_col<-c("GAM","Kriging","GWR")
774
names_table_col<-c("MAE","RMSE","ME","R","Model")
1089 775

  
776
table$Model <-model_col
777
names(table)<- names_table_col
778
table
779

  
780
file_name<-paste("table4_avg_paper","_",out_prefix,".txt",sep="")
781
write.table(table,file=file_name,sep=",")
782

  
783
file_name<-paste("table4_sd_paper","_",out_prefix,".txt",sep="")
784
write.table(table_sd,file=file_name,sep=",")
1090 785

  
1091
################### END OF SCRIPT ###################
786
###################### END OF SCRIPT #######################
1092 787

  
1093 788

  

Also available in: Unified diff