Project

General

Profile

« Previous | Next » 

Revision d5970771

Added by Benoit Parmentier about 11 years ago

script for multi timescale paper, production of table 4

View differences:

climate/research/oregon/interpolation/multi_timescale_paper_interpolation.R
4 4
#different covariates using FUSION and CAI. Accuracy methods are added in the the function script to evaluate results.
5 5
#Figures, tables and data for the  paper are also produced in the script.
6 6
#AUTHOR: Benoit Parmentier 
7
#CREATED ON: 11/08/2013  
8
#MODIFIED ON: 10/08/2013            
7
#CREATED ON: 10/31/2013  
8
#MODIFIED ON: 11/08/2013            
9 9
#Version: 1
10 10
#PROJECT: Environmental Layers project                                     
11 11
#################################################################################################
......
49 49
#direct methods: gam, kriging, gwr
50 50
in_dir1 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gam_daily_lst_comb5_11012013"
51 51
in_dir2 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_kriging_daily_lst_comb5_11022013"
52
#in_dir3 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_kriging_day_lst_comb3_07112013"
52
in_dir3 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gwr_daily_lst_comb5p1_3_11062013"
53 53
#CAI: gam, kriging, gwr
54 54
in_dir4 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gam_cai_lst_comb5_11032013"
55 55
in_dir5 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_kriging_cai_lst_comb5_11032013"
56 56
in_dir6 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gwr_cai_lst_comb5_11042013"
57 57
#FSS: gam, kriging, gwr
58
#in_dir7 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gam_fss_lst_comb5_11062013"
58
in_dir7 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gam_fss_lst_comb5_11062013"
59 59
in_dir8 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_kriging_fss_lst_comb5_11052013"
60
#in_dir9 <- "/data/project/layers/commons/Oregon_interpolation/output_data_365d_kriging_daily_mults1_lst_comb3_10112013"
60
in_dir9 <- "/data/project/layers/commons/Oregon_interpolation/output_data_365d_gwr_fss_lst_comb5_11052013"
61 61
#
62 62

  
63 63
##raster_prediction object for comb5
64 64
#direct methods
65
raster_obj_file_1 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_daily_lst_comb5_11022013.RData" 
66
raster_obj_file_2 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_lst_comb4_08152013.RData"
67
#raster_obj_file_3 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_day_lst_comb3_07112013.RData"
65
raster_obj_file_1 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_daily_lst_comb5_11012013.RData" 
66
raster_obj_file_2 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_daily_lst_comb5_11022013.RData"
67
raster_obj_file_3 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_lst_comb5p1_3_11062013.RData"
68
raster_obj_file_3b <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_lst_comb5p1_3_11062013.RData"
68 69
#CAI
69 70
raster_obj_file_4 <- "raster_prediction_obj_gam_CAI_dailyTmax_365d_gam_cai_lst_comb5_11032013.RData"
70 71
raster_obj_file_5 <- "raster_prediction_obj_kriging_CAI_dailyTmax_365d_kriging_cai_lst_comb5_11032013.RData"
71 72
raster_obj_file_6 <- "raster_prediction_obj_gwr_CAI_dailyTmax_365d_gwr_cai_lst_comb5_11042013.RData"
72 73
#FSS
73
#raster_obj_file_7 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_mults1_lst_comb3_10132013.RData"
74
raster_obj_file_8 <-  "raster_prediction_obj_kriging_fusion_dailyTmax_365d_kriging_fss_lst_comb5_11052013.RData"
75
#raster_obj_file_9 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_mults1_lst_comb3_10132013.RData"
74
raster_obj_file_7 <- "raster_prediction_obj_gam_fusion_dailyTmax_365d_gam_fss_lst_comb5_11062013.RData"
75
raster_obj_file_8 <- "raster_prediction_obj_kriging_fusion_dailyTmax_365d_kriging_fss_lst_comb5_11052013.RData"
76
raster_obj_file_9 <- "raster_prediction_obj_gwr_fusion_dailyTmax_365d_gwr_fss_lst_comb5_11052013.RData"
76 77

  
77 78
out_dir<-"/home/parmentier/Data/IPLANT_project/paper_multitime_scale__analyses_tables_fig_09032013"
78 79
setwd(out_dir)
......
81 82
met_stations_outfiles_obj_file<-"/data/project/layers/commons/data_workflow/output_data_365d_gam_fus_lst_test_run_07172013/met_stations_outfiles_obj_gam_fusion__365d_gam_fus_lst_test_run_07172013.RData"
82 83
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
83 84
y_var_name <- "dailyTmax"
84
out_prefix<-"analyses_10312013"
85
out_prefix<-"analyses_11082013"
85 86

  
86 87
#method_interpolation <- "gam_daily"
87
covar_obj_file_1 <- "covar_obj__365d_gam_day_lst_comb3_08132013.RData"
88
#met_obj_file_1 <- "met_stations_outfiles_obj_gam_daily__365d_gam_day_lst_comb3_08132013.RData"
89
#met_stations_outfiles_obj_gam_daily__365d_gam_day_lst_comb3_08132013.RData
90

  
88
#covar_obj_file_1<- list.files(path=in_dir1,pattern="covar_obj.*.RData")
89
covar_obj_file_1 <- "covar_obj__365d_gam_daily_lst_comb5_11012013.RData"
90
met_obj_file_1 <- "met_stations_outfiles_obj_gam_daily__365d_gam_daily_lst_comb5_11012013.RData"
91 91

  
92 92
#Load objects containing training, testing, models objects 
93 93

  
94
#met_stations_obj <- load_obj(met_stations_outfiles_obj_file)
95
#covar_obj <-load_obj(file.path(in_dir1,covar_obj_file_1)) #Reading covariates object for GAM daily method
96
#infile_covariates <- covar_obj$infile_covariates
97
#infile_reg_outline <- covar_obj$infile_reg_outline
94
met_stations_obj <- load_obj(file.path(in_dir1,met_obj_file_1)
95
covar_obj <-load_obj(file.path(in_dir1,covar_obj_file_1)) #Reading covariates object for GAM daily method
96
infile_covariates <- covar_obj$infile_covariates
97
infile_reg_outline <- covar_obj$infile_reg_outline
98 98
covar_names<- covar_obj$covar_names
99 99
#####
100 100
s_raster <- brick(infile_covariates)
101 101
names(s_raster)<-covar_names
102 102

  
103
raster_prediction_obj_1 <-load_obj(file.path(in_dir1,raster_obj_file_1)) #comb3 (baseline 2)
104
raster_prediction_obj_2 <-load_obj(file.path(in_dir2,raster_obj_file_2)) #comb4 (baseline 1)
105
raster_prediction_obj_3 <-load_obj(file.path(in_dir3,raster_obj_file_3)) #comb3/mod1 baseline 2, kriging
106
raster_prediction_obj_4 <-load_obj(file.path(in_dir4,raster_obj_file_4)) #comb3/mod1 baseline 2, gwr
107
raster_prediction_obj_5 <-load_obj(file.path(in_dir5,raster_obj_file_5)) #gam daily multisampling 10 to 70%
108
raster_prediction_obj_6 <-load_obj(file.path(in_dir6,raster_obj_file_6)) #kriging daily multisampling 10 to 70% 
109
raster_prediction_obj_7 <-load_obj(file.path(in_dir7,raster_obj_file_7)) #gwr daily multisampling 10 to 70%
110
raster_prediction_obj_8 <-load_obj(file.path(in_dir8,raster_obj_file_8)) #kriging fss 
111
raster_prediction_obj_9 <-load_obj(file.path(in_dir9,raster_obj_file_9)) #gwr daily multisampling 10 to 70%
103
raster_prediction_obj_1 <-load_obj(file.path(in_dir1,raster_obj_file_1)) #comb5 gam_daily
104
raster_prediction_obj_2 <-load_obj(file.path(in_dir2,raster_obj_file_2)) #comb5 kriging_daily
105
raster_prediction_obj_3 <-load_obj(file.path(in_dir3,raster_obj_file_3)) #comb5 gwr_daily mod1 to mod3
106
raster_prediction_obj_3b <-load_obj(file.path(in_dir3b,raster_obj_file_3b)) #comb5 gwr_daily mod4 to mod7
107
                             
108
raster_prediction_obj_4 <-load_obj(file.path(in_dir4,raster_obj_file_4)) #comb5 gam_CAI
109
raster_prediction_obj_5 <-load_obj(file.path(in_dir5,raster_obj_file_5)) #comb5 kriging_CAI
110
raster_prediction_obj_6 <-load_obj(file.path(in_dir6,raster_obj_file_6)) #comb5 gwr_CAI 
111
raster_prediction_obj_7 <-load_obj(file.path(in_dir7,raster_obj_file_7)) #comb5 gam_fss
112
raster_prediction_obj_8 <-load_obj(file.path(in_dir8,raster_obj_file_8)) #comb5 kriging_fss 
113
raster_prediction_obj_9 <-load_obj(file.path(in_dir9,raster_obj_file_9)) #comb5 gwr_fss
112 114

  
113 115
############### BEGIN SCRIPT #################
114 116

  
115 117
############
116
##### 1) Generate: Table 3. Contribution of covariates using validation accuracy metrics
117
## This is a table of accuracy compared to baseline by difference 
118

  
118
##### 1) Generate: Table 4. Contribution of covariates using validation accuracy metrics
119
## This is a table of accuracy  
120

  
121
list_raster_obj_files  <- list(file.path(in_dir1,raster_obj_file_1),file.path(in_dir2,raster_obj_file_2),
122
                               file.path(in_dir3,raster_obj_file_3),file.path(in_dir4,raster_obj_file_4),
123
                               file.path(in_dir5,raster_obj_file_5),file.path(in_dir6,raster_obj_file_6),
124
                               file.path(in_dir7,raster_obj_file_7),file.path(in_dir8,raster_obj_file_8),
125
                               file.path(in_dir9,raster_obj_file_9))
126
 
127
names(list_raster_obj_files)<- c("gam_daily","kriging_daily","gwr_daily",
128
                                 "gam_CAI","kriging_CAI","gwr_CAI",
129
                                 "gam_fss","kriging_fss","gwr_fss")
130
summary_metrics_v_list<-lapply(list_raster_obj_files,FUN=function(x){x<-load_obj(x);x[["summary_metrics_v"]]$avg$rmse})                           
131
names(summary_metrics_v_list)
132

  
133
#for gam models
134
table_gam<- summary_metrics_v_list[grep("gam",names(summary_metrics_v_list))]
135
table_gam <-do.call(cbind,table_gam)
136
table_gam<-table_gam[1:7,]                           
137
#for kriging models
138
table_kriging <- summary_metrics_v_list[grep("kriging",names(summary_metrics_v_list))]
139
table_kriging <- do.call(cbind,table_kriging)
140
table_kriging <- table_kriging[1:7,]                         
141

  
142
#for kriging models
143
table_gwr <- summary_metrics_v_list[grep("gwr",names(summary_metrics_v_list))]
144
table_gwr <- do.call(cbind,table_gwr)
145
table_gwr <- table_gwr[1:7,]                         
146

  
147
table4_paper <- as.data.frame(do.call(rbind,list(table_gam,table_kriging,table_gwr)))    
148
table4_paper <- round(table4_paper,digit=3) #roundto three digits teh differences
149
table4_paper$Methods <- c(rep("gam",7),
150
                          rep("kriging",7),
151
                          rep("gwr",7))    
152
                             
119 153
#Check input covariates and model formula:
120
raster_prediction_obj_1$method_mod_obj[[1]]$formulas #models run for baseline 2, GAM daily
121
raster_prediction_obj_2$method_mod_obj[[1]]$formulas #models run for baseline 1, GAM daily
122

  
123
summary_metrics_v1<-raster_prediction_obj_1$summary_metrics_v
124
summary_metrics_v2<-raster_prediction_obj_2$summary_metrics_v
125

  
126
summary_metrics_v1$avg[1,]
127
summary_metrics_v2$avg[,]
128

  
129
tb1 <-raster_prediction_obj_1$tb_diagnostic_v #365 days accuracy table for baseline 2
130
tb2 <-raster_prediction_obj_2$tb_diagnostic_v #365 days accuracy table for baseline 1
131

  
132
table_data1 <-summary_metrics_v1$avg[,c("mae","rmse","me","r")] #select relevant columns from data.frame
133
table_data2 <-summary_metrics_v2$avg[,c("mae","rmse","me","r")]
134

  
135
###Table 3a, baseline 1: s(lat,lon) 
136
#Change here !!! need  to reorder rows based on  mod first
137
model_col<-c("Baseline1","Elevation","Northing","Easting","LST","DISTOC","Forest","CANHEIGHT","LST*Forest","LST*CANHEIGHT") # 
138
names_table_col<-c("ΔMAE","ΔRMSE","ΔME","Δr","Model")
139
df3a<- as.data.frame(sapply(table_data2,FUN=function(x) x-x[1]))
140
df3a<- round(df3a,digit=3) #roundto three digits teh differences
141
df3a$Model <-model_col
142
names(df3a)<- names_table_col
143
print(df3a) #show resulting table
144

  
145
###Table 3b, baseline 2: s(lat,lon) + s(elev)
146

  
147
model_col<-c("Baseline2","Northness","Eastness","LST","DISTOC","Forest","CANHEIGHT","LST*Forest","LST*CANHEIGHT")
148

  
149
df3b <- as.data.frame(sapply(table_data1,FUN=function(x) x-x[1])) #difference between baseline (line 1) and other models
150
df3b <- round(df3b,digit=3) #roundto three digits the differences
151
df3b$Model <- model_col
152
names(df3b)<- names_table_col
153
print(df3b) #Part b of table 3
154

  
155
sd2_v <-calc_stat_from_raster_prediction_obj(raster_prediction_obj_2,"sd",training=FALSE)
154
list_formulas <-raster_prediction_obj_2$method_mod_obj[[1]]$formulas #formulas for models run comb5
155
#strsplit(list_formulas,"~")
156
                             
157
table4_paper$Forumulas<-rep(list_formulas,3)                             
158
table4_paper<-table4_paper[(c(5,4,1,2,3))]                             
156 159

  
157 160
#Testing siginificance between models
161
                             
162
#mod_compk1 <-kruskal.test(tb1$rmse ~ as.factor(tb1$pred_mod)) #Kruskal Wallis test
163
#mod_compk2 <-kruskal.test(tb2$rmse ~ as.factor(tb2$pred_mod))
164
#print(mod_compk1) #not significant
158 165

  
159
mod_compk1 <-kruskal.test(tb1$rmse ~ as.factor(tb1$pred_mod)) #Kruskal Wallis test
160
mod_compk2 <-kruskal.test(tb2$rmse ~ as.factor(tb2$pred_mod))
161
print(mod_compk1) #not significant
162
print(mod_compk2) #not significant
163

  
164
#Multiple Kruskal Wallis
165
mod_compk1 <-kruskalmc(tb1$rmse ~ as.factor(tb1$pred_mod))
166
mod_compk2 <-kruskalmc(tb2$rmse ~ as.factor(tb2$pred_mod))
167

  
168
print(mod_compk1)
169
print(mod_compk2)
170

  
171
#Now write out table 3
172

  
173
file_name<-paste("table3a_baseline1_paper","_",out_prefix,".txt",sep="")
174
write.table(df3a,file=file_name,sep=",")
175

  
176
file_name<-paste("table3b_baseline2_paper","_",out_prefix,".txt",sep="")
177
write.table(df3b,file=file_name,sep=",")
178

  
179
############
180
##### 2) Generate: Table 4. Comparison between interpolation methods using validation accuracy metrics
181
## This is a table of accuracy metric for the optimal model (baseline2) as identified in the previous step 
182

  
183
##Table 4: Interpolation methods comparison
184

  
185
#get sd for kriging, gam and gwr
186
#tb1 <-raster_prediction_obj_1$tb_diagnostic_v  HGAM baseline 1, loaded
187
#tb2 <-raster_prediction_obj_2$tb_diagnostic_v  #GAM baseline 2, loaded
188
tb3 <-raster_prediction_obj_3$tb_diagnostic_v  #Kriging methods
189
tb4 <-raster_prediction_obj_4$tb_diagnostic_v  #GWR methods
190

  
191
names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9","mod10")
192

  
193
#Calculate standard deviation for each metric
194
sd1 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_1,"sd") # see function script
195
sd2 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_2,"sd") # standard deviation for baseline 2
196
sd3 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_3,"sd") # kriging
197
sd4 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_4,"sd") #gwr
198

  
199
#Combined sd in one table for mod1 (baseline 2)
200
table_sd <- do.call(rbind,list(sd1[1,],sd3[1,],sd4[1,])) #table containing the sd for the three mdethods for baseline 2
201
table_sd <- round(table_sd[,-1],digit=3) #round to three digits the differences
202
table_sd <- table_sd[,c("mae","rmse","me","r")] #column 5 contains m50, remove it
203

  
204
summary_metrics_v3<-raster_prediction_obj_3$summary_metrics_v  #Kriging methods
205
summary_metrics_v4<-raster_prediction_obj_4$summary_metrics_v  # GWR method
206

  
207
table_data3 <-summary_metrics_v3$avg[1,c("mae","rmse","me","r")] #first line mod1 (baseline)
208
table_data4 <-summary_metrics_v4$avg[1,c("mae","rmse","me","r")] #first line mod1 (baseline)
209
table_data1 <- table_data1[1,]
210

  
211
table_ac <-do.call(rbind, list(table_data1,table_data3,table_data4))
212
table_ac <- round(table_ac,digit=3) #roundto three digits teh differences
213

  
214
#combined tables with accuracy metrics and their standard deviations
215
table4_paper <-table_combined_symbol(table_ac,table_sd,"±")
216
#lapply(lapply(table_ac,FUN=paste,table_sd,FUN=paste,sep="±"),FUN=paste)
166
###Table 4, writeout
167
names_table_col <-c("Models","Method","Single time scale","Multiple time scale CAI","Multiple time scale FSS") # 
217 168

  
218
model_col<-c("GAM","Kriging","GWR")
219
names_table_col<-c("MAE","RMSE","ME","R","Model")
220

  
221
table4_paper$Model <-model_col
222 169
names(table4_paper)<- names_table_col
170
print(table4_paper) #show resulting table
171

  
172
#Now write out table 4
223 173

  
224
file_name<-paste("table4_compariaons_interpolation_methods_avg_paper","_",out_prefix,".txt",sep="")
225
write.table(as.data.frame(table4_paper),file=file_name,sep=",")
174
file_name<-paste("table4_multi_timescale_paper","_",out_prefix,".txt",sep="")
175
write.table(table4_paper,file=file_name,sep=",")
226 176

  
227 177
####################################
228 178
####### Now create figures #############
229 179

  
230 180
#figure 1: study area
231 181
#figure 2: methodological worklfow
232
#figure 3:Figure 3. MAE/RMSE and distance to closest fitting station.
182
#figure 3: daily mean compared to monthly mean
233 183
#Figure 4. RMSE and MAE, mulitisampling and hold out for FSS and GAM.
234 184
#Figure 5. Overtraining tendency
235
#Figure 6: Spatial pattern of prediction for one day
185
#Figure 6: Spatial pattern of prediction for one day (maps)
186
#Figure 7: Spatial transects for one day (maps)
187
#Figure 8: Spatial lag profiles and stations data  
188
#Figure 9: Image differencing and land cover                               
236 189

  
237
### Figure 1: Oregon study area
190
################################################
191
######### Figure 1: Oregon study area
238 192
#3 parameters from output
239 193
#infile_monthly<-list_outfiles$monthly_covar_ghcn_data #outile4 from database_covar script
240 194
#infile_daily<-list_outfiles$daily_covar_ghcn_data  #outfile3 from database_covar script
......
284 238
box()
285 239
dev.off()
286 240

  
287
### Figure 2:  Method comparison workflow 
241
################################################
242
######### Figure 2:  Method comparison workflow 
288 243

  
289 244
# Workflow figure is not generated in R
290 245

  
291 246
################################################
292
################### Figure 3. MAE/RMSE and distance to closest fitting station.
293

  
294
#Analysis accuracy in term of distance to closest station
295
#Assign model's names
296

  
297
names_mod <- paste("res_mod",1:10,sep="")
298
names(raster_prediction_obj_1$validation_mod_obj[[1]])
299
limit_val<-seq(0,150, by=10)
300

  
301
#Call function to extract residuals in term of distance to closest fitting station and summary statistics
302
l1 <- distance_to_closest_fitting_station(raster_prediction_obj_1,names_mod,dist_classes=limit_val) #GAM method
303
l3 <- distance_to_closest_fitting_station(raster_prediction_obj_3,names_mod,dist_classes=limit_val) #Kriging method
304
l4 <- distance_to_closest_fitting_station(raster_prediction_obj_4,names_mod,dist_classes=limit_val) #GWR method
305

  
306
l1$mae_tb #contains
307

  
308
list_dist_obj <-list(l1,l3,l4)
309
head(list_dist_obj[[1]]$dstspat_dat)
310
#should add method_interp in data.frame
311

  
312
#Prepare parameters/arguments for plotting
313

  
314
col_t <- c("red","blue","black")
315
pch_t <- 1:length(col_t)
316
legend_text <- c("GAM","Kriging","GWR")
317
mod_name <- c("res_mod1","res_mod1","res_mod1")#selected models
318
x_tick_labels <- limit_val<-seq(5,125, by=10)
319
metric_name <-"rmse_tb"
320
title_plot <- "RMSE and distance to closest station for baseline 2"
321
y_lab_text <- "RMSE (C)"
322
#quick test
323
add_CI <- c(TRUE,TRUE,TRUE)
324

  
325
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,title_plot,y_lab_text,add_CI)
326
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name","title_plot","y_lab_text","add_CI")
327
plot_dst_MAE(list_param_plot)
328

  
329
metric_name <-"mae_tb"
330
title_plot <- "MAE and distance to closest fitting station"
331
y_lab_text <- "MAE (C)"
332
add_CI <- c(TRUE,TRUE,TRUE)
333
#Now set up plotting device
334
res_pix<-480
335
col_mfrow<-2
336
row_mfrow<-1
337
png_file_name<- paste("Figure_3_accuracy_and_distance_to_closest_fitting_station_",out_prefix,".png", sep="")
338
png(filename=file.path(out_dir,png_file_name),
339
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
340
par(mfrow=c(row_mfrow,col_mfrow))
341

  
342
#Figure 3a
343
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,
344
                      title_plot,y_lab_text,add_CI)
345
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name",
346
                          "title_plot","y_lab_text","add_CI")
347
#undebug(plot_dst_MAE)
348
plot_dst_MAE(list_param_plot)
349
title(xlab="Distance to closest fitting station (km)")
350

  
351
#Figure 3b: histogram
352
barplot(l1$n_tb$res_mod1,names.arg=limit_val,
353
        ylab="Number of observations",
354
        xlab="Distance to closest fitting station (km)")
355
title("Number of observation in term of distance bins")
356
box()
357
dev.off()
358

  
359
####################################################
360
#########Figure 4. RMSE and MAE, mulitisampling and hold out for single time scale methods.
361

  
362
#Using baseline 2: lat,lon and elev
363

  
364
#Use run of 7 hold out proportions, 10 to 70% with 10 random samples and 12 dates...
365
#Use gam_day method
366
#Use comb3 i.e. using baseline s(lat,lon)+s(elev)
367

  
368
#names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9")
369
names_mod<-c("mod1")
370

  
371
#debug(calc_stat_prop_tb)
372
prop_obj_gam<-calc_stat_prop_tb(names_mod,raster_prediction_obj_5)
373
prop_obj_kriging<-calc_stat_prop_tb(names_mod,raster_prediction_obj_6)
374
prop_obj_gwr<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7)
375

  
376
list_prop_obj<-list(prop_obj_gam,prop_obj_kriging,prop_obj_gwr)
377
#Testing siginificance between models
378

  
379
mod_compk1 <-kruskal.test(prop_obj_gwr$tb$rmse ~ as.factor(prop_obj_gwr$tb$prop)) #Kruskal Wallis test
380
mod_compk2 <-kruskal.test(tb2$rmse ~ as.factor(tb2$pred_mod))
381
print(mod_compk1) #not significant
382
print(mod_compk2) #not significant
383

  
384
#Multiple Kruskal Wallis
385
mod_compk1 <-kruskalmc(prop_obj_gwr$tb$rmse ~ as.factor(prop_obj_gwr$tb$prop))
386
mod_compk2 <-kruskalmc(tb2$rmse ~ as.factor(tb2$pred_mod))
387

  
388
print(mod_compk1)
389
print(mod_compk2)
390

  
391
prop_tb <- rbind(prop_obj_gam$tb,prop_obj_kriging$tb,prop_obj_gwr$tb)
392

  
393
mod_compk1 <-kruskal.test(prop_tb$rmse ~ as.factor(prop_tb$method_interp)+as.factor(prop_tb$prop)) #Kruskal Wallis test
394
mod_prop <-lm(prop_tb$rmse ~ as.factor(prop_tb$method_interp)+as.factor(prop_tb$prop)) #This is significant!!
395

  
396
## plot setting for figure 4
397

  
398
col_t<-c("red","blue","black")
399
pch_t<- 1:length(col_t)
400
legend_text <- c("GAM","Kriging","GWR")
401
mod_name<-c("mod1","mod1","mod1")#selected models
402
add_CI <- c(TRUE,TRUE,TRUE)
403
CI_bar <- c(TRUE,TRUE,TRUE)
404
#add_CI <- c(TRUE,FALSE,FALSE)
405

  
406
##### plot figure 4 for paper
407
####
408

  
409
res_pix<-480
410
col_mfrow<-2
411
row_mfrow<-1
412
png_file_name<- paste("Figure_4_proportion_of_holdout_and_accuracy_",out_prefix,".png", sep="")
413
png(filename=file.path(out_dir,png_file_name),
414
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
415
par(mfrow=c(row_mfrow,col_mfrow))
416
#par(mfrow=c(1,1))
417
metric_name<-"mae"
418
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name,add_CI,CI_bar)
419
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name","add_CI","CI_bar")
420

  
421
#debug(plot_prop_metrics)
422
plot_prop_metrics(list_param_plot)
423
title(main="MAE for hold out and methods",
424
      xlab="Hold out validation/testing proportion",
425
      ylab="MAE (C)")
426

  
427
#now figure 4b
428
metric_name<-"rmse"
429
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name,add_CI,CI_bar)
430
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name","add_CI","CI_bar")
431
plot_prop_metrics(list_param_plot)
432
title(main="RMSE for hold out and methods",
433
      xlab="Hold out validation/testing proportion",
434
      ylab="RMSE (C)")
435

  
436
dev.off()
437

  
438
####################################################
439
#########Figure 5. Overtraining tendency
440

  
441
#read in relevant data:
442
## Calculate average difference for RMSE for all three methods
443
#read in relevant data:
444

  
445
tb1_s<-extract_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"metrics_s")
446
rownames(tb1_s)<-NULL #remove row names
447
tb1_s$method_interp <- "gam_daily" #add type of interpolation...out_prefix too??
448

  
449
tb3_s<-extract_from_list_obj(raster_prediction_obj_3$validation_mod_obj,"metrics_s")
450
rownames(tb1_s)<-NULL #remove row names
451
tb3_s$method_interp <- "kriging_daily" #add type of interpolation...out_prefix too??
452

  
453
tb4_s<-extract_from_list_obj(raster_prediction_obj_4$validation_mod_obj,"metrics_s")
454
rownames(tb4_s)<-NULL #remove row names
455
tb4_s$method_interp <- "gwr_daily" #add type of interpolation...out_prefix too??
456

  
457
tb5_s<-extract_from_list_obj(raster_prediction_obj_5$validation_mod_obj,"metrics_s")
458
rownames(tb5_s)<-NULL #remove row names
459
tb5_s$method_interp <- "gam_daily" #add type of interpolation...out_prefix too??
460

  
461
tb6_s<-extract_from_list_obj(raster_prediction_obj_6$validation_mod_obj,"metrics_s")
462
rownames(tb6_s)<-NULL #remove row names
463
tb6_s$method_interp <- "kriging_daily" #add type of interpolation...out_prefix too??
464

  
465
tb7_s<-extract_from_list_obj(raster_prediction_obj_7$validation_mod_obj,"metrics_s")
466
rownames(tb7_s)<-NULL #remove row names
467
tb7_s$method_interp <- "gwr_daily" #add type of interpolation...out_prefix too??
468

  
469
#tb1_s <-raster_prediction_obj_1$tb_diagnostic_s  #gam dailycontains the accuracy metrics for each run...
470
#tb3_s <-raster_prediction_obj_3$tb_diagnostic_s  #Kriging daily methods
471
#tb4_s <-raster_prediction_obj_4$tb_diagnostic_s  #gwr daily methods
472

  
473
tb1 <-raster_prediction_obj_1$tb_diagnostic_v  #gam dailycontains the accuracy metrics for each run...
474
tb3 <-raster_prediction_obj_3$tb_diagnostic_v  #Kriging daily methods
475
tb4 <-raster_prediction_obj_4$tb_diagnostic_v  #gwr daily methods
476

  
477
tb5 <-raster_prediction_obj_5$tb_diagnostic_v  #gam dailycontains the accuracy metrics for each run...
478
tb6 <-raster_prediction_obj_6$tb_diagnostic_v  #Kriging daily methods
479
tb7 <-raster_prediction_obj_7$tb_diagnostic_v  #gwr daily methods
480

  
481
#Calculate difference in MAE and RMSE for training and testing data: call diff_df function
482
diff_tb1 <-diff_df(tb1_s[tb1_s$pred_mod=="mod1",],tb1[tb1$pred_mod=="mod1",],c("mae","rmse")) #gam select differences for mod1
483
diff_tb3 <-diff_df(tb3_s[tb3_s$pred_mod=="mod1",],tb3[tb3$pred_mod=="mod1",],c("mae","rmse")) #kriging
484
diff_tb4 <-diff_df(tb4_s[tb4_s$pred_mod=="mod1",],tb4[tb4$pred_mod=="mod1",],c("mae","rmse")) #gwr
485

  
486
diff_tb5 <-diff_df(tb5_s[tb5_s$pred_mod=="mod1",],tb5[tb5$pred_mod=="mod1",],c("mae","rmse")) #gam select differences for mod1
487
diff_tb6 <-diff_df(tb6_s[tb6_s$pred_mod=="mod1",],tb6[tb6$pred_mod=="mod1",],c("mae","rmse")) #kriging
488
diff_tb7 <-diff_df(tb7_s[tb7_s$pred_mod=="mod1",],tb7[tb7$pred_mod=="mod1",],c("mae","rmse")) #gwr
489

  
490

  
491
diff_mae_data <-data.frame(gam=diff_tb1$mae,kriging=diff_tb3$mae,gwr=diff_tb4$mae)
492
diff_rmse_data <-data.frame(gam=diff_tb1$rmse,kriging=diff_tb3$rmse,gwr=diff_tb4$rmse)
493

  
494

  
495
diff_mae_data_mult  <-data.frame(gam=diff_tb5$mae,kriging=diff_tb6$mae,gwr=diff_tb7$mae)
496
diff_rmse_data_mult <-data.frame(gam=diff_tb5$rmse,kriging=diff_tb6$rmse,gwr=diff_tb7$rmse)
497

  
498
diff_mae_data_mult$prop <- tb5$prop
499

  
500
boxplot(diff_mae_data_mult$gam~diff_mae_data_mult$prop)
501
boxplot(diff_mae_data_mult$kriging~diff_mae_data_mult$prop)
502

  
503
plot(diff_mae_data_mult$gam~diff_mae_data_mult$prop,type="p")
504

  
505

  
506

  
507
#Test the plot
508
boxplot(diff_mae_data)
509
boxplot(diff_rmse_data) #plot differences in training and testing accuracies for three methods
510
title(main="Training and testing RMSE for hold out and interpolation methods",
511
      xlab="Interpolation method",
512
      ylab="RMSE (C)")
513

  
514
boxplot(diff_mae_data_mult)
515
boxplot(diff_rmse_data_mult) #plot differences in training and testing accuracies for three methods
516

  
517

  
518
boxplot(diff_mae_data_mult)
519

  
520
tb5 <-raster_prediction_obj_5$tb_diagnostic_v  #gam dailycontains the accuracy metrics for each run...
521
tb6 <-raster_prediction_obj_6$tb_diagnostic_v  #Kriging daily methods
522
tb7 <-raster_prediction_obj_7$tb_diagnostic_v  #gwr daily methods
523

  
524
prop_obj_gam_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_5,testing=FALSE) #training accuracy with hold out proportion
525
prop_obj_kriging_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_6,testing=FALSE)
526
prop_obj_gwr_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7,testing=FALSE)
527

  
528
plot(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, type="b",)
529

  
530
###############
531
#### plot figure 5
532
#set up the output file to plot
533
res_pix<-480
534
col_mfrow<-2
535
row_mfrow<-1
536
png_file_name<- paste("Figure_5_overtraining_tendency_and_holdout_proportion_",out_prefix,".png", sep="")
537
png(filename=file.path(out_dir,png_file_name),
538
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
539
par(mfrow=c(row_mfrow,col_mfrow))
540

  
541
col_t<-c("red","blue","black")
542
pch_t<- 1:length(col_t)
543
##Make this a function???
544
y_range<-range(prop_obj_kriging$avg_tb$rmse,prop_obj_kriging_s$avg_tb$rmse)
545
#y_range<-range(prop_obj_gam$avg_tb$rmse,prop_obj_gam_s$avg_tb$rmse)
546
#plot(prop_obj_gam$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, ylab="",xlab="",type="b",col=c("red"),pch=pch_t[1],ylim=y_range,lty=2)
547
#lines(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, ylab="",xlab="",type="b",pch=pch_t[1],ylim=y_range,col=c("red"))
548
#lines(prop_obj_gwr_s$avg_tb$rmse ~ prop_obj_gwr_s$avg_tb$prop, ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[3],col=c("black"))
549
#lines(prop_obj_gwr$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[3],,col=c("black"),lty=2)
550
#lines(prop_obj_kriging$avg_tb$rmse ~ prop_obj_kriging$avg_tb$prop,ylab="",xlab="", type="b",ylim=y_range,pch=pch_t[2],,col=c("blue"),lty=2)
551
#lines(prop_obj_kriging_s$avg_tb$rmse ~ prop_obj_kriging_s$avg_tb$prop,ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[2],col=c("blue"))
552

  
553
plot_ac_holdout_prop<- function(l_prop,l_col_t,l_pch_t,add_CI,y_range){
554
  
555
  for(i in 1:length(l_prop)){
556
    if(i==1){
557
      plot(l_prop[[i]]$avg_tb$rmse ~ l_prop[[i]]$avg_tb$prop,ylab="",xlab="",type="b",pch=l_pch_t[i],ylim=y_range,col=l_col_t[i],lty=l_lty_t[i])
558
    }else{
559
      lines(l_prop[[i]]$avg_tb$rmse ~ l_prop[[i]]$avg_tb$prop,ylab="",xlab="",type="b",pch=l_pch_t[i],ylim=y_range,col=l_col_t[i],lty=l_lty_t[i])
560
    }
561
    if(add_CI[i]==TRUE){
562
      ciw   <- qt(0.975, l_prop[[i]]$n_tb$rmse) * l_prop[[i]]$sd_tb$rmse / sqrt(l_prop[[i]]$n_tb$rmse)
563
      plotCI(y=l_prop[[i]]$avg_tb$rmse, x=l_prop[[i]]$avg_tb$prop, uiw=ciw, col=l_col_t[i], barcol="blue", lwd=1,pch=l_pch_t[i],
564
             add=TRUE) 
565
    }
566
  }
567
}
568

  
569
l_prop<- list(prop_obj_gam,prop_obj_gam_s,prop_obj_gwr_s,prop_obj_gwr,prop_obj_kriging,prop_obj_kriging_s)
570

  
571
l_col_t <- c("red","red","black","black","blue","blue")
572
l_pch_t <- c(1,1,3,3,2,2)
573
l_lty_t <- c(2,1,1,2,2,1)
574
add_CI <- c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE)
575
add_CI <- c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)
576
y_range<-c(0.5,3)
577

  
578
plot_ac_holdout_prop(l_prop,l_col_t,l_pch_t,add_CI,y_range)
247
######### Figure 3: daily mean compared to monthly mean
579 248

  
580
legend_text <- c("GAM","Kriging","GWR")
581
#legend_text <- c("GAM","Kriging","GWR","training","testing")
582

  
583
#legend("topleft",legend=legend_text, 
584
#       cex=0.9, pch=c(pch_t,NA,NA),col=c(col_t,"white","white"),lty=c(NA,NA,NA,1,2),bty="n")
585
legend("topleft",legend=legend_text, 
586
       cex=0.9, pch=c(pch_t),col=c(col_t),lty=c(1,1,1),bty="n")
587
legend_text_data <-c("training","testing")
588
legend("top",legend=legend_text_data, 
589
       cex=0.9, lty=c(1,2),bty="n")
590

  
591
title(main="Training and testing RMSE for hold out and methods",
592
      xlab="Hold out validation/testing proportion",
593
      ylab="RMSE (C)")
594

  
595

  
596
boxplot(diff_mae_data_mult[-4]) #plot differences in training and testing accuracies for three methods
597

  
598
title(main="Difference between training and testing MAE",
599
      xlab="Interpolation method",
600
      ylab="MAE (C)")
601

  
602
dev.off()
603

  
604
############### STUDY TIME AND accuracy
605
  #########Figure 6: Monthly RMSE averages for the three interpolation methods: GAM, GWR and Kriging.
606

  
607
mae_tmp<- data.frame(gam=tb1[tb1$pred_mod=="mod1",c("mae")],
608
                     kriging=tb3[tb3$pred_mod=="mod1",c("mae")],
609
                     gwr=tb4[tb4$pred_mod=="mod1",c("mae")])
610

  
611
plot(mae_tmp$gam,col=c("red"),type="b",pch=1)
612
lines(mae_tmp$kriging,col=c("blue"),type="b",pch=2)
613
lines(mae_tmp$gwr,col=c("black"),type="b",pch=2)
614
legend("topleft",legend=legend_text, 
615
       cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n")
616

  
617
max(mae_tmp$gam)
618

  
619
x2<-tb1[tb1$pred_mod=="mod1",c("mae","date")]
620
arrange(x2,desc(mae))
621

  
622
#kriging=tb3[tb3$pred_mod=="mod1",c("mae")],
623
#                     gwr=tb4[tb4$pred_mod=="mod1",c("mae")])
624

  
625
##### MONTHLY AVERAGES
626

  
627
tb1_month<-raster_prediction_obj_1$summary_month_metrics_v[[1]] #note that this is for model1
628
tb3_month<-raster_prediction_obj_3$summary_month_metrics_v[[1]]
629
tb4_month<- raster_prediction_obj_4$summary_month_metrics_v[[1]]
630

  
631
y_range<-range(tb1_month$mae,tb3_month$mae,tb4_month$mae)
632
plot(1:12,tb1_month$mae,col=c("red"),type="b",ylim=y_range)
633
lines(1:12,tb3_month$mae,col=c("blue"),type="b")
634
lines(1:12,tb4_month$mae,col=c("black"),type="b")
635

  
636
add_month_tag<-function(tb){
637
  date<-strptime(tb$date, "%Y%m%d")   # interpolation date being processed
638
  month<-strftime(date, "%m")          # current month of the date being processed
639
}
640
tb1$month<-add_month_tag(tb1)
641
tb3$month<-add_month_tag(tb3)
642
tb4$month<-add_month_tag(tb4)
643

  
644
metric_name<-"mae"
645
month_data_list<-list(gam=tb1[tb1$pred_mod=="mod1",c(metric_name,"month")],
646
                      kriging=tb3[tb3$pred_mod=="mod1",c(metric_name,"month")],
647
                      gwr=tb4[tb4$pred_mod=="mod1",c(metric_name,"month")])
648
y_range<-range(unlist(month_data_list))
649

  
650

  
651
#boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5,
652
#        ylab=paste(metric_ac,"in degree C",sep=" "),,axisnames=FALSE,axes=FALSE)
653
#boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5,
654
#        ylab=paste(metric_ac,"in degree C",sep=" "))
655
#axis(1, labels = FALSE)
656
## Create some text labels
657
#labels <- month.abb # abbreviated names for each month
658
## Plot x axis labels at default tick marks
659
#text(1:length(labels), par("usr")[3] - 0.25, srt = 45, adj = 1,
660
#     labels = labels, xpd = TRUE)
661
#axis(2)
662
#box()
663

  
664
#Now plot figure 6
665
res_pix<-480
666
col_mfrow<-2
667
#row_mfrow<-2
668
row_mfrow<-1
669

  
670
png_file_name<- paste("Figure_6_monthly_accuracy_",out_prefix,".png", sep="")
671
png(filename=file.path(out_dir,png_file_name),
672
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
673
par(mfrow=c(row_mfrow,col_mfrow))
674

  
675
col_t<-c("red","blue","black")
676
pch_t<- 1:length(col_t)
677
legend_text <- c("GAM","Kriging","GWR")
678

  
679
y_range<-range(tb1_month$mae,tb3_month$mae,tb4_month$mae)
680
xlab_tick <- unique(tb1$month)
681
xlab_text <-"Month"
682
ylab_text <- "MAE (C)"
683
plot(1:12,tb1_month$mae,col=c("red"),type="b",ylim=y_range,xlab=xlab_text,ylab=ylab_text,xaxt="n")
684
lines(1:12,tb3_month$mae,col=c("blue"),type="b")
685
lines(1:12,tb4_month$mae,col=c("black"),type="b")
686
axis(1,at=1:12,labels=xlab_tick)
687
title(main="Monthly average MAE")
688
legend("topleft",legend=legend_text, 
689
       cex=0.9, pch=c(pch_t),col=c(col_t),lty=c(1,1,1),bty="n")
690

  
691
#Second plot
692
ylab_text<-"MAE (C)"
693
xlab_text<-"Month"
694
#y_range<-range(month_data_list$gam$mae,month_data_list$kriging$mae,month_data_list$gwr$mae)
695
#y_range<-range(month_data_list$gam$mae)
696
boxplot(mae~month,data=month_data_list$gam,main="Monthly MAE boxplot", xlab=xlab_text,ylab=ylab_text,outline=FALSE)
697
#boxplot(mae~month,data=month_data_list$kriging,ylim=y_range,main="Kriging",ylab=ylab_text,outline=FALSE)
698
#boxplot(mae~month,data=month_data_list$gwr,ylim=y_range,main="GWR",ylab=ylab_text,outline=FALSE)
699

  
700
dev.off()
701

  
702
#Now generate table 5
703

  
704
test<-boxplot(mae~month,data=month_data_list$gam,main="GAM",ylab=ylab_text,outline=FALSE)
705

  
706
length(tb1_month$mae)
707
names(tb1_month)
708

  
709
#Calculate standard deviation for each metric
710
sd1 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_1,"sd") # see function script
711
sd2 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_2,"sd") # standard deviation for baseline 2
712
sd3 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_3,"sd") # kriging
713
sd4 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_4,"sd") #gwr
714

  
715
avg_v1 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_1,"mean") # see function script
716
avg_v2 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_2,"mean") # standard deviation for baseline 2
717
avg_v3 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_3,"mean") # kriging
718
avg_v4 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_4,"mean") #gwr
719

  
720
#Combined sd in one table for mod1 (baseline 2)
721
table_sd <- do.call(cbind,list(gam=sd1[sd1$pred_mod=="mod1",c("rmse")],
722
                               kriging=sd3[sd3$pred_mod=="mod1",c("rmse")],
723
                               gwr=sd4[sd4$pred_mod=="mod1",c("rmse")])) #table containing the sd for the three mdethods for baseline 2
724
table_sd <- as.data.frame(round(table_sd,digit=3)) #round to three digits the differences
725

  
726
#Combined mean in one table for mod1 (baseline 2)
727
table_avg <- do.call(cbind,list(gam=avg_v1[avg_v1$pred_mod=="mod1",c("rmse")],
728
                               kriging=avg_v3[sd3$pred_mod=="mod1",c("rmse")],
729
                               gwr=avg_v4[avg_v4$pred_mod=="mod1",c("rmse")])) #table containing the sd for the three mdethods for baseline 2
730
table_avg <- as.data.frame(round(table_avg,digit=3)) #round to three digits the differences
731

  
732
#combined tables with accuracy metrics and their standard deviations
733
table5_paper <-table_combined_symbol(table_avg,table_sd,"±")
734
table5_paper$month <- month.abb
735

  
736
file_name<-paste("table5_comparisons_monthly_averages_interpolation_methods_paper","_",out_prefix,".txt",sep="")
737
write.table(as.data.frame(table5_paper),file=file_name,sep=",")
738

  
739
####### FIGURE 7: Spatial pattern ######
740

  
741
y_var_name <-"dailyTmax"
742
index<-244 #index corresponding to January 1
743

  
744
lf1 <- raster_prediction_obj_1$method_mod_obj[[index]][[y_var_name]] #select relevant raster images for the given dates
745
lf3 <- raster_prediction_obj_3$method_mod_obj[[index]][[y_var_name]]
746
lf4 <- raster_prediction_obj_4$method_mod_obj[[index]][[y_var_name]]
747

  
748
date_selected <- "20109101"
749
methods_names <-c("gam","kriging","gwr")
750
names_layers<-methods_names
751
lf <-list(lf1$mod1,lf3$mod1,lf4$mod1)
752
#lf <-lf[[1]]
753

  
754
pred_temp_s <-stack(lf)
755
#predictions<-mask(predictions,mask_rast)
756
names(pred_temp_s)<-names_layers
757
s.range <- c(min(minValue(pred_temp_s)), max(maxValue(pred_temp_s)))
758
#s.range <- s.range+c(5,-5)
759
col.breaks <- pretty(s.range, n=200)
760
lab.breaks <- pretty(s.range, n=100)
761
temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
762
max_val<-s.range[2]
763
min_val <-s.range[1]
764
#max_val<- -10
765
min_val <- 0
766
layout_m<-c(1,3) #one row two columns
767

  
768
png(paste("Figure7__spatial_pattern_tmax_prediction_levelplot_",date_selected,out_prefix,".png", sep=""),
769
    height=480*layout_m[1],width=480*layout_m[2])
770

  
771
levelplot(pred_temp_s,main="Interpolated Surfaces Method Comparison", ylab=NULL,xlab=NULL,
772
          par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=layout_m,
773
                              par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5),
774
          names.attr=names_layers,col.regions=temp.colors,at=seq(max_val,min_val,by=0.01))
775
#col.regions=temp.colors(25))
776
dev.off()
777

  
778
## FIGURE COMPARISON OF  MODELS COVARRIATES
779

  
780
lf2 <- raster_prediction_obj_2$method_mod_obj[[index]][[y_var_name]]
781
lf2 #contains the models for gam
782

  
783
pred_temp_s <-stack(lf2)
784
date_selected <- "20109101"
785
#names_layers <-c("mod1=s(lat,long)+s(elev)","mod4=s(lat,long)+s(LST)","diff=mod1-mod4")
786
names_layers <-c("mod1 = s(lat,long)","mod2 = s(lat,long)+s(elev)","mod3 = s(lat,long)+s(N_w)","mod4 = s(lat,long)+s(E_w)",
787
                 "mod5 = s(lat,long)+s(LST)","mod6 = s(lat,long)+s(DISTOC)","mod7 = s(lat,long)+s(LC1)",
788
                 "mod8 = s(lat,long)+s(LC1,LST)","mod9 = s(lat,long)+s(CANHGHT)","mod10 = s(lat,long)+s(LST,CANHGHT)")
789

  
790
#names_layers<-names(pred_temp_s)
791
#names(pred_temp_s)<-names_layers
792

  
793
s.range <- c(min(minValue(pred_temp_s)), max(maxValue(pred_temp_s)))
794
#s.range <- s.range+c(5,-5)
795
col.breaks <- pretty(s.range, n=200)
796
lab.breaks <- pretty(s.range, n=100)
797
temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
798
max_val<-s.range[2]
799
min_val <-s.range[1]
800
#max_val<- -10
801
#min_val <- 0
802
layout_m<-c(4,3) #one row two columns
803

  
804
png(paste("Figure_7_spatial_pattern_tmax_prediction_models_baseline1_gam_levelplot_",date_selected,out_prefix,".png", sep=""),
805
    height=480*layout_m[1],width=480*layout_m[2])
806

  
807
levelplot(pred_temp_s,main="Interpolated Surfaces Model Comparison baseline 1", ylab=NULL,xlab=NULL,
808
          par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=layout_m,
809
                              par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5),
810
          names.attr=names_layers,col.regions=temp.colors,at=seq(max_val,min_val,by=0.01))
811
#col.regions=temp.colors(25))
812
dev.off()
813

  
814

  
815
################ #FIGURE 8
816
lf1 <- raster_prediction_obj_1$method_mod_obj[[index]][[y_var_name]]
817
lf1 #contains the models for gam
818

  
819
pred_temp_s <-stack(lf1$mod1,lf1$mod4)
820
date_selected <- "20109101"
821
#names_layers <-c("mod1=s(lat,long)+s(elev)","mod4=s(lat,long)+s(LST)","diff=mod1-mod4")
822
names_layers <-c("mod1 = s(lat,long)+s(elev)","mod4 = s(lat,long)+s(LST)")
823
names(pred_temp_s)<-names_layers
824
s.range <- c(min(minValue(pred_temp_s)), max(maxValue(pred_temp_s)))
825
#s.range <- s.range+c(5,-5)
826
col.breaks <- pretty(s.range, n=200)
827
lab.breaks <- pretty(s.range, n=100)
828
temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
829
max_val<-s.range[2]
830
min_val <-s.range[1]
831
#max_val<- -10
832
min_val <- 0
833
layout_m<-c(1,2) #one row two columns
834

  
835
png(paste("Figure_8a_spatial_pattern_tmax_prediction_models_gam_levelplot_",date_selected,out_prefix,".png", sep=""),
836
    height=480*layout_m[1],width=480*layout_m[2])
837

  
838
levelplot(pred_temp_s,main="Interpolated Surfaces Model Comparison", ylab=NULL,xlab=NULL,
839
          par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=layout_m,
840
                              par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5),
841
          names.attr=names_layers,col.regions=temp.colors,at=seq(max_val,min_val,by=0.01))
842
#col.regions=temp.colors(25))
843

  
844
#col.regions=temp.colors(25))
845
dev.off()
846

  
847

  
848
diff<-raster(lf1$mod1)-raster(lf1$mod4)
849
names_layers <- c("difference=mod1-mod4")
850
names(diff) <- names_layers
851

  
852

  
853
png(paste("Figure_8b_spatial_pattern_tmax_prediction_models_gam_levelplot_",date_selected,out_prefix,".png", sep=""),
854
    height=480*layout_m[1],width=480*layout_m[2])
855

  
856
plot(diff,col=temp.colors(100),main=names_layers)
857
#levelplot(diff,main="Interpolated Surfaces Model Comparison", ylab=NULL,xlab=NULL,
858
#          par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=c(1,1),
859
#                              par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5),
860
#          names.attr=names_layers,col.regions=temp.colors)
861
dev.off()
862

  
863
######## NOW GET A ACCURACY BY STATIONS
864

  
865
list_data_s <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_s")
866
list_data_v <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_v")
867

  
868
#number of observations per day
869
year_nbv <- sapply(list_data_v,FUN=length)
870
year_nbs <- sapply(list_data_s,FUN=length)
871
nb_df <- data.frame(nv=year_nbv,ns=year_nbs)
872
nb_df$n_tot <- year_nbv + year_nbs
873
range(nb_df$n_tot)
874

  
875
data_v_test <- list_data_v[[1]]
876

  
877
#Convert sp data.frame and combined them in one unique df, see function define earlier
878
data_v_combined <-convert_spdf_to_df_from_list(list_data_v) #long rownames
879
names_var<-c("res_mod1","res_mod2","res_mod3","res_mod4","res_mod5","res_mod6","res_mod7","res_mod8")
880

  
881
limit_val<- c(-30,-2.57,0,2.57,30)
882
data_v_combined$res_mod1_rc1 <- cut(data_v_combined$res_mod1,include.lowest=TRUE,breaks=limit_val)
883
data_v_combined$res_mod1_rc1
884

  
885
t<-melt(data_v_combined,
886
        measure=names_var, 
887
        id=c("res_mod1_rc1","id"),
888
        na.rm=T)
889

  
890
n_tb<-cast(t,res_mod1_rc1+id~variable,length)
891
n_tb_tot <-cast(t,id~variable,length) #number of times the stations was used for validation
892

  
893
merge(n_tb$id
894
dim(n_tb)
895
#mae_tb <-cast(t,dst_cat1~variable,mae_fun)
896
#rmse_tb <-cast(t,dst_cat1~variable,rmse_fun)
897
#sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun)
898

  
899
#avg_tb<-cast(t,dst_cat1~variable,mean)
900
#sd_tb<-cast(t,dst_cat1~variable,sd)
901

  
902
t<-melt(data_v_combined,
903
        measure=names_var, 
904
        id=c("id"),
905
        na.rm=T)
906

  
907
hist(data_v_combined)
908
names(data_v_combined)
909

  
910
mae_fun<-function(x){mean(abs(x))} #Mean Absolute Error give a residuals vector
911
sd_abs_fun<-function(x){sd(abs(x))} #sd Absolute Error give a residuals vector
912

  
913
mae_tb<-cast(t,id~variable,mae_fun) #join to station location...
914

  
915
sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun)
916

  
917
#avg_tb<-cast(t,dst_cat1~variable,mean)
918
#sd_tb<-cast(t,dst_cat1~variable,sd)
919
#n_tb<-cast(t,dst_cat1~variable,length)
920

  
921
met_obj <-load_obj(file.path(in_dir1,met_obj_file_1))
922
stat_loc<-readOGR(dsn=in_dir1,layer=sub(".shp","",basename(met_obj$loc_stations)))
923

  
924
data_v_mae <-merge(mae_tb,stat_loc,by.x=c("id"),by.y=c("STAT_ID"))
925
hist(data_v_mae$res_mod1)
926
mean(data_v_mae$res_mod1)
927

  
928
coords<- data_v_mae[c('longitude','latitude')]              #Define coordinates in a data frame
929
CRS_interp<-proj4string(data_v_test)
930
coordinates(data_v_mae)<-coords                      #Assign coordinates to the data frame
931
proj4string(data_v_mae)<- proj4string(stat_loc)                #Assign coordinates reference system in PROJ4 format
932
data_v_mae<-spTransform(data_v_mae,CRS(CRS_interp))     #Project from WGS84 to new coord. system
933

  
934
p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE)
935
#p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE,key.entries=c(1,1.5,2,2.5,3,3.5,4,4.5))
936

  
937
p
938

  
939
infile_reg_outline <- "/data/project/layers/commons/data_workflow/inputs/region_outlines_ref_files/OR83M_state_outline.shp"  #input region outline defined by polygon: Oregon
940
reg_outline <- readOGR(dsn=dirname(infile_reg_outline),layer=sub(".shp","",basename(infile_reg_outline)))
941

  
942
p + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray"))
943

  
944
p4<-bubble(data_v_mae,"res_mod4",maxsize=4,col=c("red"),fill=FALSE)
945
p4 + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray"))
946

  
947
col_t <- colorRampPalette(c('blue', 'white', 'red'))
948

  
949
p_elev <-levelplot(subset(s_raster,"elev_s"),margin=FALSE)
950
p4 <-bubble(data_v_mae[data_v_mae$res_mod4>2.134,],"res_mod4",maxsize=4,col=c("blue"),fill=FALSE)
951
p_elev + p4 + layer(sp.polygons(reg_outline,lwd=0.9,col="green"))
952
title("mod4")
953

  
954
p_elev <-levelplot(subset(s_raster,"elev_s"))
955
p1 <-bubble(data_v_mae[data_v_mae$res_mod1>2.109,],"res_mod1",maxsize=4,col=c("blue"),fill=FALSE)
956
p_elev + p1 + layer(sp.polygons(reg_outline,lwd=0.9,col="green"))
957
#bubble(data_v_mae,"res_mod1")
958
#p<-spplot(data_v_mae,"res_mod1",maxsize=4,col=c("red"))
959
#p
960
#stations that are outliers in one but not the other...
961
id_setdiff<-setdiff(data_v_mae[data_v_mae$res_mod1>2.109,]$id,data_v_mae[data_v_mae$res_mod4>2.134,]$id)
962

  
963
data_id_setdiff <- data_v_mae[data_v_mae$id %in% id_setdiff,]
964

  
965
p_elev +layer(sp.polygons(reg_outline,lwd=0.9,col="green")) + layer(sp.points(data_id_setdiff,pch=4,cex=2,col="pink"))
966

  
967
###############################
968
########## Prepare table 6
969
# Now get p values and other things...
970
###baseline 2: s(lat,lon) + s(elev)
971
      
972
l_obj<-vector("list",length=2)
973
l_obj[[1]]<-raster_prediction_obj_1
974
l_obj[[2]]<-raster_prediction_obj_2
975
l_table <- vector("list",length=length(l_obj))   
976
for (k in 1:length(l_obj)){
977
  raster_prediction_obj<- l_obj[[k]]
978
  list_myModels <- extract_list_from_list_obj(raster_prediction_obj$method_mod_obj,"mod")
979
    
980
  list_models_info <-lapply(1:length(list_myModels),FUN=create_s_and_p_table_term_models,list_myModels)
981
  dates<-(extract_from_list_obj(raster_prediction_obj_1$method_mod_obj,"sampling_dat"))$date #get vector of dates
982
  names(list_models_info)<-dates #adding names to the list
983
    
984
  #Prepare and process p. value information regarding models: count number of times values were above a threshold.
985
  s.table_term_tb <-extract_from_list_obj(list_models_info,"s.table_term")
986
    
987
  threshold_val<-c(0.01,0.05,0.1)
988
  s.table_term_tb$p_val_rec1 <- s.table_term_tb[["p-value"]] < threshold_val[1]
989
  s.table_term_tb$p_val_rec2 <- s.table_term_tb[["p-value"]] < threshold_val[2]
990
  s.table_term_tb$p_val_rec3 <- s.table_term_tb[["p-value"]] < threshold_val[3]
991
  
992
  names_var <- c("p_val_rec1","p_val_rec2","p_val_rec3")
993
  t2<-melt(s.table_term_tb,
994
           measure=names_var, 
995
           id=c("mod_name","term_name"),
996
           na.rm=T)
997
  
998
  summary_s.table_term2 <- cast(t2,term_name+mod_name~variable,sum)
999
  
1000
  #Now add AIC
1001
  AIC_models_tb <-extract_from_list_obj(list_models_info,"AIC_models")
1002
  AIC_models_tb
1003
  names_var <- c("AIC")
1004
  #id_var <- 
1005
  t3<-melt(AIC_models_tb,
1006
           measure=names_var, 
1007
           id=c("mod_name","term_name"),
1008
           na.rm=T)
1009
  
1010
  summary_AIC <- cast(t3,term_name+mod_name~variable,median)
1011
  summary_AIC$AIC <- round(summary_AIC[,c("AIC")],digit=2) #roundto three digits teh differences
1012
  
1013
  #Now combine tables and drop duplicate columns the combined table can be modified for the paper...
1014
  avg_s <- calc_stat_from_raster_prediction_obj(raster_prediction_obj,"mean",training=TRUE)
1015
  avg_v <- calc_stat_from_raster_prediction_obj(raster_prediction_obj,"mean",training=FALSE)
1016
  avg <- cbind(avg_s[,c("pred_mod","mae")],avg_v[,c("mae")])
1017
  names(avg)<-c("pred_mod","mae_s","mae_v")
1018
  avg[,c("mae_s","mae_v")] <- round(avg[,c("mae_s","mae_v")],digit=2)
1019
  table <- merge(summary_AIC,avg,by.x="mod_name",by.y="pred_mod")
1020
  
1021
  tables_AIC_ac_p_val <-list(table,summary_s.table_term2)
1022
  names(tables_AIC_ac_p_val) <-c("table","s.table_p_val_term")
1023
  l_table[[k]] <- tables_AIC_ac_p_val
1024
}
1025

  
1026
## Now prepare table
1027
s.table_p_val_term <- l_table[[1]]$s.table_p_val_term[-c(10:26),]
1028
s.table_p_val_term <- s.table_p_val_term[order(s.table_p_val_term$mod_name),]
1029
#summary_s.table_term2 <- summary_s.table_term2[-c(8,10),]
1030
table <- l_table[[1]]$table
1031
      
1032
table6a <- merge(s.table_p_val_term,table,by="mod_name")  
1033
table6a <- table6a[,-match(c("term_name.y"),names(table6a))]
1034
#model_col<-c("Baseline2","Northness","Eastness","LST","DISTOC","Forest","CANHEIGHT","LST*Forest") # removed ,"LST*CANHEIGHT")
1035
#names_table_col<-c("DiffMAE","DiffRMSE","DiffME","Diffr","Model")
1036
       
1037
s.table_p_val_term <- l_table[[2]]$s.table_p_val_term[-c(11:19),]
1038
s.table_p_val_term <- s.table_p_val_term[order(s.table_p_val_term$mod_name),]
1039
#summary_s.table_term2 <- summary_s.table_term2[-c(8,10),]
1040
tableb <- l_table[[2]]$table
1041
      
1042
table6b <- merge(s.table_p_val_term,tableb,by="mod_name")  
1043
table6b <- table6b[,-match(c("term_name.y"),names(table6b))]
1044
#model_col<-c("Baseline2","Northness","Eastness","LST","DISTOC","Forest","CANHEIGHT","LST*Forest") # removed ,"LST*CANHEIGHT")
1045
#names_table_col<-c("DiffMAE","DiffRMSE","DiffME","Diffr","Model")
1046
      
1047
#table6b[order(table6b$mod_name),]
1048
      
1049
#Now write out table...
1050

  
1051
file_name<-paste("table6a_paper","_",out_prefix,".txt",sep="")
1052
write.table(table6a,file=file_name,sep=",")
1053

  
1054
file_name<-paste("table6b_paper","_",out_prefix,".txt",sep="")
1055
write.table(table6b,file=file_name,sep=",")
1056

  
1057
########################
1058
### Prepare table 7: correlation matrix between covariates      
1059

  
1060
names(s_raster)
249
################################################
250
######### Figure 4. RMSE multi-timescale mulitple hold out for FSS and CAI
1061 251

  
1062
list_formulas<-raster_prediction_obj_2$method_mod_obj[[1]]$formulas
1063
list_formulas <- lapply(list_formulas,FUN=as.formula)
1064
covar_names_model <- unique(unlist(lapply(list_formulas,FUN=all.vars)))[-1]  
1065
covar_names_model <- c(covar_names_model[-6],c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06",
1066
                                               "mm_07","mm_08","mm_09","mm_10","mm_11","mm_12"))
1067
covar_raster <- subset(s_raster,covar_names_model)
252
################################################
253
######### Figure 5. RMSE multi-timescale mulitple hold out Overtraining tendency
254
                             
255
################################################
256
######### Figure 6: Spatial pattern of prediction for one day (maps)
1068 257

  
1069
names(covar_raster) <- covar_names_model
1070
corr_layers_covar <-layerStats(covar_raster,"pearson",na.rm=TRUE)
1071
corr_mat <-round(corr_layers_covar[[1]], digit=2)
1072
      
1073
file_name<-paste("table7_paper","_",out_prefix,".txt",sep="")
1074
write.table(corr_mat,file=file_name,sep=",")
1075
      
1076
#met_obj <-load_obj(file.path(in_dir1,met_obj_file_1))
1077
#stat_loc<-readOGR(dsn=in_dir1,layer=sub(".shp","",basename(met_obj$loc_stations)))
1078
#dim(stat_loc)      
1079 258

  
1080 259
###################### END OF SCRIPT #######################
1081 260

  

Also available in: Unified diff