1
|
#################################### INTERPOLATION OF TEMPERATURES #######################################
|
2
|
############################ Script for manuscript analyses,tables and figures #######################################
|
3
|
#This script uses the worklfow code applied to the Oregon case study. Daily methods (GAM,GWR, Kriging) are tested with
|
4
|
#different covariates using two baselines. Accuracy methods are added in the the function script to evaluate results.
|
5
|
#Figures, tables and data for the contribution of covariate paper are also produced in the script.
|
6
|
#AUTHOR: Benoit Parmentier
|
7
|
#DATE: 08/15/2013
|
8
|
#Version: 2
|
9
|
#PROJECT: Environmental Layers project
|
10
|
#################################################################################################
|
11
|
|
12
|
### Loading R library and packages
|
13
|
#library used in the workflow production:
|
14
|
library(gtools) # loading some useful tools
|
15
|
library(mgcv) # GAM package by Simon Wood
|
16
|
library(sp) # Spatial pacakge with class definition by Bivand et al.
|
17
|
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al.
|
18
|
library(rgdal) # GDAL wrapper for R, spatial utilities
|
19
|
library(gstat) # Kriging and co-kriging by Pebesma et al.
|
20
|
library(fields) # NCAR Spatial Interpolation methods such as kriging, splines
|
21
|
library(raster) # Hijmans et al. package for raster processing
|
22
|
library(gdata) # various tools with xls reading, cbindX
|
23
|
library(rasterVis) # Raster plotting functions
|
24
|
library(parallel) # Parallelization of processes with multiple cores
|
25
|
library(maptools) # Tools and functions for sp and other spatial objects e.g. spCbind
|
26
|
library(maps) # Tools and data for spatial/geographic objects
|
27
|
library(reshape) # Change shape of object, summarize results
|
28
|
library(plotrix) # Additional plotting functions
|
29
|
library(plyr) # Various tools including rbind.fill
|
30
|
library(spgwr) # GWR method
|
31
|
library(automap) # Kriging automatic fitting of variogram using gstat
|
32
|
library(rgeos) # Geometric, topologic library of functions
|
33
|
#RPostgreSQL # Interface R and Postgres, not used in this script
|
34
|
|
35
|
#Additional libraries not used in workflow
|
36
|
library(pgirmess) # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess}
|
37
|
|
38
|
#### FUNCTION USED IN SCRIPT
|
39
|
|
40
|
function_analyses_paper <-"contribution_of_covariates_paper_interpolation_functions_08152013.R"
|
41
|
|
42
|
##############################
|
43
|
#### Parameters and constants
|
44
|
|
45
|
script_path<-"/home/parmentier/Data/IPLANT_project/env_layers_scripts/" #path to script
|
46
|
source(file.path(script_path,function_analyses_paper)) #source all functions used in this script.
|
47
|
|
48
|
in_dir1 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_lst_comb3_08132013"
|
49
|
in_dir2 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_day_lst_comb4_08152013"
|
50
|
#kriging results:
|
51
|
in_dir3 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_kriging_day_lst_comb3_07112013"
|
52
|
#gwr results:
|
53
|
in_dir4 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gwr_day_lst_comb3_part1_07122013"
|
54
|
#multisampling results (gam)
|
55
|
in_dir5<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gam_daily_mults10_lst_comb3_08082013"
|
56
|
in_dir6<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_kriging_daily_mults10_lst_comb3_08062013"
|
57
|
in_dir7<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_gwr_daily_mults10_lst_comb3_08072013"
|
58
|
|
59
|
out_dir<-"/home/parmentier/Data/IPLANT_project/paper_analyses_tables_fig_08032013"
|
60
|
setwd(out_dir)
|
61
|
|
62
|
y_var_name <- "dailyTmax"
|
63
|
|
64
|
out_prefix<-"analyses_08152013"
|
65
|
|
66
|
method_interpolation <- "gam_daily"
|
67
|
covar_obj_file_1 <- "covar_obj__365d_gam_day_lst_comb3_08132013.RData"
|
68
|
met_obj_file_1 <- "met_stations_outfiles_obj_gam_daily__365d_gam_day_lst_comb3_08132013.RData"
|
69
|
#met_stations_outfiles_obj_gam_daily__365d_gam_day_lst_comb3_08132013.RData
|
70
|
|
71
|
#raster_prediciton object for baseline 1 () s(lat,lon) + s(elev)) and baseline 2 (slat,lon))
|
72
|
raster_obj_file_1 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_lst_comb3_08132013.RData"
|
73
|
raster_obj_file_2 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_lst_comb4_08152013.RData"
|
74
|
|
75
|
raster_obj_file_3 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_day_lst_comb3_07112013.RData"
|
76
|
raster_obj_file_4 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_day_lst_comb3_part1_07122013.RData"
|
77
|
#multisampling using baseline lat,lon + elev
|
78
|
raster_obj_file_5 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_daily_mults10_lst_comb3_08082013.RData"
|
79
|
raster_obj_file_6 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_daily_mults10_lst_comb3_08062013.RData"
|
80
|
raster_obj_file_7 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_mults10_lst_comb3_08072013.RData"
|
81
|
#raster_prediction_obj_gam_daily_dailyTmax_365d_gam_daily_mults10_lst_comb3_08082013.RData
|
82
|
|
83
|
#Load objects containing training, testing, models objects
|
84
|
|
85
|
covar_obj <-load_obj(file.path(in_dir1,covar_obj_file_1)) #Reading covariates object for GAM daily method
|
86
|
infile_covariates <- covar_obj$infile_covariates
|
87
|
infile_reg_outline <- covar_obj$infile_reg_outline
|
88
|
covar_names<- covar_obj$covar_names
|
89
|
#####
|
90
|
s_raster <- brick(infile_covariates)
|
91
|
names(s_raster)<-covar_names
|
92
|
|
93
|
raster_prediction_obj_1 <-load_obj(file.path(in_dir1,raster_obj_file_1)) #comb3 (baseline 2)
|
94
|
raster_prediction_obj_2 <-load_obj(file.path(in_dir2,raster_obj_file_2)) #comb4 (baseline 1)
|
95
|
raster_prediction_obj_3 <-load_obj(file.path(in_dir3,raster_obj_file_3)) #comb3/mod1 baseline 2, kriging
|
96
|
raster_prediction_obj_4 <-load_obj(file.path(in_dir4,raster_obj_file_4)) #comb3/mod1 baseline 2, gwr
|
97
|
raster_prediction_obj_5 <-load_obj(file.path(in_dir5,raster_obj_file_5)) #gam daily multisampling 10 to 70%
|
98
|
raster_prediction_obj_6 <-load_obj(file.path(in_dir6,raster_obj_file_6)) #kriging daily multisampling 10 to 70%
|
99
|
raster_prediction_obj_7 <-load_obj(file.path(in_dir7,raster_obj_file_7)) #gwr daily multisampling 10 to 70%
|
100
|
|
101
|
############### BEGIN SCRIPT #################
|
102
|
|
103
|
############
|
104
|
##### 1) Generate: Table 3. Contribution of covariates using validation accuracy metrics
|
105
|
## This is a table of accuracy compared to baseline by difference
|
106
|
|
107
|
#Check input covariates and model formula:
|
108
|
raster_prediction_obj_1$method_mod_obj[[1]]$formulas #models run for baseline 2
|
109
|
raster_prediction_obj_2$method_mod_obj[[1]]$formulas #models run for baseline 1
|
110
|
|
111
|
summary_metrics_v1<-raster_prediction_obj_1$summary_metrics_v
|
112
|
summary_metrics_v2<-raster_prediction_obj_2$summary_metrics_v
|
113
|
tb1 <-raster_prediction_obj_1$tb_diagnostic_v #365 days accuracy table for baseline 2
|
114
|
tb2 <-raster_prediction_obj_2$tb_diagnostic_v #365 days accuracy table for baseline 1
|
115
|
|
116
|
table_data1 <-summary_metrics_v1$avg[,c("mae","rmse","me","r")] #select relevant columns from data.frame
|
117
|
table_data2 <-summary_metrics_v2$avg[,c("mae","rmse","me","r")]
|
118
|
|
119
|
###Table 3a, baseline 1: s(lat,lon)
|
120
|
#Chnage here !!! need to reorder rows based on mod first
|
121
|
model_col<-c("Baseline1","Elevation","Northing","Easting","LST","DISTOC","Forest","CANHEIGHT","LST*Forest","LST*CANHEIGHT") #
|
122
|
df3a<- as.data.frame(sapply(table_data2,FUN=function(x) x-x[1]))
|
123
|
df3a<- round(df3a,digit=3) #roundto three digits teh differences
|
124
|
df3a$Model <-model_col
|
125
|
names(df3a)<- names_table_col
|
126
|
print(df3a) #show resulting table
|
127
|
|
128
|
###Table 3b, baseline 2: s(lat,lon) + s(elev)
|
129
|
|
130
|
model_col<-c("Baseline2","Northness","Eastness","LST","DISTOC","Forest","CANHEIGHT","LST*Forest","LST*CANHEIGHT")
|
131
|
names_table_col<-c("ΔMAE","ΔRMSE","ΔME","Δr","Model")
|
132
|
|
133
|
df3b <- as.data.frame(sapply(table_data1,FUN=function(x) x-x[1])) #difference between baseline (line 1) and other models
|
134
|
df3b <- round(df3b,digit=3) #roundto three digits the differences
|
135
|
df3b$Model <- model_col
|
136
|
names(df3b)<- names_table_col
|
137
|
print(df3b) #Part b of table 3
|
138
|
|
139
|
sd2_v <-calc_stat_from_raster_prediction_obj(raster_prediction_obj_2,"sd",training=FALSE)
|
140
|
|
141
|
#Testing siginificance between models
|
142
|
|
143
|
mod_compk1 <-kruskal.test(tb1$rmse ~ as.factor(tb1$pred_mod)) #Kruskal Wallis test
|
144
|
mod_compk2 <-kruskal.test(tb2$rmse ~ as.factor(tb2$pred_mod))
|
145
|
print(mod_compk1) #not significant
|
146
|
print(mod_compk2) #not significant
|
147
|
|
148
|
#Multiple Kruskal Wallis
|
149
|
mod_compk1 <-kruskalmc(tb1$rmse ~ as.factor(tb1$pred_mod))
|
150
|
mod_compk2 <-kruskalmc(tb2$rmse ~ as.factor(tb2$pred_mod))
|
151
|
|
152
|
print(mod_compk1)
|
153
|
print(mod_compk2)
|
154
|
|
155
|
#Now write out table 3
|
156
|
|
157
|
file_name<-paste("table3a_baseline1_paper","_",out_prefix,".txt",sep="")
|
158
|
write.table(df3a,file=file_name,sep=",")
|
159
|
|
160
|
file_name<-paste("table3b_baseline2_paper","_",out_prefix,".txt",sep="")
|
161
|
write.table(df3b,file=file_name,sep=",")
|
162
|
|
163
|
############
|
164
|
##### 2) Generate: Table 4. Comparison between interpolation methods using validation accuracy metrics
|
165
|
## This is a table of accuracy metric for the optimal model (baseline2) as identified in the previous step
|
166
|
|
167
|
##Table 4: Interpolation methods comparison
|
168
|
|
169
|
#get sd for kriging, gam and gwr
|
170
|
#tb1 <-raster_prediction_obj_1$tb_diagnostic_v HGAM baseline 1, loaded
|
171
|
#tb2 <-raster_prediction_obj_2$tb_diagnostic_v #GAM baseline 2, loaded
|
172
|
tb3 <-raster_prediction_obj_3$tb_diagnostic_v #Kriging methods
|
173
|
tb4 <-raster_prediction_obj_4$tb_diagnostic_v #GWR methods
|
174
|
|
175
|
names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9","mod10")
|
176
|
|
177
|
#Calculate standard deviation for each metric
|
178
|
sd1 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_1,"sd") # see function script
|
179
|
sd2 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_2,"sd") # standard deviation for baseline 2
|
180
|
sd3 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_3,"sd") # kriging
|
181
|
sd4 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_4,"sd") #gwr
|
182
|
|
183
|
#Combined sd in one table for mod1 (baseline 2)
|
184
|
table_sd <- do.call(rbind,list(sd1[1,],sd3[1,],sd4[1,])) #table containing the sd for the three mdethods for baseline 2
|
185
|
table_sd <- round(table_sd[,-1],digit=3) #round to three digits the differences
|
186
|
table_sd <- table_sd[,c("mae","rmse","me","r")] #column 5 contains m50, remove it
|
187
|
|
188
|
summary_metrics_v3<-raster_prediction_obj_3$summary_metrics_v #Kriging methods
|
189
|
summary_metrics_v4<-raster_prediction_obj_4$summary_metrics_v # GWR method
|
190
|
|
191
|
table_data3 <-summary_metrics_v3$avg[1,c("mae","rmse","me","r")] #first line mod1 (baseline)
|
192
|
table_data4 <-summary_metrics_v4$avg[1,c("mae","rmse","me","r")] #first line mod1 (baseline)
|
193
|
table_data1 <- table_data1[1,]
|
194
|
|
195
|
table_ac <-do.call(rbind, list(table_data1,table_data3,table_data4))
|
196
|
table_ac <- round(table_ac,digit=3) #roundto three digits teh differences
|
197
|
|
198
|
#combined tables with accuracy metrics and their standard deviations
|
199
|
table4_paper <-table_combined_symbol(table_ac,table_sd,"±")
|
200
|
#lapply(lapply(table_ac,FUN=paste,table_sd,FUN=paste,sep="±"),FUN=paste)
|
201
|
|
202
|
model_col<-c("GAM","Kriging","GWR")
|
203
|
names_table_col<-c("MAE","RMSE","ME","R","Model")
|
204
|
|
205
|
table4_paper$Model <-model_col
|
206
|
names(table4_paper)<- names_table_col
|
207
|
|
208
|
file_name<-paste("table4_compariaons_interpolation_methods_avg_paper","_",out_prefix,".txt",sep="")
|
209
|
write.table(as.data.frame(table4_paper),file=file_name,sep=",")
|
210
|
|
211
|
####################################
|
212
|
####### Now create figures #############
|
213
|
|
214
|
#figure 1: study area
|
215
|
#figure 2: methodological worklfow
|
216
|
#figure 3:Figure 3. MAE/RMSE and distance to closest fitting station.
|
217
|
#Figure 4. RMSE and MAE, mulitisampling and hold out for FSS and GAM.
|
218
|
#Figure 5. Overtraining tendency
|
219
|
#Figure 6: Spatial pattern of prediction for one day
|
220
|
|
221
|
### Figure 1: Oregon study area
|
222
|
|
223
|
#...add code
|
224
|
|
225
|
### Figure 2: Method comparison workflow
|
226
|
|
227
|
# Workflow not generated in R
|
228
|
|
229
|
################################################
|
230
|
################### Figure 3. MAE/RMSE and distance to closest fitting station.
|
231
|
|
232
|
#Analysis accuracy in term of distance to closest station
|
233
|
#Assign model's names
|
234
|
|
235
|
names_mod <- paste("res_mod",1:10,sep="")
|
236
|
names(raster_prediction_obj_1$validation_mod_obj[[1]])
|
237
|
limit_val<-seq(0,150, by=10)
|
238
|
|
239
|
#Call function to extract residuals in term of distance to closest fitting station and summary statistics
|
240
|
l1 <- distance_to_closest_fitting_station(raster_prediction_obj_1,names_mod,dist_classes=limit_val) #GAM method
|
241
|
l3 <- distance_to_closest_fitting_station(raster_prediction_obj_3,names_mod,dist_classes=limit_val) #Kriging method
|
242
|
l4 <- distance_to_closest_fitting_station(raster_prediction_obj_4,names_mod,dist_classes=limit_val) #GWR method
|
243
|
|
244
|
l1$mae_tb #contains
|
245
|
|
246
|
#Prepare parameters/arguments for plotting
|
247
|
list_dist_obj <-list(l1,l3,l4)
|
248
|
col_t <- c("red","blue","black")
|
249
|
pch_t <- 1:length(col_t)
|
250
|
legend_text <- c("GAM","Kriging","GWR")
|
251
|
mod_name <- c("res_mod1","res_mod1","res_mod1")#selected models
|
252
|
x_tick_labels <- limit_val<-seq(5,125, by=10)
|
253
|
metric_name <-"rmse_tb"
|
254
|
title_plot <- "RMSE and distance to closest station for baseline 2"
|
255
|
y_lab_text <- "RMSE (C)"
|
256
|
#quick test
|
257
|
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,title_plot,y_lab_text)
|
258
|
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name","title_plot","y_lab_text")
|
259
|
plot_dst_MAE(list_param_plot)
|
260
|
|
261
|
metric_name <-"mae_tb"
|
262
|
title_plot <- "MAE and distance to closest fitting station"
|
263
|
y_lab_text <- "MAE (C)"
|
264
|
|
265
|
#Now set up plotting device
|
266
|
res_pix<-480
|
267
|
col_mfrow<-2
|
268
|
row_mfrow<-1
|
269
|
png_file_name<- paste("Figure_3_accuracy_and_distance_to_closest_fitting_station_",out_prefix,".png", sep="")
|
270
|
png(filename=file.path(out_dir,png_file_name),
|
271
|
width=col_mfrow*res_pix,height=row_mfrow*res_pix)
|
272
|
par(mfrow=c(row_mfrow,col_mfrow))
|
273
|
|
274
|
#Figure 3a
|
275
|
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,title_plot,y_lab_text)
|
276
|
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name","title_plot","y_lab_text")
|
277
|
plot_dst_MAE(list_param_plot)
|
278
|
title(xlab="Distance to closest fitting station (km)")
|
279
|
|
280
|
#Figure 3b: histogram
|
281
|
barplot(l1$n_tb$res_mod1,names.arg=limit_val,
|
282
|
ylab="Number of observations",
|
283
|
xlab="Distance to closest fitting station (km)")
|
284
|
title("Number of observation in term of distance bins")
|
285
|
box()
|
286
|
dev.off()
|
287
|
|
288
|
####################################################
|
289
|
#########Figure 4. RMSE and MAE, mulitisampling and hold out for single time scale methods.
|
290
|
|
291
|
#Using baseline 2: lat,lon and elev
|
292
|
|
293
|
#Use run of 7 hold out proportions, 10 to 70% with 10 random samples and 12 dates...
|
294
|
#Use gam_day method
|
295
|
#Use comb3 i.e. using baseline s(lat,lon)+s(elev)
|
296
|
|
297
|
#names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9")
|
298
|
names_mod<-c("mod1")
|
299
|
|
300
|
#debug(calc_stat_prop_tb)
|
301
|
prop_obj_gam<-calc_stat_prop_tb(names_mod,raster_prediction_obj_5)
|
302
|
prop_obj_kriging<-calc_stat_prop_tb(names_mod,raster_prediction_obj_6)
|
303
|
prop_obj_gwr<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7)
|
304
|
|
305
|
list_prop_obj<-list(prop_obj_gam,prop_obj_kriging,prop_obj_gwr)
|
306
|
|
307
|
## plot setting for figure 4
|
308
|
|
309
|
col_t<-c("red","blue","black")
|
310
|
pch_t<- 1:length(col_t)
|
311
|
legend_text <- c("GAM","Kriging","GWR")
|
312
|
mod_name<-c("mod1","mod1","mod1")#selected models
|
313
|
|
314
|
##### plot figure 4 for paper
|
315
|
####
|
316
|
|
317
|
res_pix<-480
|
318
|
col_mfrow<-2
|
319
|
row_mfrow<-1
|
320
|
png_file_name<- paste("Figure_4_proportion_of_holdout_and_accuracy_",out_prefix,".png", sep="")
|
321
|
png(filename=file.path(out_dir,png_file_name),
|
322
|
width=col_mfrow*res_pix,height=row_mfrow*res_pix)
|
323
|
par(mfrow=c(row_mfrow,col_mfrow))
|
324
|
metric_name<-"mae"
|
325
|
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name)
|
326
|
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name")
|
327
|
|
328
|
plot_prop_metrics(list_param_plot)
|
329
|
title(main="MAE for hold out and methods",
|
330
|
xlab="Hold out validation/testing proportion",
|
331
|
ylab="MAE (C)")
|
332
|
|
333
|
#now figure 4b
|
334
|
metric_name<-"rmse"
|
335
|
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name)
|
336
|
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name")
|
337
|
plot_prop_metrics(list_param_plot)
|
338
|
title(main="RMSE for hold out and methods",
|
339
|
xlab="Hold out validation/testing proportion",
|
340
|
ylab="RMSE (C)")
|
341
|
|
342
|
dev.off()
|
343
|
|
344
|
####################################################
|
345
|
#########Figure 5. Overtraining tendency
|
346
|
|
347
|
#read in relevant data:
|
348
|
## Calculate average difference for RMSE for all three methods
|
349
|
#read in relevant data:
|
350
|
tb1_s<-extract_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"metrics_s")
|
351
|
rownames(tb1_s)<-NULL #remove row names
|
352
|
tb1_s$method_interp <- "gam_daily" #add type of interpolation...out_prefix too??
|
353
|
|
354
|
tb3_s<-extract_from_list_obj(raster_prediction_obj_3$validation_mod_obj,"metrics_s")
|
355
|
rownames(tb1_s)<-NULL #remove row names
|
356
|
tb3_s$method_interp <- "kriging_daily" #add type of interpolation...out_prefix too??
|
357
|
|
358
|
tb4_s<-extract_from_list_obj(raster_prediction_obj_4$validation_mod_obj,"metrics_s")
|
359
|
rownames(tb4_s)<-NULL #remove row names
|
360
|
tb4_s$method_interp <- "gwr_daily" #add type of interpolation...out_prefix too??
|
361
|
|
362
|
#tb1_s <-raster_prediction_obj_1$tb_diagnostic_s #gam dailycontains the accuracy metrics for each run...
|
363
|
#tb3_s <-raster_prediction_obj_3$tb_diagnostic_s #Kriging daily methods
|
364
|
#tb4_s <-raster_prediction_obj_4$tb_diagnostic_s #gwr daily methods
|
365
|
|
366
|
tb1 <-raster_prediction_obj_1$tb_diagnostic_v #gam dailycontains the accuracy metrics for each run...
|
367
|
tb3 <-raster_prediction_obj_3$tb_diagnostic_v #Kriging daily methods
|
368
|
tb4 <-raster_prediction_obj_4$tb_diagnostic_v #gwr daily methods
|
369
|
|
370
|
#Calculate difference in MAE and RMSE for training and testing data: call diff_df function
|
371
|
diff_tb1 <-diff_df(tb1_s[tb1_s$pred_mod=="mod1",],tb1[tb1$pred_mod=="mod1",],c("mae","rmse")) #gam select differences for mod1
|
372
|
diff_tb3 <-diff_df(tb3_s[tb3_s$pred_mod=="mod1",],tb3[tb3$pred_mod=="mod1",],c("mae","rmse")) #kriging
|
373
|
diff_tb4 <-diff_df(tb4_s[tb4_s$pred_mod=="mod1",],tb4[tb4$pred_mod=="mod1",],c("mae","rmse")) #gwr
|
374
|
|
375
|
diff_mae_data <-data.frame(gam=diff_tb1$mae,kriging=diff_tb3$mae,gwr=diff_tb4$mae)
|
376
|
diff_rmse_data <-data.frame(gam=diff_tb1$rmse,kriging=diff_tb3$rmse,gwr=diff_tb4$rmse)
|
377
|
|
378
|
#Test the plot
|
379
|
boxplot(diff_mae_data)
|
380
|
boxplot(diff_rmse_data) #plot differences in training and testing accuracies for three methods
|
381
|
title(main="Training and testing RMSE for hold out and interpolation methods",
|
382
|
xlab="Interpolation method",
|
383
|
ylab="RMSE (C)")
|
384
|
|
385
|
tb5 <-raster_prediction_obj_5$tb_diagnostic_v #gam dailycontains the accuracy metrics for each run...
|
386
|
tb6 <-raster_prediction_obj_6$tb_diagnostic_v #Kriging daily methods
|
387
|
tb7 <-raster_prediction_obj_7$tb_diagnostic_v #gwr daily methods
|
388
|
|
389
|
prop_obj_gam_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_5,testing=FALSE) #training accuracy with hold out proportion
|
390
|
prop_obj_kriging_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_6,testing=FALSE)
|
391
|
prop_obj_gwr_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7,testing=FALSE)
|
392
|
|
393
|
plot(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, type="b",)
|
394
|
|
395
|
###############
|
396
|
#### plot figure 5
|
397
|
#set up the output file to plot
|
398
|
res_pix<-480
|
399
|
col_mfrow<-2
|
400
|
row_mfrow<-1
|
401
|
png_file_name<- paste("Figure_5_overtraining_tendency_and_holdout_proportion_",out_prefix,".png", sep="")
|
402
|
png(filename=file.path(out_dir,png_file_name),
|
403
|
width=col_mfrow*res_pix,height=row_mfrow*res_pix)
|
404
|
par(mfrow=c(row_mfrow,col_mfrow))
|
405
|
|
406
|
col_t<-c("red","blue","black")
|
407
|
pch_t<- 1:length(col_t)
|
408
|
|
409
|
y_range<-range(prop_obj_kriging$avg_tb$rmse,prop_obj_kriging_s$avg_tb$rmse)
|
410
|
#y_range<-range(prop_obj_gam$avg_tb$rmse,prop_obj_gam_s$avg_tb$rmse)
|
411
|
plot(prop_obj_gam$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, ylab="",xlab="",type="b",col=c("red"),pch=pch_t[1],ylim=y_range,lty=2)
|
412
|
lines(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, ylab="",xlab="",type="b",pch=pch_t[1],ylim=y_range,col=c("red"))
|
413
|
lines(prop_obj_gwr_s$avg_tb$rmse ~ prop_obj_gwr_s$avg_tb$prop, ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[3],col=c("black"))
|
414
|
lines(prop_obj_gwr$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[3],,col=c("black"),lty=2)
|
415
|
lines(prop_obj_kriging$avg_tb$rmse ~ prop_obj_kriging$avg_tb$prop,ylab="",xlab="", type="b",ylim=y_range,pch=pch_t[2],,col=c("blue"),lty=2)
|
416
|
lines(prop_obj_kriging_s$avg_tb$rmse ~ prop_obj_kriging_s$avg_tb$prop,ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[2],col=c("blue"))
|
417
|
|
418
|
legend("topleft",legend=legend_text,
|
419
|
cex=0.9, pch=pch_t,col=col_t,lty=1,bty="n")
|
420
|
title(main="Training and testing RMSE for hold out and methods",
|
421
|
xlab="Hold out validation/testing proportion",
|
422
|
ylab="RMSE (C)")
|
423
|
|
424
|
boxplot(diff_mae_data) #plot differences in training and testing accuracies for three methods
|
425
|
title(main="Difference between training and testing MAE",
|
426
|
xlab="Interpolation method",
|
427
|
ylab="MAE (C)")
|
428
|
|
429
|
dev.off()
|
430
|
|
431
|
############### STUDY TIME AND accuracy
|
432
|
#########Figure 6: Monthly RMSE averages for the three interpolation methods: GAM, GWR and Kriging.
|
433
|
|
434
|
mae_tmp<- data.frame(gam=tb1[tb1$pred_mod=="mod1",c("mae")],
|
435
|
kriging=tb3[tb3$pred_mod=="mod1",c("mae")],
|
436
|
gwr=tb4[tb4$pred_mod=="mod1",c("mae")])
|
437
|
|
438
|
plot(mae_tmp$gam,col=c("red"),type="b",pch=1)
|
439
|
lines(mae_tmp$kriging,col=c("blue"),type="b",pch=2)
|
440
|
lines(mae_tmp$gwr,col=c("black"),type="b",pch=2)
|
441
|
legend("topleft",legend=legend_text,
|
442
|
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n")
|
443
|
|
444
|
max(mae_tmp$gam)
|
445
|
|
446
|
x2<-tb1[tb1$pred_mod=="mod1",c("mae","date")]
|
447
|
arrange(x2,desc(mae))
|
448
|
|
449
|
#kriging=tb3[tb3$pred_mod=="mod1",c("mae")],
|
450
|
# gwr=tb4[tb4$pred_mod=="mod1",c("mae")])
|
451
|
|
452
|
##### MONTHLY AVERAGES
|
453
|
|
454
|
tb1_month<-raster_prediction_obj_1$summary_month_metrics_v[[1]] #note that this is for model1
|
455
|
tb3_month<-raster_prediction_obj_3$summary_month_metrics_v[[1]]
|
456
|
tb4_month<- raster_prediction_obj_4$summary_month_metrics_v[[1]]
|
457
|
|
458
|
y_range<-range(tb1_month$mae,tb3_month$mae,tb4_month$mae)
|
459
|
plot(1:12,tb1_month$mae,col=c("red"),type="b",ylim=y_range)
|
460
|
lines(1:12,tb3_month$mae,col=c("blue"),type="b")
|
461
|
lines(1:12,tb4_month$mae,col=c("black"),type="b")
|
462
|
|
463
|
add_month_tag<-function(tb){
|
464
|
date<-strptime(tb$date, "%Y%m%d") # interpolation date being processed
|
465
|
month<-strftime(date, "%m") # current month of the date being processed
|
466
|
}
|
467
|
tb1$month<-add_month_tag(tb1)
|
468
|
tb3$month<-add_month_tag(tb3)
|
469
|
tb4$month<-add_month_tag(tb4)
|
470
|
|
471
|
metric_name<-"mae"
|
472
|
month_data_list<-list(gam=tb1[tb1$pred_mod=="mod1",c(metric_name,"month")],
|
473
|
kriging=tb3[tb3$pred_mod=="mod1",c(metric_name,"month")],
|
474
|
gwr=tb4[tb4$pred_mod=="mod1",c(metric_name,"month")])
|
475
|
y_range<-range(unlist(month_data_list))
|
476
|
|
477
|
|
478
|
#boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5,
|
479
|
# ylab=paste(metric_ac,"in degree C",sep=" "),,axisnames=FALSE,axes=FALSE)
|
480
|
#boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5,
|
481
|
# ylab=paste(metric_ac,"in degree C",sep=" "))
|
482
|
#axis(1, labels = FALSE)
|
483
|
## Create some text labels
|
484
|
#labels <- month.abb # abbreviated names for each month
|
485
|
## Plot x axis labels at default tick marks
|
486
|
#text(1:length(labels), par("usr")[3] - 0.25, srt = 45, adj = 1,
|
487
|
# labels = labels, xpd = TRUE)
|
488
|
#axis(2)
|
489
|
#box()
|
490
|
|
491
|
#Now plot figure 6
|
492
|
res_pix<-480
|
493
|
col_mfrow<-2
|
494
|
#row_mfrow<-2
|
495
|
row_mfrow<-1
|
496
|
|
497
|
png_file_name<- paste("Figure_6_monthly_accuracy_",out_prefix,".png", sep="")
|
498
|
png(filename=file.path(out_dir,png_file_name),
|
499
|
width=col_mfrow*res_pix,height=row_mfrow*res_pix)
|
500
|
par(mfrow=c(row_mfrow,col_mfrow))
|
501
|
|
502
|
y_range<-range(tb1_month$mae,tb3_month$mae,tb4_month$mae)
|
503
|
xlab_tick <- unique(tb1$month)
|
504
|
xlab_text <-"Month"
|
505
|
|
506
|
plot(1:12,tb1_month$mae,col=c("red"),type="b",ylim=y_range,xlab=xlab_text,xaxt="n")
|
507
|
lines(1:12,tb3_month$mae,col=c("blue"),type="b")
|
508
|
lines(1:12,tb4_month$mae,col=c("black"),type="b")
|
509
|
axis(1,at=1:12,labels=xlab_tick)
|
510
|
title(main="Monthly average MAE")
|
511
|
|
512
|
ylab_text<-"MAE (C)"
|
513
|
xlab_text<-"Month"
|
514
|
#y_range<-range(month_data_list$gam$mae,month_data_list$kriging$mae,month_data_list$gwr$mae)
|
515
|
#y_range<-range(month_data_list$gam$mae)
|
516
|
boxplot(mae~month,data=month_data_list$gam,main="GAM",ylab=ylab_text,outline=FALSE)
|
517
|
#boxplot(mae~month,data=month_data_list$kriging,ylim=y_range,main="Kriging",ylab=ylab_text,outline=FALSE)
|
518
|
#boxplot(mae~month,data=month_data_list$gwr,ylim=y_range,main="GWR",ylab=ylab_text,outline=FALSE)
|
519
|
|
520
|
dev.off()
|
521
|
|
522
|
#Now generate table 5
|
523
|
|
524
|
test<-boxplot(mae~month,data=month_data_list$gam,main="GAM",ylab=ylab_text,outline=FALSE)
|
525
|
|
526
|
length(tb1_month$mae)
|
527
|
names(tb1_month)
|
528
|
|
529
|
#Calculate standard deviation for each metric
|
530
|
sd1 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_1,"sd") # see function script
|
531
|
sd2 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_2,"sd") # standard deviation for baseline 2
|
532
|
sd3 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_3,"sd") # kriging
|
533
|
sd4 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_4,"sd") #gwr
|
534
|
|
535
|
avg_v1 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_1,"mean") # see function script
|
536
|
avg_v2 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_2,"mean") # standard deviation for baseline 2
|
537
|
avg_v3 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_3,"mean") # kriging
|
538
|
avg_v4 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_4,"mean") #gwr
|
539
|
|
540
|
#Combined sd in one table for mod1 (baseline 2)
|
541
|
table_sd <- do.call(cbind,list(gam=sd1[sd1$pred_mod=="mod1",c("rmse")],
|
542
|
kriging=sd3[sd3$pred_mod=="mod1",c("rmse")],
|
543
|
gwr=sd4[sd4$pred_mod=="mod1",c("rmse")])) #table containing the sd for the three mdethods for baseline 2
|
544
|
table_sd <- as.data.frame(round(table_sd,digit=3)) #round to three digits the differences
|
545
|
|
546
|
#Combined mean in one table for mod1 (baseline 2)
|
547
|
table_avg <- do.call(cbind,list(gam=avg_v1[avg_v1$pred_mod=="mod1",c("rmse")],
|
548
|
kriging=avg_v3[sd3$pred_mod=="mod1",c("rmse")],
|
549
|
gwr=avg_v4[avg_v4$pred_mod=="mod1",c("rmse")])) #table containing the sd for the three mdethods for baseline 2
|
550
|
table_avg <- as.data.frame(round(table_avg,digit=3)) #round to three digits the differences
|
551
|
|
552
|
#combined tables with accuracy metrics and their standard deviations
|
553
|
table5_paper <-table_combined_symbol(table_avg,table_sd,"±")
|
554
|
table5_paper$month <- month.abb
|
555
|
|
556
|
file_name<-paste("table5_comparisons_monthly_averages_interpolation_methods_paper","_",out_prefix,".txt",sep="")
|
557
|
write.table(as.data.frame(table5_paper),file=file_name,sep=",")
|
558
|
|
559
|
####### FIGURE 7: Spatial pattern ######
|
560
|
|
561
|
y_var_name <-"dailyTmax"
|
562
|
index<-244 #index corresponding to January 1
|
563
|
|
564
|
lf1 <- raster_prediction_obj_1$method_mod_obj[[index]][[y_var_name]] #select relevant raster images for the given dates
|
565
|
lf3 <- raster_prediction_obj_3$method_mod_obj[[index]][[y_var_name]]
|
566
|
lf4 <- raster_prediction_obj_4$method_mod_obj[[index]][[y_var_name]]
|
567
|
|
568
|
date_selected <- "20109101"
|
569
|
methods_names <-c("gam","kriging","gwr")
|
570
|
names_layers<-methods_names
|
571
|
lf <-list(lf1$mod1,lf3$mod1,lf4$mod1)
|
572
|
#lf <-lf[[1]]
|
573
|
|
574
|
pred_temp_s <-stack(lf)
|
575
|
#predictions<-mask(predictions,mask_rast)
|
576
|
names(pred_temp_s)<-names_layers
|
577
|
s.range <- c(min(minValue(pred_temp_s)), max(maxValue(pred_temp_s)))
|
578
|
#s.range <- s.range+c(5,-5)
|
579
|
col.breaks <- pretty(s.range, n=200)
|
580
|
lab.breaks <- pretty(s.range, n=100)
|
581
|
temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
|
582
|
max_val<-s.range[2]
|
583
|
min_val <-s.range[1]
|
584
|
#max_val<- -10
|
585
|
min_val <- 0
|
586
|
layout_m<-c(1,3) #one row two columns
|
587
|
|
588
|
png(paste("Figure7__spatial_pattern_tmax_prediction_levelplot_",date_selected,out_prefix,".png", sep=""),
|
589
|
height=480*layout_m[1],width=480*layout_m[2])
|
590
|
|
591
|
levelplot(pred_temp_s,main="Interpolated Surfaces Method Comparison", ylab=NULL,xlab=NULL,
|
592
|
par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=layout_m,
|
593
|
par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5),
|
594
|
names.attr=names_layers,col.regions=temp.colors,at=seq(max_val,min_val,by=0.01))
|
595
|
#col.regions=temp.colors(25))
|
596
|
dev.off()
|
597
|
|
598
|
## FIGURE COMPARISON OF MODELS COVARRIATES
|
599
|
|
600
|
lf2 <- raster_prediction_obj_2$method_mod_obj[[index]][[y_var_name]]
|
601
|
lf2 #contains the models for gam
|
602
|
|
603
|
pred_temp_s <-stack(lf2)
|
604
|
date_selected <- "20109101"
|
605
|
#names_layers <-c("mod1=s(lat,long)+s(elev)","mod4=s(lat,long)+s(LST)","diff=mod1-mod4")
|
606
|
names_layers <-c("mod1 = s(lat,long)","mod2 = s(lat,long)+s(elev)","mod3 = s(lat,long)+s(N_w)","mod4 = s(lat,long)+s(E_w)",
|
607
|
"mod5 = s(lat,long)+s(LST)","mod6 = s(lat,long)+s(DISTOC)","mod7 = s(lat,long)+s(LC1)",
|
608
|
"mod8 = s(lat,long)+s(LC1,LST)","mod9 = s(lat,long)+s(CANHGHT)","mod10 = s(lat,long)+s(LST,CANHGHT)")
|
609
|
|
610
|
#names_layers<-names(pred_temp_s)
|
611
|
#names(pred_temp_s)<-names_layers
|
612
|
|
613
|
s.range <- c(min(minValue(pred_temp_s)), max(maxValue(pred_temp_s)))
|
614
|
#s.range <- s.range+c(5,-5)
|
615
|
col.breaks <- pretty(s.range, n=200)
|
616
|
lab.breaks <- pretty(s.range, n=100)
|
617
|
temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
|
618
|
max_val<-s.range[2]
|
619
|
min_val <-s.range[1]
|
620
|
#max_val<- -10
|
621
|
#min_val <- 0
|
622
|
layout_m<-c(4,3) #one row two columns
|
623
|
|
624
|
png(paste("Figure_7_spatial_pattern_tmax_prediction_models_baseline1_gam_levelplot_",date_selected,out_prefix,".png", sep=""),
|
625
|
height=480*layout_m[1],width=480*layout_m[2])
|
626
|
|
627
|
levelplot(pred_temp_s,main="Interpolated Surfaces Model Comparison baseline 1", ylab=NULL,xlab=NULL,
|
628
|
par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=layout_m,
|
629
|
par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5),
|
630
|
names.attr=names_layers,col.regions=temp.colors,at=seq(max_val,min_val,by=0.01))
|
631
|
#col.regions=temp.colors(25))
|
632
|
dev.off()
|
633
|
|
634
|
|
635
|
################ #FIGURE 8
|
636
|
lf1 <- raster_prediction_obj_1$method_mod_obj[[index]][[y_var_name]]
|
637
|
lf1 #contains the models for gam
|
638
|
|
639
|
pred_temp_s <-stack(lf1$mod1,lf1$mod4)
|
640
|
date_selected <- "20109101"
|
641
|
#names_layers <-c("mod1=s(lat,long)+s(elev)","mod4=s(lat,long)+s(LST)","diff=mod1-mod4")
|
642
|
names_layers <-c("mod1 = s(lat,long)+s(elev)","mod4 = s(lat,long)+s(LST)")
|
643
|
names(pred_temp_s)<-names_layers
|
644
|
s.range <- c(min(minValue(pred_temp_s)), max(maxValue(pred_temp_s)))
|
645
|
#s.range <- s.range+c(5,-5)
|
646
|
col.breaks <- pretty(s.range, n=200)
|
647
|
lab.breaks <- pretty(s.range, n=100)
|
648
|
temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
|
649
|
max_val<-s.range[2]
|
650
|
min_val <-s.range[1]
|
651
|
#max_val<- -10
|
652
|
min_val <- 0
|
653
|
layout_m<-c(1,2) #one row two columns
|
654
|
|
655
|
png(paste("Figure_8a_spatial_pattern_tmax_prediction_models_gam_levelplot_",date_selected,out_prefix,".png", sep=""),
|
656
|
height=480*layout_m[1],width=480*layout_m[2])
|
657
|
|
658
|
levelplot(pred_temp_s,main="Interpolated Surfaces Model Comparison", ylab=NULL,xlab=NULL,
|
659
|
par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=layout_m,
|
660
|
par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5),
|
661
|
names.attr=names_layers,col.regions=temp.colors,at=seq(max_val,min_val,by=0.01))
|
662
|
#col.regions=temp.colors(25))
|
663
|
|
664
|
#col.regions=temp.colors(25))
|
665
|
dev.off()
|
666
|
|
667
|
|
668
|
diff<-raster(lf1$mod1)-raster(lf1$mod4)
|
669
|
names_layers <- c("difference=mod1-mod4")
|
670
|
names(diff) <- names_layers
|
671
|
|
672
|
|
673
|
png(paste("Figure_8b_spatial_pattern_tmax_prediction_models_gam_levelplot_",date_selected,out_prefix,".png", sep=""),
|
674
|
height=480*layout_m[1],width=480*layout_m[2])
|
675
|
|
676
|
plot(diff,col=temp.colors(100),main=names_layers)
|
677
|
#levelplot(diff,main="Interpolated Surfaces Model Comparison", ylab=NULL,xlab=NULL,
|
678
|
# par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=c(1,1),
|
679
|
# par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5),
|
680
|
# names.attr=names_layers,col.regions=temp.colors)
|
681
|
dev.off()
|
682
|
|
683
|
######## NOW GET A ACCURACY BY STATIONS
|
684
|
|
685
|
list_data_v<-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_v")
|
686
|
data_v_test <- list_data_v[[1]]
|
687
|
|
688
|
#Convert sp data.frame and combined them in one unique df, see function define earlier
|
689
|
data_v_combined <-convert_spdf_to_df_from_list(list_data_v) #long rownames
|
690
|
names_var<-c("res_mod1","res_mod2","res_mod3","res_mod4","res_mod5","res_mod6","res_mod7","res_mod8")
|
691
|
|
692
|
limit_val<- c(-30,-2.57,0,2.57,30)
|
693
|
data_v_combined$res_mod1_rc1 <- cut(data_v_combined$res_mod1,include.lowest=TRUE,breaks=limit_val)
|
694
|
data_v_combined$res_mod1_rc1
|
695
|
|
696
|
t<-melt(data_v_combined,
|
697
|
measure=names_var,
|
698
|
id=c("res_mod1_rc1","id"),
|
699
|
na.rm=T)
|
700
|
|
701
|
n_tb<-cast(t,res_mod1_rc1+id~variable,length)
|
702
|
n_tb_tot <-cast(t,id~variable,length) #number of times the stations was used for validation
|
703
|
|
704
|
merge(n_tb$id
|
705
|
dim(n_tb)
|
706
|
#mae_tb <-cast(t,dst_cat1~variable,mae_fun)
|
707
|
#rmse_tb <-cast(t,dst_cat1~variable,rmse_fun)
|
708
|
#sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun)
|
709
|
|
710
|
#avg_tb<-cast(t,dst_cat1~variable,mean)
|
711
|
#sd_tb<-cast(t,dst_cat1~variable,sd)
|
712
|
|
713
|
t<-melt(data_v_combined,
|
714
|
measure=names_var,
|
715
|
id=c("id"),
|
716
|
na.rm=T)
|
717
|
|
718
|
hist(data_v_combined)
|
719
|
names(data_v_combined)
|
720
|
|
721
|
mae_fun<-function(x){mean(abs(x))} #Mean Absolute Error give a residuals vector
|
722
|
sd_abs_fun<-function(x){sd(abs(x))} #sd Absolute Error give a residuals vector
|
723
|
|
724
|
mae_tb<-cast(t,id~variable,mae_fun) #join to station location...
|
725
|
|
726
|
sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun)
|
727
|
|
728
|
#avg_tb<-cast(t,dst_cat1~variable,mean)
|
729
|
#sd_tb<-cast(t,dst_cat1~variable,sd)
|
730
|
#n_tb<-cast(t,dst_cat1~variable,length)
|
731
|
|
732
|
met_obj <-load_obj(file.path(in_dir1,met_obj_file_1))
|
733
|
stat_loc<-readOGR(dsn=in_dir1,layer=sub(".shp","",basename(met_obj$loc_stations)))
|
734
|
|
735
|
data_v_mae <-merge(mae_tb,stat_loc,by.x=c("id"),by.y=c("STAT_ID"))
|
736
|
hist(data_v_mae$res_mod1)
|
737
|
mean(data_v_mae$res_mod1)
|
738
|
|
739
|
coords<- data_v_mae[c('longitude','latitude')] #Define coordinates in a data frame
|
740
|
CRS_interp<-proj4string(data_v_test)
|
741
|
coordinates(data_v_mae)<-coords #Assign coordinates to the data frame
|
742
|
proj4string(data_v_mae)<- proj4string(stat_loc) #Assign coordinates reference system in PROJ4 format
|
743
|
data_v_mae<-spTransform(data_v_mae,CRS(CRS_interp)) #Project from WGS84 to new coord. system
|
744
|
|
745
|
p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE)
|
746
|
#p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE,key.entries=c(1,1.5,2,2.5,3,3.5,4,4.5))
|
747
|
|
748
|
p
|
749
|
|
750
|
infile_reg_outline <- "/data/project/layers/commons/data_workflow/inputs/region_outlines_ref_files/OR83M_state_outline.shp" #input region outline defined by polygon: Oregon
|
751
|
reg_outline <- readOGR(dsn=dirname(infile_reg_outline),layer=sub(".shp","",basename(infile_reg_outline)))
|
752
|
|
753
|
p + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray"))
|
754
|
|
755
|
p4<-bubble(data_v_mae,"res_mod4",maxsize=4,col=c("red"),fill=FALSE)
|
756
|
p4 + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray"))
|
757
|
|
758
|
col_t <- colorRampPalette(c('blue', 'white', 'red'))
|
759
|
|
760
|
p_elev <-levelplot(subset(s_raster,"elev_s"),margin=FALSE)
|
761
|
p4 <-bubble(data_v_mae[data_v_mae$res_mod4>2.134,],"res_mod4",maxsize=4,col=c("blue"),fill=FALSE)
|
762
|
p_elev + p4 + layer(sp.polygons(reg_outline,lwd=0.9,col="green"))
|
763
|
title("mod4")
|
764
|
|
765
|
p_elev <-levelplot(subset(s_raster,"elev_s"))
|
766
|
p1 <-bubble(data_v_mae[data_v_mae$res_mod1>2.109,],"res_mod1",maxsize=4,col=c("blue"),fill=FALSE)
|
767
|
p_elev + p1 + layer(sp.polygons(reg_outline,lwd=0.9,col="green"))
|
768
|
#bubble(data_v_mae,"res_mod1")
|
769
|
#p<-spplot(data_v_mae,"res_mod1",maxsize=4,col=c("red"))
|
770
|
#p
|
771
|
#stations that are outliers in one but not the other...
|
772
|
id_setdiff<-setdiff(data_v_mae[data_v_mae$res_mod1>2.109,]$id,data_v_mae[data_v_mae$res_mod4>2.134,]$id)
|
773
|
|
774
|
data_id_setdiff <- data_v_mae[data_v_mae$id %in% id_setdiff,]
|
775
|
|
776
|
p_elev +layer(sp.polygons(reg_outline,lwd=0.9,col="green")) + layer(sp.points(data_id_setdiff,pch=4,cex=2,col="pink"))
|
777
|
|
778
|
###############################
|
779
|
########## Prepare table 6
|
780
|
# Now get p values and other things...
|
781
|
###baseline 2: s(lat,lon) + s(elev)
|
782
|
|
783
|
l_obj<-vector("list",length=2)
|
784
|
l_obj[[1]]<-raster_prediction_obj_1
|
785
|
l_obj[[2]]<-raster_prediction_obj_2
|
786
|
l_table <- vector("list",length=length(l_obj))
|
787
|
for (k in 1:length(l_obj)){
|
788
|
raster_prediction_obj<- l_obj[[k]]
|
789
|
list_myModels <- extract_list_from_list_obj(raster_prediction_obj$method_mod_obj,"mod")
|
790
|
|
791
|
list_models_info <-lapply(1:length(list_myModels),FUN=create_s_and_p_table_term_models,list_myModels)
|
792
|
dates<-(extract_from_list_obj(raster_prediction_obj_1$method_mod_obj,"sampling_dat"))$date #get vector of dates
|
793
|
names(list_models_info)<-dates #adding names to the list
|
794
|
|
795
|
#Prepare and process p. value information regarding models: count number of times values were above a threshold.
|
796
|
s.table_term_tb <-extract_from_list_obj(list_models_info,"s.table_term")
|
797
|
|
798
|
threshold_val<-c(0.01,0.05,0.1)
|
799
|
s.table_term_tb$p_val_rec1 <- s.table_term_tb[["p-value"]] < threshold_val[1]
|
800
|
s.table_term_tb$p_val_rec2 <- s.table_term_tb[["p-value"]] < threshold_val[2]
|
801
|
s.table_term_tb$p_val_rec3 <- s.table_term_tb[["p-value"]] < threshold_val[3]
|
802
|
|
803
|
names_var <- c("p_val_rec1","p_val_rec2","p_val_rec3")
|
804
|
t2<-melt(s.table_term_tb,
|
805
|
measure=names_var,
|
806
|
id=c("mod_name","term_name"),
|
807
|
na.rm=T)
|
808
|
|
809
|
summary_s.table_term2 <- cast(t2,term_name+mod_name~variable,sum)
|
810
|
|
811
|
#Now add AIC
|
812
|
AIC_models_tb <-extract_from_list_obj(list_models_info,"AIC_models")
|
813
|
AIC_models_tb
|
814
|
names_var <- c("AIC")
|
815
|
#id_var <-
|
816
|
t3<-melt(AIC_models_tb,
|
817
|
measure=names_var,
|
818
|
id=c("mod_name","term_name"),
|
819
|
na.rm=T)
|
820
|
|
821
|
summary_AIC <- cast(t3,term_name+mod_name~variable,median)
|
822
|
summary_AIC$AIC <- round(summary_AIC[,c("AIC")],digit=2) #roundto three digits teh differences
|
823
|
|
824
|
#Now combine tables and drop duplicate columns the combined table can be modified for the paper...
|
825
|
avg_s <- calc_stat_from_raster_prediction_obj(raster_prediction_obj,"mean",training=TRUE)
|
826
|
avg_v <- calc_stat_from_raster_prediction_obj(raster_prediction_obj,"mean",training=FALSE)
|
827
|
avg <- cbind(avg_s[,c("pred_mod","mae")],avg_v[,c("mae")])
|
828
|
names(avg)<-c("pred_mod","mae_s","mae_v")
|
829
|
avg[,c("mae_s","mae_v")] <- round(avg[,c("mae_s","mae_v")],digit=2)
|
830
|
table <- merge(summary_AIC,avg,by.x="mod_name",by.y="pred_mod")
|
831
|
|
832
|
tables_AIC_ac_p_val <-list(table,summary_s.table_term2)
|
833
|
names(tables_AIC_ac_p_val) <-c("table","s.table_p_val_term")
|
834
|
l_table[[k]] <- tables_AIC_ac_p_val
|
835
|
}
|
836
|
|
837
|
## Now prepare table
|
838
|
s.table_p_val_term <- l_table[[1]]$s.table_p_val_term[-c(10:26),]
|
839
|
s.table_p_val_term <- s.table_p_val_term[order(s.table_p_val_term$mod_name),]
|
840
|
#summary_s.table_term2 <- summary_s.table_term2[-c(8,10),]
|
841
|
table <- l_table[[1]]$table
|
842
|
|
843
|
table6a <- merge(s.table_p_val_term,table,by="mod_name")
|
844
|
table6a <- table6a[,-match(c("term_name.y"),names(table6a))]
|
845
|
#model_col<-c("Baseline2","Northness","Eastness","LST","DISTOC","Forest","CANHEIGHT","LST*Forest") # removed ,"LST*CANHEIGHT")
|
846
|
#names_table_col<-c("DiffMAE","DiffRMSE","DiffME","Diffr","Model")
|
847
|
|
848
|
s.table_p_val_term <- l_table[[2]]$s.table_p_val_term[-c(11:19),]
|
849
|
s.table_p_val_term <- s.table_p_val_term[order(s.table_p_val_term$mod_name),]
|
850
|
#summary_s.table_term2 <- summary_s.table_term2[-c(8,10),]
|
851
|
tableb <- l_table[[2]]$table
|
852
|
|
853
|
table6b <- merge(s.table_p_val_term,tableb,by="mod_name")
|
854
|
table6b <- table6b[,-match(c("term_name.y"),names(table6b))]
|
855
|
#model_col<-c("Baseline2","Northness","Eastness","LST","DISTOC","Forest","CANHEIGHT","LST*Forest") # removed ,"LST*CANHEIGHT")
|
856
|
#names_table_col<-c("DiffMAE","DiffRMSE","DiffME","Diffr","Model")
|
857
|
|
858
|
#table6b[order(table6b$mod_name),]
|
859
|
|
860
|
#Now write out table...
|
861
|
|
862
|
file_name<-paste("table6a_paper","_",out_prefix,".txt",sep="")
|
863
|
write.table(table6a,file=file_name,sep=",")
|
864
|
|
865
|
file_name<-paste("table6b_paper","_",out_prefix,".txt",sep="")
|
866
|
write.table(table6b,file=file_name,sep=",")
|
867
|
|
868
|
########################
|
869
|
### Prepare table 7: correlation matrix between covariates
|
870
|
|
871
|
names(s_raster)
|
872
|
|
873
|
list_formulas<-raster_prediction_obj_2$method_mod_obj[[1]]$formulas
|
874
|
list_formulas <- lapply(list_formulas,FUN=as.formula)
|
875
|
covar_names_model <- unique(unlist(lapply(list_formulas,FUN=all.vars)))[-1]
|
876
|
covar_names_model <- c(covar_names_model[-6],c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06",
|
877
|
"mm_07","mm_08","mm_09","mm_10","mm_11","mm_12"))
|
878
|
covar_raster <- subset(s_raster,covar_names_model)
|
879
|
|
880
|
names(covar_raster) <- covar_names_model
|
881
|
corr_layers_covar <-layerStats(covar_raster,"pearson",na.rm=TRUE)
|
882
|
corr_mat <-round(corr_layers_covar[[1]], digit=2)
|
883
|
|
884
|
file_name<-paste("table7_paper","_",out_prefix,".txt",sep="")
|
885
|
write.table(corr_mat,file=file_name,sep=",")
|
886
|
|
887
|
#met_obj <-load_obj(file.path(in_dir1,met_obj_file_1))
|
888
|
#stat_loc<-readOGR(dsn=in_dir1,layer=sub(".shp","",basename(met_obj$loc_stations)))
|
889
|
#dim(stat_loc)
|
890
|
|
891
|
###################### END OF SCRIPT #######################
|
892
|
|
893
|
|