Revision d5970771
Added by Benoit Parmentier about 11 years ago
climate/research/oregon/interpolation/multi_timescale_paper_interpolation.R | ||
---|---|---|
4 | 4 |
#different covariates using FUSION and CAI. Accuracy methods are added in the the function script to evaluate results. |
5 | 5 |
#Figures, tables and data for the paper are also produced in the script. |
6 | 6 |
#AUTHOR: Benoit Parmentier |
7 |
#CREATED ON: 11/08/2013
|
|
8 |
#MODIFIED ON: 10/08/2013
|
|
7 |
#CREATED ON: 10/31/2013
|
|
8 |
#MODIFIED ON: 11/08/2013
|
|
9 | 9 |
#Version: 1 |
10 | 10 |
#PROJECT: Environmental Layers project |
11 | 11 |
################################################################################################# |
... | ... | |
49 | 49 |
#direct methods: gam, kriging, gwr |
50 | 50 |
in_dir1 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gam_daily_lst_comb5_11012013" |
51 | 51 |
in_dir2 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_kriging_daily_lst_comb5_11022013" |
52 |
#in_dir3 <-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data_365d_kriging_day_lst_comb3_07112013"
|
|
52 |
in_dir3 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gwr_daily_lst_comb5p1_3_11062013"
|
|
53 | 53 |
#CAI: gam, kriging, gwr |
54 | 54 |
in_dir4 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gam_cai_lst_comb5_11032013" |
55 | 55 |
in_dir5 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_kriging_cai_lst_comb5_11032013" |
56 | 56 |
in_dir6 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gwr_cai_lst_comb5_11042013" |
57 | 57 |
#FSS: gam, kriging, gwr |
58 |
#in_dir7 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gam_fss_lst_comb5_11062013"
|
|
58 |
in_dir7 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_gam_fss_lst_comb5_11062013" |
|
59 | 59 |
in_dir8 <-"/data/project/layers/commons/Oregon_interpolation/output_data_365d_kriging_fss_lst_comb5_11052013" |
60 |
#in_dir9 <- "/data/project/layers/commons/Oregon_interpolation/output_data_365d_kriging_daily_mults1_lst_comb3_10112013"
|
|
60 |
in_dir9 <- "/data/project/layers/commons/Oregon_interpolation/output_data_365d_gwr_fss_lst_comb5_11052013"
|
|
61 | 61 |
# |
62 | 62 |
|
63 | 63 |
##raster_prediction object for comb5 |
64 | 64 |
#direct methods |
65 |
raster_obj_file_1 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_daily_lst_comb5_11022013.RData" |
|
66 |
raster_obj_file_2 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_day_lst_comb4_08152013.RData" |
|
67 |
#raster_obj_file_3 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_day_lst_comb3_07112013.RData" |
|
65 |
raster_obj_file_1 <- "raster_prediction_obj_gam_daily_dailyTmax_365d_gam_daily_lst_comb5_11012013.RData" |
|
66 |
raster_obj_file_2 <- "raster_prediction_obj_kriging_daily_dailyTmax_365d_kriging_daily_lst_comb5_11022013.RData" |
|
67 |
raster_obj_file_3 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_lst_comb5p1_3_11062013.RData" |
|
68 |
raster_obj_file_3b <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_lst_comb5p1_3_11062013.RData" |
|
68 | 69 |
#CAI |
69 | 70 |
raster_obj_file_4 <- "raster_prediction_obj_gam_CAI_dailyTmax_365d_gam_cai_lst_comb5_11032013.RData" |
70 | 71 |
raster_obj_file_5 <- "raster_prediction_obj_kriging_CAI_dailyTmax_365d_kriging_cai_lst_comb5_11032013.RData" |
71 | 72 |
raster_obj_file_6 <- "raster_prediction_obj_gwr_CAI_dailyTmax_365d_gwr_cai_lst_comb5_11042013.RData" |
72 | 73 |
#FSS |
73 |
#raster_obj_file_7 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_mults1_lst_comb3_10132013.RData"
|
|
74 |
raster_obj_file_8 <- "raster_prediction_obj_kriging_fusion_dailyTmax_365d_kriging_fss_lst_comb5_11052013.RData"
|
|
75 |
#raster_obj_file_9 <- "raster_prediction_obj_gwr_daily_dailyTmax_365d_gwr_daily_mults1_lst_comb3_10132013.RData"
|
|
74 |
raster_obj_file_7 <- "raster_prediction_obj_gam_fusion_dailyTmax_365d_gam_fss_lst_comb5_11062013.RData"
|
|
75 |
raster_obj_file_8 <- "raster_prediction_obj_kriging_fusion_dailyTmax_365d_kriging_fss_lst_comb5_11052013.RData" |
|
76 |
raster_obj_file_9 <- "raster_prediction_obj_gwr_fusion_dailyTmax_365d_gwr_fss_lst_comb5_11052013.RData"
|
|
76 | 77 |
|
77 | 78 |
out_dir<-"/home/parmentier/Data/IPLANT_project/paper_multitime_scale__analyses_tables_fig_09032013" |
78 | 79 |
setwd(out_dir) |
... | ... | |
81 | 82 |
met_stations_outfiles_obj_file<-"/data/project/layers/commons/data_workflow/output_data_365d_gam_fus_lst_test_run_07172013/met_stations_outfiles_obj_gam_fusion__365d_gam_fus_lst_test_run_07172013.RData" |
82 | 83 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 |
83 | 84 |
y_var_name <- "dailyTmax" |
84 |
out_prefix<-"analyses_10312013"
|
|
85 |
out_prefix<-"analyses_11082013"
|
|
85 | 86 |
|
86 | 87 |
#method_interpolation <- "gam_daily" |
87 |
covar_obj_file_1 <- "covar_obj__365d_gam_day_lst_comb3_08132013.RData" |
|
88 |
#met_obj_file_1 <- "met_stations_outfiles_obj_gam_daily__365d_gam_day_lst_comb3_08132013.RData" |
|
89 |
#met_stations_outfiles_obj_gam_daily__365d_gam_day_lst_comb3_08132013.RData |
|
90 |
|
|
88 |
#covar_obj_file_1<- list.files(path=in_dir1,pattern="covar_obj.*.RData") |
|
89 |
covar_obj_file_1 <- "covar_obj__365d_gam_daily_lst_comb5_11012013.RData" |
|
90 |
met_obj_file_1 <- "met_stations_outfiles_obj_gam_daily__365d_gam_daily_lst_comb5_11012013.RData" |
|
91 | 91 |
|
92 | 92 |
#Load objects containing training, testing, models objects |
93 | 93 |
|
94 |
#met_stations_obj <- load_obj(met_stations_outfiles_obj_file)
|
|
95 |
#covar_obj <-load_obj(file.path(in_dir1,covar_obj_file_1)) #Reading covariates object for GAM daily method
|
|
96 |
#infile_covariates <- covar_obj$infile_covariates
|
|
97 |
#infile_reg_outline <- covar_obj$infile_reg_outline
|
|
94 |
met_stations_obj <- load_obj(file.path(in_dir1,met_obj_file_1)
|
|
95 |
covar_obj <-load_obj(file.path(in_dir1,covar_obj_file_1)) #Reading covariates object for GAM daily method |
|
96 |
infile_covariates <- covar_obj$infile_covariates |
|
97 |
infile_reg_outline <- covar_obj$infile_reg_outline |
|
98 | 98 |
covar_names<- covar_obj$covar_names |
99 | 99 |
##### |
100 | 100 |
s_raster <- brick(infile_covariates) |
101 | 101 |
names(s_raster)<-covar_names |
102 | 102 |
|
103 |
raster_prediction_obj_1 <-load_obj(file.path(in_dir1,raster_obj_file_1)) #comb3 (baseline 2) |
|
104 |
raster_prediction_obj_2 <-load_obj(file.path(in_dir2,raster_obj_file_2)) #comb4 (baseline 1) |
|
105 |
raster_prediction_obj_3 <-load_obj(file.path(in_dir3,raster_obj_file_3)) #comb3/mod1 baseline 2, kriging |
|
106 |
raster_prediction_obj_4 <-load_obj(file.path(in_dir4,raster_obj_file_4)) #comb3/mod1 baseline 2, gwr |
|
107 |
raster_prediction_obj_5 <-load_obj(file.path(in_dir5,raster_obj_file_5)) #gam daily multisampling 10 to 70% |
|
108 |
raster_prediction_obj_6 <-load_obj(file.path(in_dir6,raster_obj_file_6)) #kriging daily multisampling 10 to 70% |
|
109 |
raster_prediction_obj_7 <-load_obj(file.path(in_dir7,raster_obj_file_7)) #gwr daily multisampling 10 to 70% |
|
110 |
raster_prediction_obj_8 <-load_obj(file.path(in_dir8,raster_obj_file_8)) #kriging fss |
|
111 |
raster_prediction_obj_9 <-load_obj(file.path(in_dir9,raster_obj_file_9)) #gwr daily multisampling 10 to 70% |
|
103 |
raster_prediction_obj_1 <-load_obj(file.path(in_dir1,raster_obj_file_1)) #comb5 gam_daily |
|
104 |
raster_prediction_obj_2 <-load_obj(file.path(in_dir2,raster_obj_file_2)) #comb5 kriging_daily |
|
105 |
raster_prediction_obj_3 <-load_obj(file.path(in_dir3,raster_obj_file_3)) #comb5 gwr_daily mod1 to mod3 |
|
106 |
raster_prediction_obj_3b <-load_obj(file.path(in_dir3b,raster_obj_file_3b)) #comb5 gwr_daily mod4 to mod7 |
|
107 |
|
|
108 |
raster_prediction_obj_4 <-load_obj(file.path(in_dir4,raster_obj_file_4)) #comb5 gam_CAI |
|
109 |
raster_prediction_obj_5 <-load_obj(file.path(in_dir5,raster_obj_file_5)) #comb5 kriging_CAI |
|
110 |
raster_prediction_obj_6 <-load_obj(file.path(in_dir6,raster_obj_file_6)) #comb5 gwr_CAI |
|
111 |
raster_prediction_obj_7 <-load_obj(file.path(in_dir7,raster_obj_file_7)) #comb5 gam_fss |
|
112 |
raster_prediction_obj_8 <-load_obj(file.path(in_dir8,raster_obj_file_8)) #comb5 kriging_fss |
|
113 |
raster_prediction_obj_9 <-load_obj(file.path(in_dir9,raster_obj_file_9)) #comb5 gwr_fss |
|
112 | 114 |
|
113 | 115 |
############### BEGIN SCRIPT ################# |
114 | 116 |
|
115 | 117 |
############ |
116 |
##### 1) Generate: Table 3. Contribution of covariates using validation accuracy metrics |
|
117 |
## This is a table of accuracy compared to baseline by difference |
|
118 |
|
|
118 |
##### 1) Generate: Table 4. Contribution of covariates using validation accuracy metrics |
|
119 |
## This is a table of accuracy |
|
120 |
|
|
121 |
list_raster_obj_files <- list(file.path(in_dir1,raster_obj_file_1),file.path(in_dir2,raster_obj_file_2), |
|
122 |
file.path(in_dir3,raster_obj_file_3),file.path(in_dir4,raster_obj_file_4), |
|
123 |
file.path(in_dir5,raster_obj_file_5),file.path(in_dir6,raster_obj_file_6), |
|
124 |
file.path(in_dir7,raster_obj_file_7),file.path(in_dir8,raster_obj_file_8), |
|
125 |
file.path(in_dir9,raster_obj_file_9)) |
|
126 |
|
|
127 |
names(list_raster_obj_files)<- c("gam_daily","kriging_daily","gwr_daily", |
|
128 |
"gam_CAI","kriging_CAI","gwr_CAI", |
|
129 |
"gam_fss","kriging_fss","gwr_fss") |
|
130 |
summary_metrics_v_list<-lapply(list_raster_obj_files,FUN=function(x){x<-load_obj(x);x[["summary_metrics_v"]]$avg$rmse}) |
|
131 |
names(summary_metrics_v_list) |
|
132 |
|
|
133 |
#for gam models |
|
134 |
table_gam<- summary_metrics_v_list[grep("gam",names(summary_metrics_v_list))] |
|
135 |
table_gam <-do.call(cbind,table_gam) |
|
136 |
table_gam<-table_gam[1:7,] |
|
137 |
#for kriging models |
|
138 |
table_kriging <- summary_metrics_v_list[grep("kriging",names(summary_metrics_v_list))] |
|
139 |
table_kriging <- do.call(cbind,table_kriging) |
|
140 |
table_kriging <- table_kriging[1:7,] |
|
141 |
|
|
142 |
#for kriging models |
|
143 |
table_gwr <- summary_metrics_v_list[grep("gwr",names(summary_metrics_v_list))] |
|
144 |
table_gwr <- do.call(cbind,table_gwr) |
|
145 |
table_gwr <- table_gwr[1:7,] |
|
146 |
|
|
147 |
table4_paper <- as.data.frame(do.call(rbind,list(table_gam,table_kriging,table_gwr))) |
|
148 |
table4_paper <- round(table4_paper,digit=3) #roundto three digits teh differences |
|
149 |
table4_paper$Methods <- c(rep("gam",7), |
|
150 |
rep("kriging",7), |
|
151 |
rep("gwr",7)) |
|
152 |
|
|
119 | 153 |
#Check input covariates and model formula: |
120 |
raster_prediction_obj_1$method_mod_obj[[1]]$formulas #models run for baseline 2, GAM daily |
|
121 |
raster_prediction_obj_2$method_mod_obj[[1]]$formulas #models run for baseline 1, GAM daily |
|
122 |
|
|
123 |
summary_metrics_v1<-raster_prediction_obj_1$summary_metrics_v |
|
124 |
summary_metrics_v2<-raster_prediction_obj_2$summary_metrics_v |
|
125 |
|
|
126 |
summary_metrics_v1$avg[1,] |
|
127 |
summary_metrics_v2$avg[,] |
|
128 |
|
|
129 |
tb1 <-raster_prediction_obj_1$tb_diagnostic_v #365 days accuracy table for baseline 2 |
|
130 |
tb2 <-raster_prediction_obj_2$tb_diagnostic_v #365 days accuracy table for baseline 1 |
|
131 |
|
|
132 |
table_data1 <-summary_metrics_v1$avg[,c("mae","rmse","me","r")] #select relevant columns from data.frame |
|
133 |
table_data2 <-summary_metrics_v2$avg[,c("mae","rmse","me","r")] |
|
134 |
|
|
135 |
###Table 3a, baseline 1: s(lat,lon) |
|
136 |
#Change here !!! need to reorder rows based on mod first |
|
137 |
model_col<-c("Baseline1","Elevation","Northing","Easting","LST","DISTOC","Forest","CANHEIGHT","LST*Forest","LST*CANHEIGHT") # |
|
138 |
names_table_col<-c("ΔMAE","ΔRMSE","ΔME","Δr","Model") |
|
139 |
df3a<- as.data.frame(sapply(table_data2,FUN=function(x) x-x[1])) |
|
140 |
df3a<- round(df3a,digit=3) #roundto three digits teh differences |
|
141 |
df3a$Model <-model_col |
|
142 |
names(df3a)<- names_table_col |
|
143 |
print(df3a) #show resulting table |
|
144 |
|
|
145 |
###Table 3b, baseline 2: s(lat,lon) + s(elev) |
|
146 |
|
|
147 |
model_col<-c("Baseline2","Northness","Eastness","LST","DISTOC","Forest","CANHEIGHT","LST*Forest","LST*CANHEIGHT") |
|
148 |
|
|
149 |
df3b <- as.data.frame(sapply(table_data1,FUN=function(x) x-x[1])) #difference between baseline (line 1) and other models |
|
150 |
df3b <- round(df3b,digit=3) #roundto three digits the differences |
|
151 |
df3b$Model <- model_col |
|
152 |
names(df3b)<- names_table_col |
|
153 |
print(df3b) #Part b of table 3 |
|
154 |
|
|
155 |
sd2_v <-calc_stat_from_raster_prediction_obj(raster_prediction_obj_2,"sd",training=FALSE) |
|
154 |
list_formulas <-raster_prediction_obj_2$method_mod_obj[[1]]$formulas #formulas for models run comb5 |
|
155 |
#strsplit(list_formulas,"~") |
|
156 |
|
|
157 |
table4_paper$Forumulas<-rep(list_formulas,3) |
|
158 |
table4_paper<-table4_paper[(c(5,4,1,2,3))] |
|
156 | 159 |
|
157 | 160 |
#Testing siginificance between models |
161 |
|
|
162 |
#mod_compk1 <-kruskal.test(tb1$rmse ~ as.factor(tb1$pred_mod)) #Kruskal Wallis test |
|
163 |
#mod_compk2 <-kruskal.test(tb2$rmse ~ as.factor(tb2$pred_mod)) |
|
164 |
#print(mod_compk1) #not significant |
|
158 | 165 |
|
159 |
mod_compk1 <-kruskal.test(tb1$rmse ~ as.factor(tb1$pred_mod)) #Kruskal Wallis test |
|
160 |
mod_compk2 <-kruskal.test(tb2$rmse ~ as.factor(tb2$pred_mod)) |
|
161 |
print(mod_compk1) #not significant |
|
162 |
print(mod_compk2) #not significant |
|
163 |
|
|
164 |
#Multiple Kruskal Wallis |
|
165 |
mod_compk1 <-kruskalmc(tb1$rmse ~ as.factor(tb1$pred_mod)) |
|
166 |
mod_compk2 <-kruskalmc(tb2$rmse ~ as.factor(tb2$pred_mod)) |
|
167 |
|
|
168 |
print(mod_compk1) |
|
169 |
print(mod_compk2) |
|
170 |
|
|
171 |
#Now write out table 3 |
|
172 |
|
|
173 |
file_name<-paste("table3a_baseline1_paper","_",out_prefix,".txt",sep="") |
|
174 |
write.table(df3a,file=file_name,sep=",") |
|
175 |
|
|
176 |
file_name<-paste("table3b_baseline2_paper","_",out_prefix,".txt",sep="") |
|
177 |
write.table(df3b,file=file_name,sep=",") |
|
178 |
|
|
179 |
############ |
|
180 |
##### 2) Generate: Table 4. Comparison between interpolation methods using validation accuracy metrics |
|
181 |
## This is a table of accuracy metric for the optimal model (baseline2) as identified in the previous step |
|
182 |
|
|
183 |
##Table 4: Interpolation methods comparison |
|
184 |
|
|
185 |
#get sd for kriging, gam and gwr |
|
186 |
#tb1 <-raster_prediction_obj_1$tb_diagnostic_v HGAM baseline 1, loaded |
|
187 |
#tb2 <-raster_prediction_obj_2$tb_diagnostic_v #GAM baseline 2, loaded |
|
188 |
tb3 <-raster_prediction_obj_3$tb_diagnostic_v #Kriging methods |
|
189 |
tb4 <-raster_prediction_obj_4$tb_diagnostic_v #GWR methods |
|
190 |
|
|
191 |
names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9","mod10") |
|
192 |
|
|
193 |
#Calculate standard deviation for each metric |
|
194 |
sd1 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_1,"sd") # see function script |
|
195 |
sd2 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_2,"sd") # standard deviation for baseline 2 |
|
196 |
sd3 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_3,"sd") # kriging |
|
197 |
sd4 <- calc_stat_from_raster_prediction_obj(raster_prediction_obj_4,"sd") #gwr |
|
198 |
|
|
199 |
#Combined sd in one table for mod1 (baseline 2) |
|
200 |
table_sd <- do.call(rbind,list(sd1[1,],sd3[1,],sd4[1,])) #table containing the sd for the three mdethods for baseline 2 |
|
201 |
table_sd <- round(table_sd[,-1],digit=3) #round to three digits the differences |
|
202 |
table_sd <- table_sd[,c("mae","rmse","me","r")] #column 5 contains m50, remove it |
|
203 |
|
|
204 |
summary_metrics_v3<-raster_prediction_obj_3$summary_metrics_v #Kriging methods |
|
205 |
summary_metrics_v4<-raster_prediction_obj_4$summary_metrics_v # GWR method |
|
206 |
|
|
207 |
table_data3 <-summary_metrics_v3$avg[1,c("mae","rmse","me","r")] #first line mod1 (baseline) |
|
208 |
table_data4 <-summary_metrics_v4$avg[1,c("mae","rmse","me","r")] #first line mod1 (baseline) |
|
209 |
table_data1 <- table_data1[1,] |
|
210 |
|
|
211 |
table_ac <-do.call(rbind, list(table_data1,table_data3,table_data4)) |
|
212 |
table_ac <- round(table_ac,digit=3) #roundto three digits teh differences |
|
213 |
|
|
214 |
#combined tables with accuracy metrics and their standard deviations |
|
215 |
table4_paper <-table_combined_symbol(table_ac,table_sd,"±") |
|
216 |
#lapply(lapply(table_ac,FUN=paste,table_sd,FUN=paste,sep="±"),FUN=paste) |
|
166 |
###Table 4, writeout |
|
167 |
names_table_col <-c("Models","Method","Single time scale","Multiple time scale CAI","Multiple time scale FSS") # |
|
217 | 168 |
|
218 |
model_col<-c("GAM","Kriging","GWR") |
|
219 |
names_table_col<-c("MAE","RMSE","ME","R","Model") |
|
220 |
|
|
221 |
table4_paper$Model <-model_col |
|
222 | 169 |
names(table4_paper)<- names_table_col |
170 |
print(table4_paper) #show resulting table |
|
171 |
|
|
172 |
#Now write out table 4 |
|
223 | 173 |
|
224 |
file_name<-paste("table4_compariaons_interpolation_methods_avg_paper","_",out_prefix,".txt",sep="")
|
|
225 |
write.table(as.data.frame(table4_paper),file=file_name,sep=",")
|
|
174 |
file_name<-paste("table4_multi_timescale_paper","_",out_prefix,".txt",sep="")
|
|
175 |
write.table(table4_paper,file=file_name,sep=",")
|
|
226 | 176 |
|
227 | 177 |
#################################### |
228 | 178 |
####### Now create figures ############# |
229 | 179 |
|
230 | 180 |
#figure 1: study area |
231 | 181 |
#figure 2: methodological worklfow |
232 |
#figure 3:Figure 3. MAE/RMSE and distance to closest fitting station.
|
|
182 |
#figure 3: daily mean compared to monthly mean
|
|
233 | 183 |
#Figure 4. RMSE and MAE, mulitisampling and hold out for FSS and GAM. |
234 | 184 |
#Figure 5. Overtraining tendency |
235 |
#Figure 6: Spatial pattern of prediction for one day |
|
185 |
#Figure 6: Spatial pattern of prediction for one day (maps) |
|
186 |
#Figure 7: Spatial transects for one day (maps) |
|
187 |
#Figure 8: Spatial lag profiles and stations data |
|
188 |
#Figure 9: Image differencing and land cover |
|
236 | 189 |
|
237 |
### Figure 1: Oregon study area |
|
190 |
################################################ |
|
191 |
######### Figure 1: Oregon study area |
|
238 | 192 |
#3 parameters from output |
239 | 193 |
#infile_monthly<-list_outfiles$monthly_covar_ghcn_data #outile4 from database_covar script |
240 | 194 |
#infile_daily<-list_outfiles$daily_covar_ghcn_data #outfile3 from database_covar script |
... | ... | |
284 | 238 |
box() |
285 | 239 |
dev.off() |
286 | 240 |
|
287 |
### Figure 2: Method comparison workflow |
|
241 |
################################################ |
|
242 |
######### Figure 2: Method comparison workflow |
|
288 | 243 |
|
289 | 244 |
# Workflow figure is not generated in R |
290 | 245 |
|
291 | 246 |
################################################ |
292 |
################### Figure 3. MAE/RMSE and distance to closest fitting station. |
|
293 |
|
|
294 |
#Analysis accuracy in term of distance to closest station |
|
295 |
#Assign model's names |
|
296 |
|
|
297 |
names_mod <- paste("res_mod",1:10,sep="") |
|
298 |
names(raster_prediction_obj_1$validation_mod_obj[[1]]) |
|
299 |
limit_val<-seq(0,150, by=10) |
|
300 |
|
|
301 |
#Call function to extract residuals in term of distance to closest fitting station and summary statistics |
|
302 |
l1 <- distance_to_closest_fitting_station(raster_prediction_obj_1,names_mod,dist_classes=limit_val) #GAM method |
|
303 |
l3 <- distance_to_closest_fitting_station(raster_prediction_obj_3,names_mod,dist_classes=limit_val) #Kriging method |
|
304 |
l4 <- distance_to_closest_fitting_station(raster_prediction_obj_4,names_mod,dist_classes=limit_val) #GWR method |
|
305 |
|
|
306 |
l1$mae_tb #contains |
|
307 |
|
|
308 |
list_dist_obj <-list(l1,l3,l4) |
|
309 |
head(list_dist_obj[[1]]$dstspat_dat) |
|
310 |
#should add method_interp in data.frame |
|
311 |
|
|
312 |
#Prepare parameters/arguments for plotting |
|
313 |
|
|
314 |
col_t <- c("red","blue","black") |
|
315 |
pch_t <- 1:length(col_t) |
|
316 |
legend_text <- c("GAM","Kriging","GWR") |
|
317 |
mod_name <- c("res_mod1","res_mod1","res_mod1")#selected models |
|
318 |
x_tick_labels <- limit_val<-seq(5,125, by=10) |
|
319 |
metric_name <-"rmse_tb" |
|
320 |
title_plot <- "RMSE and distance to closest station for baseline 2" |
|
321 |
y_lab_text <- "RMSE (C)" |
|
322 |
#quick test |
|
323 |
add_CI <- c(TRUE,TRUE,TRUE) |
|
324 |
|
|
325 |
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,title_plot,y_lab_text,add_CI) |
|
326 |
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name","title_plot","y_lab_text","add_CI") |
|
327 |
plot_dst_MAE(list_param_plot) |
|
328 |
|
|
329 |
metric_name <-"mae_tb" |
|
330 |
title_plot <- "MAE and distance to closest fitting station" |
|
331 |
y_lab_text <- "MAE (C)" |
|
332 |
add_CI <- c(TRUE,TRUE,TRUE) |
|
333 |
#Now set up plotting device |
|
334 |
res_pix<-480 |
|
335 |
col_mfrow<-2 |
|
336 |
row_mfrow<-1 |
|
337 |
png_file_name<- paste("Figure_3_accuracy_and_distance_to_closest_fitting_station_",out_prefix,".png", sep="") |
|
338 |
png(filename=file.path(out_dir,png_file_name), |
|
339 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
340 |
par(mfrow=c(row_mfrow,col_mfrow)) |
|
341 |
|
|
342 |
#Figure 3a |
|
343 |
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name, |
|
344 |
title_plot,y_lab_text,add_CI) |
|
345 |
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name", |
|
346 |
"title_plot","y_lab_text","add_CI") |
|
347 |
#undebug(plot_dst_MAE) |
|
348 |
plot_dst_MAE(list_param_plot) |
|
349 |
title(xlab="Distance to closest fitting station (km)") |
|
350 |
|
|
351 |
#Figure 3b: histogram |
|
352 |
barplot(l1$n_tb$res_mod1,names.arg=limit_val, |
|
353 |
ylab="Number of observations", |
|
354 |
xlab="Distance to closest fitting station (km)") |
|
355 |
title("Number of observation in term of distance bins") |
|
356 |
box() |
|
357 |
dev.off() |
|
358 |
|
|
359 |
#################################################### |
|
360 |
#########Figure 4. RMSE and MAE, mulitisampling and hold out for single time scale methods. |
|
361 |
|
|
362 |
#Using baseline 2: lat,lon and elev |
|
363 |
|
|
364 |
#Use run of 7 hold out proportions, 10 to 70% with 10 random samples and 12 dates... |
|
365 |
#Use gam_day method |
|
366 |
#Use comb3 i.e. using baseline s(lat,lon)+s(elev) |
|
367 |
|
|
368 |
#names_mod<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9") |
|
369 |
names_mod<-c("mod1") |
|
370 |
|
|
371 |
#debug(calc_stat_prop_tb) |
|
372 |
prop_obj_gam<-calc_stat_prop_tb(names_mod,raster_prediction_obj_5) |
|
373 |
prop_obj_kriging<-calc_stat_prop_tb(names_mod,raster_prediction_obj_6) |
|
374 |
prop_obj_gwr<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7) |
|
375 |
|
|
376 |
list_prop_obj<-list(prop_obj_gam,prop_obj_kriging,prop_obj_gwr) |
|
377 |
#Testing siginificance between models |
|
378 |
|
|
379 |
mod_compk1 <-kruskal.test(prop_obj_gwr$tb$rmse ~ as.factor(prop_obj_gwr$tb$prop)) #Kruskal Wallis test |
|
380 |
mod_compk2 <-kruskal.test(tb2$rmse ~ as.factor(tb2$pred_mod)) |
|
381 |
print(mod_compk1) #not significant |
|
382 |
print(mod_compk2) #not significant |
|
383 |
|
|
384 |
#Multiple Kruskal Wallis |
|
385 |
mod_compk1 <-kruskalmc(prop_obj_gwr$tb$rmse ~ as.factor(prop_obj_gwr$tb$prop)) |
|
386 |
mod_compk2 <-kruskalmc(tb2$rmse ~ as.factor(tb2$pred_mod)) |
|
387 |
|
|
388 |
print(mod_compk1) |
|
389 |
print(mod_compk2) |
|
390 |
|
|
391 |
prop_tb <- rbind(prop_obj_gam$tb,prop_obj_kriging$tb,prop_obj_gwr$tb) |
|
392 |
|
|
393 |
mod_compk1 <-kruskal.test(prop_tb$rmse ~ as.factor(prop_tb$method_interp)+as.factor(prop_tb$prop)) #Kruskal Wallis test |
|
394 |
mod_prop <-lm(prop_tb$rmse ~ as.factor(prop_tb$method_interp)+as.factor(prop_tb$prop)) #This is significant!! |
|
395 |
|
|
396 |
## plot setting for figure 4 |
|
397 |
|
|
398 |
col_t<-c("red","blue","black") |
|
399 |
pch_t<- 1:length(col_t) |
|
400 |
legend_text <- c("GAM","Kriging","GWR") |
|
401 |
mod_name<-c("mod1","mod1","mod1")#selected models |
|
402 |
add_CI <- c(TRUE,TRUE,TRUE) |
|
403 |
CI_bar <- c(TRUE,TRUE,TRUE) |
|
404 |
#add_CI <- c(TRUE,FALSE,FALSE) |
|
405 |
|
|
406 |
##### plot figure 4 for paper |
|
407 |
#### |
|
408 |
|
|
409 |
res_pix<-480 |
|
410 |
col_mfrow<-2 |
|
411 |
row_mfrow<-1 |
|
412 |
png_file_name<- paste("Figure_4_proportion_of_holdout_and_accuracy_",out_prefix,".png", sep="") |
|
413 |
png(filename=file.path(out_dir,png_file_name), |
|
414 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
415 |
par(mfrow=c(row_mfrow,col_mfrow)) |
|
416 |
#par(mfrow=c(1,1)) |
|
417 |
metric_name<-"mae" |
|
418 |
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name,add_CI,CI_bar) |
|
419 |
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name","add_CI","CI_bar") |
|
420 |
|
|
421 |
#debug(plot_prop_metrics) |
|
422 |
plot_prop_metrics(list_param_plot) |
|
423 |
title(main="MAE for hold out and methods", |
|
424 |
xlab="Hold out validation/testing proportion", |
|
425 |
ylab="MAE (C)") |
|
426 |
|
|
427 |
#now figure 4b |
|
428 |
metric_name<-"rmse" |
|
429 |
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name,add_CI,CI_bar) |
|
430 |
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name","add_CI","CI_bar") |
|
431 |
plot_prop_metrics(list_param_plot) |
|
432 |
title(main="RMSE for hold out and methods", |
|
433 |
xlab="Hold out validation/testing proportion", |
|
434 |
ylab="RMSE (C)") |
|
435 |
|
|
436 |
dev.off() |
|
437 |
|
|
438 |
#################################################### |
|
439 |
#########Figure 5. Overtraining tendency |
|
440 |
|
|
441 |
#read in relevant data: |
|
442 |
## Calculate average difference for RMSE for all three methods |
|
443 |
#read in relevant data: |
|
444 |
|
|
445 |
tb1_s<-extract_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"metrics_s") |
|
446 |
rownames(tb1_s)<-NULL #remove row names |
|
447 |
tb1_s$method_interp <- "gam_daily" #add type of interpolation...out_prefix too?? |
|
448 |
|
|
449 |
tb3_s<-extract_from_list_obj(raster_prediction_obj_3$validation_mod_obj,"metrics_s") |
|
450 |
rownames(tb1_s)<-NULL #remove row names |
|
451 |
tb3_s$method_interp <- "kriging_daily" #add type of interpolation...out_prefix too?? |
|
452 |
|
|
453 |
tb4_s<-extract_from_list_obj(raster_prediction_obj_4$validation_mod_obj,"metrics_s") |
|
454 |
rownames(tb4_s)<-NULL #remove row names |
|
455 |
tb4_s$method_interp <- "gwr_daily" #add type of interpolation...out_prefix too?? |
|
456 |
|
|
457 |
tb5_s<-extract_from_list_obj(raster_prediction_obj_5$validation_mod_obj,"metrics_s") |
|
458 |
rownames(tb5_s)<-NULL #remove row names |
|
459 |
tb5_s$method_interp <- "gam_daily" #add type of interpolation...out_prefix too?? |
|
460 |
|
|
461 |
tb6_s<-extract_from_list_obj(raster_prediction_obj_6$validation_mod_obj,"metrics_s") |
|
462 |
rownames(tb6_s)<-NULL #remove row names |
|
463 |
tb6_s$method_interp <- "kriging_daily" #add type of interpolation...out_prefix too?? |
|
464 |
|
|
465 |
tb7_s<-extract_from_list_obj(raster_prediction_obj_7$validation_mod_obj,"metrics_s") |
|
466 |
rownames(tb7_s)<-NULL #remove row names |
|
467 |
tb7_s$method_interp <- "gwr_daily" #add type of interpolation...out_prefix too?? |
|
468 |
|
|
469 |
#tb1_s <-raster_prediction_obj_1$tb_diagnostic_s #gam dailycontains the accuracy metrics for each run... |
|
470 |
#tb3_s <-raster_prediction_obj_3$tb_diagnostic_s #Kriging daily methods |
|
471 |
#tb4_s <-raster_prediction_obj_4$tb_diagnostic_s #gwr daily methods |
|
472 |
|
|
473 |
tb1 <-raster_prediction_obj_1$tb_diagnostic_v #gam dailycontains the accuracy metrics for each run... |
|
474 |
tb3 <-raster_prediction_obj_3$tb_diagnostic_v #Kriging daily methods |
|
475 |
tb4 <-raster_prediction_obj_4$tb_diagnostic_v #gwr daily methods |
|
476 |
|
|
477 |
tb5 <-raster_prediction_obj_5$tb_diagnostic_v #gam dailycontains the accuracy metrics for each run... |
|
478 |
tb6 <-raster_prediction_obj_6$tb_diagnostic_v #Kriging daily methods |
|
479 |
tb7 <-raster_prediction_obj_7$tb_diagnostic_v #gwr daily methods |
|
480 |
|
|
481 |
#Calculate difference in MAE and RMSE for training and testing data: call diff_df function |
|
482 |
diff_tb1 <-diff_df(tb1_s[tb1_s$pred_mod=="mod1",],tb1[tb1$pred_mod=="mod1",],c("mae","rmse")) #gam select differences for mod1 |
|
483 |
diff_tb3 <-diff_df(tb3_s[tb3_s$pred_mod=="mod1",],tb3[tb3$pred_mod=="mod1",],c("mae","rmse")) #kriging |
|
484 |
diff_tb4 <-diff_df(tb4_s[tb4_s$pred_mod=="mod1",],tb4[tb4$pred_mod=="mod1",],c("mae","rmse")) #gwr |
|
485 |
|
|
486 |
diff_tb5 <-diff_df(tb5_s[tb5_s$pred_mod=="mod1",],tb5[tb5$pred_mod=="mod1",],c("mae","rmse")) #gam select differences for mod1 |
|
487 |
diff_tb6 <-diff_df(tb6_s[tb6_s$pred_mod=="mod1",],tb6[tb6$pred_mod=="mod1",],c("mae","rmse")) #kriging |
|
488 |
diff_tb7 <-diff_df(tb7_s[tb7_s$pred_mod=="mod1",],tb7[tb7$pred_mod=="mod1",],c("mae","rmse")) #gwr |
|
489 |
|
|
490 |
|
|
491 |
diff_mae_data <-data.frame(gam=diff_tb1$mae,kriging=diff_tb3$mae,gwr=diff_tb4$mae) |
|
492 |
diff_rmse_data <-data.frame(gam=diff_tb1$rmse,kriging=diff_tb3$rmse,gwr=diff_tb4$rmse) |
|
493 |
|
|
494 |
|
|
495 |
diff_mae_data_mult <-data.frame(gam=diff_tb5$mae,kriging=diff_tb6$mae,gwr=diff_tb7$mae) |
|
496 |
diff_rmse_data_mult <-data.frame(gam=diff_tb5$rmse,kriging=diff_tb6$rmse,gwr=diff_tb7$rmse) |
|
497 |
|
|
498 |
diff_mae_data_mult$prop <- tb5$prop |
|
499 |
|
|
500 |
boxplot(diff_mae_data_mult$gam~diff_mae_data_mult$prop) |
|
501 |
boxplot(diff_mae_data_mult$kriging~diff_mae_data_mult$prop) |
|
502 |
|
|
503 |
plot(diff_mae_data_mult$gam~diff_mae_data_mult$prop,type="p") |
|
504 |
|
|
505 |
|
|
506 |
|
|
507 |
#Test the plot |
|
508 |
boxplot(diff_mae_data) |
|
509 |
boxplot(diff_rmse_data) #plot differences in training and testing accuracies for three methods |
|
510 |
title(main="Training and testing RMSE for hold out and interpolation methods", |
|
511 |
xlab="Interpolation method", |
|
512 |
ylab="RMSE (C)") |
|
513 |
|
|
514 |
boxplot(diff_mae_data_mult) |
|
515 |
boxplot(diff_rmse_data_mult) #plot differences in training and testing accuracies for three methods |
|
516 |
|
|
517 |
|
|
518 |
boxplot(diff_mae_data_mult) |
|
519 |
|
|
520 |
tb5 <-raster_prediction_obj_5$tb_diagnostic_v #gam dailycontains the accuracy metrics for each run... |
|
521 |
tb6 <-raster_prediction_obj_6$tb_diagnostic_v #Kriging daily methods |
|
522 |
tb7 <-raster_prediction_obj_7$tb_diagnostic_v #gwr daily methods |
|
523 |
|
|
524 |
prop_obj_gam_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_5,testing=FALSE) #training accuracy with hold out proportion |
|
525 |
prop_obj_kriging_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_6,testing=FALSE) |
|
526 |
prop_obj_gwr_s<-calc_stat_prop_tb(names_mod,raster_prediction_obj_7,testing=FALSE) |
|
527 |
|
|
528 |
plot(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, type="b",) |
|
529 |
|
|
530 |
############### |
|
531 |
#### plot figure 5 |
|
532 |
#set up the output file to plot |
|
533 |
res_pix<-480 |
|
534 |
col_mfrow<-2 |
|
535 |
row_mfrow<-1 |
|
536 |
png_file_name<- paste("Figure_5_overtraining_tendency_and_holdout_proportion_",out_prefix,".png", sep="") |
|
537 |
png(filename=file.path(out_dir,png_file_name), |
|
538 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
539 |
par(mfrow=c(row_mfrow,col_mfrow)) |
|
540 |
|
|
541 |
col_t<-c("red","blue","black") |
|
542 |
pch_t<- 1:length(col_t) |
|
543 |
##Make this a function??? |
|
544 |
y_range<-range(prop_obj_kriging$avg_tb$rmse,prop_obj_kriging_s$avg_tb$rmse) |
|
545 |
#y_range<-range(prop_obj_gam$avg_tb$rmse,prop_obj_gam_s$avg_tb$rmse) |
|
546 |
#plot(prop_obj_gam$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, ylab="",xlab="",type="b",col=c("red"),pch=pch_t[1],ylim=y_range,lty=2) |
|
547 |
#lines(prop_obj_gam_s$avg_tb$rmse ~ prop_obj_gam_s$avg_tb$prop, ylab="",xlab="",type="b",pch=pch_t[1],ylim=y_range,col=c("red")) |
|
548 |
#lines(prop_obj_gwr_s$avg_tb$rmse ~ prop_obj_gwr_s$avg_tb$prop, ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[3],col=c("black")) |
|
549 |
#lines(prop_obj_gwr$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[3],,col=c("black"),lty=2) |
|
550 |
#lines(prop_obj_kriging$avg_tb$rmse ~ prop_obj_kriging$avg_tb$prop,ylab="",xlab="", type="b",ylim=y_range,pch=pch_t[2],,col=c("blue"),lty=2) |
|
551 |
#lines(prop_obj_kriging_s$avg_tb$rmse ~ prop_obj_kriging_s$avg_tb$prop,ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[2],col=c("blue")) |
|
552 |
|
|
553 |
plot_ac_holdout_prop<- function(l_prop,l_col_t,l_pch_t,add_CI,y_range){ |
|
554 |
|
|
555 |
for(i in 1:length(l_prop)){ |
|
556 |
if(i==1){ |
|
557 |
plot(l_prop[[i]]$avg_tb$rmse ~ l_prop[[i]]$avg_tb$prop,ylab="",xlab="",type="b",pch=l_pch_t[i],ylim=y_range,col=l_col_t[i],lty=l_lty_t[i]) |
|
558 |
}else{ |
|
559 |
lines(l_prop[[i]]$avg_tb$rmse ~ l_prop[[i]]$avg_tb$prop,ylab="",xlab="",type="b",pch=l_pch_t[i],ylim=y_range,col=l_col_t[i],lty=l_lty_t[i]) |
|
560 |
} |
|
561 |
if(add_CI[i]==TRUE){ |
|
562 |
ciw <- qt(0.975, l_prop[[i]]$n_tb$rmse) * l_prop[[i]]$sd_tb$rmse / sqrt(l_prop[[i]]$n_tb$rmse) |
|
563 |
plotCI(y=l_prop[[i]]$avg_tb$rmse, x=l_prop[[i]]$avg_tb$prop, uiw=ciw, col=l_col_t[i], barcol="blue", lwd=1,pch=l_pch_t[i], |
|
564 |
add=TRUE) |
|
565 |
} |
|
566 |
} |
|
567 |
} |
|
568 |
|
|
569 |
l_prop<- list(prop_obj_gam,prop_obj_gam_s,prop_obj_gwr_s,prop_obj_gwr,prop_obj_kriging,prop_obj_kriging_s) |
|
570 |
|
|
571 |
l_col_t <- c("red","red","black","black","blue","blue") |
|
572 |
l_pch_t <- c(1,1,3,3,2,2) |
|
573 |
l_lty_t <- c(2,1,1,2,2,1) |
|
574 |
add_CI <- c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE) |
|
575 |
add_CI <- c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE) |
|
576 |
y_range<-c(0.5,3) |
|
577 |
|
|
578 |
plot_ac_holdout_prop(l_prop,l_col_t,l_pch_t,add_CI,y_range) |
|
247 |
######### Figure 3: daily mean compared to monthly mean |
|
579 | 248 |
|
580 |
legend_text <- c("GAM","Kriging","GWR") |
|
581 |
#legend_text <- c("GAM","Kriging","GWR","training","testing") |
|
582 |
|
|
583 |
#legend("topleft",legend=legend_text, |
|
584 |
# cex=0.9, pch=c(pch_t,NA,NA),col=c(col_t,"white","white"),lty=c(NA,NA,NA,1,2),bty="n") |
|
585 |
legend("topleft",legend=legend_text, |
|
586 |
cex=0.9, pch=c(pch_t),col=c(col_t),lty=c(1,1,1),bty="n") |
|
587 |
legend_text_data <-c("training","testing") |
|
588 |
legend("top",legend=legend_text_data, |
|
589 |
cex=0.9, lty=c(1,2),bty="n") |
|
590 |
|
|
591 |
title(main="Training and testing RMSE for hold out and methods", |
|
592 |
xlab="Hold out validation/testing proportion", |
|
593 |
ylab="RMSE (C)") |
|
594 |
|
|
595 |
|
|
596 |
boxplot(diff_mae_data_mult[-4]) #plot differences in training and testing accuracies for three methods |
|
597 |
|
|
598 |
title(main="Difference between training and testing MAE", |
|
599 |
xlab="Interpolation method", |
|
600 |
ylab="MAE (C)") |
|
601 |
|
|
602 |
dev.off() |
|
603 |
|
|
604 |
############### STUDY TIME AND accuracy |
|
605 |
#########Figure 6: Monthly RMSE averages for the three interpolation methods: GAM, GWR and Kriging. |
|
606 |
|
|
607 |
mae_tmp<- data.frame(gam=tb1[tb1$pred_mod=="mod1",c("mae")], |
|
608 |
kriging=tb3[tb3$pred_mod=="mod1",c("mae")], |
|
609 |
gwr=tb4[tb4$pred_mod=="mod1",c("mae")]) |
|
610 |
|
|
611 |
plot(mae_tmp$gam,col=c("red"),type="b",pch=1) |
|
612 |
lines(mae_tmp$kriging,col=c("blue"),type="b",pch=2) |
|
613 |
lines(mae_tmp$gwr,col=c("black"),type="b",pch=2) |
|
614 |
legend("topleft",legend=legend_text, |
|
615 |
cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n") |
|
616 |
|
|
617 |
max(mae_tmp$gam) |
|
618 |
|
|
619 |
x2<-tb1[tb1$pred_mod=="mod1",c("mae","date")] |
|
620 |
arrange(x2,desc(mae)) |
|
621 |
|
|
622 |
#kriging=tb3[tb3$pred_mod=="mod1",c("mae")], |
|
623 |
# gwr=tb4[tb4$pred_mod=="mod1",c("mae")]) |
|
624 |
|
|
625 |
##### MONTHLY AVERAGES |
|
626 |
|
|
627 |
tb1_month<-raster_prediction_obj_1$summary_month_metrics_v[[1]] #note that this is for model1 |
|
628 |
tb3_month<-raster_prediction_obj_3$summary_month_metrics_v[[1]] |
|
629 |
tb4_month<- raster_prediction_obj_4$summary_month_metrics_v[[1]] |
|
630 |
|
|
631 |
y_range<-range(tb1_month$mae,tb3_month$mae,tb4_month$mae) |
|
632 |
plot(1:12,tb1_month$mae,col=c("red"),type="b",ylim=y_range) |
|
633 |
lines(1:12,tb3_month$mae,col=c("blue"),type="b") |
|
634 |
lines(1:12,tb4_month$mae,col=c("black"),type="b") |
|
635 |
|
|
636 |
add_month_tag<-function(tb){ |
|
637 |
date<-strptime(tb$date, "%Y%m%d") # interpolation date being processed |
|
638 |
month<-strftime(date, "%m") # current month of the date being processed |
|
639 |
} |
|
640 |
tb1$month<-add_month_tag(tb1) |
|
641 |
tb3$month<-add_month_tag(tb3) |
|
642 |
tb4$month<-add_month_tag(tb4) |
|
643 |
|
|
644 |
metric_name<-"mae" |
|
645 |
month_data_list<-list(gam=tb1[tb1$pred_mod=="mod1",c(metric_name,"month")], |
|
646 |
kriging=tb3[tb3$pred_mod=="mod1",c(metric_name,"month")], |
|
647 |
gwr=tb4[tb4$pred_mod=="mod1",c(metric_name,"month")]) |
|
648 |
y_range<-range(unlist(month_data_list)) |
|
649 |
|
|
650 |
|
|
651 |
#boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5, |
|
652 |
# ylab=paste(metric_ac,"in degree C",sep=" "),,axisnames=FALSE,axes=FALSE) |
|
653 |
#boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5, |
|
654 |
# ylab=paste(metric_ac,"in degree C",sep=" ")) |
|
655 |
#axis(1, labels = FALSE) |
|
656 |
## Create some text labels |
|
657 |
#labels <- month.abb # abbreviated names for each month |
|
658 |
## Plot x axis labels at default tick marks |
|
659 |
#text(1:length(labels), par("usr")[3] - 0.25, srt = 45, adj = 1, |
|
660 |
# labels = labels, xpd = TRUE) |
|
661 |
#axis(2) |
|
662 |
#box() |
|
663 |
|
|
664 |
#Now plot figure 6 |
|
665 |
res_pix<-480 |
|
666 |
col_mfrow<-2 |
|
667 |
#row_mfrow<-2 |
|
668 |
row_mfrow<-1 |
|
669 |
|
|
670 |
png_file_name<- paste("Figure_6_monthly_accuracy_",out_prefix,".png", sep="") |
|
671 |
png(filename=file.path(out_dir,png_file_name), |
|
672 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
673 |
par(mfrow=c(row_mfrow,col_mfrow)) |
|
674 |
|
|
675 |
col_t<-c("red","blue","black") |
|
676 |
pch_t<- 1:length(col_t) |
|
677 |
legend_text <- c("GAM","Kriging","GWR") |
|
678 |
|
|
679 |
y_range<-range(tb1_month$mae,tb3_month$mae,tb4_month$mae) |
|
680 |
xlab_tick <- unique(tb1$month) |
|
681 |
xlab_text <-"Month" |
|
682 |
ylab_text <- "MAE (C)" |
|
683 |
plot(1:12,tb1_month$mae,col=c("red"),type="b",ylim=y_range,xlab=xlab_text,ylab=ylab_text,xaxt="n") |
|
684 |
lines(1:12,tb3_month$mae,col=c("blue"),type="b") |
|
685 |
lines(1:12,tb4_month$mae,col=c("black"),type="b") |
|
686 |
axis(1,at=1:12,labels=xlab_tick) |
|
687 |
title(main="Monthly average MAE") |
|
688 |
legend("topleft",legend=legend_text, |
|
689 |
cex=0.9, pch=c(pch_t),col=c(col_t),lty=c(1,1,1),bty="n") |
|
690 |
|
|
691 |
#Second plot |
|
692 |
ylab_text<-"MAE (C)" |
|
693 |
xlab_text<-"Month" |
|
694 |
#y_range<-range(month_data_list$gam$mae,month_data_list$kriging$mae,month_data_list$gwr$mae) |
|
695 |
#y_range<-range(month_data_list$gam$mae) |
|
696 |
boxplot(mae~month,data=month_data_list$gam,main="Monthly MAE boxplot", xlab=xlab_text,ylab=ylab_text,outline=FALSE) |
|
697 |
#boxplot(mae~month,data=month_data_list$kriging,ylim=y_range,main="Kriging",ylab=ylab_text,outline=FALSE) |
|
698 |
#boxplot(mae~month,data=month_data_list$gwr,ylim=y_range,main="GWR",ylab=ylab_text,outline=FALSE) |
|
699 |
|
|
700 |
dev.off() |
|
701 |
|
|
702 |
#Now generate table 5 |
|
703 |
|
|
704 |
test<-boxplot(mae~month,data=month_data_list$gam,main="GAM",ylab=ylab_text,outline=FALSE) |
|
705 |
|
|
706 |
length(tb1_month$mae) |
|
707 |
names(tb1_month) |
|
708 |
|
|
709 |
#Calculate standard deviation for each metric |
|
710 |
sd1 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_1,"sd") # see function script |
|
711 |
sd2 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_2,"sd") # standard deviation for baseline 2 |
|
712 |
sd3 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_3,"sd") # kriging |
|
713 |
sd4 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_4,"sd") #gwr |
|
714 |
|
|
715 |
avg_v1 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_1,"mean") # see function script |
|
716 |
avg_v2 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_2,"mean") # standard deviation for baseline 2 |
|
717 |
avg_v3 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_3,"mean") # kriging |
|
718 |
avg_v4 <- calc_stat_month_from_raster_prediction_obj(raster_prediction_obj_4,"mean") #gwr |
|
719 |
|
|
720 |
#Combined sd in one table for mod1 (baseline 2) |
|
721 |
table_sd <- do.call(cbind,list(gam=sd1[sd1$pred_mod=="mod1",c("rmse")], |
|
722 |
kriging=sd3[sd3$pred_mod=="mod1",c("rmse")], |
|
723 |
gwr=sd4[sd4$pred_mod=="mod1",c("rmse")])) #table containing the sd for the three mdethods for baseline 2 |
|
724 |
table_sd <- as.data.frame(round(table_sd,digit=3)) #round to three digits the differences |
|
725 |
|
|
726 |
#Combined mean in one table for mod1 (baseline 2) |
|
727 |
table_avg <- do.call(cbind,list(gam=avg_v1[avg_v1$pred_mod=="mod1",c("rmse")], |
|
728 |
kriging=avg_v3[sd3$pred_mod=="mod1",c("rmse")], |
|
729 |
gwr=avg_v4[avg_v4$pred_mod=="mod1",c("rmse")])) #table containing the sd for the three mdethods for baseline 2 |
|
730 |
table_avg <- as.data.frame(round(table_avg,digit=3)) #round to three digits the differences |
|
731 |
|
|
732 |
#combined tables with accuracy metrics and their standard deviations |
|
733 |
table5_paper <-table_combined_symbol(table_avg,table_sd,"±") |
|
734 |
table5_paper$month <- month.abb |
|
735 |
|
|
736 |
file_name<-paste("table5_comparisons_monthly_averages_interpolation_methods_paper","_",out_prefix,".txt",sep="") |
|
737 |
write.table(as.data.frame(table5_paper),file=file_name,sep=",") |
|
738 |
|
|
739 |
####### FIGURE 7: Spatial pattern ###### |
|
740 |
|
|
741 |
y_var_name <-"dailyTmax" |
|
742 |
index<-244 #index corresponding to January 1 |
|
743 |
|
|
744 |
lf1 <- raster_prediction_obj_1$method_mod_obj[[index]][[y_var_name]] #select relevant raster images for the given dates |
|
745 |
lf3 <- raster_prediction_obj_3$method_mod_obj[[index]][[y_var_name]] |
|
746 |
lf4 <- raster_prediction_obj_4$method_mod_obj[[index]][[y_var_name]] |
|
747 |
|
|
748 |
date_selected <- "20109101" |
|
749 |
methods_names <-c("gam","kriging","gwr") |
|
750 |
names_layers<-methods_names |
|
751 |
lf <-list(lf1$mod1,lf3$mod1,lf4$mod1) |
|
752 |
#lf <-lf[[1]] |
|
753 |
|
|
754 |
pred_temp_s <-stack(lf) |
|
755 |
#predictions<-mask(predictions,mask_rast) |
|
756 |
names(pred_temp_s)<-names_layers |
|
757 |
s.range <- c(min(minValue(pred_temp_s)), max(maxValue(pred_temp_s))) |
|
758 |
#s.range <- s.range+c(5,-5) |
|
759 |
col.breaks <- pretty(s.range, n=200) |
|
760 |
lab.breaks <- pretty(s.range, n=100) |
|
761 |
temp.colors <- colorRampPalette(c('blue', 'white', 'red')) |
|
762 |
max_val<-s.range[2] |
|
763 |
min_val <-s.range[1] |
|
764 |
#max_val<- -10 |
|
765 |
min_val <- 0 |
|
766 |
layout_m<-c(1,3) #one row two columns |
|
767 |
|
|
768 |
png(paste("Figure7__spatial_pattern_tmax_prediction_levelplot_",date_selected,out_prefix,".png", sep=""), |
|
769 |
height=480*layout_m[1],width=480*layout_m[2]) |
|
770 |
|
|
771 |
levelplot(pred_temp_s,main="Interpolated Surfaces Method Comparison", ylab=NULL,xlab=NULL, |
|
772 |
par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=layout_m, |
|
773 |
par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5), |
|
774 |
names.attr=names_layers,col.regions=temp.colors,at=seq(max_val,min_val,by=0.01)) |
|
775 |
#col.regions=temp.colors(25)) |
|
776 |
dev.off() |
|
777 |
|
|
778 |
## FIGURE COMPARISON OF MODELS COVARRIATES |
|
779 |
|
|
780 |
lf2 <- raster_prediction_obj_2$method_mod_obj[[index]][[y_var_name]] |
|
781 |
lf2 #contains the models for gam |
|
782 |
|
|
783 |
pred_temp_s <-stack(lf2) |
|
784 |
date_selected <- "20109101" |
|
785 |
#names_layers <-c("mod1=s(lat,long)+s(elev)","mod4=s(lat,long)+s(LST)","diff=mod1-mod4") |
|
786 |
names_layers <-c("mod1 = s(lat,long)","mod2 = s(lat,long)+s(elev)","mod3 = s(lat,long)+s(N_w)","mod4 = s(lat,long)+s(E_w)", |
|
787 |
"mod5 = s(lat,long)+s(LST)","mod6 = s(lat,long)+s(DISTOC)","mod7 = s(lat,long)+s(LC1)", |
|
788 |
"mod8 = s(lat,long)+s(LC1,LST)","mod9 = s(lat,long)+s(CANHGHT)","mod10 = s(lat,long)+s(LST,CANHGHT)") |
|
789 |
|
|
790 |
#names_layers<-names(pred_temp_s) |
|
791 |
#names(pred_temp_s)<-names_layers |
|
792 |
|
|
793 |
s.range <- c(min(minValue(pred_temp_s)), max(maxValue(pred_temp_s))) |
|
794 |
#s.range <- s.range+c(5,-5) |
|
795 |
col.breaks <- pretty(s.range, n=200) |
|
796 |
lab.breaks <- pretty(s.range, n=100) |
|
797 |
temp.colors <- colorRampPalette(c('blue', 'white', 'red')) |
|
798 |
max_val<-s.range[2] |
|
799 |
min_val <-s.range[1] |
|
800 |
#max_val<- -10 |
|
801 |
#min_val <- 0 |
|
802 |
layout_m<-c(4,3) #one row two columns |
|
803 |
|
|
804 |
png(paste("Figure_7_spatial_pattern_tmax_prediction_models_baseline1_gam_levelplot_",date_selected,out_prefix,".png", sep=""), |
|
805 |
height=480*layout_m[1],width=480*layout_m[2]) |
|
806 |
|
|
807 |
levelplot(pred_temp_s,main="Interpolated Surfaces Model Comparison baseline 1", ylab=NULL,xlab=NULL, |
|
808 |
par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=layout_m, |
|
809 |
par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5), |
|
810 |
names.attr=names_layers,col.regions=temp.colors,at=seq(max_val,min_val,by=0.01)) |
|
811 |
#col.regions=temp.colors(25)) |
|
812 |
dev.off() |
|
813 |
|
|
814 |
|
|
815 |
################ #FIGURE 8 |
|
816 |
lf1 <- raster_prediction_obj_1$method_mod_obj[[index]][[y_var_name]] |
|
817 |
lf1 #contains the models for gam |
|
818 |
|
|
819 |
pred_temp_s <-stack(lf1$mod1,lf1$mod4) |
|
820 |
date_selected <- "20109101" |
|
821 |
#names_layers <-c("mod1=s(lat,long)+s(elev)","mod4=s(lat,long)+s(LST)","diff=mod1-mod4") |
|
822 |
names_layers <-c("mod1 = s(lat,long)+s(elev)","mod4 = s(lat,long)+s(LST)") |
|
823 |
names(pred_temp_s)<-names_layers |
|
824 |
s.range <- c(min(minValue(pred_temp_s)), max(maxValue(pred_temp_s))) |
|
825 |
#s.range <- s.range+c(5,-5) |
|
826 |
col.breaks <- pretty(s.range, n=200) |
|
827 |
lab.breaks <- pretty(s.range, n=100) |
|
828 |
temp.colors <- colorRampPalette(c('blue', 'white', 'red')) |
|
829 |
max_val<-s.range[2] |
|
830 |
min_val <-s.range[1] |
|
831 |
#max_val<- -10 |
|
832 |
min_val <- 0 |
|
833 |
layout_m<-c(1,2) #one row two columns |
|
834 |
|
|
835 |
png(paste("Figure_8a_spatial_pattern_tmax_prediction_models_gam_levelplot_",date_selected,out_prefix,".png", sep=""), |
|
836 |
height=480*layout_m[1],width=480*layout_m[2]) |
|
837 |
|
|
838 |
levelplot(pred_temp_s,main="Interpolated Surfaces Model Comparison", ylab=NULL,xlab=NULL, |
|
839 |
par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=layout_m, |
|
840 |
par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5), |
|
841 |
names.attr=names_layers,col.regions=temp.colors,at=seq(max_val,min_val,by=0.01)) |
|
842 |
#col.regions=temp.colors(25)) |
|
843 |
|
|
844 |
#col.regions=temp.colors(25)) |
|
845 |
dev.off() |
|
846 |
|
|
847 |
|
|
848 |
diff<-raster(lf1$mod1)-raster(lf1$mod4) |
|
849 |
names_layers <- c("difference=mod1-mod4") |
|
850 |
names(diff) <- names_layers |
|
851 |
|
|
852 |
|
|
853 |
png(paste("Figure_8b_spatial_pattern_tmax_prediction_models_gam_levelplot_",date_selected,out_prefix,".png", sep=""), |
|
854 |
height=480*layout_m[1],width=480*layout_m[2]) |
|
855 |
|
|
856 |
plot(diff,col=temp.colors(100),main=names_layers) |
|
857 |
#levelplot(diff,main="Interpolated Surfaces Model Comparison", ylab=NULL,xlab=NULL, |
|
858 |
# par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=c(1,1), |
|
859 |
# par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5), |
|
860 |
# names.attr=names_layers,col.regions=temp.colors) |
|
861 |
dev.off() |
|
862 |
|
|
863 |
######## NOW GET A ACCURACY BY STATIONS |
|
864 |
|
|
865 |
list_data_s <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_s") |
|
866 |
list_data_v <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_v") |
|
867 |
|
|
868 |
#number of observations per day |
|
869 |
year_nbv <- sapply(list_data_v,FUN=length) |
|
870 |
year_nbs <- sapply(list_data_s,FUN=length) |
|
871 |
nb_df <- data.frame(nv=year_nbv,ns=year_nbs) |
|
872 |
nb_df$n_tot <- year_nbv + year_nbs |
|
873 |
range(nb_df$n_tot) |
|
874 |
|
|
875 |
data_v_test <- list_data_v[[1]] |
|
876 |
|
|
877 |
#Convert sp data.frame and combined them in one unique df, see function define earlier |
|
878 |
data_v_combined <-convert_spdf_to_df_from_list(list_data_v) #long rownames |
|
879 |
names_var<-c("res_mod1","res_mod2","res_mod3","res_mod4","res_mod5","res_mod6","res_mod7","res_mod8") |
|
880 |
|
|
881 |
limit_val<- c(-30,-2.57,0,2.57,30) |
|
882 |
data_v_combined$res_mod1_rc1 <- cut(data_v_combined$res_mod1,include.lowest=TRUE,breaks=limit_val) |
|
883 |
data_v_combined$res_mod1_rc1 |
|
884 |
|
|
885 |
t<-melt(data_v_combined, |
|
886 |
measure=names_var, |
|
887 |
id=c("res_mod1_rc1","id"), |
|
888 |
na.rm=T) |
|
889 |
|
|
890 |
n_tb<-cast(t,res_mod1_rc1+id~variable,length) |
|
891 |
n_tb_tot <-cast(t,id~variable,length) #number of times the stations was used for validation |
|
892 |
|
|
893 |
merge(n_tb$id |
|
894 |
dim(n_tb) |
|
895 |
#mae_tb <-cast(t,dst_cat1~variable,mae_fun) |
|
896 |
#rmse_tb <-cast(t,dst_cat1~variable,rmse_fun) |
|
897 |
#sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun) |
|
898 |
|
|
899 |
#avg_tb<-cast(t,dst_cat1~variable,mean) |
|
900 |
#sd_tb<-cast(t,dst_cat1~variable,sd) |
|
901 |
|
|
902 |
t<-melt(data_v_combined, |
|
903 |
measure=names_var, |
|
904 |
id=c("id"), |
|
905 |
na.rm=T) |
|
906 |
|
|
907 |
hist(data_v_combined) |
|
908 |
names(data_v_combined) |
|
909 |
|
|
910 |
mae_fun<-function(x){mean(abs(x))} #Mean Absolute Error give a residuals vector |
|
911 |
sd_abs_fun<-function(x){sd(abs(x))} #sd Absolute Error give a residuals vector |
|
912 |
|
|
913 |
mae_tb<-cast(t,id~variable,mae_fun) #join to station location... |
|
914 |
|
|
915 |
sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun) |
|
916 |
|
|
917 |
#avg_tb<-cast(t,dst_cat1~variable,mean) |
|
918 |
#sd_tb<-cast(t,dst_cat1~variable,sd) |
|
919 |
#n_tb<-cast(t,dst_cat1~variable,length) |
|
920 |
|
|
921 |
met_obj <-load_obj(file.path(in_dir1,met_obj_file_1)) |
|
922 |
stat_loc<-readOGR(dsn=in_dir1,layer=sub(".shp","",basename(met_obj$loc_stations))) |
|
923 |
|
|
924 |
data_v_mae <-merge(mae_tb,stat_loc,by.x=c("id"),by.y=c("STAT_ID")) |
|
925 |
hist(data_v_mae$res_mod1) |
|
926 |
mean(data_v_mae$res_mod1) |
|
927 |
|
|
928 |
coords<- data_v_mae[c('longitude','latitude')] #Define coordinates in a data frame |
|
929 |
CRS_interp<-proj4string(data_v_test) |
|
930 |
coordinates(data_v_mae)<-coords #Assign coordinates to the data frame |
|
931 |
proj4string(data_v_mae)<- proj4string(stat_loc) #Assign coordinates reference system in PROJ4 format |
|
932 |
data_v_mae<-spTransform(data_v_mae,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
933 |
|
|
934 |
p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE) |
|
935 |
#p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE,key.entries=c(1,1.5,2,2.5,3,3.5,4,4.5)) |
|
936 |
|
|
937 |
p |
|
938 |
|
|
939 |
infile_reg_outline <- "/data/project/layers/commons/data_workflow/inputs/region_outlines_ref_files/OR83M_state_outline.shp" #input region outline defined by polygon: Oregon |
|
940 |
reg_outline <- readOGR(dsn=dirname(infile_reg_outline),layer=sub(".shp","",basename(infile_reg_outline))) |
|
941 |
|
|
942 |
p + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray")) |
|
943 |
|
|
944 |
p4<-bubble(data_v_mae,"res_mod4",maxsize=4,col=c("red"),fill=FALSE) |
|
945 |
p4 + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray")) |
|
946 |
|
|
947 |
col_t <- colorRampPalette(c('blue', 'white', 'red')) |
|
948 |
|
|
949 |
p_elev <-levelplot(subset(s_raster,"elev_s"),margin=FALSE) |
|
950 |
p4 <-bubble(data_v_mae[data_v_mae$res_mod4>2.134,],"res_mod4",maxsize=4,col=c("blue"),fill=FALSE) |
|
951 |
p_elev + p4 + layer(sp.polygons(reg_outline,lwd=0.9,col="green")) |
|
952 |
title("mod4") |
|
953 |
|
|
954 |
p_elev <-levelplot(subset(s_raster,"elev_s")) |
|
955 |
p1 <-bubble(data_v_mae[data_v_mae$res_mod1>2.109,],"res_mod1",maxsize=4,col=c("blue"),fill=FALSE) |
|
956 |
p_elev + p1 + layer(sp.polygons(reg_outline,lwd=0.9,col="green")) |
|
957 |
#bubble(data_v_mae,"res_mod1") |
|
958 |
#p<-spplot(data_v_mae,"res_mod1",maxsize=4,col=c("red")) |
|
959 |
#p |
|
960 |
#stations that are outliers in one but not the other... |
|
961 |
id_setdiff<-setdiff(data_v_mae[data_v_mae$res_mod1>2.109,]$id,data_v_mae[data_v_mae$res_mod4>2.134,]$id) |
|
962 |
|
|
963 |
data_id_setdiff <- data_v_mae[data_v_mae$id %in% id_setdiff,] |
|
964 |
|
|
965 |
p_elev +layer(sp.polygons(reg_outline,lwd=0.9,col="green")) + layer(sp.points(data_id_setdiff,pch=4,cex=2,col="pink")) |
|
966 |
|
|
967 |
############################### |
|
968 |
########## Prepare table 6 |
|
969 |
# Now get p values and other things... |
|
970 |
###baseline 2: s(lat,lon) + s(elev) |
|
971 |
|
|
972 |
l_obj<-vector("list",length=2) |
|
973 |
l_obj[[1]]<-raster_prediction_obj_1 |
|
974 |
l_obj[[2]]<-raster_prediction_obj_2 |
|
975 |
l_table <- vector("list",length=length(l_obj)) |
|
976 |
for (k in 1:length(l_obj)){ |
|
977 |
raster_prediction_obj<- l_obj[[k]] |
|
978 |
list_myModels <- extract_list_from_list_obj(raster_prediction_obj$method_mod_obj,"mod") |
|
979 |
|
|
980 |
list_models_info <-lapply(1:length(list_myModels),FUN=create_s_and_p_table_term_models,list_myModels) |
|
981 |
dates<-(extract_from_list_obj(raster_prediction_obj_1$method_mod_obj,"sampling_dat"))$date #get vector of dates |
|
982 |
names(list_models_info)<-dates #adding names to the list |
|
983 |
|
|
984 |
#Prepare and process p. value information regarding models: count number of times values were above a threshold. |
|
985 |
s.table_term_tb <-extract_from_list_obj(list_models_info,"s.table_term") |
|
986 |
|
|
987 |
threshold_val<-c(0.01,0.05,0.1) |
|
988 |
s.table_term_tb$p_val_rec1 <- s.table_term_tb[["p-value"]] < threshold_val[1] |
|
989 |
s.table_term_tb$p_val_rec2 <- s.table_term_tb[["p-value"]] < threshold_val[2] |
|
990 |
s.table_term_tb$p_val_rec3 <- s.table_term_tb[["p-value"]] < threshold_val[3] |
|
991 |
|
|
992 |
names_var <- c("p_val_rec1","p_val_rec2","p_val_rec3") |
|
993 |
t2<-melt(s.table_term_tb, |
|
994 |
measure=names_var, |
|
995 |
id=c("mod_name","term_name"), |
|
996 |
na.rm=T) |
|
997 |
|
|
998 |
summary_s.table_term2 <- cast(t2,term_name+mod_name~variable,sum) |
|
999 |
|
|
1000 |
#Now add AIC |
|
1001 |
AIC_models_tb <-extract_from_list_obj(list_models_info,"AIC_models") |
|
1002 |
AIC_models_tb |
|
1003 |
names_var <- c("AIC") |
|
1004 |
#id_var <- |
|
1005 |
t3<-melt(AIC_models_tb, |
|
1006 |
measure=names_var, |
|
1007 |
id=c("mod_name","term_name"), |
|
1008 |
na.rm=T) |
|
1009 |
|
|
1010 |
summary_AIC <- cast(t3,term_name+mod_name~variable,median) |
|
1011 |
summary_AIC$AIC <- round(summary_AIC[,c("AIC")],digit=2) #roundto three digits teh differences |
|
1012 |
|
|
1013 |
#Now combine tables and drop duplicate columns the combined table can be modified for the paper... |
|
1014 |
avg_s <- calc_stat_from_raster_prediction_obj(raster_prediction_obj,"mean",training=TRUE) |
|
1015 |
avg_v <- calc_stat_from_raster_prediction_obj(raster_prediction_obj,"mean",training=FALSE) |
|
1016 |
avg <- cbind(avg_s[,c("pred_mod","mae")],avg_v[,c("mae")]) |
|
1017 |
names(avg)<-c("pred_mod","mae_s","mae_v") |
|
1018 |
avg[,c("mae_s","mae_v")] <- round(avg[,c("mae_s","mae_v")],digit=2) |
|
1019 |
table <- merge(summary_AIC,avg,by.x="mod_name",by.y="pred_mod") |
|
1020 |
|
|
1021 |
tables_AIC_ac_p_val <-list(table,summary_s.table_term2) |
|
1022 |
names(tables_AIC_ac_p_val) <-c("table","s.table_p_val_term") |
|
1023 |
l_table[[k]] <- tables_AIC_ac_p_val |
|
1024 |
} |
|
1025 |
|
|
1026 |
## Now prepare table |
|
1027 |
s.table_p_val_term <- l_table[[1]]$s.table_p_val_term[-c(10:26),] |
|
1028 |
s.table_p_val_term <- s.table_p_val_term[order(s.table_p_val_term$mod_name),] |
|
1029 |
#summary_s.table_term2 <- summary_s.table_term2[-c(8,10),] |
|
1030 |
table <- l_table[[1]]$table |
|
1031 |
|
|
1032 |
table6a <- merge(s.table_p_val_term,table,by="mod_name") |
|
1033 |
table6a <- table6a[,-match(c("term_name.y"),names(table6a))] |
|
1034 |
#model_col<-c("Baseline2","Northness","Eastness","LST","DISTOC","Forest","CANHEIGHT","LST*Forest") # removed ,"LST*CANHEIGHT") |
|
1035 |
#names_table_col<-c("DiffMAE","DiffRMSE","DiffME","Diffr","Model") |
|
1036 |
|
|
1037 |
s.table_p_val_term <- l_table[[2]]$s.table_p_val_term[-c(11:19),] |
|
1038 |
s.table_p_val_term <- s.table_p_val_term[order(s.table_p_val_term$mod_name),] |
|
1039 |
#summary_s.table_term2 <- summary_s.table_term2[-c(8,10),] |
|
1040 |
tableb <- l_table[[2]]$table |
|
1041 |
|
|
1042 |
table6b <- merge(s.table_p_val_term,tableb,by="mod_name") |
|
1043 |
table6b <- table6b[,-match(c("term_name.y"),names(table6b))] |
|
1044 |
#model_col<-c("Baseline2","Northness","Eastness","LST","DISTOC","Forest","CANHEIGHT","LST*Forest") # removed ,"LST*CANHEIGHT") |
|
1045 |
#names_table_col<-c("DiffMAE","DiffRMSE","DiffME","Diffr","Model") |
|
1046 |
|
|
1047 |
#table6b[order(table6b$mod_name),] |
|
1048 |
|
|
1049 |
#Now write out table... |
|
1050 |
|
|
1051 |
file_name<-paste("table6a_paper","_",out_prefix,".txt",sep="") |
|
1052 |
write.table(table6a,file=file_name,sep=",") |
|
1053 |
|
|
1054 |
file_name<-paste("table6b_paper","_",out_prefix,".txt",sep="") |
|
1055 |
write.table(table6b,file=file_name,sep=",") |
|
1056 |
|
|
1057 |
######################## |
|
1058 |
### Prepare table 7: correlation matrix between covariates |
|
1059 |
|
|
1060 |
names(s_raster) |
|
249 |
################################################ |
|
250 |
######### Figure 4. RMSE multi-timescale mulitple hold out for FSS and CAI |
|
1061 | 251 |
|
1062 |
list_formulas<-raster_prediction_obj_2$method_mod_obj[[1]]$formulas |
|
1063 |
list_formulas <- lapply(list_formulas,FUN=as.formula) |
|
1064 |
covar_names_model <- unique(unlist(lapply(list_formulas,FUN=all.vars)))[-1] |
|
1065 |
covar_names_model <- c(covar_names_model[-6],c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06", |
|
1066 |
"mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")) |
|
1067 |
covar_raster <- subset(s_raster,covar_names_model) |
|
252 |
################################################ |
|
253 |
######### Figure 5. RMSE multi-timescale mulitple hold out Overtraining tendency |
|
254 |
|
|
255 |
################################################ |
|
256 |
######### Figure 6: Spatial pattern of prediction for one day (maps) |
|
1068 | 257 |
|
1069 |
names(covar_raster) <- covar_names_model |
|
1070 |
corr_layers_covar <-layerStats(covar_raster,"pearson",na.rm=TRUE) |
|
1071 |
corr_mat <-round(corr_layers_covar[[1]], digit=2) |
|
1072 |
|
|
1073 |
file_name<-paste("table7_paper","_",out_prefix,".txt",sep="") |
|
1074 |
write.table(corr_mat,file=file_name,sep=",") |
|
1075 |
|
|
1076 |
#met_obj <-load_obj(file.path(in_dir1,met_obj_file_1)) |
|
1077 |
#stat_loc<-readOGR(dsn=in_dir1,layer=sub(".shp","",basename(met_obj$loc_stations))) |
|
1078 |
#dim(stat_loc) |
|
1079 | 258 |
|
1080 | 259 |
###################### END OF SCRIPT ####################### |
1081 | 260 |
|
Also available in: Unified diff
script for multi timescale paper, production of table 4