1 |
48e49857
|
Benoit Parmentier
|
##VALIDATION-ACCURACY ASSESSMENT
|
2 |
|
|
|
3 |
|
|
#The interpolation is done first at the monthly add delta.
|
4 |
|
|
#AUTHOR: Benoit Parmentier
|
5 |
f23e6e04
|
Benoit Parmentier
|
#DATE: 05/06/2013
|
6 |
48e49857
|
Benoit Parmentier
|
|
7 |
|
|
#Change this to allow explicitly arguments...
|
8 |
|
|
#Arguments:
|
9 |
|
|
#1)list of climatology files for all models...(365*nb of models)
|
10 |
|
|
#2)data_s:training
|
11 |
|
|
#3)data_v:testing
|
12 |
643c7d19
|
Benoit Parmentier
|
#4)list of dates??: index
|
13 |
48e49857
|
Benoit Parmentier
|
#5)stack of covariates: not needed at this this stage
|
14 |
|
|
#6)dst: data at the monthly time scale
|
15 |
|
|
|
16 |
|
|
#Function used in the script
|
17 |
|
|
|
18 |
643c7d19
|
Benoit Parmentier
|
calculate_accuracy_metrics<-function(i,list_param){
|
19 |
1d0a0b81
|
Benoit Parmentier
|
library(plyr)
|
20 |
48e49857
|
Benoit Parmentier
|
### Caculate accuracy metrics
|
21 |
|
|
calc_val_metrics<-function(x,y){
|
22 |
|
|
#This functions calculates accurayc metrics on given two vectors.
|
23 |
|
|
#Arguments: list of fitted models, raster stack of covariates
|
24 |
|
|
#Output: spatial grid data frame of the subset of tiles
|
25 |
|
|
#s_sgdf<-as(r_stack,"SpatialGridDataFrame") #Conversion to spatial grid data frame
|
26 |
|
|
|
27 |
|
|
residuals<-x-y
|
28 |
|
|
mae<-mean(abs(residuals),na.rm=T)
|
29 |
|
|
rmse<-sqrt(mean((residuals)^2,na.rm=T))
|
30 |
|
|
me<-mean(residuals,na.rm=T)
|
31 |
|
|
r<-cor(x,y,use="complete")
|
32 |
|
|
m50<-median(residuals,na.rm=T)
|
33 |
|
|
metrics_dat<-as.data.frame(cbind(mae,rmse,me,r,m50))
|
34 |
|
|
names(metrics_dat)<-c("mae","rmse","me","r","m50")
|
35 |
|
|
metrics_obj<-list(metrics_dat,as.data.frame(residuals))
|
36 |
|
|
names(metrics_obj)<-c("metrics_dat","residuals")
|
37 |
|
|
return(metrics_obj)
|
38 |
|
|
}
|
39 |
|
|
|
40 |
|
|
calc_val_metrics_rast <-function(df,y_ref,pred_names){
|
41 |
6cb81608
|
Benoit Parmentier
|
#Input parameters:
|
42 |
|
|
#1) df: data frame containing the observed and predicted variables (data_s or data_v)
|
43 |
|
|
#2) y_ref: observed variable correspond to y_var_name??
|
44 |
|
|
#3) pred_names: models run containig predicted values
|
45 |
|
|
|
46 |
|
|
# library
|
47 |
|
|
library(maptools)
|
48 |
|
|
|
49 |
|
|
## START SCRIPT
|
50 |
48e49857
|
Benoit Parmentier
|
|
51 |
|
|
list_metrics<-vector("list",length(pred_names))
|
52 |
|
|
list_residuals<-vector("list",length(pred_names))
|
53 |
|
|
names(list_metrics)<-pred_names
|
54 |
|
|
names(list_residuals)<-pred_names
|
55 |
|
|
for (j in 1:length(pred_names)){
|
56 |
|
|
pred_var<-pred_names[j]
|
57 |
|
|
metrics<-calc_val_metrics(df[[pred_var]],df[[y_ref]])
|
58 |
|
|
list_metrics[[j]]<-metrics[[1]]
|
59 |
|
|
list_residuals[[j]]<-metrics[[2]]
|
60 |
|
|
}
|
61 |
|
|
metrics_df<-do.call(rbind,list_metrics)
|
62 |
|
|
metrics_df$pred_mod <- pred_names #adding name column
|
63 |
|
|
residuals_df<-do.call(cbind,list_residuals) #creating data frame for residuals
|
64 |
|
|
names(residuals_df)<-paste("res",pred_names,sep="_")
|
65 |
|
|
|
66 |
|
|
accuracy_obj<-list(metrics_df,residuals_df) #output object
|
67 |
|
|
names(accuracy_obj)<-c("metrics","residuals")
|
68 |
|
|
return(accuracy_obj)
|
69 |
|
|
}
|
70 |
|
|
|
71 |
6cb81608
|
Benoit Parmentier
|
############### BEGIN SCRIPT ###########
|
72 |
48e49857
|
Benoit Parmentier
|
|
73 |
643c7d19
|
Benoit Parmentier
|
#PARSING INPUT PARAMETERS
|
74 |
f23e6e04
|
Benoit Parmentier
|
out_path <- list_param$out_path
|
75 |
bb747d71
|
Benoit Parmentier
|
day_list <- list_param$rast_day_year_list[[i]] #this is the list of raster files, may be daily or monthly predictions
|
76 |
|
|
names_mod <- names(day_list) #names of the predicted variables
|
77 |
08721acf
|
Benoit Parmentier
|
|
78 |
bb747d71
|
Benoit Parmentier
|
y_ref <- list_param$y_ref #This is the reference variable from which resituals and accuracy metrics are created
|
79 |
08721acf
|
Benoit Parmentier
|
multi_time_scale <- list_param$multi_time_scale
|
80 |
|
|
|
81 |
bb747d71
|
Benoit Parmentier
|
data_v <- list_param$list_data_v[[i]]
|
82 |
|
|
data_s <- list_param$list_data_s[[i]]
|
83 |
|
|
sampling_dat_day <- list_param$list_sampling_dat[[i]]
|
84 |
643c7d19
|
Benoit Parmentier
|
|
85 |
|
|
## Now create the stack
|
86 |
48e49857
|
Benoit Parmentier
|
|
87 |
|
|
rast_day_mod <- stack(day_list)
|
88 |
|
|
names(rast_day_mod) <- names(day_list)
|
89 |
08721acf
|
Benoit Parmentier
|
#Change to handle cases in which data_v is NULL!!!
|
90 |
|
|
|
91 |
bb747d71
|
Benoit Parmentier
|
ns <- nrow(data_s) # some loss of data might have happened because of the averaging...
|
92 |
|
|
nv <- nrow(data_v)
|
93 |
48e49857
|
Benoit Parmentier
|
|
94 |
|
|
#add sampling dat info...
|
95 |
|
|
N=length(names_mod)
|
96 |
|
|
|
97 |
08721acf
|
Benoit Parmentier
|
#Handle case of 0% hold out, monhtly or daily
|
98 |
|
|
if (nv > 0){
|
99 |
|
|
run_info<-cbind(sampling_dat_day,n=nv)
|
100 |
|
|
run_info[rep(seq_len(nrow(run_info)), each=N),] #repeating same row n times
|
101 |
|
|
|
102 |
|
|
extract_data_v<-extract(rast_day_mod,data_v,df=TRUE)
|
103 |
|
|
data_v <-spCbind(data_v,extract_data_v) #should match IDs before joining for good practice
|
104 |
bb747d71
|
Benoit Parmentier
|
metrics_v_obj<-calc_val_metrics_rast(data_v,y_ref,names_mod)
|
105 |
08721acf
|
Benoit Parmentier
|
metrics_v_df<-cbind(metrics_v_obj$metrics,run_info)
|
106 |
bb747d71
|
Benoit Parmentier
|
metrics_v_df["var_interp"]<-rep(y_ref,times=nrow(metrics_v_df))
|
107 |
08721acf
|
Benoit Parmentier
|
#Name of the variable interpolated, useful for cross-comparison between methods at later stages
|
108 |
|
|
data_v<-spCbind(data_v,metrics_v_obj$residuals)
|
109 |
|
|
}
|
110 |
|
|
|
111 |
|
|
extract_data_s<-extract(rast_day_mod,data_s,df=TRUE)
|
112 |
bb747d71
|
Benoit Parmentier
|
|
113 |
08721acf
|
Benoit Parmentier
|
data_s <-spCbind(data_s,extract_data_s)
|
114 |
|
|
|
115 |
bb747d71
|
Benoit Parmentier
|
metrics_s_obj <- calc_val_metrics_rast(data_s,y_ref,names_mod)
|
116 |
|
|
|
117 |
08721acf
|
Benoit Parmentier
|
run_info <- cbind(sampling_dat_day,n=ns)
|
118 |
48e49857
|
Benoit Parmentier
|
run_info[rep(seq_len(nrow(run_info)), each=N),]
|
119 |
08721acf
|
Benoit Parmentier
|
metrics_s_df <- cbind(metrics_s_obj$metrics,run_info)
|
120 |
bb747d71
|
Benoit Parmentier
|
metrics_s_df["var_interp"] <- rep(y_ref,times=nrow(metrics_s_df))
|
121 |
f23e6e04
|
Benoit Parmentier
|
#Name of the variable interpolated, useful for cross-comparison between methods at later stages
|
122 |
48e49857
|
Benoit Parmentier
|
|
123 |
08721acf
|
Benoit Parmentier
|
data_s <- spCbind(data_s,metrics_s_obj$residuals)
|
124 |
|
|
|
125 |
|
|
#prepare output object
|
126 |
|
|
|
127 |
|
|
if (nv > 0){
|
128 |
|
|
validation_obj<-list(metrics_s_df,metrics_v_df,data_s,data_v)
|
129 |
|
|
names(validation_obj)<-c("metrics_s","metrics_v","data_s","data_v")
|
130 |
|
|
}else{
|
131 |
|
|
validation_obj<-list(metrics_s_df,data_s)
|
132 |
|
|
names(validation_obj)<-c("metrics_s","data_s")
|
133 |
|
|
}
|
134 |
48e49857
|
Benoit Parmentier
|
|
135 |
|
|
return(validation_obj)
|
136 |
|
|
|
137 |
|
|
}
|
138 |
|
|
|
139 |
dc5bfc17
|
Benoit Parmentier
|
#### Function to create a data.frame from validation obj
|
140 |
|
|
extract_from_list_obj<-function(obj_list,list_name){
|
141 |
|
|
list_tmp<-vector("list",length(obj_list))
|
142 |
|
|
for (i in 1:length(obj_list)){
|
143 |
|
|
tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame
|
144 |
|
|
list_tmp[[i]]<-tmp
|
145 |
|
|
}
|
146 |
|
|
tb_list_tmp<-do.call(rbind,list_tmp) #long rownames
|
147 |
|
|
return(tb_list_tmp) #this is a data.frame
|
148 |
|
|
}
|
149 |
|
|
|
150 |
08721acf
|
Benoit Parmentier
|
#### Function to create a list from a object made up of a list with names e.g. method_mod_obj or clim_method_mod_obj
|
151 |
|
|
extract_list_from_list_obj<-function(obj_list,list_name){
|
152 |
|
|
#Create a list of an object from a given list of object using a name prodived as input
|
153 |
|
|
|
154 |
|
|
list_tmp<-vector("list",length(obj_list))
|
155 |
|
|
for (i in 1:length(obj_list)){
|
156 |
|
|
tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame
|
157 |
|
|
list_tmp[[i]]<-tmp
|
158 |
|
|
}
|
159 |
|
|
return(list_tmp) #this is a data.frame
|
160 |
|
|
}
|
161 |
|
|
|
162 |
dc5bfc17
|
Benoit Parmentier
|
#### Function to plot boxplot from data.frame table of accuracy metrics
|
163 |
|
|
|
164 |
f23e6e04
|
Benoit Parmentier
|
boxplot_from_tb <-function(tb_diagnostic,metric_names,out_prefix,out_path){
|
165 |
dc5bfc17
|
Benoit Parmentier
|
#now boxplots and mean per models
|
166 |
6cb81608
|
Benoit Parmentier
|
library(gdata) #Nesssary to use cbindX
|
167 |
|
|
|
168 |
|
|
### Start script
|
169 |
f23e6e04
|
Benoit Parmentier
|
y_var_name<-unique(tb_diagnostic$var_interp) #extract the name of interpolated variable: dailyTmax, dailyTmin
|
170 |
6cb81608
|
Benoit Parmentier
|
|
171 |
1d0a0b81
|
Benoit Parmentier
|
mod_names<-sort(unique(tb_diagnostic$pred_mod)) #models that have accuracy metrics
|
172 |
dc5bfc17
|
Benoit Parmentier
|
t<-melt(tb_diagnostic,
|
173 |
|
|
#measure=mod_var,
|
174 |
|
|
id=c("date","pred_mod","prop"),
|
175 |
|
|
na.rm=F)
|
176 |
f23e6e04
|
Benoit Parmentier
|
t$value<-as.numeric(t$value) #problem with char!!!
|
177 |
dc5bfc17
|
Benoit Parmentier
|
avg_tb<-cast(t,pred_mod~variable,mean)
|
178 |
f23e6e04
|
Benoit Parmentier
|
avg_tb$var_interp<-rep(y_var_name,times=nrow(avg_tb))
|
179 |
dc5bfc17
|
Benoit Parmentier
|
median_tb<-cast(t,pred_mod~variable,median)
|
180 |
f23e6e04
|
Benoit Parmentier
|
|
181 |
|
|
#avg_tb<-cast(t,pred_mod~variable,mean)
|
182 |
dc5bfc17
|
Benoit Parmentier
|
tb<-tb_diagnostic
|
183 |
1d0a0b81
|
Benoit Parmentier
|
|
184 |
|
|
#mod_names<-sort(unique(tb$pred_mod)) #kept for clarity
|
185 |
|
|
tb_mod_list<-lapply(mod_names, function(k) subset(tb, pred_mod==k)) #this creates a list of 5 based on models names
|
186 |
dc5bfc17
|
Benoit Parmentier
|
names(tb_mod_list)<-mod_names
|
187 |
1d0a0b81
|
Benoit Parmentier
|
#mod_metrics<-do.call(cbind,tb_mod_list)
|
188 |
6cb81608
|
Benoit Parmentier
|
#debug here
|
189 |
c9196413
|
Benoit Parmentier
|
if(length(tb_mod_list)>1){
|
190 |
|
|
mod_metrics<-do.call(cbindX,tb_mod_list) #column bind the list??
|
191 |
|
|
}else{
|
192 |
|
|
mod_metrics<-tb_mod_list[[1]]
|
193 |
|
|
}
|
194 |
|
|
|
195 |
1d0a0b81
|
Benoit Parmentier
|
test_names<-lapply(1:length(mod_names),function(k) paste(names(tb_mod_list[[1]]),mod_names[k],sep="_"))
|
196 |
c9196413
|
Benoit Parmentier
|
#test names are used when plotting the boxplot for the different models
|
197 |
1d0a0b81
|
Benoit Parmentier
|
names(mod_metrics)<-unlist(test_names)
|
198 |
|
|
rows_total<-lapply(tb_mod_list,nrow)
|
199 |
dc5bfc17
|
Benoit Parmentier
|
for (j in 1:length(metric_names)){
|
200 |
|
|
metric_ac<-metric_names[j]
|
201 |
1d0a0b81
|
Benoit Parmentier
|
mod_pat<-glob2rx(paste(metric_ac,"_*",sep=""))
|
202 |
dc5bfc17
|
Benoit Parmentier
|
mod_var<-grep(mod_pat,names(mod_metrics),value=TRUE) # using grep with "value" extracts the matching names
|
203 |
|
|
#browser()
|
204 |
|
|
test<-mod_metrics[mod_var]
|
205 |
f23e6e04
|
Benoit Parmentier
|
png(file.path(out_path,paste("boxplot_metric_",metric_ac, out_prefix,".png", sep="")))
|
206 |
|
|
#boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5,
|
207 |
|
|
# ylab=paste(metric_ac,"in degree C",sep=" "))
|
208 |
|
|
|
209 |
dc5bfc17
|
Benoit Parmentier
|
boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5,
|
210 |
f23e6e04
|
Benoit Parmentier
|
ylab=paste(metric_ac,"in degree C",sep=" "),axisnames=FALSE,axes=FALSE)
|
211 |
|
|
axis(1, labels = FALSE)
|
212 |
|
|
## Create some text labels
|
213 |
|
|
labels <- labels<- names(test)
|
214 |
|
|
## Plot x axis labels at default tick marks
|
215 |
|
|
text(1:ncol(test), par("usr")[3] - 0.25, srt = 45, adj = 1,
|
216 |
|
|
labels = labels, xpd = TRUE)
|
217 |
|
|
axis(2)
|
218 |
|
|
box()
|
219 |
1d0a0b81
|
Benoit Parmentier
|
#legend("bottomleft",legend=paste(names(rows_total),":",rows_total,sep=""),cex=0.7,bty="n")
|
220 |
617d83ff
|
Benoit Parmentier
|
#title(as.character(t(paste(t(names(rows_total)),":",rows_total,sep=""))),cex=0.8)
|
221 |
|
|
title(paste(metric_ac,"for",y_var_name,sep=" "),cex=0.8)
|
222 |
dc5bfc17
|
Benoit Parmentier
|
dev.off()
|
223 |
|
|
}
|
224 |
617d83ff
|
Benoit Parmentier
|
|
225 |
1d0a0b81
|
Benoit Parmentier
|
avg_tb$n<-rows_total #total number of predictions on which the mean is based
|
226 |
|
|
median_tb$n<-rows_total
|
227 |
dc5bfc17
|
Benoit Parmentier
|
summary_obj<-list(avg_tb,median_tb)
|
228 |
f23e6e04
|
Benoit Parmentier
|
names(summary_obj)<-c("avg","median")
|
229 |
dc5bfc17
|
Benoit Parmentier
|
return(summary_obj)
|
230 |
|
|
}
|
231 |
f23e6e04
|
Benoit Parmentier
|
#boxplot_month_from_tb(tb_diagnostic,metric_names,out_prefix,out_path)
|
232 |
dc5bfc17
|
Benoit Parmentier
|
## Function to display metrics by months/seasons
|
233 |
f23e6e04
|
Benoit Parmentier
|
boxplot_month_from_tb <-function(tb_diagnostic,metric_names,out_prefix,out_path){
|
234 |
617d83ff
|
Benoit Parmentier
|
|
235 |
|
|
#Generate boxplot per month for models and accuracy metrics
|
236 |
|
|
#Input parameters:
|
237 |
|
|
#1) df: data frame containing accurayc metrics (RMSE etc.) per day)
|
238 |
|
|
#2) metric_names: metrics used for validation
|
239 |
|
|
#3) out_prefix
|
240 |
|
|
#
|
241 |
|
|
|
242 |
|
|
#################
|
243 |
|
|
## BEGIN
|
244 |
f23e6e04
|
Benoit Parmentier
|
y_var_name<-unique(tb_diagnostic$var_interp) #extract the name of interpolated variable: dailyTmax, dailyTmin
|
245 |
bcef031f
|
Benoit Parmentier
|
date_f<-strptime(tb_diagnostic$date, "%Y%m%d") # interpolation date being processed
|
246 |
|
|
tb_diagnostic$month<-strftime(date_f, "%m") # current month of the date being processed
|
247 |
|
|
mod_names<-sort(unique(tb_diagnostic$pred_mod)) #models that have accuracy metrics
|
248 |
|
|
tb_mod_list<-lapply(mod_names, function(k) subset(tb_diagnostic, pred_mod==k)) #this creates a list of 5 based on models names
|
249 |
|
|
names(tb_mod_list)<-mod_names
|
250 |
|
|
t<-melt(tb_diagnostic,
|
251 |
|
|
#measure=mod_var,
|
252 |
|
|
id=c("date","pred_mod","prop","month"),
|
253 |
|
|
na.rm=F)
|
254 |
f23e6e04
|
Benoit Parmentier
|
t$value<-as.numeric(t$value) #problem with char!!!
|
255 |
617d83ff
|
Benoit Parmentier
|
tb_mod_m_avg <-cast(t,pred_mod+month~variable,mean) #monthly mean for every model
|
256 |
f23e6e04
|
Benoit Parmentier
|
tb_mod_m_avg$var_interp<-rep(y_var_name,times=nrow(tb_mod_m_avg))
|
257 |
|
|
|
258 |
617d83ff
|
Benoit Parmentier
|
tb_mod_m_sd <-cast(t,pred_mod+month~variable,sd) #monthly sd for every model
|
259 |
|
|
|
260 |
f23e6e04
|
Benoit Parmentier
|
tb_mod_m_list <-lapply(mod_names, function(k) subset(tb_mod_m_avg, pred_mod==k)) #this creates a list of 5 based on models names
|
261 |
|
|
|
262 |
617d83ff
|
Benoit Parmentier
|
for (k in 1:length(mod_names)){
|
263 |
|
|
mod_metrics <-tb_mod_list[[k]]
|
264 |
|
|
current_mod_name<- mod_names[k]
|
265 |
|
|
for (j in 1:length(metric_names)){
|
266 |
bcef031f
|
Benoit Parmentier
|
metric_ac<-metric_names[j]
|
267 |
617d83ff
|
Benoit Parmentier
|
col_selected<-c(metric_ac,"month")
|
268 |
|
|
test<-mod_metrics[col_selected]
|
269 |
f23e6e04
|
Benoit Parmentier
|
png(file.path(out_path,paste("boxplot_metric_",metric_ac,"_",current_mod_name,"_by_month_",out_prefix,".png", sep="")))
|
270 |
617d83ff
|
Benoit Parmentier
|
boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5,
|
271 |
f23e6e04
|
Benoit Parmentier
|
ylab=paste(metric_ac,"in degree C",sep=" "),,axisnames=FALSE,axes=FALSE)
|
272 |
|
|
#boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5,
|
273 |
|
|
# ylab=paste(metric_ac,"in degree C",sep=" "))
|
274 |
|
|
axis(1, labels = FALSE)
|
275 |
|
|
## Create some text labels
|
276 |
|
|
labels <- month.abb # abbreviated names for each month
|
277 |
|
|
## Plot x axis labels at default tick marks
|
278 |
|
|
text(1:length(labels), par("usr")[3] - 0.25, srt = 45, adj = 1,
|
279 |
|
|
labels = labels, xpd = TRUE)
|
280 |
|
|
axis(2)
|
281 |
|
|
box()
|
282 |
bcef031f
|
Benoit Parmentier
|
#legend("bottomleft",legend=paste(names(rows_total),":",rows_total,sep=""),cex=0.7,bty="n")
|
283 |
617d83ff
|
Benoit Parmentier
|
title(paste(metric_ac,"for",current_mod_name,"by month",sep=" "))
|
284 |
bcef031f
|
Benoit Parmentier
|
dev.off()
|
285 |
617d83ff
|
Benoit Parmentier
|
}
|
286 |
|
|
|
287 |
bcef031f
|
Benoit Parmentier
|
}
|
288 |
617d83ff
|
Benoit Parmentier
|
summary_month_obj <-c(tb_mod_m_list,tb_mod_m_avg,tb_mod_m_sd)
|
289 |
|
|
names(summary_month_obj)<-c("tb_list","metric_month_avg","metric_month_sd")
|
290 |
|
|
return(summary_month_obj)
|
291 |
dc5bfc17
|
Benoit Parmentier
|
}
|
292 |
|
|
|
293 |
1d0a0b81
|
Benoit Parmentier
|
|
294 |
48e49857
|
Benoit Parmentier
|
####################################
|
295 |
|
|
############ END OF SCRIPT #########
|