1
|
##VALIDATION-ACCURACY ASSESSMENT
|
2
|
|
3
|
#The interpolation is done first at the monthly add delta.
|
4
|
#AUTHOR: Benoit Parmentier
|
5
|
#DATE: 03/12/2013
|
6
|
|
7
|
#Change this to allow explicitly arguments...
|
8
|
#Arguments:
|
9
|
#1)list of climatology files for all models...(365*nb of models)
|
10
|
#2)data_s:training
|
11
|
#3)data_v:testing
|
12
|
#4)list of dates??: index
|
13
|
#5)stack of covariates: not needed at this this stage
|
14
|
#6)dst: data at the monthly time scale
|
15
|
|
16
|
#Function used in the script
|
17
|
|
18
|
calculate_accuracy_metrics<-function(i,list_param){
|
19
|
library(plyr)
|
20
|
### Caculate accuracy metrics
|
21
|
calc_val_metrics<-function(x,y){
|
22
|
#This functions calculates accurayc metrics on given two vectors.
|
23
|
#Arguments: list of fitted models, raster stack of covariates
|
24
|
#Output: spatial grid data frame of the subset of tiles
|
25
|
#s_sgdf<-as(r_stack,"SpatialGridDataFrame") #Conversion to spatial grid data frame
|
26
|
|
27
|
residuals<-x-y
|
28
|
mae<-mean(abs(residuals),na.rm=T)
|
29
|
rmse<-sqrt(mean((residuals)^2,na.rm=T))
|
30
|
me<-mean(residuals,na.rm=T)
|
31
|
r<-cor(x,y,use="complete")
|
32
|
m50<-median(residuals,na.rm=T)
|
33
|
metrics_dat<-as.data.frame(cbind(mae,rmse,me,r,m50))
|
34
|
names(metrics_dat)<-c("mae","rmse","me","r","m50")
|
35
|
metrics_obj<-list(metrics_dat,as.data.frame(residuals))
|
36
|
names(metrics_obj)<-c("metrics_dat","residuals")
|
37
|
return(metrics_obj)
|
38
|
}
|
39
|
|
40
|
calc_val_metrics_rast <-function(df,y_ref,pred_names){
|
41
|
#
|
42
|
|
43
|
list_metrics<-vector("list",length(pred_names))
|
44
|
list_residuals<-vector("list",length(pred_names))
|
45
|
names(list_metrics)<-pred_names
|
46
|
names(list_residuals)<-pred_names
|
47
|
for (j in 1:length(pred_names)){
|
48
|
pred_var<-pred_names[j]
|
49
|
metrics<-calc_val_metrics(df[[pred_var]],df[[y_ref]])
|
50
|
list_metrics[[j]]<-metrics[[1]]
|
51
|
list_residuals[[j]]<-metrics[[2]]
|
52
|
}
|
53
|
metrics_df<-do.call(rbind,list_metrics)
|
54
|
metrics_df$pred_mod <- pred_names #adding name column
|
55
|
residuals_df<-do.call(cbind,list_residuals) #creating data frame for residuals
|
56
|
names(residuals_df)<-paste("res",pred_names,sep="_")
|
57
|
|
58
|
accuracy_obj<-list(metrics_df,residuals_df) #output object
|
59
|
names(accuracy_obj)<-c("metrics","residuals")
|
60
|
return(accuracy_obj)
|
61
|
}
|
62
|
|
63
|
## BEGIN ##
|
64
|
|
65
|
#PARSING INPUT PARAMETERS
|
66
|
day_list<- list_param$rast_day_year_list[[i]]
|
67
|
#day_list <-rast_day_yearlist[[i]] #list of prediction for the current date...
|
68
|
names_mod<-names(day_list)
|
69
|
method_mod_obj<-list_param$method_mod_obj
|
70
|
#Change to results_mod_obj[[i]]$data_s to make it less specific
|
71
|
data_v <- method_mod_obj[[i]]$data_v
|
72
|
data_s <- method_mod_obj[[i]]$data_s
|
73
|
|
74
|
## Now create the stack
|
75
|
|
76
|
rast_day_mod <- stack(day_list)
|
77
|
names(rast_day_mod) <- names(day_list)
|
78
|
extract_data_v<-extract(rast_day_mod,data_v,df=TRUE)
|
79
|
extract_data_s<-extract(rast_day_mod,data_s,df=TRUE)
|
80
|
|
81
|
data_v <-spCbind(data_v,extract_data_v) #should match IDs before joining for good practice
|
82
|
data_s <-spCbind(data_s,extract_data_s)
|
83
|
|
84
|
ns<-nrow(data_s) # some loss of data might have happened because of the averaging...
|
85
|
nv<-nrow(data_v)
|
86
|
|
87
|
sampling_dat_day<-(method_mod_obj[[i]])$sampling_dat
|
88
|
|
89
|
metrics_v_obj<-calc_val_metrics_rast(data_v,y_var_name,names_mod)
|
90
|
metrics_s_obj<-calc_val_metrics_rast(data_s,y_var_name,names_mod)
|
91
|
|
92
|
#add sampling dat info...
|
93
|
N=length(names_mod)
|
94
|
run_info<-cbind(sampling_dat_day,n=nv)
|
95
|
run_info[rep(seq_len(nrow(run_info)), each=N),] #repeating same row n times
|
96
|
metrics_v_df<-cbind(metrics_v_obj$metrics,run_info)
|
97
|
|
98
|
run_info<-cbind(sampling_dat_day,n=ns)
|
99
|
run_info[rep(seq_len(nrow(run_info)), each=N),]
|
100
|
metrics_s_df<-cbind(metrics_s_obj$metrics,run_info)
|
101
|
|
102
|
data_v<-spCbind(data_v,metrics_v_obj$residuals)
|
103
|
data_s<-spCbind(data_s,metrics_s_obj$residuals)
|
104
|
|
105
|
validation_obj<-list(metrics_s_df,metrics_v_df,data_s,data_v)
|
106
|
names(validation_obj)<-c("metrics_s","metrics_v","data_s","data_v")
|
107
|
|
108
|
return(validation_obj)
|
109
|
|
110
|
}
|
111
|
|
112
|
#### Function to create a data.frame from validation obj
|
113
|
extract_from_list_obj<-function(obj_list,list_name){
|
114
|
list_tmp<-vector("list",length(obj_list))
|
115
|
for (i in 1:length(obj_list)){
|
116
|
tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame
|
117
|
list_tmp[[i]]<-tmp
|
118
|
}
|
119
|
tb_list_tmp<-do.call(rbind,list_tmp) #long rownames
|
120
|
return(tb_list_tmp) #this is a data.frame
|
121
|
}
|
122
|
|
123
|
#### Function to plot boxplot from data.frame table of accuracy metrics
|
124
|
|
125
|
boxplot_from_tb <-function(tb_diagnostic,metric_names,out_prefix){
|
126
|
#now boxplots and mean per models
|
127
|
mod_names<-sort(unique(tb_diagnostic$pred_mod)) #models that have accuracy metrics
|
128
|
t<-melt(tb_diagnostic,
|
129
|
#measure=mod_var,
|
130
|
id=c("date","pred_mod","prop"),
|
131
|
na.rm=F)
|
132
|
avg_tb<-cast(t,pred_mod~variable,mean)
|
133
|
|
134
|
median_tb<-cast(t,pred_mod~variable,median)
|
135
|
tb<-tb_diagnostic
|
136
|
|
137
|
#mod_names<-sort(unique(tb$pred_mod)) #kept for clarity
|
138
|
tb_mod_list<-lapply(mod_names, function(k) subset(tb, pred_mod==k)) #this creates a list of 5 based on models names
|
139
|
names(tb_mod_list)<-mod_names
|
140
|
#mod_metrics<-do.call(cbind,tb_mod_list)
|
141
|
mod_metrics<-do.call(cbindX,tb_mod_list)
|
142
|
test_names<-lapply(1:length(mod_names),function(k) paste(names(tb_mod_list[[1]]),mod_names[k],sep="_"))
|
143
|
names(mod_metrics)<-unlist(test_names)
|
144
|
rows_total<-lapply(tb_mod_list,nrow)
|
145
|
for (j in 1:length(metric_names)){
|
146
|
metric_ac<-metric_names[j]
|
147
|
mod_pat<-glob2rx(paste(metric_ac,"_*",sep=""))
|
148
|
mod_var<-grep(mod_pat,names(mod_metrics),value=TRUE) # using grep with "value" extracts the matching names
|
149
|
#browser()
|
150
|
test<-mod_metrics[mod_var]
|
151
|
png(paste("boxplot_metric_",metric_ac, out_prefix,".png", sep=""))
|
152
|
boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5,
|
153
|
ylab=paste(metric_ac,"in degree C",sep=" "))
|
154
|
#legend("bottomleft",legend=paste(names(rows_total),":",rows_total,sep=""),cex=0.7,bty="n")
|
155
|
title(as.character(t(paste(t(names(rows_total)),":",rows_total,sep=""))),cex=0.8)
|
156
|
dev.off()
|
157
|
}
|
158
|
avg_tb$n<-rows_total #total number of predictions on which the mean is based
|
159
|
median_tb$n<-rows_total
|
160
|
summary_obj<-list(avg_tb,median_tb)
|
161
|
return(summary_obj)
|
162
|
}
|
163
|
|
164
|
## Function to display metrics by months/seasons
|
165
|
boxplot_month_from_tb <-function(tb_diagnostic,metric_names,out_prefix){
|
166
|
#Add code here...
|
167
|
}
|
168
|
|
169
|
|
170
|
|
171
|
|
172
|
####################################
|
173
|
############ END OF SCRIPT #########
|