1 |
48e49857
|
Benoit Parmentier
|
##VALIDATION-ACCURACY ASSESSMENT
|
2 |
|
|
|
3 |
|
|
#The interpolation is done first at the monthly add delta.
|
4 |
|
|
#AUTHOR: Benoit Parmentier
|
5 |
|
|
#DATE: 02/13/2013
|
6 |
|
|
|
7 |
|
|
#Change this to allow explicitly arguments...
|
8 |
|
|
#Arguments:
|
9 |
|
|
#1)list of climatology files for all models...(365*nb of models)
|
10 |
|
|
#2)data_s:training
|
11 |
|
|
#3)data_v:testing
|
12 |
|
|
#4)list of dates??
|
13 |
|
|
#5)stack of covariates: not needed at this this stage
|
14 |
|
|
#6)dst: data at the monthly time scale
|
15 |
|
|
|
16 |
|
|
#Function used in the script
|
17 |
|
|
|
18 |
|
|
calculate_accuracy_metrics<-function(i){
|
19 |
|
|
|
20 |
|
|
### Caculate accuracy metrics
|
21 |
|
|
calc_val_metrics<-function(x,y){
|
22 |
|
|
#This functions calculates accurayc metrics on given two vectors.
|
23 |
|
|
#Arguments: list of fitted models, raster stack of covariates
|
24 |
|
|
#Output: spatial grid data frame of the subset of tiles
|
25 |
|
|
#s_sgdf<-as(r_stack,"SpatialGridDataFrame") #Conversion to spatial grid data frame
|
26 |
|
|
|
27 |
|
|
residuals<-x-y
|
28 |
|
|
mae<-mean(abs(residuals),na.rm=T)
|
29 |
|
|
rmse<-sqrt(mean((residuals)^2,na.rm=T))
|
30 |
|
|
me<-mean(residuals,na.rm=T)
|
31 |
|
|
r<-cor(x,y,use="complete")
|
32 |
|
|
m50<-median(residuals,na.rm=T)
|
33 |
|
|
metrics_dat<-as.data.frame(cbind(mae,rmse,me,r,m50))
|
34 |
|
|
names(metrics_dat)<-c("mae","rmse","me","r","m50")
|
35 |
|
|
metrics_obj<-list(metrics_dat,as.data.frame(residuals))
|
36 |
|
|
names(metrics_obj)<-c("metrics_dat","residuals")
|
37 |
|
|
return(metrics_obj)
|
38 |
|
|
}
|
39 |
|
|
|
40 |
|
|
calc_val_metrics_rast <-function(df,y_ref,pred_names){
|
41 |
|
|
#
|
42 |
|
|
|
43 |
|
|
list_metrics<-vector("list",length(pred_names))
|
44 |
|
|
list_residuals<-vector("list",length(pred_names))
|
45 |
|
|
names(list_metrics)<-pred_names
|
46 |
|
|
names(list_residuals)<-pred_names
|
47 |
|
|
for (j in 1:length(pred_names)){
|
48 |
|
|
pred_var<-pred_names[j]
|
49 |
|
|
metrics<-calc_val_metrics(df[[pred_var]],df[[y_ref]])
|
50 |
|
|
list_metrics[[j]]<-metrics[[1]]
|
51 |
|
|
list_residuals[[j]]<-metrics[[2]]
|
52 |
|
|
}
|
53 |
|
|
metrics_df<-do.call(rbind,list_metrics)
|
54 |
|
|
metrics_df$pred_mod <- pred_names #adding name column
|
55 |
|
|
residuals_df<-do.call(cbind,list_residuals) #creating data frame for residuals
|
56 |
|
|
names(residuals_df)<-paste("res",pred_names,sep="_")
|
57 |
|
|
|
58 |
|
|
accuracy_obj<-list(metrics_df,residuals_df) #output object
|
59 |
|
|
names(accuracy_obj)<-c("metrics","residuals")
|
60 |
|
|
return(accuracy_obj)
|
61 |
|
|
}
|
62 |
|
|
|
63 |
|
|
## BEGIN ##
|
64 |
|
|
|
65 |
|
|
day_list <-rast_day_yearlist[[i]] #list of prediction for the current date...
|
66 |
|
|
names_mod<-names(day_list)
|
67 |
|
|
#this needs to be changed...this must be assigned earlier
|
68 |
|
|
# names(day_list)<-c("mod1","mod2","mod3","mod4","mod_kr")
|
69 |
|
|
# obj_names<-c(y_var_name,"clim","delta","data_s","sampling_dat","data_v",
|
70 |
|
|
# ,model_name)
|
71 |
|
|
# names(gam_fus_mod[[i]])
|
72 |
|
|
#
|
73 |
|
|
data_v <- gam_fus_mod[[i]]$data_v
|
74 |
|
|
data_s <- gam_fus_mod[[i]]$data_s
|
75 |
|
|
|
76 |
|
|
rast_day_mod <- stack(day_list)
|
77 |
|
|
names(rast_day_mod) <- names(day_list)
|
78 |
|
|
extract_data_v<-extract(rast_day_mod,data_v,df=TRUE)
|
79 |
|
|
extract_data_s<-extract(rast_day_mod,data_s,df=TRUE)
|
80 |
|
|
|
81 |
|
|
data_v <-spCbind(data_v,extract_data_v) #should match IDs before joining for good practice
|
82 |
|
|
data_s <-spCbind(data_s,extract_data_s)
|
83 |
|
|
|
84 |
|
|
ns<-nrow(data_s) # some loss of data might have happened because of the averaging...
|
85 |
|
|
nv<-nrow(data_v)
|
86 |
|
|
|
87 |
|
|
sampling_dat_day<-(gam_fus_mod[[i]])$sampling_dat
|
88 |
|
|
|
89 |
|
|
metrics_v_obj<-calc_val_metrics_rast(data_v,y_var_name,names_mod)
|
90 |
|
|
metrics_s_obj<-calc_val_metrics_rast(data_s,y_var_name,names_mod)
|
91 |
|
|
|
92 |
|
|
#add sampling dat info...
|
93 |
|
|
N=length(names_mod)
|
94 |
|
|
run_info<-cbind(sampling_dat_day,n=nv)
|
95 |
|
|
run_info[rep(seq_len(nrow(run_info)), each=N),] #repeating same row n times
|
96 |
|
|
metrics_v_df<-cbind(metrics_v_obj$metrics,run_info)
|
97 |
|
|
|
98 |
|
|
run_info<-cbind(sampling_dat_day,n=ns)
|
99 |
|
|
run_info[rep(seq_len(nrow(run_info)), each=N),]
|
100 |
|
|
metrics_s_df<-cbind(metrics_s_obj$metrics,run_info)
|
101 |
|
|
|
102 |
|
|
data_v<-spCbind(data_v,metrics_v_obj$residuals)
|
103 |
|
|
data_s<-spCbind(data_s,metrics_s_obj$residuals)
|
104 |
|
|
|
105 |
|
|
validation_obj<-list(metrics_s_df,metrics_v_df,data_s,data_v)
|
106 |
|
|
names(validation_obj)<-c("metrics_s","metrics_v","data_s","data_v")
|
107 |
|
|
|
108 |
|
|
return(validation_obj)
|
109 |
|
|
|
110 |
|
|
}
|
111 |
|
|
|
112 |
|
|
####################################
|
113 |
|
|
############ END OF SCRIPT #########
|