Revision 48e49857
Added by Benoit Parmentier almost 12 years ago
climate/research/oregon/interpolation/GAM_fusion_function_multisampling_validation_metrics.R | ||
---|---|---|
1 |
##VALIDATION-ACCURACY ASSESSMENT |
|
2 |
|
|
3 |
#The interpolation is done first at the monthly add delta. |
|
4 |
#AUTHOR: Benoit Parmentier |
|
5 |
#DATE: 02/13/2013 |
|
6 |
|
|
7 |
#Change this to allow explicitly arguments... |
|
8 |
#Arguments: |
|
9 |
#1)list of climatology files for all models...(365*nb of models) |
|
10 |
#2)data_s:training |
|
11 |
#3)data_v:testing |
|
12 |
#4)list of dates?? |
|
13 |
#5)stack of covariates: not needed at this this stage |
|
14 |
#6)dst: data at the monthly time scale |
|
15 |
|
|
16 |
#Function used in the script |
|
17 |
|
|
18 |
calculate_accuracy_metrics<-function(i){ |
|
19 |
|
|
20 |
### Caculate accuracy metrics |
|
21 |
calc_val_metrics<-function(x,y){ |
|
22 |
#This functions calculates accurayc metrics on given two vectors. |
|
23 |
#Arguments: list of fitted models, raster stack of covariates |
|
24 |
#Output: spatial grid data frame of the subset of tiles |
|
25 |
#s_sgdf<-as(r_stack,"SpatialGridDataFrame") #Conversion to spatial grid data frame |
|
26 |
|
|
27 |
residuals<-x-y |
|
28 |
mae<-mean(abs(residuals),na.rm=T) |
|
29 |
rmse<-sqrt(mean((residuals)^2,na.rm=T)) |
|
30 |
me<-mean(residuals,na.rm=T) |
|
31 |
r<-cor(x,y,use="complete") |
|
32 |
m50<-median(residuals,na.rm=T) |
|
33 |
metrics_dat<-as.data.frame(cbind(mae,rmse,me,r,m50)) |
|
34 |
names(metrics_dat)<-c("mae","rmse","me","r","m50") |
|
35 |
metrics_obj<-list(metrics_dat,as.data.frame(residuals)) |
|
36 |
names(metrics_obj)<-c("metrics_dat","residuals") |
|
37 |
return(metrics_obj) |
|
38 |
} |
|
39 |
|
|
40 |
calc_val_metrics_rast <-function(df,y_ref,pred_names){ |
|
41 |
# |
|
42 |
|
|
43 |
list_metrics<-vector("list",length(pred_names)) |
|
44 |
list_residuals<-vector("list",length(pred_names)) |
|
45 |
names(list_metrics)<-pred_names |
|
46 |
names(list_residuals)<-pred_names |
|
47 |
for (j in 1:length(pred_names)){ |
|
48 |
pred_var<-pred_names[j] |
|
49 |
metrics<-calc_val_metrics(df[[pred_var]],df[[y_ref]]) |
|
50 |
list_metrics[[j]]<-metrics[[1]] |
|
51 |
list_residuals[[j]]<-metrics[[2]] |
|
52 |
} |
|
53 |
metrics_df<-do.call(rbind,list_metrics) |
|
54 |
metrics_df$pred_mod <- pred_names #adding name column |
|
55 |
residuals_df<-do.call(cbind,list_residuals) #creating data frame for residuals |
|
56 |
names(residuals_df)<-paste("res",pred_names,sep="_") |
|
57 |
|
|
58 |
accuracy_obj<-list(metrics_df,residuals_df) #output object |
|
59 |
names(accuracy_obj)<-c("metrics","residuals") |
|
60 |
return(accuracy_obj) |
|
61 |
} |
|
62 |
|
|
63 |
## BEGIN ## |
|
64 |
|
|
65 |
day_list <-rast_day_yearlist[[i]] #list of prediction for the current date... |
|
66 |
names_mod<-names(day_list) |
|
67 |
#this needs to be changed...this must be assigned earlier |
|
68 |
# names(day_list)<-c("mod1","mod2","mod3","mod4","mod_kr") |
|
69 |
# obj_names<-c(y_var_name,"clim","delta","data_s","sampling_dat","data_v", |
|
70 |
# ,model_name) |
|
71 |
# names(gam_fus_mod[[i]]) |
|
72 |
# |
|
73 |
data_v <- gam_fus_mod[[i]]$data_v |
|
74 |
data_s <- gam_fus_mod[[i]]$data_s |
|
75 |
|
|
76 |
rast_day_mod <- stack(day_list) |
|
77 |
names(rast_day_mod) <- names(day_list) |
|
78 |
extract_data_v<-extract(rast_day_mod,data_v,df=TRUE) |
|
79 |
extract_data_s<-extract(rast_day_mod,data_s,df=TRUE) |
|
80 |
|
|
81 |
data_v <-spCbind(data_v,extract_data_v) #should match IDs before joining for good practice |
|
82 |
data_s <-spCbind(data_s,extract_data_s) |
|
83 |
|
|
84 |
ns<-nrow(data_s) # some loss of data might have happened because of the averaging... |
|
85 |
nv<-nrow(data_v) |
|
86 |
|
|
87 |
sampling_dat_day<-(gam_fus_mod[[i]])$sampling_dat |
|
88 |
|
|
89 |
metrics_v_obj<-calc_val_metrics_rast(data_v,y_var_name,names_mod) |
|
90 |
metrics_s_obj<-calc_val_metrics_rast(data_s,y_var_name,names_mod) |
|
91 |
|
|
92 |
#add sampling dat info... |
|
93 |
N=length(names_mod) |
|
94 |
run_info<-cbind(sampling_dat_day,n=nv) |
|
95 |
run_info[rep(seq_len(nrow(run_info)), each=N),] #repeating same row n times |
|
96 |
metrics_v_df<-cbind(metrics_v_obj$metrics,run_info) |
|
97 |
|
|
98 |
run_info<-cbind(sampling_dat_day,n=ns) |
|
99 |
run_info[rep(seq_len(nrow(run_info)), each=N),] |
|
100 |
metrics_s_df<-cbind(metrics_s_obj$metrics,run_info) |
|
101 |
|
|
102 |
data_v<-spCbind(data_v,metrics_v_obj$residuals) |
|
103 |
data_s<-spCbind(data_s,metrics_s_obj$residuals) |
|
104 |
|
|
105 |
validation_obj<-list(metrics_s_df,metrics_v_df,data_s,data_v) |
|
106 |
names(validation_obj)<-c("metrics_s","metrics_v","data_s","data_v") |
|
107 |
|
|
108 |
return(validation_obj) |
|
109 |
|
|
110 |
} |
|
111 |
|
|
112 |
#################################### |
|
113 |
############ END OF SCRIPT ######### |
Also available in: Unified diff
GAM fusion function, calculation of validation metrics to follow predictions