Project

General

Profile

Download (5.9 KB) Statistics
| Branch: | Revision:
1
##VALIDATION-ACCURACY ASSESSMENT
2

    
3
#The interpolation is done first at the monthly add delta.
4
#AUTHOR: Benoit Parmentier                                                                        
5
#DATE: 03/12/2013                                                                                 
6

    
7
#Change this to allow explicitly arguments...
8
#Arguments: 
9
#1)list of climatology files for all models...(365*nb of models)
10
#2)data_s:training
11
#3)data_v:testing
12
#4)list of dates??: index
13
#5)stack of covariates: not needed at this this stage
14
#6)dst: data at the monthly time scale
15

    
16
#Function used in the script
17

    
18
calculate_accuracy_metrics<-function(i,list_param){
19
  
20
  ### Caculate accuracy metrics
21
  calc_val_metrics<-function(x,y){
22
    #This functions calculates accurayc metrics on given two vectors.
23
    #Arguments: list of fitted models, raster stack of covariates
24
    #Output: spatial grid data frame of the subset of tiles
25
    #s_sgdf<-as(r_stack,"SpatialGridDataFrame") #Conversion to spatial grid data frame
26
    
27
    residuals<-x-y
28
    mae<-mean(abs(residuals),na.rm=T)
29
    rmse<-sqrt(mean((residuals)^2,na.rm=T))
30
    me<-mean(residuals,na.rm=T)
31
    r<-cor(x,y,use="complete")
32
    m50<-median(residuals,na.rm=T)
33
    metrics_dat<-as.data.frame(cbind(mae,rmse,me,r,m50))
34
    names(metrics_dat)<-c("mae","rmse","me","r","m50")
35
    metrics_obj<-list(metrics_dat,as.data.frame(residuals))
36
    names(metrics_obj)<-c("metrics_dat","residuals")
37
    return(metrics_obj)
38
  }
39
  
40
  calc_val_metrics_rast <-function(df,y_ref,pred_names){
41
    #
42
    
43
    list_metrics<-vector("list",length(pred_names))
44
    list_residuals<-vector("list",length(pred_names))
45
    names(list_metrics)<-pred_names
46
    names(list_residuals)<-pred_names
47
    for (j in 1:length(pred_names)){
48
      pred_var<-pred_names[j]
49
      metrics<-calc_val_metrics(df[[pred_var]],df[[y_ref]])
50
      list_metrics[[j]]<-metrics[[1]]
51
      list_residuals[[j]]<-metrics[[2]]
52
    }
53
    metrics_df<-do.call(rbind,list_metrics)
54
    metrics_df$pred_mod <- pred_names #adding name column
55
    residuals_df<-do.call(cbind,list_residuals) #creating data frame for residuals
56
    names(residuals_df)<-paste("res",pred_names,sep="_")
57
    
58
    accuracy_obj<-list(metrics_df,residuals_df) #output object
59
    names(accuracy_obj)<-c("metrics","residuals") 
60
    return(accuracy_obj)
61
  }  
62
  
63
  ## BEGIN ##
64
  
65
  #PARSING INPUT PARAMETERS
66
  day_list<- list_param$rast_day_year_list[[i]]
67
  #day_list <-rast_day_yearlist[[i]] #list of prediction for the current date...
68
  names_mod<-names(day_list)
69
  method_mod_obj<-list_param$method_mod_obj
70
  #Change to results_mod_obj[[i]]$data_s to make it less specific
71
  data_v <- method_mod_obj[[i]]$data_v
72
  data_s <- method_mod_obj[[i]]$data_s
73
  
74
  ## Now create the stack
75
  
76
  rast_day_mod <- stack(day_list)
77
  names(rast_day_mod) <- names(day_list)
78
  extract_data_v<-extract(rast_day_mod,data_v,df=TRUE)
79
  extract_data_s<-extract(rast_day_mod,data_s,df=TRUE)
80
  
81
  data_v <-spCbind(data_v,extract_data_v) #should match IDs before joining for good practice
82
  data_s <-spCbind(data_s,extract_data_s)
83
  
84
  ns<-nrow(data_s) # some loss of data might have happened because of the averaging...
85
  nv<-nrow(data_v)
86
  
87
  sampling_dat_day<-(method_mod_obj[[i]])$sampling_dat
88
   
89
  metrics_v_obj<-calc_val_metrics_rast(data_v,y_var_name,names_mod)
90
  metrics_s_obj<-calc_val_metrics_rast(data_s,y_var_name,names_mod)
91
  
92
  #add sampling dat info...
93
  N=length(names_mod)
94
  run_info<-cbind(sampling_dat_day,n=nv)
95
  run_info[rep(seq_len(nrow(run_info)), each=N),] #repeating same row n times
96
  metrics_v_df<-cbind(metrics_v_obj$metrics,run_info)
97
  
98
  run_info<-cbind(sampling_dat_day,n=ns)
99
  run_info[rep(seq_len(nrow(run_info)), each=N),]
100
  metrics_s_df<-cbind(metrics_s_obj$metrics,run_info)
101
  
102
  data_v<-spCbind(data_v,metrics_v_obj$residuals)
103
  data_s<-spCbind(data_s,metrics_s_obj$residuals)
104
    
105
  validation_obj<-list(metrics_s_df,metrics_v_df,data_s,data_v)
106
  names(validation_obj)<-c("metrics_s","metrics_v","data_s","data_v")
107
  
108
  return(validation_obj)
109

    
110
}
111

    
112
#### Function to create a data.frame from validation obj
113
extract_from_list_obj<-function(obj_list,list_name){
114
  list_tmp<-vector("list",length(obj_list))
115
  for (i in 1:length(obj_list)){
116
    tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame
117
    list_tmp[[i]]<-tmp
118
  }
119
  tb_list_tmp<-do.call(rbind,list_tmp) #long rownames
120
  return(tb_list_tmp) #this is  a data.frame
121
}
122

    
123
#### Function to plot boxplot from data.frame table of accuracy metrics
124

    
125
boxplot_from_tb <-function(tb_diagnostic,metric_names,out_prefix){
126
  #now boxplots and mean per models
127
  mod_names<-unique(tb_diagnostic$pred_mod) #models that have accuracy metrics
128
  t<-melt(tb_diagnostic,
129
          #measure=mod_var, 
130
          id=c("date","pred_mod","prop"),
131
          na.rm=F)
132
  avg_tb<-cast(t,pred_mod~variable,mean)
133
  
134
  median_tb<-cast(t,pred_mod~variable,median)
135
  tb<-tb_diagnostic
136
  tb_mod_list<-vector("list",length(mod_names))
137
  for(i in 1:length(mod_names)){            # Reorganizing information in terms of metrics 
138
    mod_name_tb<-paste("tb_",mod_names[i],sep="")
139
    tb_mod<-subset(tb, pred_mod==mod_names[i])
140
    assign(mod_name_tb,tb_mod)
141
    tb_mod_list[[i]]<-tb_mod
142
  }
143
  names(tb_mod_list)<-mod_names
144
  mod_metrics<-do.call(cbind,tb_mod_list)
145
  for (j in 1:length(metric_names)){
146
    metric_ac<-metric_names[j]
147
    mod_pat<-glob2rx(paste("*.",metric_ac,sep=""))   
148
    mod_var<-grep(mod_pat,names(mod_metrics),value=TRUE) # using grep with "value" extracts the matching names     
149
    #browser()
150
    test<-mod_metrics[mod_var]
151
    png(paste("boxplot_metric_",metric_ac, out_prefix,".png", sep=""))
152
    boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5,
153
            ylab=paste(metric_ac,"in degree C",sep=" "))
154
    dev.off()
155
  }
156
  summary_obj<-list(avg_tb,median_tb)
157
  return(summary_obj)  
158
}
159

    
160
## Function to display metrics by months/seasons
161
boxplot_month_from_tb <-function(tb_diagnostic,metric_names,out_prefix){
162
  #Add code here...
163
}
164

    
165
####################################
166
############ END OF SCRIPT #########
(12-12/40)