Project

General

Profile

Download (11.6 KB) Statistics
| Branch: | Revision:
1 48e49857 Benoit Parmentier
##VALIDATION-ACCURACY ASSESSMENT
2
3
#The interpolation is done first at the monthly add delta.
4
#AUTHOR: Benoit Parmentier                                                                        
5 f23e6e04 Benoit Parmentier
#DATE: 05/06/2013                                                                                 
6 48e49857 Benoit Parmentier
7
#Change this to allow explicitly arguments...
8
#Arguments: 
9
#1)list of climatology files for all models...(365*nb of models)
10
#2)data_s:training
11
#3)data_v:testing
12 643c7d19 Benoit Parmentier
#4)list of dates??: index
13 48e49857 Benoit Parmentier
#5)stack of covariates: not needed at this this stage
14
#6)dst: data at the monthly time scale
15
16
#Function used in the script
17
18 643c7d19 Benoit Parmentier
calculate_accuracy_metrics<-function(i,list_param){
19 1d0a0b81 Benoit Parmentier
  library(plyr)
20 48e49857 Benoit Parmentier
  ### Caculate accuracy metrics
21
  calc_val_metrics<-function(x,y){
22
    #This functions calculates accurayc metrics on given two vectors.
23
    #Arguments: list of fitted models, raster stack of covariates
24
    #Output: spatial grid data frame of the subset of tiles
25
    #s_sgdf<-as(r_stack,"SpatialGridDataFrame") #Conversion to spatial grid data frame
26
    
27
    residuals<-x-y
28
    mae<-mean(abs(residuals),na.rm=T)
29
    rmse<-sqrt(mean((residuals)^2,na.rm=T))
30
    me<-mean(residuals,na.rm=T)
31
    r<-cor(x,y,use="complete")
32
    m50<-median(residuals,na.rm=T)
33
    metrics_dat<-as.data.frame(cbind(mae,rmse,me,r,m50))
34
    names(metrics_dat)<-c("mae","rmse","me","r","m50")
35
    metrics_obj<-list(metrics_dat,as.data.frame(residuals))
36
    names(metrics_obj)<-c("metrics_dat","residuals")
37
    return(metrics_obj)
38
  }
39
  
40
  calc_val_metrics_rast <-function(df,y_ref,pred_names){
41 6cb81608 Benoit Parmentier
    #Input parameters:
42
    #1) df: data frame containing the observed and predicted variables (data_s or data_v)
43
    #2) y_ref: observed variable correspond to y_var_name??
44
    #3) pred_names: models run containig predicted values
45
    
46
    # library
47
    library(maptools)
48
    
49
    ## START SCRIPT
50 48e49857 Benoit Parmentier
    
51
    list_metrics<-vector("list",length(pred_names))
52
    list_residuals<-vector("list",length(pred_names))
53
    names(list_metrics)<-pred_names
54
    names(list_residuals)<-pred_names
55
    for (j in 1:length(pred_names)){
56
      pred_var<-pred_names[j]
57
      metrics<-calc_val_metrics(df[[pred_var]],df[[y_ref]])
58
      list_metrics[[j]]<-metrics[[1]]
59
      list_residuals[[j]]<-metrics[[2]]
60
    }
61
    metrics_df<-do.call(rbind,list_metrics)
62
    metrics_df$pred_mod <- pred_names #adding name column
63
    residuals_df<-do.call(cbind,list_residuals) #creating data frame for residuals
64
    names(residuals_df)<-paste("res",pred_names,sep="_")
65
    
66
    accuracy_obj<-list(metrics_df,residuals_df) #output object
67
    names(accuracy_obj)<-c("metrics","residuals") 
68
    return(accuracy_obj)
69
  }  
70
  
71 6cb81608 Benoit Parmentier
  ############### BEGIN SCRIPT ###########
72 48e49857 Benoit Parmentier
  
73 643c7d19 Benoit Parmentier
  #PARSING INPUT PARAMETERS
74 f23e6e04 Benoit Parmentier
  out_path <- list_param$out_path
75 bb747d71 Benoit Parmentier
  day_list <- list_param$rast_day_year_list[[i]] #this is the list of raster files, may be daily or monthly predictions
76
  names_mod <- names(day_list) #names of the predicted variables
77 08721acf Benoit Parmentier
78 bb747d71 Benoit Parmentier
  y_ref <- list_param$y_ref  #This is the reference variable from which resituals and accuracy metrics are created
79 08721acf Benoit Parmentier
  multi_time_scale <- list_param$multi_time_scale
80
  
81 bb747d71 Benoit Parmentier
  data_v <- list_param$list_data_v[[i]]
82
  data_s <- list_param$list_data_s[[i]]
83
  sampling_dat_day <- list_param$list_sampling_dat[[i]]
84 643c7d19 Benoit Parmentier
  
85
  ## Now create the stack
86 48e49857 Benoit Parmentier
  
87
  rast_day_mod <- stack(day_list)
88
  names(rast_day_mod) <- names(day_list)
89 08721acf Benoit Parmentier
  #Change to handle cases in which data_v is NULL!!!
90
    
91 bb747d71 Benoit Parmentier
  ns <- nrow(data_s) # some loss of data might have happened because of the averaging...
92
  nv <- nrow(data_v)
93 48e49857 Benoit Parmentier
  
94
  #add sampling dat info...
95
  N=length(names_mod)
96
  
97 08721acf Benoit Parmentier
  #Handle case of 0% hold out, monhtly or daily
98
  if (nv > 0){
99
    run_info<-cbind(sampling_dat_day,n=nv)
100
    run_info[rep(seq_len(nrow(run_info)), each=N),] #repeating same row n times
101
    
102
    extract_data_v<-extract(rast_day_mod,data_v,df=TRUE)
103
    data_v <-spCbind(data_v,extract_data_v) #should match IDs before joining for good practice    
104 bb747d71 Benoit Parmentier
    metrics_v_obj<-calc_val_metrics_rast(data_v,y_ref,names_mod)
105 08721acf Benoit Parmentier
    metrics_v_df<-cbind(metrics_v_obj$metrics,run_info)
106 bb747d71 Benoit Parmentier
    metrics_v_df["var_interp"]<-rep(y_ref,times=nrow(metrics_v_df)) 
107 08721acf Benoit Parmentier
    #Name of the variable interpolated, useful for cross-comparison between methods at later stages
108
    data_v<-spCbind(data_v,metrics_v_obj$residuals)
109
  }
110
  
111
  extract_data_s<-extract(rast_day_mod,data_s,df=TRUE)  
112 bb747d71 Benoit Parmentier
  
113 08721acf Benoit Parmentier
  data_s <-spCbind(data_s,extract_data_s)
114
115 bb747d71 Benoit Parmentier
  metrics_s_obj <- calc_val_metrics_rast(data_s,y_ref,names_mod)  
116
  
117 08721acf Benoit Parmentier
  run_info <- cbind(sampling_dat_day,n=ns)
118 48e49857 Benoit Parmentier
  run_info[rep(seq_len(nrow(run_info)), each=N),]
119 08721acf Benoit Parmentier
  metrics_s_df <- cbind(metrics_s_obj$metrics,run_info)
120 bb747d71 Benoit Parmentier
  metrics_s_df["var_interp"] <- rep(y_ref,times=nrow(metrics_s_df)) 
121 f23e6e04 Benoit Parmentier
  #Name of the variable interpolated, useful for cross-comparison between methods at later stages
122 48e49857 Benoit Parmentier
  
123 08721acf Benoit Parmentier
  data_s <- spCbind(data_s,metrics_s_obj$residuals)
124
  
125
  #prepare output object
126
  
127
  if (nv > 0){
128
    validation_obj<-list(metrics_s_df,metrics_v_df,data_s,data_v)
129
    names(validation_obj)<-c("metrics_s","metrics_v","data_s","data_v")
130
  }else{
131
    validation_obj<-list(metrics_s_df,data_s)
132
    names(validation_obj)<-c("metrics_s","data_s")
133
  }
134 48e49857 Benoit Parmentier
  
135
  return(validation_obj)
136
137
}
138
139 dc5bfc17 Benoit Parmentier
#### Function to create a data.frame from validation obj
140
extract_from_list_obj<-function(obj_list,list_name){
141
  list_tmp<-vector("list",length(obj_list))
142
  for (i in 1:length(obj_list)){
143
    tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame
144
    list_tmp[[i]]<-tmp
145
  }
146
  tb_list_tmp<-do.call(rbind,list_tmp) #long rownames
147
  return(tb_list_tmp) #this is  a data.frame
148
}
149
150 08721acf Benoit Parmentier
#### Function to create a list from a object made up of a list with names e.g. method_mod_obj or clim_method_mod_obj
151
extract_list_from_list_obj<-function(obj_list,list_name){
152
  #Create a list of an object from a given list of object using a name prodived as input
153
  
154
  list_tmp<-vector("list",length(obj_list))
155
  for (i in 1:length(obj_list)){
156
    tmp<-obj_list[[i]][[list_name]] #double bracket to return data.frame
157
    list_tmp[[i]]<-tmp
158
  }
159
  return(list_tmp) #this is  a data.frame
160
}
161
162 dc5bfc17 Benoit Parmentier
#### Function to plot boxplot from data.frame table of accuracy metrics
163
164 f23e6e04 Benoit Parmentier
boxplot_from_tb <-function(tb_diagnostic,metric_names,out_prefix,out_path){
165 dc5bfc17 Benoit Parmentier
  #now boxplots and mean per models
166 6cb81608 Benoit Parmentier
  library(gdata) #Nesssary to use cbindX
167
  
168
  ### Start script
169 f23e6e04 Benoit Parmentier
  y_var_name<-unique(tb_diagnostic$var_interp) #extract the name of interpolated variable: dailyTmax, dailyTmin
170 6cb81608 Benoit Parmentier
  
171 1d0a0b81 Benoit Parmentier
  mod_names<-sort(unique(tb_diagnostic$pred_mod)) #models that have accuracy metrics
172 dc5bfc17 Benoit Parmentier
  t<-melt(tb_diagnostic,
173
          #measure=mod_var, 
174
          id=c("date","pred_mod","prop"),
175
          na.rm=F)
176 f23e6e04 Benoit Parmentier
  t$value<-as.numeric(t$value) #problem with char!!!
177 dc5bfc17 Benoit Parmentier
  avg_tb<-cast(t,pred_mod~variable,mean)
178 f23e6e04 Benoit Parmentier
  avg_tb$var_interp<-rep(y_var_name,times=nrow(avg_tb))
179 dc5bfc17 Benoit Parmentier
  median_tb<-cast(t,pred_mod~variable,median)
180 f23e6e04 Benoit Parmentier
  
181
  #avg_tb<-cast(t,pred_mod~variable,mean)
182 dc5bfc17 Benoit Parmentier
  tb<-tb_diagnostic
183 1d0a0b81 Benoit Parmentier
 
184
  #mod_names<-sort(unique(tb$pred_mod)) #kept for clarity
185
  tb_mod_list<-lapply(mod_names, function(k) subset(tb, pred_mod==k)) #this creates a list of 5 based on models names
186 dc5bfc17 Benoit Parmentier
  names(tb_mod_list)<-mod_names
187 1d0a0b81 Benoit Parmentier
  #mod_metrics<-do.call(cbind,tb_mod_list)
188 6cb81608 Benoit Parmentier
  #debug here
189 c9196413 Benoit Parmentier
  if(length(tb_mod_list)>1){
190
    mod_metrics<-do.call(cbindX,tb_mod_list) #column bind the list??
191
  }else{
192
    mod_metrics<-tb_mod_list[[1]]
193
  }
194
  
195 1d0a0b81 Benoit Parmentier
  test_names<-lapply(1:length(mod_names),function(k) paste(names(tb_mod_list[[1]]),mod_names[k],sep="_"))
196 c9196413 Benoit Parmentier
  #test names are used when plotting the boxplot for the different models
197 1d0a0b81 Benoit Parmentier
  names(mod_metrics)<-unlist(test_names)
198
  rows_total<-lapply(tb_mod_list,nrow)
199 dc5bfc17 Benoit Parmentier
  for (j in 1:length(metric_names)){
200
    metric_ac<-metric_names[j]
201 1d0a0b81 Benoit Parmentier
    mod_pat<-glob2rx(paste(metric_ac,"_*",sep=""))   
202 dc5bfc17 Benoit Parmentier
    mod_var<-grep(mod_pat,names(mod_metrics),value=TRUE) # using grep with "value" extracts the matching names     
203
    #browser()
204
    test<-mod_metrics[mod_var]
205 f23e6e04 Benoit Parmentier
    png(file.path(out_path,paste("boxplot_metric_",metric_ac, out_prefix,".png", sep="")))
206
    #boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5,
207
    #        ylab=paste(metric_ac,"in degree C",sep=" "))
208
    
209 dc5bfc17 Benoit Parmentier
    boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5,
210 f23e6e04 Benoit Parmentier
              ylab=paste(metric_ac,"in degree C",sep=" "),axisnames=FALSE,axes=FALSE)
211
    axis(1, labels = FALSE)
212
    ## Create some text labels
213
    labels <- labels<- names(test)
214
    ## Plot x axis labels at default tick marks
215
    text(1:ncol(test), par("usr")[3] - 0.25, srt = 45, adj = 1,
216
         labels = labels, xpd = TRUE)
217
    axis(2)
218
    box()
219 1d0a0b81 Benoit Parmentier
    #legend("bottomleft",legend=paste(names(rows_total),":",rows_total,sep=""),cex=0.7,bty="n")
220 617d83ff Benoit Parmentier
    #title(as.character(t(paste(t(names(rows_total)),":",rows_total,sep=""))),cex=0.8)
221
    title(paste(metric_ac,"for",y_var_name,sep=" "),cex=0.8)
222 dc5bfc17 Benoit Parmentier
    dev.off()
223
  }
224 617d83ff Benoit Parmentier
  
225 1d0a0b81 Benoit Parmentier
  avg_tb$n<-rows_total #total number of predictions on which the mean is based
226
  median_tb$n<-rows_total
227 dc5bfc17 Benoit Parmentier
  summary_obj<-list(avg_tb,median_tb)
228 f23e6e04 Benoit Parmentier
  names(summary_obj)<-c("avg","median")
229 dc5bfc17 Benoit Parmentier
  return(summary_obj)  
230
}
231 f23e6e04 Benoit Parmentier
#boxplot_month_from_tb(tb_diagnostic,metric_names,out_prefix,out_path)
232 dc5bfc17 Benoit Parmentier
## Function to display metrics by months/seasons
233 f23e6e04 Benoit Parmentier
boxplot_month_from_tb <-function(tb_diagnostic,metric_names,out_prefix,out_path){
234 617d83ff Benoit Parmentier
  
235
  #Generate boxplot per month for models and accuracy metrics
236
  #Input parameters:
237
  #1) df: data frame containing accurayc metrics (RMSE etc.) per day)
238
  #2) metric_names: metrics used for validation
239
  #3) out_prefix
240
  #
241
  
242
  #################
243
  ## BEGIN
244 f23e6e04 Benoit Parmentier
  y_var_name<-unique(tb_diagnostic$var_interp) #extract the name of interpolated variable: dailyTmax, dailyTmin  
245 bcef031f Benoit Parmentier
  date_f<-strptime(tb_diagnostic$date, "%Y%m%d")   # interpolation date being processed
246
  tb_diagnostic$month<-strftime(date_f, "%m")          # current month of the date being processed
247
  mod_names<-sort(unique(tb_diagnostic$pred_mod)) #models that have accuracy metrics
248
  tb_mod_list<-lapply(mod_names, function(k) subset(tb_diagnostic, pred_mod==k)) #this creates a list of 5 based on models names
249
  names(tb_mod_list)<-mod_names
250
  t<-melt(tb_diagnostic,
251
          #measure=mod_var, 
252
          id=c("date","pred_mod","prop","month"),
253
          na.rm=F)
254 f23e6e04 Benoit Parmentier
  t$value<-as.numeric(t$value) #problem with char!!!
255 617d83ff Benoit Parmentier
  tb_mod_m_avg <-cast(t,pred_mod+month~variable,mean) #monthly mean for every model
256 f23e6e04 Benoit Parmentier
  tb_mod_m_avg$var_interp<-rep(y_var_name,times=nrow(tb_mod_m_avg))
257
  
258 617d83ff Benoit Parmentier
  tb_mod_m_sd <-cast(t,pred_mod+month~variable,sd)   #monthly sd for every model
259
  
260 f23e6e04 Benoit Parmentier
  tb_mod_m_list <-lapply(mod_names, function(k) subset(tb_mod_m_avg, pred_mod==k)) #this creates a list of 5 based on models names
261
  
262 617d83ff Benoit Parmentier
  for (k in 1:length(mod_names)){
263
    mod_metrics <-tb_mod_list[[k]]
264
    current_mod_name<- mod_names[k]
265
    for (j in 1:length(metric_names)){    
266 bcef031f Benoit Parmentier
      metric_ac<-metric_names[j]
267 617d83ff Benoit Parmentier
      col_selected<-c(metric_ac,"month")
268
      test<-mod_metrics[col_selected]
269 f23e6e04 Benoit Parmentier
      png(file.path(out_path,paste("boxplot_metric_",metric_ac,"_",current_mod_name,"_by_month_",out_prefix,".png", sep="")))
270 617d83ff Benoit Parmentier
      boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5,
271 f23e6e04 Benoit Parmentier
              ylab=paste(metric_ac,"in degree C",sep=" "),,axisnames=FALSE,axes=FALSE)
272
      #boxplot(test[[metric_ac]]~test[[c("month")]],outline=FALSE,horizontal=FALSE,cex=0.5,
273
      #        ylab=paste(metric_ac,"in degree C",sep=" "))
274
      axis(1, labels = FALSE)
275
      ## Create some text labels
276
      labels <- month.abb # abbreviated names for each month
277
      ## Plot x axis labels at default tick marks
278
      text(1:length(labels), par("usr")[3] - 0.25, srt = 45, adj = 1,
279
           labels = labels, xpd = TRUE)
280
      axis(2)
281
      box()
282 bcef031f Benoit Parmentier
      #legend("bottomleft",legend=paste(names(rows_total),":",rows_total,sep=""),cex=0.7,bty="n")
283 617d83ff Benoit Parmentier
      title(paste(metric_ac,"for",current_mod_name,"by month",sep=" "))
284 bcef031f Benoit Parmentier
      dev.off()
285 617d83ff Benoit Parmentier
    }  
286
    
287 bcef031f Benoit Parmentier
  }
288 617d83ff Benoit Parmentier
  summary_month_obj <-c(tb_mod_m_list,tb_mod_m_avg,tb_mod_m_sd)
289
  names(summary_month_obj)<-c("tb_list","metric_month_avg","metric_month_sd")
290
  return(summary_month_obj)  
291 dc5bfc17 Benoit Parmentier
}
292
293 1d0a0b81 Benoit Parmentier
294 48e49857 Benoit Parmentier
####################################
295
############ END OF SCRIPT #########