Project

General

Profile

« Previous | Next » 

Revision a6de78b2

Added by Benoit Parmentier about 11 years ago

running kriging fusion with 40 to 70% monthly hold out and changes in paper 1 script figures and analyses

View differences:

climate/research/oregon/interpolation/contribution_of_covariates_paper_interpolation.R
4 4
#different covariates using two baselines. Accuracy methods are added in the the function script to evaluate results.
5 5
#Figures, tables and data for the contribution of covariate paper are also produced in the script.
6 6
#AUTHOR: Benoit Parmentier                                                                      
7
#DATE: 08/15/2013            
8
#Version: 2
7
#DATE: 09/09/2013            
8
#Version: 3
9 9
#PROJECT: Environmental Layers project                                     
10 10
#################################################################################################
11 11

  
......
37 37

  
38 38
#### FUNCTION USED IN SCRIPT
39 39

  
40
function_analyses_paper <-"contribution_of_covariates_paper_interpolation_functions_08152013.R"
40
function_analyses_paper <-"contribution_of_covariates_paper_interpolation_functions_09092013.R"
41 41

  
42 42
##############################
43 43
#### Parameters and constants  
......
59 59
out_dir<-"/home/parmentier/Data/IPLANT_project/paper_analyses_tables_fig_08032013"
60 60
setwd(out_dir)
61 61

  
62
infile_reg_outline <- "/data/project/layers/commons/data_workflow/inputs/region_outlines_ref_files/OR83M_state_outline.shp"  #input region outline defined by polygon: Oregon
63
met_stations_outfiles_obj_file<-"/data/project/layers/commons/data_workflow/output_data_365d_gam_fus_lst_test_run_07172013/met_stations_outfiles_obj_gam_fusion__365d_gam_fus_lst_test_run_07172013.RData"
64
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
62 65
y_var_name <- "dailyTmax"
66
out_prefix<-"analyses_09092013"
63 67

  
64
out_prefix<-"analyses_08152013"
65

  
66
method_interpolation <- "gam_daily"
68
#method_interpolation <- "gam_daily"
67 69
covar_obj_file_1 <- "covar_obj__365d_gam_day_lst_comb3_08132013.RData"
68 70
met_obj_file_1 <- "met_stations_outfiles_obj_gam_daily__365d_gam_day_lst_comb3_08132013.RData"
69 71
#met_stations_outfiles_obj_gam_daily__365d_gam_day_lst_comb3_08132013.RData
......
82 84

  
83 85
#Load objects containing training, testing, models objects 
84 86

  
87
met_stations_obj <- load_obj(met_stations_outfiles_obj_file)
85 88
covar_obj <-load_obj(file.path(in_dir1,covar_obj_file_1)) #Reading covariates object for GAM daily method
86 89
infile_covariates <- covar_obj$infile_covariates
87 90
infile_reg_outline <- covar_obj$infile_reg_outline
......
117 120
table_data2 <-summary_metrics_v2$avg[,c("mae","rmse","me","r")]
118 121

  
119 122
###Table 3a, baseline 1: s(lat,lon) 
120
#Chnage here !!! need  to reorder rows based on  mod first
123
#Change here !!! need  to reorder rows based on  mod first
121 124
model_col<-c("Baseline1","Elevation","Northing","Easting","LST","DISTOC","Forest","CANHEIGHT","LST*Forest","LST*CANHEIGHT") # 
122 125
df3a<- as.data.frame(sapply(table_data2,FUN=function(x) x-x[1]))
123 126
df3a<- round(df3a,digit=3) #roundto three digits teh differences
......
219 222
#Figure 6: Spatial pattern of prediction for one day
220 223

  
221 224
### Figure 1: Oregon study area
225
#3 parameters from output
226
infile_monthly<-list_outfiles$monthly_covar_ghcn_data #outile4 from database_covar script
227
infile_daily<-list_outfiles$daily_covar_ghcn_data  #outfile3 from database_covar script
228
infile_locs<- list_outfiles$loc_stations_ghcn #outfile2? from database covar script
229

  
230
ghcn_dat <- readOGR(dsn=dirname(met_stations_obj$monthly_covar_ghcn_data),
231
        sub(".shp","",basename(met_stations_obj$monthly_covar_ghcn_data)))
232
ghcn_dat_WGS84 <-spTransform(ghcn_dat,CRS_locs_WGS84)         # Project from WGS84 to new coord. system
233

  
234
interp_area <- readOGR(dsn=dirname(infile_reg_outline),sub(".shp","",basename(infile_reg_outline)))
235
interp_area_WGS84 <-spTransform(interp_area,CRS_locs_WGS84)         # Project from WGS84 to new coord. system
236

  
237
usa_map <- getData('GADM', country='USA', level=1) #Get US map
238
usa_map_2 <- usa_map[usa_map$NAME_1!="Alaska",] #remove Alaska
239
usa_map_2 <- usa_map_2[usa_map_2$NAME_1!="Hawaii",] #remove Hawai 
240
usa_map_OR <- usa_map_2[usa_map_2$NAME_1=="Oregon",] #get OR
241

  
242
elev <- subset(s_raster,"elev_s")
243
elev_WGS84 <- projectRaster(from=elev,crs=CRS_locs_WGS84,method="ngb")
244

  
245
#set up the output file to plot
246
res_pix<-960
247
col_mfrow<-1
248
row_mfrow<-1
249
png(filename=paste("Figure1_contribution_covariates_study_area_",out_prefix,".png",sep=""),
250
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
251
par(mfrow=c(1,1))
252

  
253

  
254
#plot(elev_WGS84)
255
plot(interp_area_WGS84)
256
plot(ghcn_dat_WGS84,add=T)
257
title("SStudy area with ")
258
#this work on non sp plot too: Scale bar postion
259
#scale_position<-c(450000, 600000)
260
#arrow_position<-c(900000, 600000)
261

  
262
#legend("topright",legend=c(0:7),title="Number of change",
263
#       pt.cex=1.4,cex=2.1,fill=rev(terrain.colors(8)),bty="n")
264
#label_scalebar<-c("0","125","250")
265
#scalebar(d=250000, xy=scale_position, type = 'bar', 
266
#         divs=3,label=label_scalebar,below="kilometers",
267
#         cex=1.8)
268

  
269
## Northern arrow
270
#SpatialPolygonsRescale(layout.north.arrow(), offset = arrow_position, 
271
#                       scale = 150000, fill=c("transparent","black"),plot.grid=FALSE)
272
##note that scale in SpatialPolygonRescale sets the size of the north arrow!!
273

  
274
### PLACE INSET MAP OF THE USA
275
#opar <- par(fig=c(0.7, 0.95, 0.5, 0.75), new=TRUE)
276
#opar <- par(fig=c(0, 0, 1, 1), new=TRUE)
277

  
278
par(mar = c(0,0,0,0)) # remove margin
279
#opar <- par(fig=c(0.9,0.95,0.8, 0.85), new=TRUE)
280
opar <- par(fig=c(0.85,0.95,0.8, 0.9), new=TRUE)
281

  
282
#p1<-spplot(ghcn_dat,"station")
283
#p2<-spplot(usa_map_2,"NAMES_1")
284
#print(p1,position=c(0,0,1,1),more=T)
285
#print(p2,position=c(0,0,0.3,0.3),more=T)
286

  
287
plot(usa_map_2,border="black") #border and lwd are options of graphics package polygon object
288
plot(usa_map_OR,col="grey",add=T)
289
box()
290
dev.off()
222 291

  
223
#...add code
224 292

  
225 293
### Figure 2:  Method comparison workflow 
226 294

  
227
# Workflow not generated in R
295
# Workflow figure is not generated in R
228 296

  
229 297
################################################
230 298
################### Figure 3. MAE/RMSE and distance to closest fitting station.
......
254 322
title_plot <- "RMSE and distance to closest station for baseline 2"
255 323
y_lab_text <- "RMSE (C)"
256 324
#quick test
257
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,title_plot,y_lab_text)
258
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name","title_plot","y_lab_text")
325
add_CI <- c(TRUE,TRUE,TRUE)
326

  
327
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,title_plot,y_lab_text,add_CI)
328
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name","title_plot","y_lab_text","add_CI")
259 329
plot_dst_MAE(list_param_plot)
260 330

  
261 331
metric_name <-"mae_tb"
262 332
title_plot <- "MAE and distance to closest fitting station"
263 333
y_lab_text <- "MAE (C)"
264

  
334
add_CI <- c(TRUE,TRUE,TRUE)
265 335
#Now set up plotting device
266 336
res_pix<-480
267 337
col_mfrow<-2
......
272 342
par(mfrow=c(row_mfrow,col_mfrow))
273 343

  
274 344
#Figure 3a
275
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,title_plot,y_lab_text)
276
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name","title_plot","y_lab_text")
345
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,
346
                      title_plot,y_lab_text,add_CI)
347
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name",
348
                          "title_plot","y_lab_text","add_CI")
349
debug(plot_dst_MAE)
277 350
plot_dst_MAE(list_param_plot)
278 351
title(xlab="Distance to closest fitting station (km)")
279 352

  
......
310 383
pch_t<- 1:length(col_t)
311 384
legend_text <- c("GAM","Kriging","GWR")
312 385
mod_name<-c("mod1","mod1","mod1")#selected models
386
add_CI <- c(TRUE,TRUE,TRUE)
387
#add_CI <- c(TRUE,FALSE,FALSE)
313 388

  
314 389
##### plot figure 4 for paper
315 390
####
......
321 396
png(filename=file.path(out_dir,png_file_name),
322 397
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
323 398
par(mfrow=c(row_mfrow,col_mfrow))
399

  
324 400
metric_name<-"mae"
325
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name)
326
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name")
401
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name,add_CI)
402
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name","add_CI")
327 403

  
404
#debug(plot_prop_metrics)
328 405
plot_prop_metrics(list_param_plot)
329 406
title(main="MAE for hold out and methods",
330 407
      xlab="Hold out validation/testing proportion",
......
332 409

  
333 410
#now figure 4b
334 411
metric_name<-"rmse"
335
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name)
336
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name")
412
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name,add_CI)
413
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name","add_CI")
337 414
plot_prop_metrics(list_param_plot)
338 415
title(main="RMSE for hold out and methods",
339 416
      xlab="Hold out validation/testing proportion",
......
347 424
#read in relevant data:
348 425
## Calculate average difference for RMSE for all three methods
349 426
#read in relevant data:
427

  
350 428
tb1_s<-extract_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"metrics_s")
351 429
rownames(tb1_s)<-NULL #remove row names
352 430
tb1_s$method_interp <- "gam_daily" #add type of interpolation...out_prefix too??
......
406 484
col_t<-c("red","blue","black")
407 485
pch_t<- 1:length(col_t)
408 486

  
487
##Make this a function???
409 488
y_range<-range(prop_obj_kriging$avg_tb$rmse,prop_obj_kriging_s$avg_tb$rmse)
410 489
#y_range<-range(prop_obj_gam$avg_tb$rmse,prop_obj_gam_s$avg_tb$rmse)
411 490
plot(prop_obj_gam$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, ylab="",xlab="",type="b",col=c("red"),pch=pch_t[1],ylim=y_range,lty=2)
......
415 494
lines(prop_obj_kriging$avg_tb$rmse ~ prop_obj_kriging$avg_tb$prop,ylab="",xlab="", type="b",ylim=y_range,pch=pch_t[2],,col=c("blue"),lty=2)
416 495
lines(prop_obj_kriging_s$avg_tb$rmse ~ prop_obj_kriging_s$avg_tb$prop,ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[2],col=c("blue"))
417 496

  
497
plot_ac_holdout_prop<- function(l_prop,l_col_t,l_pch_t,add_CI,y_range){
498
  
499
  for(i in 1:length(l_prop)){
500
    if(i==1){
501
      plot(l_prop[[i]]$avg_tb$rmse ~ l_prop[[i]]$avg_tb$prop,ylab="",xlab="",type="b",pch=l_pch_t[i],ylim=y_range,col=l_col_t[i],lty=l_lty_t[i])
502
    }else{
503
      lines(l_prop[[i]]$avg_tb$rmse ~ l_prop[[i]]$avg_tb$prop,ylab="",xlab="",type="b",pch=l_pch_t[i],ylim=y_range,col=l_col_t[i],lty=l_lty_t[i])
504
    }
505
    if(add_CI[i]==TRUE){
506
      ciw   <- qt(0.975, l_prop[[i]]$n_tb$rmse) * l_prop[[i]]$sd_tb$rmse / sqrt(l_prop[[i]]$n_tb$rmse)
507
      plotCI(y=l_prop[[i]]$avg_tb$rmse, x=l_prop[[i]]$avg_tb$prop, uiw=ciw, col=l_col_t[i], barcol="blue", lwd=1,pch=l_pch_t[i],
508
             add=TRUE) 
509
    }
510
  }
511
}
512

  
513
l_prop<- list(prop_obj_gam,prop_obj_gam_s,prop_obj_gwr_s,prop_obj_gwr,prop_obj_kriging,prop_obj_kriging_s)
514

  
515
l_col_t <- c("red","red","black","black","blue","blue")
516
l_pch_t <- c(1,1,3,3,2,2)
517
l_lty_t <- c(2,1,1,2,2,1)
518
add_CI <- c(TRUE,TRUE,FALSE,FALSE,FALSE,FALSE)
519
y_range<-c(0.5,3)
520

  
521
plot_ac_holdout_prop(l_prop,l_col_t,l_pch_t,add_CI,y_range)
522

  
523
legend_text <- c("GAM","Kriging","GWR","training","testing")
524

  
418 525
legend("topleft",legend=legend_text, 
419
       cex=0.9, pch=pch_t,col=col_t,lty=1,bty="n")
526
       cex=0.9, pch=c(pch_t,NA,NA),col=c(col_t,NA,NA),lty=c(NA,NA,NA,1,2),bty="n")
420 527
title(main="Training and testing RMSE for hold out and methods",
421 528
      xlab="Hold out validation/testing proportion",
422 529
      ylab="RMSE (C)")
423 530

  
531

  
424 532
boxplot(diff_mae_data) #plot differences in training and testing accuracies for three methods
425 533
title(main="Difference between training and testing MAE",
426 534
      xlab="Interpolation method",
......
682 790

  
683 791
######## NOW GET A ACCURACY BY STATIONS
684 792

  
685
list_data_v<-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_v")
793
list_data_s <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_s")
794
list_data_v <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_v")
795

  
796
#number of observations per day
797
year_nbv <- sapply(list_data_v,FUN=length)
798
year_nbs <- sapply(list_data_s,FUN=length)
799
nb_df <- data.frame(nv=year_nbv,ns=year_nbs)
800
nb_df$n_tot <- year_nbv + year_nbs
801
range(nb_df$n_tot)
802

  
686 803
data_v_test <- list_data_v[[1]]
687 804

  
688 805
#Convert sp data.frame and combined them in one unique df, see function define earlier
climate/research/oregon/interpolation/contribution_of_covariates_paper_interpolation_functions.R
265 265
  metric_name <-list_param$metric_name
266 266
  title_plot <- list_param$title_plot
267 267
  y_lab_text <- list_param$y_lab_text
268
    
268
  add_CI <- list_param$add_CI  
269
  
269 270
  for (i in 1:length(list_dist_obj)){
270 271
    
271 272
    l<-list_dist_obj[[i]]
......
286 287
    #ciw2   <- qt(0.975, n) * y_sd2 / sqrt(n)
287 288
    ciw   <- qt(0.975, n) * y_sd / sqrt(n)
288 289
    
289
    if(i==1){
290
      plotCI(y=y, x=x, uiw=ciw, col=col_t[i], barcol="blue", lwd=1,
291
             ylab=y_lab_text, xlab="")
292
      lines(y~x, pch=pch_t[i],col=col_t[i],type="b")
290
    #if(i==1){
291
    #  plotCI(y=y, x=x, uiw=ciw, col=col_t[i], barcol="blue", lwd=1,
292
    #         ylab=y_lab_text, xlab="")
293
    #  lines(y~x, pch=pch_t[i],col=col_t[i],type="b")
294
    #  
295
    #}else{
296
    #  lines(y~x, pch=pch_t[i],col=col_t[i],type="b")
297
    #}
298
    
299
    if(add_CI[i]==TRUE){
293 300
      
301
      if (i==1){
302
        plotCI(y=y, x=x, uiw=ciw, col=col_t[i], barcol="blue", lwd=1,pch=pch_t[i],
303
               ylab=y_lab_text, xlab="") 
304
        lines(y~x, pch=pch_t[i],col=col_t[i],type="b")
305
      }else{
306
        lines(y~x, pch=pch_t[i],col=col_t[i],type="b")
307
        plotCI(y=y, x=x, uiw=ciw, col=col_t[i], barcol="blue", lwd=1,pch=pch_t[i],
308
               ylab=y_lab_text, xlab="",add=TRUE) 
309
      }
310
 
294 311
    }else{
295
      lines(y~x, pch=pch_t[i],col=col_t[i],type="b")
312
      if (i==1){
313
        plot(y~x, pch=pch_t[i],col=col_t[i],type="b")
314
      }else{
315
        lines(y~x, pch=pch_t[i],col=col_t[i],type="b")
316
      }
296 317
    }
297 318
    
319
    
298 320
  }
299 321
  legend("topleft",legend=legend_text, 
300 322
         cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n")
......
347 369
  legend_text <- list_param$legend_text
348 370
  list_mod_name<-list_param$mod_name
349 371
  metric_name<-list_param$metric_name
372
  add_CI <- list_param$add_CI
350 373
  
351 374
  for (i in 1:length(list_obj)){
352 375
    
......
370 393
    #plotCI(y=y, x=x, uiw=ciw, col="red", main=paste(" MAE for ",mod_name,sep=""), barcol="blue", lwd=1,
371 394
    #       ylab="RMSE (C)", xlab=xlab_text)
372 395
    
373
    ciw   <- qt(0.975, no) * y_sd / sqrt(no)
396
    #ciw   <- qt(0.975, no) * y_sd / sqrt(no)
374 397
    
375
    if(i==1){
376
      plotCI(y=y, x=x, uiw=ciw, col=col_t[i], main=paste(" Comparison of ",metric_name," in ",mod_name,sep=""), barcol="blue", lwd=1,
377
             ylab="RMSE (C)", xlab=xlab_text)
378
      lines(y~x, col=col_t[i])
398
    #if(i==1){
399
    #  plotCI(y=y, x=x, uiw=ciw, col=col_t[i], main=paste(" Comparison of ",metric_name," in ",mod_name,sep=""), barcol="blue", lwd=1,
400
    #         ylab="RMSE (C)", xlab=xlab_text)
401
    #  lines(y~x, col=col_t[i])
402
      
403
    #}else{
404
    #  lines(y~x, col=col_t[i])
405
    #}
406
    
407
    if(add_CI[i]==TRUE){
408
      
409
      if (i==1){
410
        plotCI(y=y, x=x, uiw=ciw, col=col_t[i], barcol="blue", lwd=1,pch=pch_t[i],main=paste(" Comparison of ",metric_name," in ",mod_name,sep=""),
411
               ylab="RMSE (C)", xlab=xlab_text) 
412
        lines(y~x, pch=pch_t[i],col=col_t[i],type="b")
413
      }else{
414
        lines(y~x, pch=pch_t[i],col=col_t[i],type="b")
415
        plotCI(y=y, x=x, uiw=ciw, col=col_t[i], barcol="blue", lwd=1,pch=pch_t[i],
416
               ylab="RMSE (C)", xlab=xlab_text,add=TRUE) 
417
      }
379 418
      
380 419
    }else{
381
      lines(y~x, col=col_t[i])
420
      if (i==1){
421
        plot(y~x, pch=pch_t[i],col=col_t[i],type="b")
422
      }else{
423
        lines(y~x, pch=pch_t[i],col=col_t[i],type="b")
424
      }
382 425
    }
383 426
    
427
    
384 428
  }
385 429
  legend("topleft",legend=legend_text, 
386 430
         cex=1.2, pch=pch_t,col=col_t,lty=1,bty="n")
climate/research/oregon/interpolation/master_script_temp.R
10 10
#STAGE 5: Output analyses: assessment of results for specific dates...
11 11
#
12 12
#AUTHOR: Benoit Parmentier                                                                       
13
#DATE: 09/13/2013                                                                                 
13
#DATE: 09/14/2013                                                                                 
14 14

  
15 15
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363, TASK$568--   
16 16

  
......
80 80
#met_stations_outfiles_obj_file<-"met_stations_outfiles_obj_gam_CAI__365d_gam_CAI_lst_comb3_08252013.RData"
81 81

  
82 82
var<-"TMAX" # variable being interpolated
83
out_prefix<-"_365d_kriging_fus_lst_comb3_09132013"                #User defined output prefix
84
out_suffix<-"_OR_09132013"                                       #Regional suffix
83
out_prefix<-"_365d_kriging_fus_lst_comb3_09142013"                #User defined output prefix
84
out_suffix<-"_OR_09142013"                                       #Regional suffix
85 85
out_suffix_modis <-"_05302013"                       #pattern to find tiles produced previously     
86 86

  
87 87
#interpolation_method<-c("gam_fusion","gam_CAI","gam_daily") #other otpions to be added later
......
254 254
nb_sample_month <-1           #number of time random sampling must be repeated for every hold out proportion
255 255
step_month <-0.1         
256 256
constant_month <-0             #if value 1 then use the same samples as date one for the all set of dates
257
prop_minmax_month <-c(0.2,0.3)  #if prop_min=prop_max and step=0 then predictions are done for the number of dates...
257
prop_minmax_month <-c(0.4,0.7)  #if prop_min=prop_max and step=0 then predictions are done for the number of dates...
258 258

  
259 259
#dates_selected<-c("20100101","20100102","20100103","20100901") # Note that the dates set must have a specific format: yyymmdd
260 260
#dates_selected<-c("20100101","20100102","20100301","20100302","20100501","20100502","20100701","20100702","20100901","20100902","20101101","20101102")

Also available in: Unified diff