Project

General

Profile

« Previous | Next » 

Revision 280bb8d2

Added by Benoit Parmentier about 11 years ago

modifications assessments of methods and covariates figures and analyses

View differences:

climate/research/oregon/interpolation/contribution_of_covariates_paper_interpolation.R
34 34

  
35 35
#Additional libraries not used in workflow
36 36
library(pgirmess)                            # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess}  
37

  
37
library(ncf)
38 38
#### FUNCTION USED IN SCRIPT
39 39

  
40 40
function_analyses_paper <-"contribution_of_covariates_paper_interpolation_functions_10152013.R"
......
234 234
#infile_monthly<-list_outfiles$monthly_covar_ghcn_data #outile4 from database_covar script
235 235
#infile_daily<-list_outfiles$daily_covar_ghcn_data  #outfile3 from database_covar script
236 236
#infile_locs<- list_outfiles$loc_stations_ghcn #outfile2? from database covar script
237
#
238
#
239
#ghcn_day_dat <- readOGR(dsn=dirname(met_stations_obj$daily_covar_ghcn_data),
240
#                    sub(".shp","",basename(met_stations_obj$daily_covar_ghcn_data)))
241
#ghcn_dayq_dat <- readOGR(dsn=dirname(met_stations_obj$daily_query_ghcn_data),
242
#                        sub(".shp","",basename(met_stations_obj$daily_query_ghcn_data)))
243

  
237 244

  
238 245
ghcn_dat <- readOGR(dsn=dirname(met_stations_obj$monthly_covar_ghcn_data),
239 246
        sub(".shp","",basename(met_stations_obj$monthly_covar_ghcn_data)))
......
313 320
x_tick_labels <- limit_val<-seq(5,125, by=10)
314 321
metric_name <-"rmse_tb"
315 322
title_plot <- "RMSE and distance to closest station for baseline 2"
316
y_lab_text <- "RMSE (C)"
323
#y_lab_text <- "RMSE (C)"
324
#y_lab_text <- expression(paste("RMSE", ("^o","C")"), sep="")) 
325
y_lab_text <- "RMSE (°C)"
317 326
#quick test
318 327
add_CI <- c(TRUE,TRUE,TRUE)
319 328

  
......
323 332

  
324 333
metric_name <-"mae_tb"
325 334
title_plot <- "MAE and distance to closest fitting station"
326
y_lab_text <- "MAE (C)"
335
y_lab_text <- "MAE (°C)"
327 336
add_CI <- c(TRUE,TRUE,TRUE)
328 337
#Now set up plotting device
329 338
res_pix<-480
......
417 426
plot_prop_metrics(list_param_plot)
418 427
title(main="MAE for hold out and methods",
419 428
      xlab="Hold out validation/testing proportion",
420
      ylab="MAE (C)")
429
      ylab="MAE (°C)")
421 430

  
422 431
#now figure 4b
423 432
metric_name<-"rmse"
......
426 435
plot_prop_metrics(list_param_plot)
427 436
title(main="RMSE for hold out and methods",
428 437
      xlab="Hold out validation/testing proportion",
429
      ylab="RMSE (C)")
438
      ylab="RMSE (°C)")
430 439

  
431 440
dev.off()
432 441

  
......
504 513
boxplot(diff_rmse_data) #plot differences in training and testing accuracies for three methods
505 514
title(main="Training and testing RMSE for hold out and interpolation methods",
506 515
      xlab="Interpolation method",
507
      ylab="RMSE (C)")
516
      ylab="RMSE (°C)")
508 517

  
509 518
boxplot(diff_mae_data_mult)
510 519
boxplot(diff_rmse_data_mult) #plot differences in training and testing accuracies for three methods
......
585 594

  
586 595
title(main="Training and testing RMSE for hold out and methods",
587 596
      xlab="Hold out validation/testing proportion",
588
      ylab="RMSE (C)")
597
      ylab="RMSE (°C)")
589 598

  
590 599

  
591 600
boxplot(diff_mae_data_mult[-4]) #plot differences in training and testing accuracies for three methods
592 601

  
593 602
title(main="Difference between training and testing MAE",
594 603
      xlab="Interpolation method",
595
      ylab="MAE (C)")
604
      ylab="MAE (°C)")
596 605

  
597 606
dev.off()
598 607

  
599 608
############### STUDY TIME AND accuracy
600
  #########Figure 6: Monthly RMSE averages for the three interpolation methods: GAM, GWR and Kriging.
609
#########Figure 6: Monthly RMSE averages for the three interpolation methods: GAM, GWR and Kriging.
601 610

  
602 611
mae_tmp<- data.frame(gam=tb1[tb1$pred_mod=="mod1",c("mae")],
603 612
                     kriging=tb3[tb3$pred_mod=="mod1",c("mae")],
......
674 683
y_range<-range(tb1_month$mae,tb3_month$mae,tb4_month$mae)
675 684
xlab_tick <- unique(tb1$month)
676 685
xlab_text <-"Month"
677
ylab_text <- "MAE (C)"
686
ylab_text <- "MAE (°C)"
678 687
plot(1:12,tb1_month$mae,col=c("red"),type="b",ylim=y_range,xlab=xlab_text,ylab=ylab_text,xaxt="n")
679 688
lines(1:12,tb3_month$mae,col=c("blue"),type="b")
680 689
lines(1:12,tb4_month$mae,col=c("black"),type="b")
......
684 693
       cex=0.9, pch=c(pch_t),col=c(col_t),lty=c(1,1,1),bty="n")
685 694

  
686 695
#Second plot
687
ylab_text<-"MAE (C)"
696
ylab_text<-"MAE (°C)"
688 697
xlab_text<-"Month"
689 698
#y_range<-range(month_data_list$gam$mae,month_data_list$kriging$mae,month_data_list$gwr$mae)
690 699
#y_range<-range(month_data_list$gam$mae)
......
734 743
####### FIGURE 7: Spatial pattern ######
735 744

  
736 745
y_var_name <-"dailyTmax"
737
index<-244 #index corresponding to January 1
746
index<-244 #index corresponding to Sept 1
738 747

  
739 748
lf1 <- raster_prediction_obj_1$method_mod_obj[[index]][[y_var_name]] #select relevant raster images for the given dates
740 749
lf3 <- raster_prediction_obj_3$method_mod_obj[[index]][[y_var_name]]
......
760 769
min_val <- 0
761 770
layout_m<-c(1,3) #one row two columns
762 771

  
763
png(paste("Figure7__spatial_pattern_tmax_prediction_levelplot_",date_selected,out_prefix,".png", sep=""),
764
    height=480*layout_m[1],width=480*layout_m[2])
772
#png(paste("Figure7__spatial_pattern_tmax_prediction_levelplot_",date_selected,out_prefix,".png", sep=""),
773
#    height=480*layout_m[1],width=480*layout_m[2])
765 774

  
766
levelplot(pred_temp_s,main="Interpolated Surfaces Method Comparison", ylab=NULL,xlab=NULL,
775
p<-levelplot(pred_temp_s,main="Interpolated Surfaces Method Comparison", ylab=NULL,xlab=NULL,
767 776
          par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=layout_m,
768 777
                              par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5),
769 778
          names.attr=names_layers,col.regions=temp.colors,at=seq(max_val,min_val,by=0.01))
779
print(p)
770 780
#col.regions=temp.colors(25))
771
dev.off()
781
#dev.off()
772 782

  
773
## FIGURE COMPARISON OF  MODELS COVARRIATES
783
## FIGURE COMPARISON OF  MODELS COVARRIATES: Figure 7...
774 784

  
775 785
lf2 <- raster_prediction_obj_2$method_mod_obj[[index]][[y_var_name]]
776 786
lf2 #contains the models for gam
......
779 789
date_selected <- "20109101"
780 790
#names_layers <-c("mod1=s(lat,long)+s(elev)","mod4=s(lat,long)+s(LST)","diff=mod1-mod4")
781 791
names_layers <-c("mod1 = s(lat,long)","mod2 = s(lat,long)+s(elev)","mod3 = s(lat,long)+s(N_w)","mod4 = s(lat,long)+s(E_w)",
782
                 "mod5 = s(lat,long)+s(LST)","mod6 = s(lat,long)+s(DISTOC)","mod7 = s(lat,long)+s(LC1)",
783
                 "mod8 = s(lat,long)+s(LC1,LST)","mod9 = s(lat,long)+s(CANHGHT)","mod10 = s(lat,long)+s(LST,CANHGHT)")
792
                 "mod5 = s(lat,long)+s(LST)","mod6 = s(lat,long)+s(DISTOC)","mod7 = s(lat,long)+s(FOREST)",
793
                 "mod8 = s(lat,long)+s(CANHGHT)","mod9 = s(lat,long)+s(LST,FOREST)","mod10 = s(lat,long)+s(LST,CANHGHT)")
784 794

  
785 795
#names_layers<-names(pred_temp_s)
786 796
#names(pred_temp_s)<-names_layers
......
799 809
png(paste("Figure_7_spatial_pattern_tmax_prediction_models_baseline1_gam_levelplot_",date_selected,out_prefix,".png", sep=""),
800 810
    height=480*layout_m[1],width=480*layout_m[2])
801 811

  
802
levelplot(pred_temp_s,main="Interpolated Surfaces Model Comparison baseline 1", ylab=NULL,xlab=NULL,
812
p<- levelplot(pred_temp_s,main="Interpolated Surfaces Model Comparison baseline 1", ylab=NULL,xlab=NULL,
803 813
          par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=layout_m,
804 814
                              par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5),
805 815
          names.attr=names_layers,col.regions=temp.colors,at=seq(max_val,min_val,by=0.01))
806 816
#col.regions=temp.colors(25))
817
print(p)
807 818
dev.off()
808 819

  
820
########################################################
821
#### Examining spatial correlograms for each model...use spedep but can be problematic when many datapoints
822
#The follwing method using Moran's I raster command works for now but does not provide CI/std dev...
823

  
824
r_stack <-pred_temp_s
825

  
826
r<- subset(r_stack,"mod1")
827
Moran(r) #with lag 1 and default rooks lag correlation
828

  
829
#generate filters for 10 lags: quick solution
830

  
831
list_filters<-lapply(1:10,FUN=autocor_filter_fun,f_type="queen") #generate 10 filters
832
#moran_list <- lapply(list_filters,FUN=Moran,x=r)
833

  
834
list_param_moran <- list(list_filters=list_filters,r_stack=r_stack)
835
#moran_r <-moran_multiple_fun(1,list_param=list_param_moran)
836
nlayers(r_stack) 
837
moran_I_df <-mclapply(1:nlayers(r_stack), list_param=list_param_moran, FUN=moran_multiple_fun,mc.preschedule=FALSE,mc.cores = 10) #This is the end bracket from mclapply(...) statement
838

  
839
moran_df <- do.call(cbind,moran_I_df) #bind Moran's I value 10*nlayers data.frame
840
moran_df$lag <-1:nrow(moran_df)
841

  
842
#prepare to automate the plotting of   all columns
843
mydata<-moran_df
844
dd <- do.call(make.groups, mydata[,-ncol(mydata)]) 
845
dd$lag <- mydata$lag 
846

  
847
layout_m<-c(4,3) #one row two columns
848

  
849
png(paste("Figure_7b_spatial_correlogram_tmax_prediction_models_baseline1_gam_levelplot_",date_selected,out_prefix,".png", sep=""),
850
    height=480*layout_m[1],width=480*layout_m[2])
851

  
852
p<-xyplot(data ~ lag | which, dd,type="b",
853
          par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=layout_m,
854
                              par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5),
855
          strip=strip.custom(factor.levels=names_layers),
856
          xlab=list(label="Spatial lag neighbor", cex=2,font=2),
857
          ylab=list(label="Moran's I", cex=2, font=2))
858
print(p)
859

  
860
dev.off()
861

  
862
### median for first neighbour distances in data
863
ghcn_day_dat <- readOGR(dsn=dirname(met_stations_obj$daily_covar_ghcn_data),
864
                        sub(".shp","",basename(met_stations_obj$daily_covar_ghcn_data)))
865
loc_spdf <- readOGR(dsn=dirname(met_stations_obj$loc_stations),
866
                        sub(".shp","",basename(met_stations_obj$loc_stations)))
867
loc_2010_spdf<-subset(loc_spdf,loc_spdf$STAT_ID%in%ghcn_day_dat$station) #stations in 2010
868
loc_2010_spdf <- spTransform(loc_2010_spdf, CRS=CRS(proj4string(ghcn_day_dat)))
869
loc_knn1 <- knearneigh(coordinates(loc_2010_spdf), k=1) #lag1
870
class(loc_knn1) #knn object
871
loc_nb1 <- knn2nb(loc_knn1) #nb object
872
dsts_nb1 <-unlist(nbdists(loc_nb1,coordinates(loc_2010_spdf))) #vector with first nearest neighbour distance for each station
873

  
874
median(dsts_nb1) #median first nearest neighbour distance: 20023.03 m
875
mean(dsts_nb1) #mean first nearest neighbour distance : 22480.91 meters
876
sd(dsts_nb1) #
877
hist(dsts_nb1)
878

  
879
################ #FIGURE 8: difference image
809 880

  
810
################ #FIGURE 8
811 881
lf1 <- raster_prediction_obj_1$method_mod_obj[[index]][[y_var_name]]
812 882
lf1 #contains the models for gam
813 883

  
......
830 900
png(paste("Figure_8a_spatial_pattern_tmax_prediction_models_gam_levelplot_",date_selected,out_prefix,".png", sep=""),
831 901
    height=480*layout_m[1],width=480*layout_m[2])
832 902

  
833
levelplot(pred_temp_s,main="Interpolated Surfaces Model Comparison", ylab=NULL,xlab=NULL,
903
p<-levelplot(pred_temp_s,main="Interpolated Surfaces Model Comparison", ylab=NULL,xlab=NULL,
834 904
          par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=layout_m,
835 905
                              par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5),
836 906
          names.attr=names_layers,col.regions=temp.colors,at=seq(max_val,min_val,by=0.01))
837 907
#col.regions=temp.colors(25))
838

  
908
print(p)
839 909
#col.regions=temp.colors(25))
840 910
dev.off()
841 911

  
......
853 923
#          par.settings = list(axis.text = list(font = 2, cex = 1.3),layout=c(1,1),
854 924
#                              par.main.text=list(font=2,cex=2),strip.background=list(col="white")),par.strip.text=list(font=2,cex=1.5),
855 925
#          names.attr=names_layers,col.regions=temp.colors)
856
dev.off()
926
dev.off
857 927

  
858 928
######## NOW GET A ACCURACY BY STATIONS
859 929

  
climate/research/oregon/interpolation/contribution_of_covariates_paper_interpolation_functions.R
579 579
  return(stat_tb)
580 580
}
581 581

  
582
### generate filter for Moran's I function in raster package
583
autocor_filter_fun <-function(no_lag=1,f_type="queen"){
584
  if(f_type=="queen"){
585
    no_rows <- 2*no_lag +1
586
    border_row <-rep(1,no_rows)
587
    other_row <- c(1,rep(0,no_rows-2),1)
588
    other_rows <- rep(other_row,no_rows-2)
589
    mat_data<- c(border_row,other_rows,border_row)
590
    autocor_filter<-matrix(mat_data,nrow=no_rows)
591
  }
592
  #if(f_type=="rook){} #add later
593
  return(autocor_filter)
594
}
595

  
596
#Now run Moran's I for raster image given a list of  filters for different lags and raster stack
597
moran_multiple_fun<-function(i,list_param){
598
  #Parameters:
599
  #list_filters: list of filters with different lags in the image
600
  #r_stack: stack of raster image, only the selected layer is used...
601
  list_filters <-list_param$list_filters
602
  r <- subset(list_param$r_stack,i)
603
  moran_list <- lapply(list_filters,FUN=Moran,x=r)
604
  moran_v <-as.data.frame(unlist(moran_list))
605
  names(moran_v)<-names(r)
606
  return(moran_v)
607
}
608

  
582 609

  
583 610
################### END OF SCRIPT ###################
584 611

  

Also available in: Unified diff