Project

General

Profile

« Previous | Next » 

Revision 5e7d95a7

Added by Benoit Parmentier almost 12 years ago

Methods comp part2: task-#491-major modifications, transect through images and stations, temporal profiles etc.

View differences:

climate/research/oregon/interpolation/methods_comparison_assessment_part2.R
1
#####################################  METHOD COMPARISON ##########################################
1
#####################################  METHODS COMPARISON ##########################################
2 2
#################################### Spatial Analysis ########################################
3
#This script utilizes the R ojbects created during the interpolation phase.                       #
4
#At this stage the script produces figures of various accuracy metrics and compare methods:       #
3
#This script is not aimed at producing new interpolation surfaces. It utilizes the R ojbects created 
4
# during the interpolation phase.                       #
5
# At this stage the script produces figures of various accuracy metrics and compare methods:       #
5 6
#- multisampling plots                                                                            #
6 7
#- spatial accuracy in terms of distance to closest station                                       #
7
#- spatial density of station network and accuracy metric                                         # 
8
#- spatial density of station network and accuracy metric 
9
#- visualization of maps of prediction and difference for comparison 
8 10
#AUTHOR: Benoit Parmentier                                                                        #
9
#DATE: 09/25/2012                                                                                 #
10
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#??--                                    #
11
#DATE: 10/30/2012                                                                                 #
12
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491 --                                    #
11 13
###################################################################################################
12 14

  
13 15
###Loading R library and packages                                                      
......
186 188
savePlot(paste("Comparison_multisampling_fus_CAI_RMSE_averages",out_prefix,".png", sep=""), type="png")
187 189
dev.off()
188 190

  
189
### PART II EXAMINIG PREDICTIONS AND RESIDUALS TEMPORAL PROFILES ####
191
############################################################################
192
#### PART II EXAMINIG PREDICTIONS AND RESIDUALS TEMPORAL PROFILES ##########
190 193

  
191 194
l_f<-list.files(pattern="*tmax_predicted.*fusion5.rst$") #Search for files in relation to fusion
192 195
l_f2<-list.files(pattern="CAI_tmax_predicted.*_GAM_CAI2.rst$")
......
234 237
ghcn_mc<-cast(ghcn_m,station~date~variable,mean) #This creates an array of dimension 186,366,1
235 238

  
236 239
ghcn_value<-as.data.frame(ghcn_mc[,,1])
237
ghcn_value<-cbind(ghcn_locs,ghcn_value[,1:365])
240
ghcn_value<-cbind(ghcn_locs,ghcn_value[,1:365]) #This data frame contains values for 365 days
241
                                                #for 186 stations of year 2010...
238 242
write.table(ghcn_value,na="",file="extract3_dailyTmax_y2010.txt",sep=",")
239 243

  
240 244
id<-c("USW00094261","USW00004141","USC00356252","USC00357208")
241
id<-c("USW00024284","USC00354126","USC00358536","USC00354835",
245
#id<-c("USW00024284","USC00354126","USC00358536","USC00354835",
242 246
      "USC00356252","USC00359316","USC00358246","USC00350694",
243 247
      "USC00350699","USW00024230","USC00353542")
244 248

  
......
264 268
savePlot(paste("temporal_profile_station_locations_map",out_prefix,".png", sep=""), type="png")
265 269
dev.off()
266 270

  
267
stat_list<-vector("list",3 )
271
stat_list<-vector("list",3 )  #list containing the selected stations...
268 272
stat_list[[1]]<-ghcn_fus_pred
269 273
stat_list[[2]]<-ghcn_cai_pred
270 274
stat_list[[3]]<-ghcn_value
......
275 279
  m1<-match(id[i],ghcn_fus_pred$station)
276 280
  m2<-match(id[i],ghcn_cai_pred$station)
277 281
  m3<-match(id[i],ghcn_value$station)
278
  y1<-as.numeric(ghcn_fus_pred[m1,6:ncol(ghcn_fus_pred)])
279
  y2<-as.numeric(ghcn_cai_pred[m2,6:ncol(ghcn_cai_pred)])
280
  y3<-as.numeric(ghcn_value[m3,6:ncol(ghcn_value)])
281
  res2<-y2-y3
282
  res1<-y1-y3
282
  y1<-as.numeric(ghcn_fus_pred[m1,6:ncol(ghcn_fus_pred)]) #vector containing fusion time series of predictecd tmax
283
  y2<-as.numeric(ghcn_cai_pred[m2,6:ncol(ghcn_cai_pred)]) #vector containing CAI time series of predictecd
284
  y3<-as.numeric(ghcn_value[m3,6:ncol(ghcn_value)])  #vector containing observed time series of predictecd
285
  res2<-y2-y3 #CAI time series residuals
286
  res1<-y1-y3 #fusion time series residuals
283 287
  x<-1:365
284 288
  X11(6,15)
285
  plot(x,y1,type="l",col="black")
289
  plot(x,y1,type="l",col="red",ylab="tmax (C)",xlab="Day of year")
286 290
  lines(x,y2,col="blue")
287
  lines(x,y3,col="red")
291
  lines(x,y3,col="green")
288 292
  title(paste("temporal profile for station ", id[i],sep=""))
289 293
  # add a legend
290
  legend("topright",legend=c("fus","CAI","OBS"), cex=1.2, col=c("black","blue","red"),
294
  legend("topright",legend=c("fus","CAI","OBS"), cex=1.2, col=c("red","blue","green"),
291 295
         lty=1, title="tmax")
292 296
  savePlot(paste("Temporal_profile_",id[i],out_prefix,".png", sep=""), type="png")
297
  
298
  ### RESIDUALS PLOT
293 299
  zero<-rep(0,365)
294
  plot(x,res2,type="l",col="black")
295
  lines(x,res1,col="blue")
296
  lines(x,zero,col="red")
297
  legend("topright",legend=c("fus","CAI"), cex=1.2, col=c("black","blue"),
298
         lty=1, title="tmax")
300
  plot(x,res2,type="l",col="blue", ylab="tmax (C)",xlab="Day of year") #res2 contains residuals from cai
301
  lines(x,res1,col="red")        #res1 contains fus
302
  lines(x,zero,col="green")      
303
  legend("topright",legend=c("fus","CAI"), cex=1.2, col=c("red","blue"),
304
         lty=1)
305
  title(paste("temporal profile for station ", id[i],sep=""))
306
  
299 307
  savePlot(paste("Temporal_profile_res",id[i],out_prefix,".png", sep=""), type="png")
300 308
  
301 309
  ac_temp[i,1]<-mean(abs(res1),na.rm=T)
......
306 314
ac_temp$station<-id
307 315
names(ac_temp)<-c("fus","CAI","station") #ac_temp contains the MAE per station
308 316

  
317
### RESIDUALS FOR EVERY STATION...############
318

  
309 319
id<-ghcn_value$station #if runinng on all the station...
310 320
ac_temp2<-matrix(NA,length(id),2)
311 321

  
......
322 332
  ac_temp2[i,2]<-mean(abs(res2),na.rm=T)
323 333
}  
324 334

  
335

  
325 336
ac_temp2<-as.data.frame(ac_temp2)
326 337
ac_temp2$station<-id
327 338
names(ac_temp2)<-c("fus","CAI","station")
......
329 340
ac_temp2<-ac_temp2[order(ac_temp2$fus,ac_temp2$CAI), ]
330 341
ghcn_MAE<-merge(ghcn_locs,ac_temp2,by.x=station,by.y=station)
331 342

  
343
########### TRANSECT-- DAY OF YEAR PLOT...#########
344

  
345
id<-c("USW00024284","USC00354126","USC00358536","USC00354835",
346
"USC00356252","USC00359316","USC00358246","USC00350694",
347
"USC00350699","USW00024230","USC00353542")
348
id_order<-1:11
349
m<-match(id,ghcn_locs$station)
350
dat_id<-ghcn_locs[m,]  #creating new subset
351
#dat_id<-subset(ghcn_locs[gj])
352

  
353
filename<-sub(".shp","",infile6)             #Removing the extension from file.
354
reg_outline<-readOGR(".", filename)                 #reading shapefile 
355
X11()
356
s.range <- c(min(minValue(mm_01)), max(maxValue(mm_01)))
357
col.breaks <- pretty(s.range, n=50)
358
lab.breaks <- pretty(s.range, n=5)
359
temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
360
plot(mm_01, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
361
     axis=list(at=lab.breaks, labels=lab.breaks))
362
plot(reg_outline, add=TRUE)
363
plot(dat_id,cex=1.5,add=TRUE)
364
title("Selected stations for comparison",line=3)
365
title("(Background: mean January LST)", cex=0.5, line=2)
366
coords<-coordinates(dat_id)
367
text(x=coords[,1],y=coords[,2],labels=as.character(id_order),cex=1.5, adj=c(0,1),offset=2) #c(0,1) for lower right corner!
368
savePlot(paste("temporal_profile_station_locations_map",out_prefix,".png", sep=""), type="png")
369
dev.off()
370

  
332 371
m1<-match(id,ghcn_fus_pred$station)
333 372
m2<-match(id,ghcn_cai_pred$station)
334 373
m3<-match(id,ghcn_value$station)
335 374
ghcn_dsub<-subset(ghcn,ghcn$station==id)
336 375
all.equal(m1,m2,m3) #OK order is the same
337
date_selection<-c("01-01-2010","01-08-2010")
376
date_selection<-c("01-01-2010","01-09-2010")
338 377
#date_str<-gsub(date_selection,"-","")
339
date_str<-c("20100101","20100801")
378
date_str<-c("20100101","20100901")
340 379
covar_dsub<-subset(ghcn_dsub,ghcn_dsub$date==date_str[i],select=c("station","ELEV_SRTM","LC1"))
341 380

  
342 381
date_pred<-as.Date(date_selection)
......
345 384

  
346 385
for (i in 1:length(date_pred)){
347 386
  doy<-doy_pred[i]+5 #column label
387
  doy<-243+5
348 388
  stat_subset<-cbind(id,ghcn_fus_pred[m1,doy],ghcn_cai_pred[m1,doy],ghcn_value[m1,doy])
349 389
  colnames(stat_subset)<-c("station","fus","cai","value")
350 390
  stat_subset<-as.data.frame(stat_subset)
......
352 392
    stat_subset[,j]<-as.numeric(as.character(stat_subset[,j]))  
353 393
  }
354 394
  X11()
355
  plot(1:11,stat_subset$value,type="b",col="red")
395
  plot(1:11,stat_subset$value,type="b",col="green",ylab="tmax",xlab="station transtect number")
356 396
  # xlabels())
357
  lines(1:11,stat_subset$fus,type="b",col="black")
358
  lines(1:11,stat_subset$cai,type="b",col="green")
397
  lines(1:11,stat_subset$fus,type="b",col="red")
398
  lines(1:11,stat_subset$cai,type="b",col="blue")
359 399
  
360
  legend("bottomright",legend=c("obs","fus","cai"), cex=1.2, col=c("red","black","green"),
400
  legend("bottomright",legend=c("obs","fus","cai"), cex=1.2, col=c("green","red","blue"),
361 401
         lty=1, title="tmax")
362 402
  title(paste("Daily tmax prediction ",date_selection[i],sep=" "))
363 403
  savePlot(paste("transect_profile_tmax_",date_str[i],out_prefix,".png", sep=""), type="png")
364 404
  dev.off()
365 405
}
366 406

  
367
##### USING TEMPORAL IMAGES...
407
##############################################
408
########## USING TEMPORAL IMAGES...############
368 409

  
369 410
date_list<-vector("list", length(l_f))
370 411
for (k in 1:length(l_f)){
......
579 620
savePlot(paste("Barplot_freq_er_spat_",out_prefix,".png", sep=""), type="png")
580 621
dev.off()
581 622

  
582

  
623
############################################################
583 624
##############             PART III         #############
584 625
### Average MAE per station and coarse grid box (0.5 deg)
585 626

  
627
#For all stations
628

  
629
ghcn$station
630

  
631
# For validation and training stations...
632

  
586 633
test$abs_res_fus<-abs(test$res_fus)
587 634
test2$abs_res_CAI<-abs(test2$res_CAI)
588 635

  
......
597 644
oc<-oc+1
598 645
station_v_er$oc<-oc
599 646

  
600
unique(ghcn$id)
647
unique(ghcn$station)
601 648

  
602 649
coords<- station_v_er[,c('x_OR83M','y_OR83M')]
603 650
coordinates(station_v_er)<-coords
......
618 665
  list_cai_data[[k]]<-rbind(data_s2,data_v2)
619 666
}
620 667

  
621
### GRID BOX AVERAGING ####################
622
#Create the averaged grid box...##############
668
############ GRID BOX AVERAGING ####################
669
####### Create the averaged grid box...##############
623 670

  
624 671
rast_agg<-aggregate(raster_pred,fact=50,fun=mean,na.rm=TRUE) #Changing the raster resolution by aggregation factor
672

  
673
ghcn_sub<-as.data.frame(subset(ghcn, select=c("station","x_OR83M","y_OR83M")))
674
ghcn_sub_melt<-melt(ghcn_sub,
675
                    measure=c("x_OR83M","y_OR83M"), 
676
                    id=c("station"),
677
                    na.rm=F)
678
ghcn_stations<-as.data.frame(cast(ghcn_sub_melt,station~variable,mean))
679
coords<- ghcn_stations[,c('x_OR83M','y_OR83M')]
680
coordinates(ghcn_stations)<-coords
681
proj4string(ghcn_stations)<-proj_str  #Need to assign coordinates...
682
oc_all<-vector("numeric",nrow(ghcn_stations))
683
oc_all<-oc_all+1
684

  
685
ghcn_stations$oc_all<-oc_all
686
rast_oc_all<-rasterize(ghcn_stations,rast_agg,"oc_all",na.rm=TRUE,fun=sum)
687
ac_agg50$oc_all<-values(rast_oc_all)
688

  
689
plot(rast_oc_all, main="Number of stations in coarsened 50km grid")
690
plot(reg_outline, add=TRUE)
691
fdens_all50<-as.data.frame(freq(rast_oc_all))
692
tot50<-sum(fdens_all50$count[1:(nrow(fdens_all50)-1)])
693
percent<-(fdens_all50$count/tot50)*100
694
percent[length(percent)]<-NA
695
fdens_all50$percent<-percent
625 696
#list_agg_MAE<-vector("list",nel)
626 697
#list_agg_RMSE<-vector("list",nel)
627 698
#list_density_training<-vector("list",nel)
......
658 729
  ac_agg50$MAE_cai<-as.numeric(values(rast_MAE_cai))
659 730
  names(ac_agg50)<-c("oc","MAE_fus","MAE_cai")
660 731
  
661
  ghcn_sub<-as.data.frame(subset(ghcn, select=c("station","x_OR83M","y_OR83M")))
662
  ghcn_sub_melt<-melt(ghcn_sub,
663
                      measure=c("x_OR83M","y_OR83M"), 
664
                      id=c("station"),
665
                      na.rm=F)
666
  ghcn_stations<-as.data.frame(cast(ghcn_sub_melt,station~variable,mean))
667
  coords<- ghcn_stations[,c('x_OR83M','y_OR83M')]
668
  coordinates(ghcn_stations)<-coords
669
  proj4string(ghcn_stations)<-proj_str  #Need to assign coordinates...
670
  oc_all<-vector("numeric",nrow(ghcn_stations))
671
  oc_all<-oc_all+1
672
  
673
  ghcn_stations$oc_all<-oc_all
674
  rast_oc_all<-rasterize(ghcn_stations,rast_agg,"oc_all",na.rm=TRUE,fun=sum)
675
  ac_agg50$oc_all<-values(rast_oc_all)
732
  #ghcn_sub<-as.data.frame(subset(ghcn, select=c("station","x_OR83M","y_OR83M")))
733
  #ghcn_sub_melt<-melt(ghcn_sub,
734
  #                    measure=c("x_OR83M","y_OR83M"), 
735
  #                    id=c("station"),
736
  #                    na.rm=F)
737
  #ghcn_stations<-as.data.frame(cast(ghcn_sub_melt,station~variable,mean))
738
  #coords<- ghcn_stations[,c('x_OR83M','y_OR83M')]
739
  #coordinates(ghcn_stations)<-coords
740
  #proj4string(ghcn_stations)<-proj_str  #Need to assign coordinates...
741
  #oc_all<-vector("numeric",nrow(ghcn_stations))
742
  #oc_all<-oc_all+1
743
  
744
  #ghcn_stations$oc_all<-oc_all
745
  #rast_oc_all<-rasterize(ghcn_stations,rast_agg,"oc_all",na.rm=TRUE,fun=sum)
746
  #ac_agg50$oc_all<-values(rast_oc_all)
676 747
  
677 748
  td1<-aggregate(MAE_fus~oc,data=ac_agg50,mean) 
678 749
  td2<-aggregate(MAE_cai~oc,data=ac_agg50,mean)
......
697 768
  
698 769
  plot(rast_oc, main="Number of val stations in coarsened 50km grid")
699 770
  plot(reg_outline, add=TRUE)
700
  plot(rast_oc_all, main="Number of stations in coarsened 50km grid")
771
  plot(rast_oc_t, main="Number of training stations in coarsened 50km grid")
701 772
  plot(reg_outline, add=TRUE)
702 773
  
703 774
  #MAKE IT AN OBJECT for future function return...
......
762 833
infile2<-"list_10_dates_04212012.txt"                             #List of 10 dates for the regression
763 834
dates2<-read.table(paste(path,"/",infile2,sep=""), sep="")         #Column 1 contains the names of raster files
764 835
date_list2<-as.list(as.character(dates2[,1]))
836
names_statistics<-c("mean","sd","min","max")
837
stat_val_m<-matrix(NA,nrow=length(date_list2),ncol=length(names_statistics))
838
colnames(stat_val_m)<-names_statistics
839
rownames(stat_val_m)<-date_list2
840
stat_val_m<-as.data.frame(stat_val_m)
765 841

  
766 842
for (k in 1:length(date_list2)){
767 843
  
......
769 845
  #date_proc<-date_list[[k]]
770 846
  index<-match(as.character(date_proc2),unlist(date_list)) #find the correct date... in the 365 stack
771 847
  #raster_pred<-raster(rp_raster,index)
772
  raster_pred1<-raster(l_f[[index]])
848
  raster_pred1<-raster(l_f[[index]])  # Fusion image
773 849
  projection(raster_pred1)<-proj_str
774 850
  raster_pred1<-mask(raster_pred1,mask_land_NA)
775 851
  
776
  raster_pred2<-raster(l_f2[[index]])
852
  raster_pred2<-raster(l_f2[[index]]) # CAI image
777 853
  projection(raster_pred2)<-proj_str
778 854
  raster_pred2<-mask(raster_pred2,mask_land_NA)
779 855
  
......
795 871
       axis=list(at=lab.breaks, labels=lab.breaks))
796 872
  #plot(reg_outline, add=TRUE)
797 873
  savePlot(paste("comparison_raster1_CAI_fusion_tmax_prediction_",date_list2[[k]],out_prefix,".png", sep=""), type="png")
798

  
799
  hist(predictions, breaks=col.breaks,freq=FALSE,maxpixels=ncells(predictions),xlabel="tmax (C)")
874
  
875
  
876
  stat_val_m$mean[i]<-cellStats(raster_pred1,na.rm=TRUE,stat=mean)    #Calculating the standard deviation for the 
877
  t1<-cellStats(raster_pred1,na.rm=TRUE,stat=mean)    #Calculating the standard deviation for the 
878
  t2<-cellStats(raster_pred2,na.rm=TRUE,stat=mean)    #Calculating the standard deviation for the 
879
  t1<-cellStats(raster_pred1,na.rm=TRUE,stat=sd)    #Calculating the standard deviation for the 
880
  t2<-cellStats(raster_pred2,na.rm=TRUE,stat=sd)    #Calculating the standard deviation for the 
881
  t1<-cellStats(raster_pred1,na.rm=TRUE,stat=min)    #Calculating the standard deviation for the 
882
  t2<-cellStats(raster_pred2,na.rm=TRUE,stat=min)    #Calculating the standard deviation for the 
883
  t1<-cellStats(raster_pred1,na.rm=TRUE,stat=max)    #Calculating the standard deviation for the 
884
  t2<-cellStats(raster_pred2,na.rm=TRUE,stat=max)    #Calculating the standard deviation for the 
885
  
886
  hist(predictions,freq=FALSE,maxpixels=ncells(predictions),xlabel="tmax (C)")
800 887
  savePlot(paste("comparison_histo_CAI_fusion_tmax_prediction_",date_list2[[k]],out_prefix,".png", sep=""), type="png")
801 888
  #plot(predictions)
802 889
  dev.off()
803 890
  
891
  X11(6,12)
804 892
  diff<-raster_pred2-raster_pred1
805 893
  s.range <- c(min(minValue(diff)), max(maxValue(diff)))
806 894
  col.breaks <- pretty(s.range, n=50)
......
808 896
  temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
809 897
  plot(diff, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
810 898
       axis=list(at=lab.breaks, labels=lab.breaks))
811
  
812
}  
899
  title(paste("Difference between CAI and fusion for ",date_list2[[k]],sep=""))
900
  savePlot(paste("comparison_diff_CAI_fusion_tmax_prediction_",date_list2[[k]],out_prefix,".png", sep=""), type="png")
901
  dev.off()
813 902

  
903
}  
814 904

  
815 905
write.table(data_dist,file=paste("data_dist_",out_prefix,".txt",sep=""),sep=",")
816 906
write.table(test,file=paste("ac_spat_dist",out_prefix,".txt",sep=""),sep=",")
......
818 908
write.table(td,file=paste("MAE_density_station_",out_prefix,".txt",sep=""),sep=",")
819 909
write.table(td_all,file=paste("MAE_density_station_all_",out_prefix,".txt",sep=""),sep=",")
820 910

  
821

  
822

  
823
#### END OF THE SCRIPT
911
symbols(c(2e5, 4e5), c(2e5, 3e5), circles=rep(2e4, 2), inches=FALSE, add=TRUE)
912
text(c(2e5, 4e5), c(2e5, 3e5), labels=1:2,cex=0.5)
913
points(coordinates(pts), type="c")
914
text(coordinates(pts), labels=9:11, cex=0.8)
915
points(coordinates(pts), type="b", pch=as.character(1:length(pts))
916
       points(coordinates(pts), type="b", pch=as.character(9:11)
917
              
918
########### END OF THE SCRIPT #############

Also available in: Unified diff