4 |
4 |
#different covariates using two baselines. Accuracy methods are added in the the function script to evaluate results.
|
5 |
5 |
#Figures, tables and data for the contribution of covariate paper are also produced in the script.
|
6 |
6 |
#AUTHOR: Benoit Parmentier
|
7 |
|
#DATE: 08/15/2013
|
8 |
|
#Version: 2
|
|
7 |
#DATE: 09/09/2013
|
|
8 |
#Version: 3
|
9 |
9 |
#PROJECT: Environmental Layers project
|
10 |
10 |
#################################################################################################
|
11 |
11 |
|
... | ... | |
37 |
37 |
|
38 |
38 |
#### FUNCTION USED IN SCRIPT
|
39 |
39 |
|
40 |
|
function_analyses_paper <-"contribution_of_covariates_paper_interpolation_functions_08152013.R"
|
|
40 |
function_analyses_paper <-"contribution_of_covariates_paper_interpolation_functions_09092013.R"
|
41 |
41 |
|
42 |
42 |
##############################
|
43 |
43 |
#### Parameters and constants
|
... | ... | |
59 |
59 |
out_dir<-"/home/parmentier/Data/IPLANT_project/paper_analyses_tables_fig_08032013"
|
60 |
60 |
setwd(out_dir)
|
61 |
61 |
|
|
62 |
infile_reg_outline <- "/data/project/layers/commons/data_workflow/inputs/region_outlines_ref_files/OR83M_state_outline.shp" #input region outline defined by polygon: Oregon
|
|
63 |
met_stations_outfiles_obj_file<-"/data/project/layers/commons/data_workflow/output_data_365d_gam_fus_lst_test_run_07172013/met_stations_outfiles_obj_gam_fusion__365d_gam_fus_lst_test_run_07172013.RData"
|
|
64 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
|
62 |
65 |
y_var_name <- "dailyTmax"
|
|
66 |
out_prefix<-"analyses_09092013"
|
63 |
67 |
|
64 |
|
out_prefix<-"analyses_08152013"
|
65 |
|
|
66 |
|
method_interpolation <- "gam_daily"
|
|
68 |
#method_interpolation <- "gam_daily"
|
67 |
69 |
covar_obj_file_1 <- "covar_obj__365d_gam_day_lst_comb3_08132013.RData"
|
68 |
70 |
met_obj_file_1 <- "met_stations_outfiles_obj_gam_daily__365d_gam_day_lst_comb3_08132013.RData"
|
69 |
71 |
#met_stations_outfiles_obj_gam_daily__365d_gam_day_lst_comb3_08132013.RData
|
... | ... | |
82 |
84 |
|
83 |
85 |
#Load objects containing training, testing, models objects
|
84 |
86 |
|
|
87 |
met_stations_obj <- load_obj(met_stations_outfiles_obj_file)
|
85 |
88 |
covar_obj <-load_obj(file.path(in_dir1,covar_obj_file_1)) #Reading covariates object for GAM daily method
|
86 |
89 |
infile_covariates <- covar_obj$infile_covariates
|
87 |
90 |
infile_reg_outline <- covar_obj$infile_reg_outline
|
... | ... | |
117 |
120 |
table_data2 <-summary_metrics_v2$avg[,c("mae","rmse","me","r")]
|
118 |
121 |
|
119 |
122 |
###Table 3a, baseline 1: s(lat,lon)
|
120 |
|
#Chnage here !!! need to reorder rows based on mod first
|
|
123 |
#Change here !!! need to reorder rows based on mod first
|
121 |
124 |
model_col<-c("Baseline1","Elevation","Northing","Easting","LST","DISTOC","Forest","CANHEIGHT","LST*Forest","LST*CANHEIGHT") #
|
122 |
125 |
df3a<- as.data.frame(sapply(table_data2,FUN=function(x) x-x[1]))
|
123 |
126 |
df3a<- round(df3a,digit=3) #roundto three digits teh differences
|
... | ... | |
219 |
222 |
#Figure 6: Spatial pattern of prediction for one day
|
220 |
223 |
|
221 |
224 |
### Figure 1: Oregon study area
|
|
225 |
#3 parameters from output
|
|
226 |
infile_monthly<-list_outfiles$monthly_covar_ghcn_data #outile4 from database_covar script
|
|
227 |
infile_daily<-list_outfiles$daily_covar_ghcn_data #outfile3 from database_covar script
|
|
228 |
infile_locs<- list_outfiles$loc_stations_ghcn #outfile2? from database covar script
|
|
229 |
|
|
230 |
ghcn_dat <- readOGR(dsn=dirname(met_stations_obj$monthly_covar_ghcn_data),
|
|
231 |
sub(".shp","",basename(met_stations_obj$monthly_covar_ghcn_data)))
|
|
232 |
ghcn_dat_WGS84 <-spTransform(ghcn_dat,CRS_locs_WGS84) # Project from WGS84 to new coord. system
|
|
233 |
|
|
234 |
interp_area <- readOGR(dsn=dirname(infile_reg_outline),sub(".shp","",basename(infile_reg_outline)))
|
|
235 |
interp_area_WGS84 <-spTransform(interp_area,CRS_locs_WGS84) # Project from WGS84 to new coord. system
|
|
236 |
|
|
237 |
usa_map <- getData('GADM', country='USA', level=1) #Get US map
|
|
238 |
usa_map_2 <- usa_map[usa_map$NAME_1!="Alaska",] #remove Alaska
|
|
239 |
usa_map_2 <- usa_map_2[usa_map_2$NAME_1!="Hawaii",] #remove Hawai
|
|
240 |
usa_map_OR <- usa_map_2[usa_map_2$NAME_1=="Oregon",] #get OR
|
|
241 |
|
|
242 |
elev <- subset(s_raster,"elev_s")
|
|
243 |
elev_WGS84 <- projectRaster(from=elev,crs=CRS_locs_WGS84,method="ngb")
|
|
244 |
|
|
245 |
#set up the output file to plot
|
|
246 |
res_pix<-960
|
|
247 |
col_mfrow<-1
|
|
248 |
row_mfrow<-1
|
|
249 |
png(filename=paste("Figure1_contribution_covariates_study_area_",out_prefix,".png",sep=""),
|
|
250 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix)
|
|
251 |
par(mfrow=c(1,1))
|
|
252 |
|
|
253 |
|
|
254 |
#plot(elev_WGS84)
|
|
255 |
plot(interp_area_WGS84)
|
|
256 |
plot(ghcn_dat_WGS84,add=T)
|
|
257 |
title("SStudy area with ")
|
|
258 |
#this work on non sp plot too: Scale bar postion
|
|
259 |
#scale_position<-c(450000, 600000)
|
|
260 |
#arrow_position<-c(900000, 600000)
|
|
261 |
|
|
262 |
#legend("topright",legend=c(0:7),title="Number of change",
|
|
263 |
# pt.cex=1.4,cex=2.1,fill=rev(terrain.colors(8)),bty="n")
|
|
264 |
#label_scalebar<-c("0","125","250")
|
|
265 |
#scalebar(d=250000, xy=scale_position, type = 'bar',
|
|
266 |
# divs=3,label=label_scalebar,below="kilometers",
|
|
267 |
# cex=1.8)
|
|
268 |
|
|
269 |
## Northern arrow
|
|
270 |
#SpatialPolygonsRescale(layout.north.arrow(), offset = arrow_position,
|
|
271 |
# scale = 150000, fill=c("transparent","black"),plot.grid=FALSE)
|
|
272 |
##note that scale in SpatialPolygonRescale sets the size of the north arrow!!
|
|
273 |
|
|
274 |
### PLACE INSET MAP OF THE USA
|
|
275 |
#opar <- par(fig=c(0.7, 0.95, 0.5, 0.75), new=TRUE)
|
|
276 |
#opar <- par(fig=c(0, 0, 1, 1), new=TRUE)
|
|
277 |
|
|
278 |
par(mar = c(0,0,0,0)) # remove margin
|
|
279 |
#opar <- par(fig=c(0.9,0.95,0.8, 0.85), new=TRUE)
|
|
280 |
opar <- par(fig=c(0.85,0.95,0.8, 0.9), new=TRUE)
|
|
281 |
|
|
282 |
#p1<-spplot(ghcn_dat,"station")
|
|
283 |
#p2<-spplot(usa_map_2,"NAMES_1")
|
|
284 |
#print(p1,position=c(0,0,1,1),more=T)
|
|
285 |
#print(p2,position=c(0,0,0.3,0.3),more=T)
|
|
286 |
|
|
287 |
plot(usa_map_2,border="black") #border and lwd are options of graphics package polygon object
|
|
288 |
plot(usa_map_OR,col="grey",add=T)
|
|
289 |
box()
|
|
290 |
dev.off()
|
222 |
291 |
|
223 |
|
#...add code
|
224 |
292 |
|
225 |
293 |
### Figure 2: Method comparison workflow
|
226 |
294 |
|
227 |
|
# Workflow not generated in R
|
|
295 |
# Workflow figure is not generated in R
|
228 |
296 |
|
229 |
297 |
################################################
|
230 |
298 |
################### Figure 3. MAE/RMSE and distance to closest fitting station.
|
... | ... | |
254 |
322 |
title_plot <- "RMSE and distance to closest station for baseline 2"
|
255 |
323 |
y_lab_text <- "RMSE (C)"
|
256 |
324 |
#quick test
|
257 |
|
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,title_plot,y_lab_text)
|
258 |
|
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name","title_plot","y_lab_text")
|
|
325 |
add_CI <- c(TRUE,TRUE,TRUE)
|
|
326 |
|
|
327 |
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,title_plot,y_lab_text,add_CI)
|
|
328 |
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name","title_plot","y_lab_text","add_CI")
|
259 |
329 |
plot_dst_MAE(list_param_plot)
|
260 |
330 |
|
261 |
331 |
metric_name <-"mae_tb"
|
262 |
332 |
title_plot <- "MAE and distance to closest fitting station"
|
263 |
333 |
y_lab_text <- "MAE (C)"
|
264 |
|
|
|
334 |
add_CI <- c(TRUE,TRUE,TRUE)
|
265 |
335 |
#Now set up plotting device
|
266 |
336 |
res_pix<-480
|
267 |
337 |
col_mfrow<-2
|
... | ... | |
272 |
342 |
par(mfrow=c(row_mfrow,col_mfrow))
|
273 |
343 |
|
274 |
344 |
#Figure 3a
|
275 |
|
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,title_plot,y_lab_text)
|
276 |
|
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name","title_plot","y_lab_text")
|
|
345 |
list_param_plot<-list(list_dist_obj,col_t,pch_t,legend_text,mod_name,x_tick_labels,metric_name,
|
|
346 |
title_plot,y_lab_text,add_CI)
|
|
347 |
names(list_param_plot)<-c("list_dist_obj","col_t","pch_t","legend_text","mod_name","x_tick_labels","metric_name",
|
|
348 |
"title_plot","y_lab_text","add_CI")
|
|
349 |
debug(plot_dst_MAE)
|
277 |
350 |
plot_dst_MAE(list_param_plot)
|
278 |
351 |
title(xlab="Distance to closest fitting station (km)")
|
279 |
352 |
|
... | ... | |
310 |
383 |
pch_t<- 1:length(col_t)
|
311 |
384 |
legend_text <- c("GAM","Kriging","GWR")
|
312 |
385 |
mod_name<-c("mod1","mod1","mod1")#selected models
|
|
386 |
add_CI <- c(TRUE,TRUE,TRUE)
|
|
387 |
#add_CI <- c(TRUE,FALSE,FALSE)
|
313 |
388 |
|
314 |
389 |
##### plot figure 4 for paper
|
315 |
390 |
####
|
... | ... | |
321 |
396 |
png(filename=file.path(out_dir,png_file_name),
|
322 |
397 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix)
|
323 |
398 |
par(mfrow=c(row_mfrow,col_mfrow))
|
|
399 |
|
324 |
400 |
metric_name<-"mae"
|
325 |
|
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name)
|
326 |
|
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name")
|
|
401 |
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name,add_CI)
|
|
402 |
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name","add_CI")
|
327 |
403 |
|
|
404 |
#debug(plot_prop_metrics)
|
328 |
405 |
plot_prop_metrics(list_param_plot)
|
329 |
406 |
title(main="MAE for hold out and methods",
|
330 |
407 |
xlab="Hold out validation/testing proportion",
|
... | ... | |
332 |
409 |
|
333 |
410 |
#now figure 4b
|
334 |
411 |
metric_name<-"rmse"
|
335 |
|
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name)
|
336 |
|
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name")
|
|
412 |
list_param_plot<-list(list_prop_obj,col_t,pch_t,legend_text,mod_name,metric_name,add_CI)
|
|
413 |
names(list_param_plot)<-c("list_prop_obj","col_t","pch_t","legend_text","mod_name","metric_name","add_CI")
|
337 |
414 |
plot_prop_metrics(list_param_plot)
|
338 |
415 |
title(main="RMSE for hold out and methods",
|
339 |
416 |
xlab="Hold out validation/testing proportion",
|
... | ... | |
347 |
424 |
#read in relevant data:
|
348 |
425 |
## Calculate average difference for RMSE for all three methods
|
349 |
426 |
#read in relevant data:
|
|
427 |
|
350 |
428 |
tb1_s<-extract_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"metrics_s")
|
351 |
429 |
rownames(tb1_s)<-NULL #remove row names
|
352 |
430 |
tb1_s$method_interp <- "gam_daily" #add type of interpolation...out_prefix too??
|
... | ... | |
406 |
484 |
col_t<-c("red","blue","black")
|
407 |
485 |
pch_t<- 1:length(col_t)
|
408 |
486 |
|
|
487 |
##Make this a function???
|
409 |
488 |
y_range<-range(prop_obj_kriging$avg_tb$rmse,prop_obj_kriging_s$avg_tb$rmse)
|
410 |
489 |
#y_range<-range(prop_obj_gam$avg_tb$rmse,prop_obj_gam_s$avg_tb$rmse)
|
411 |
490 |
plot(prop_obj_gam$avg_tb$rmse ~ prop_obj_gam$avg_tb$prop, ylab="",xlab="",type="b",col=c("red"),pch=pch_t[1],ylim=y_range,lty=2)
|
... | ... | |
415 |
494 |
lines(prop_obj_kriging$avg_tb$rmse ~ prop_obj_kriging$avg_tb$prop,ylab="",xlab="", type="b",ylim=y_range,pch=pch_t[2],,col=c("blue"),lty=2)
|
416 |
495 |
lines(prop_obj_kriging_s$avg_tb$rmse ~ prop_obj_kriging_s$avg_tb$prop,ylab="",xlab="",type="b",ylim=y_range,pch=pch_t[2],col=c("blue"))
|
417 |
496 |
|
|
497 |
plot_ac_holdout_prop<- function(l_prop,l_col_t,l_pch_t,add_CI,y_range){
|
|
498 |
|
|
499 |
for(i in 1:length(l_prop)){
|
|
500 |
if(i==1){
|
|
501 |
plot(l_prop[[i]]$avg_tb$rmse ~ l_prop[[i]]$avg_tb$prop,ylab="",xlab="",type="b",pch=l_pch_t[i],ylim=y_range,col=l_col_t[i],lty=l_lty_t[i])
|
|
502 |
}else{
|
|
503 |
lines(l_prop[[i]]$avg_tb$rmse ~ l_prop[[i]]$avg_tb$prop,ylab="",xlab="",type="b",pch=l_pch_t[i],ylim=y_range,col=l_col_t[i],lty=l_lty_t[i])
|
|
504 |
}
|
|
505 |
if(add_CI[i]==TRUE){
|
|
506 |
ciw <- qt(0.975, l_prop[[i]]$n_tb$rmse) * l_prop[[i]]$sd_tb$rmse / sqrt(l_prop[[i]]$n_tb$rmse)
|
|
507 |
plotCI(y=l_prop[[i]]$avg_tb$rmse, x=l_prop[[i]]$avg_tb$prop, uiw=ciw, col=l_col_t[i], barcol="blue", lwd=1,pch=l_pch_t[i],
|
|
508 |
add=TRUE)
|
|
509 |
}
|
|
510 |
}
|
|
511 |
}
|
|
512 |
|
|
513 |
l_prop<- list(prop_obj_gam,prop_obj_gam_s,prop_obj_gwr_s,prop_obj_gwr,prop_obj_kriging,prop_obj_kriging_s)
|
|
514 |
|
|
515 |
l_col_t <- c("red","red","black","black","blue","blue")
|
|
516 |
l_pch_t <- c(1,1,3,3,2,2)
|
|
517 |
l_lty_t <- c(2,1,1,2,2,1)
|
|
518 |
add_CI <- c(TRUE,TRUE,FALSE,FALSE,FALSE,FALSE)
|
|
519 |
y_range<-c(0.5,3)
|
|
520 |
|
|
521 |
plot_ac_holdout_prop(l_prop,l_col_t,l_pch_t,add_CI,y_range)
|
|
522 |
|
|
523 |
legend_text <- c("GAM","Kriging","GWR","training","testing")
|
|
524 |
|
418 |
525 |
legend("topleft",legend=legend_text,
|
419 |
|
cex=0.9, pch=pch_t,col=col_t,lty=1,bty="n")
|
|
526 |
cex=0.9, pch=c(pch_t,NA,NA),col=c(col_t,NA,NA),lty=c(NA,NA,NA,1,2),bty="n")
|
420 |
527 |
title(main="Training and testing RMSE for hold out and methods",
|
421 |
528 |
xlab="Hold out validation/testing proportion",
|
422 |
529 |
ylab="RMSE (C)")
|
423 |
530 |
|
|
531 |
|
424 |
532 |
boxplot(diff_mae_data) #plot differences in training and testing accuracies for three methods
|
425 |
533 |
title(main="Difference between training and testing MAE",
|
426 |
534 |
xlab="Interpolation method",
|
... | ... | |
682 |
790 |
|
683 |
791 |
######## NOW GET A ACCURACY BY STATIONS
|
684 |
792 |
|
685 |
|
list_data_v<-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_v")
|
|
793 |
list_data_s <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_s")
|
|
794 |
list_data_v <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_v")
|
|
795 |
|
|
796 |
#number of observations per day
|
|
797 |
year_nbv <- sapply(list_data_v,FUN=length)
|
|
798 |
year_nbs <- sapply(list_data_s,FUN=length)
|
|
799 |
nb_df <- data.frame(nv=year_nbv,ns=year_nbs)
|
|
800 |
nb_df$n_tot <- year_nbv + year_nbs
|
|
801 |
range(nb_df$n_tot)
|
|
802 |
|
686 |
803 |
data_v_test <- list_data_v[[1]]
|
687 |
804 |
|
688 |
805 |
#Convert sp data.frame and combined them in one unique df, see function define earlier
|
running kriging fusion with 40 to 70% monthly hold out and changes in paper 1 script figures and analyses