Revision 0c563d63
Added by Benoit Parmentier almost 11 years ago
climate/research/oregon/interpolation/multi_timescale_paper_interpolation.R | ||
---|---|---|
7 | 7 |
#Analyses, figures, tables and data for the paper are also produced in the script. |
8 | 8 |
#AUTHOR: Benoit Parmentier |
9 | 9 |
#CREATED ON: 10/31/2013 |
10 |
#MODIFIED ON: 04/06/2014
|
|
10 |
#MODIFIED ON: 04/07/2014
|
|
11 | 11 |
#Version: 4 |
12 | 12 |
#PROJECT: Environmental Layers project |
13 | 13 |
################################################################################################# |
... | ... | |
42 | 42 |
#### FUNCTION USED IN SCRIPT |
43 | 43 |
|
44 | 44 |
function_analyses_paper1 <-"contribution_of_covariates_paper_interpolation_functions_10222013.R" #first interp paper |
45 |
function_analyses_paper2 <-"multi_timescales_paper_interpolation_functions_03182014.R"
|
|
45 |
function_analyses_paper2 <-"multi_timescales_paper_interpolation_functions_04072014.R"
|
|
46 | 46 |
|
47 | 47 |
############################## |
48 | 48 |
#### Parameters and constants |
... | ... | |
153 | 153 |
|
154 | 154 |
###################### PART I: Generate tables for paper ########## |
155 | 155 |
#Table 1: Covariates used (not produced in R) |
156 |
#Table 2: Covariates models and functional form (not produced in R)
|
|
157 |
#Table 3: Combinations of models tested (not produced in R)
|
|
158 |
#Table 4. Average RMSE for all models acroos year 2010 wiht 30% daily hold out
|
|
156 |
#Table 2: Combinations of models tested (not produced in R)
|
|
157 |
#Table 3. Average RMSE for all models across year 2010 wiht 30% daily hold out
|
|
158 |
#Table 4. RMSE and monthly holdout using linear model with cat var
|
|
159 | 159 |
#Table 5: Average monhtly RMSE for CAI model 7, climatology surfaces, CAI and single time scale methods |
160 |
#Table 6: Monthly correlations TMax-elev and TMax-LST |
|
160 | 161 |
|
161 |
##### Table 4: Contribution of covariates using validation accuracy metrics
|
|
162 |
##### Table 3: Contribution of covariates using validation accuracy metrics
|
|
162 | 163 |
## This is a table of accuracy |
163 | 164 |
|
164 | 165 |
summary_metrics_v_list<-lapply(list_raster_obj_files,FUN=function(x){x<-load_obj(x);x[["summary_metrics_v"]]$avg$rmse}) |
165 | 166 |
names(summary_metrics_v_list) |
166 | 167 |
|
168 |
|
|
167 | 169 |
#for gam models |
168 | 170 |
table_gam<- summary_metrics_v_list[grep("gam",names(summary_metrics_v_list))] |
169 | 171 |
table_gam <-do.call(cbind,table_gam) |
... | ... | |
180 | 182 |
table_gwr <- do.call(cbind,table_gwr) |
181 | 183 |
table_gwr <- table_gwr[1:7,] |
182 | 184 |
|
183 |
table4_paper <- as.data.frame(do.call(rbind,list(table_gam,table_kriging,table_gwr)))
|
|
184 |
table4_paper <- round(table4_paper,digit=3) #roundto three digits teh differences
|
|
185 |
table4_paper$Methods <- c(rep("gam",7),
|
|
185 |
table3_paper <- as.data.frame(do.call(rbind,list(table_gam,table_kriging,table_gwr)))
|
|
186 |
table3_paper <- round(table3_paper,digit=3) #roundto three digits teh differences
|
|
187 |
table3_paper$Methods <- c(rep("gam",7),
|
|
186 | 188 |
rep("kriging",7), |
187 | 189 |
rep("gwr",7)) |
188 |
|
|
190 |
#Drop fss |
|
191 |
#grep("gam_fss",names(table3_paper),invert=T,value=F) |
|
192 |
table3_paper <- subset(table3_paper, |
|
193 |
select=grep("gam_fss",names(table3_paper),invert=T,value=T)) |
|
194 |
|
|
195 |
list_tb <-lapply(list_raster_oj_files,FUN=function(x){x<-load_obj(x);x[["tb_diagnostic_v"]]}) |
|
196 |
|
|
197 |
stat<-"sd" |
|
198 |
training <- "FALSE" |
|
199 |
|
|
200 |
list_param_calc_stat <- list(list_tb,stat,training) |
|
201 |
names(list_param_calc_stat) <- c("list_tb","stat","training") |
|
202 |
|
|
203 |
#Calculate standard deviation for each metric |
|
204 |
#undebug(calc_stat_from_tb_list) |
|
205 |
#sd1 <- calc_stat_from_tb_list(1,list_param=list_param_calc_stat) # see function script |
|
206 |
|
|
207 |
list_sd<- lapply(1:length(list_tb),FUN=calc_stat_from_tb_list,list_param=list_param_calc_stat) |
|
208 |
names(list_sd) <- names(list_tb) #now select all the daily in one column and CAI in antoehr |
|
209 |
#table_sd <- do.call(rbind,list_sd) |
|
210 |
tab_sd <- do.call(rbind,list_sd) |
|
211 |
tab_sd <- subset(tab_sd,pred_mod!="mod_kr") |
|
212 |
|
|
213 |
table_sd <- as.data.frame(tab_sd[grep("daily",rownames(tab_sd),value=F),c("rmse")]) |
|
214 |
names(table_sd) <- c("rmse_s") |
|
215 |
rmse_m <- (tab_sd[grep("CAI",rownames(tab_sd),value=F),c("rmse")]) |
|
216 |
table_sd$rmse_m <- rmse_m |
|
217 |
|
|
218 |
#Combined sd in one table for mod1 (baseline 2) |
|
219 |
#table_sd <- do.call(rbind,list(sd1[1,],sd3[1,],sd4[1,])) #table containing the sd for the three mdethods for baseline 2 |
|
220 |
table_sd <- round(table_sd,digit=3) #round to three digits the differences |
|
221 |
|
|
222 |
#combined tables with accuracy metrics and their standard deviations |
|
223 |
table_ac <- table3_paper[,1:2] #only use the rmse values |
|
224 |
table_ac_combined <-table_combined_symbol(table_ac,table_sd,"±") |
|
225 |
#lapply(lapply(table_ac,FUN=paste,table_sd,FUN=paste,sep="±"),FUN=paste) |
|
226 |
|
|
227 |
table_ac_combined$Methods <- table3_paper$Methods |
|
228 |
table3_paper <- table_ac_combined |
|
229 |
|
|
230 |
|
|
189 | 231 |
#Check input covariates and model formula: |
190 | 232 |
#list_formulas <-raster_prediction_obj_2$method_mod_obj[[1]]$formulas #formulas for models run comb5 |
191 | 233 |
list_formulas <-unlist(lapply(list_raster_obj_files[[1]],FUN=function(x){x<-load_obj(x);x$method_mod_obj[[1]]$formulas})) |
192 | 234 |
#strsplit(list_formulas,"~") |
193 | 235 |
|
194 |
table4_paper$Formulas<-rep(list_formulas,3)
|
|
195 |
table4_paper<-table4_paper[(c(5,4,1,2,3))] #reordering columns
|
|
236 |
table3_paper$Formulas<-rep(list_formulas,3)
|
|
237 |
table3_paper<-table3_paper[(c(4,3,1,2))] #reordering columns
|
|
196 | 238 |
|
197 | 239 |
#Testing siginificance between models |
198 | 240 |
#mod_compk1 <-kruskal.test(tb1$rmse ~ as.factor(tb1$pred_mod)) #Kruskal Wallis test |
199 | 241 |
#mod_compk2 <-kruskal.test(tb2$rmse ~ as.factor(tb2$pred_mod)) |
200 | 242 |
#print(mod_compk1) #not significant |
201 | 243 |
|
202 |
###Table 4, writeout
|
|
203 |
names_table_col <-c("Models","Method","Single time scale","Multiple time scale CAI","Multiple time scale FSS") #
|
|
244 |
###Table 3, writeout
|
|
245 |
names_table_col <-c("Models","Method","Single time scale","Multiple time scale CAI") # |
|
204 | 246 |
|
205 |
names(table4_paper)<- names_table_col
|
|
206 |
print(table4_paper) #show resulting table
|
|
247 |
names(table3_paper)<- names_table_col
|
|
248 |
print(table3_paper) #show resulting table
|
|
207 | 249 |
|
208 |
#Now write out table 4
|
|
250 |
#Now write out table 3
|
|
209 | 251 |
|
210 |
file_name<-paste("table4_multi_timescale_paper","_",out_prefix,".txt",sep="")
|
|
211 |
write.table(table4_paper,file=file_name,sep=",")
|
|
252 |
file_name<-paste("table3_multi_timescale_paper","_",out_prefix,".txt",sep="")
|
|
253 |
write.table(table3_paper,file=file_name,sep=",")
|
|
212 | 254 |
|
213 |
################ Table 6 ################
|
|
255 |
################ Table 4 ################
|
|
214 | 256 |
#### Test for monthly hold out |
215 | 257 |
|
216 | 258 |
#names(tb_v_list) <- paste("tb_v_",names(tb_v_list),sep="") |
... | ... | |
228 | 270 |
list_table_method_holdout_obj <-lapply(1:length(list_interp_method),FUN=extract_table_term_factor,list_param=list_param_extract_term) |
229 | 271 |
list_table_method_holdout<- lapply(list_table_method_holdout_obj, |
230 | 272 |
FUN=function(x){x$table_method}) |
231 |
table6 <- do.call(rbind,list_table_method_holdout)
|
|
232 |
table6_paper <- table6[,c(9,1,2,3,4,5,6,7,8)]
|
|
273 |
table4 <- do.call(rbind,list_table_method_holdout)
|
|
274 |
table4_paper <- table4[,c(9,1,2,3,4,5,6,7,8)]
|
|
233 | 275 |
|
234 |
file_name<-paste("table6_multi_timescale_paper","_",out_prefix,".txt",sep="")
|
|
235 |
write.table(table6_paper,file=file_name,sep=",")
|
|
276 |
file_name<-paste("table4_multi_timescale_paper","_",out_prefix,".txt",sep="")
|
|
277 |
write.table(table4_paper,file=file_name,sep=",")
|
|
236 | 278 |
|
237 | 279 |
################ Table 5 ################ |
238 | 280 |
#### Monthly RMSE for GAM CAI Climatology, GAM CAI daily and Single time scale |
... | ... | |
249 | 291 |
list_stat_tb_gam_daily <-calc_stat_by_month_tb(names_mod,tb_gam_daily,month_holdout=F) |
250 | 292 |
|
251 | 293 |
list_stat_tb_gam_CAI$avg |
294 |
list_stat_tb_gam_CAI$sd |
|
252 | 295 |
|
253 | 296 |
clim_rmse <- subset(tb_m_gam_CAI,prop==30 & pred_mod== "mod7",select="rmse") |
254 | 297 |
CAI_rmse <- subset(list_stat_tb_gam_CAI$avg,prop_month==30 & pred_mod== "mod7",select="rmse") |
255 | 298 |
daily_rmse <- subset(list_stat_tb_gam_daily$avg, pred_mod== "mod7",select="rmse") |
299 |
#now get sd: no sd for clim |
|
300 |
#clim_rmse_sd <- subset(tb_m_gam_CAI,prop==30 & pred_mod== "mod7",select="rmse") |
|
301 |
#CAI_rmse_sd <- subset(list_stat_tb_gam_CAI$avg,prop_month==30 & pred_mod== "mod7",select="rmse") |
|
302 |
#daily_rmse_sd <- subset(list_stat_tb_gam_daily$avg, pred_mod== "mod7",select="rmse") |
|
256 | 303 |
|
257 | 304 |
table5 <- data.frame(clim=clim_rmse,CAI_month=CAI_rmse,daily_month=daily_rmse) |
258 | 305 |
table5 <- round(table5,digit=3) #roundto three digits teh differences |
... | ... | |
273 | 320 |
file_name<-paste("table5_multi_timescale_paper","_",out_prefix,".txt",sep="") |
274 | 321 |
write.table(table5,file=file_name,sep=",") |
275 | 322 |
|
323 |
################ Table 6 ################ |
|
324 |
|
|
325 |
### NOW EXMAINE LST-TMAx and elev cor |
|
326 |
|
|
327 |
#Use data from accuracy with no monthly hold out |
|
328 |
raster_obj <- load_obj(list_raster_obj_files$gam_CAI) |
|
329 |
data_month <- (raster_obj$method_mod_obj[[1]]$data_month_s) |
|
330 |
dates<-unique(raster_obj$tb_diagnostic_v$date) |
|
331 |
#Extract monthly data frame used in fitting |
|
332 |
list_data_month_s <-extract_list_from_list_obj(raster_obj$validation_mod_month_obj,"data_s") |
|
333 |
year_nbs <- sapply(list_data_month_s,FUN=length) #number of observations per month |
|
334 |
#Convert sp data.frame and combined them in one unique df, see function define earlier |
|
335 |
data_month_all <-convert_spdf_to_df_from_list(list_data_month_s) #long rownames |
|
336 |
|
|
337 |
names_tmp<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08", |
|
338 |
"mm_09","mm_10","mm_11","mm_12")#,"LST","elev_s") |
|
339 |
|
|
340 |
list_cor_x <- vector("list",length=12) |
|
341 |
for(i in 1:12){ |
|
342 |
data_month<-list_data_month_s[[i]] |
|
343 |
data_x <- data_month[,c("TMax","elev_s",names_tmp[i])] #,"LC1","LC6","LC9")] |
|
344 |
data_x <- as.data.frame(data_x)[,1:3] |
|
345 |
cor_x<- cor(data_x,use="complete.obs") |
|
346 |
list_cor_x[[i]] <- cor_x |
|
347 |
} |
|
348 |
|
|
349 |
cor_tab<-lapply(list_cor_x,function(x){x[1,2:3]}) |
|
350 |
#cor_tab<-lapply(list_cor_x,function(x){x[1,2:4]}) |
|
351 |
|
|
352 |
cor_tab<-do.call(rbind,cor_tab) |
|
353 |
table6 <- as.data.frame(cor_tab) |
|
354 |
table6 <- round(table6,digit=3) #roundto three digits teh differences |
|
355 |
|
|
356 |
table6$month <- month.abb |
|
357 |
table6 <- table6[,c(3,1,2)] |
|
358 |
|
|
359 |
###Table 6, writeout |
|
360 |
names_table_col <-c("Month","Elevation-TMax", |
|
361 |
"LST-TMax") |
|
362 |
|
|
363 |
names(table6)<- names_table_col |
|
364 |
print(table6) #show resulting table |
|
365 |
|
|
366 |
#Now write out table 6 |
|
367 |
|
|
368 |
file_name<-paste("table6_correlation_multi_timescale_paper","_",out_prefix,".txt",sep="") |
|
369 |
write.table(table6,file=file_name,sep=",") |
|
370 |
|
|
276 | 371 |
####################################################### |
277 | 372 |
####### PART 2: generate figures for paper ############# |
278 | 373 |
|
... | ... | |
402 | 497 |
|
403 | 498 |
p <- xyplot(rmse~prop_month|method_interp, # |set up pannels using method_interp |
404 | 499 |
group=pred_mod,data=avg_tb_ac, #group by model (covariates) |
405 |
main="RME by monthly hold-out proportions and interpolation methods", |
|
500 |
main="RMSE by monthly hold-out proportions and interpolation methods",
|
|
406 | 501 |
type="b",as.table=TRUE, |
407 | 502 |
index.cond=list(c(1,3,2)), #this provides the order of the panels) |
408 | 503 |
pch=1:length(avg_tb_ac$pred_mod), |
... | ... | |
416 | 511 |
|
417 | 512 |
dev.off() |
418 | 513 |
|
419 |
##Do analyses using density of stations per holdout |
|
420 |
##DO lm modeling with factor variable being hold out proportion |
|
421 |
|
|
422 |
tb_mv <- do.call(rbind.fill,tb_mv_list) #[c("gam_CAI","kriging_CAI","gwr_CAI")]) |
|
423 |
tb_mv <- subset(tb_mv,tb_mv$method_interp %in%c("gam_CAI","kriging_CAI","gwr_CAI") ) |
|
424 |
|
|
425 |
#names(tb_mv) |
|
426 |
#reg_area_r <- rasterize(interp_area,elev,field=1,fun=mean) |
|
427 |
#tot_area <-freq(reg_area_r)[1,2]*prod(res(reg_area_r)/1000) |
|
428 |
|
|
429 |
#table(tb_mv$pred_mod) |
|
430 |
#unique(tb_mv$n) #note that |
|
431 |
#station_density_prop <- unique(tb_mv$n)/tot_area |
|
432 |
#station_density_prop[1:7]*1000 |
|
433 |
|
|
434 |
|
|
435 |
extract_table_term_factor <- function(i,list_param){ |
|
436 |
#This function generate a linear model for proportion of hold out effect on accuracy |
|
437 |
#Add option to choose MAE later |
|
438 |
|
|
439 |
#First parse arguments |
|
440 |
interpolation_method <- list_param$list_interp_method[[i]] |
|
441 |
tb_mv <- list_param$tb_mv |
|
442 |
|
|
443 |
#Begin script: |
|
444 |
|
|
445 |
list_pred_mod_name <- unique(tb_mv$pred_mod) |
|
446 |
list_prop_cat <- unique(tb_mv$prop_month) |
|
447 |
list_pred_mod_name <- grep(c("mod_kr"),list_pred_mod_name,value=TRUE,invert=TRUE) |
|
448 |
list_mod_table <- vector("list",length=length(list_pred_mod_name)) |
|
449 |
tb_dat<- subset(tb_mv,tb_mv$method_interp==interpolation_method) |
|
450 |
|
|
451 |
for(j in 1:length(list_mod_table)){ |
|
452 |
mod <-lm(rmse~ as.factor(prop_month), |
|
453 |
data=subset(tb_dat,tb_dat$pred_mod==list_pred_mod_name[[j]])) |
|
454 |
term_table <- summary(mod)$coefficients |
|
455 |
list_mod_table[[j]] <- term_table |
|
456 |
} |
|
457 |
names(list_mod_table) <- list_pred_mod_name |
|
458 |
|
|
459 |
tx <- lapply(list_mod_table,function(x){x[,c(1,4)]}) |
|
460 |
tx <-lapply(tx, round,digit=3) |
|
461 |
|
|
462 |
#tx <- format(tx,digits=3) |
|
463 |
|
|
464 |
#column_tab <- paste(format(tx[[1]][,1],digits=3)," ", |
|
465 |
# "(",format(tx[[1]][,2],digits=3),")",sep="") |
|
466 |
#column_tab <-lapply(tx,function(x){paste(format(x[,1],digits=3)," ", |
|
467 |
# "(",format(x[,2],digits=3),")",sep="")}) |
|
468 |
column_tab <-lapply(tx,function(x){as.data.frame(paste(format(x[,1],digits=3)," ", |
|
469 |
"(",format(x[,2],digits=3),")",sep=""))}) |
|
470 |
|
|
471 |
table_method <- as.data.frame(do.call(cbindX,column_tab)) |
|
472 |
names(table_method) <- list_pred_mod_name |
|
473 |
table_method$method_interp <- rep(interpolation_method,nrow(table_method)) |
|
474 |
table_method$prop <- list_prop_cat |
|
475 |
|
|
476 |
mod_table_obj<- list(list_mod_table,table_method) |
|
477 |
names(mod_table_obj) <- c("list_mod_table","table_method") |
|
478 |
return(mod_table_obj) |
|
479 |
} |
|
480 |
|
|
481 |
|
|
482 |
table4_paper <- as.data.frame(do.call(rbind,list(table_gam,table_kriging,table_gwr))) |
|
483 |
table4_paper <- round(table4_paper,digit=3) #roundto three digits teh differences |
|
484 |
table4_paper$Methods <- c(rep("gam",7), |
|
485 |
rep("kriging",7), |
|
486 |
rep("gwr",7)) |
|
487 |
|
|
488 |
|
|
489 |
} |
|
490 |
|
|
491 |
#(res(reg_area_r)) |
|
492 |
|
|
493 |
#### DATA MONTH: correlation between Tmax and LST comparison... |
|
494 |
|
|
495 |
|
|
514 |
|
|
496 | 515 |
#Correlation tmax elev and LST... |
497 | 516 |
|
498 | 517 |
################################################ |
... | ... | |
967 | 986 |
col=c("red","green","black","black","black"), |
968 | 987 |
lty=1:4,pch=1:4,bty="n") |
969 | 988 |
title("Monthly average tmax for stations in Oregon ",cex=2.4) |
970 |
|
|
971 |
|
|
989 |
|
|
972 | 990 |
######################################## |
973 | 991 |
####### Figure 8c: LST, TMax and RMSE for GAM_CAI (monthly averages) |
974 | 992 |
|
... | ... | |
1008 | 1026 |
x_cor_tmp2 <-layerStats(r_tmp2,stat="pearson",na.rm=T) |
1009 | 1027 |
|
1010 | 1028 |
##Now plot Figure 8d |
1011 |
plot(1:12,x_cor_tmp1[[1]][1:12,13],ylim=c(-0.8,0.5),
|
|
1029 |
plot(1:12,x_cor_tmp1[[1]][1:12,13],ylim=c(-0.9,0.9),
|
|
1012 | 1030 |
type="b",lty="solid",pch=1, |
1013 | 1031 |
ylab="Pearson corelation",xlab="month", |
1014 | 1032 |
cex=2,cex.lab=1.5)#,cex.axis=1.8) |
... | ... | |
1020 | 1038 |
title("Correlation between LST-elevation and LST-Forest",cex=2.4) |
1021 | 1039 |
#closing file for figure 8 |
1022 | 1040 |
dev.off() |
1023 |
|
|
1024 |
|
|
1025 |
### NOW EXMAINE LST-TMAx and elev cor |
|
1026 |
|
|
1027 |
plot(TMax~mm_01,data=subset(data_month_all,data_month_all$month==1)) |
|
1028 |
plot(TMax~mm_02,data=subset(data_month_all,data_month_all$month==1)) |
|
1029 |
plot(TMax~mm_06,data=subset(data_month_all,data_month_all$month==6)) |
|
1030 |
plot(TMax~mm_07,data=subset(data_month_all,data_month_all$month==7)) |
|
1031 |
|
|
1032 |
cor(data_month) |
|
1033 |
|
|
1034 |
names_tmp<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08", |
|
1035 |
"mm_09","mm_10","mm_11","mm_12")#,"LST","elev_s") |
|
1036 |
|
|
1037 |
list_cor_x <- vector("list",length=12) |
|
1038 |
for(i in 1:12){ |
|
1039 |
data_month<-list_data_month_s[[i]] |
|
1040 |
data_x <- data_month[,c("TMax","elev_s",names_tmp[i],"LC1","LC6","LC9")] |
|
1041 |
data_x <- as.data.frame(data_x)[,1:3] |
|
1042 |
cor_x<- cor(data_x,use="complete.obs") |
|
1043 |
list_cor_x[[i]] <- cor_x |
|
1044 |
} |
|
1045 |
|
|
1046 |
cor_tab<-lapply(list_cor_x,function(x){x[1,2:5]}) |
|
1047 |
cor_tab<-lapply(list_cor_x,function(x){x[1,2:4]}) |
|
1048 |
|
|
1049 |
cor_tab<-do.call(rbind,cor_tab) |
|
1050 |
#cor() |
|
1051 |
data |
|
1052 |
#plot(TMax~mm_01,data_month_all) |
|
1053 |
|
|
1054 | 1041 |
|
1055 |
# for(i in 1:12){ |
|
1056 |
# data_month<-list_data_month_s[[i]] |
|
1057 |
# data_x <- data_month[,c("TMax","elev_s",names_tmp[i],"LC1","LC6","LC9")] |
|
1058 |
# data_x <- as.data.frame(data_x)[,1:3] |
|
1059 |
# cor_x<- cor(data_x,use="complete.obs") |
|
1060 |
# list_cor_x[[i]] <- cor_x |
|
1061 |
#} |
|
1062 |
|
|
1063 | 1042 |
################################################ |
1064 | 1043 |
############ GENERATE ANNEX FIGURES |
1065 | 1044 |
|
Also available in: Unified diff
multi-timescale paper, revisions draft3 additional modifications