Revision 8f68282a
Added by Benoit Parmentier over 10 years ago
climate/research/oregon/interpolation/global_run_scalingup_assessment_part2.R | ||
---|---|---|
5 | 5 |
#Analyses, figures, tables and data are also produced in the script. |
6 | 6 |
#AUTHOR: Benoit Parmentier |
7 | 7 |
#CREATED ON: 03/23/2014 |
8 |
#MODIFIED ON: 07/02/2014
|
|
8 |
#MODIFIED ON: 08/14/2014
|
|
9 | 9 |
#Version: 3 |
10 | 10 |
#PROJECT: Environmental Layers project |
11 | 11 |
#COMMENTS: analyses for run 3 global using 2 specific tiles |
... | ... | |
40 | 40 |
|
41 | 41 |
#### FUNCTION USED IN SCRIPT |
42 | 42 |
|
43 |
function_analyses_paper1 <-"contribution_of_covariates_paper_interpolation_functions_10222013.R" #first interp paper
|
|
44 |
function_analyses_paper2 <-"multi_timescales_paper_interpolation_functions_05052014.R"
|
|
43 |
function_analyses_paper1 <-"contribution_of_covariates_paper_interpolation_functions_07182014.R" #first interp paper
|
|
44 |
function_analyses_paper2 <-"multi_timescales_paper_interpolation_functions_08132014.R"
|
|
45 | 45 |
|
46 | 46 |
load_obj <- function(f) |
47 | 47 |
{ |
... | ... | |
69 | 69 |
#on ATLAS |
70 | 70 |
#in_dir1 <- "/data/project/layers/commons/NEX_data/test_run1_03232014/output" #On Atlas |
71 | 71 |
#parent output dir : contains subset of the data produced on NEX |
72 |
in_dir1 <- "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output"
|
|
72 |
in_dir1 <- "/data/project/layers/commons/NEX_data/output_run4_global_analyses_08142014/output20Deg"
|
|
73 | 73 |
# parent output dir for the curent script analyes |
74 | 74 |
#out_dir <-"/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/" #On NCEAS Atlas |
75 |
out_dir <-"/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/"
|
|
75 |
out_dir <-"/data/project/layers/commons/NEX_data/output_run4_global_analyses_08142014/"
|
|
76 | 76 |
# input dir containing shapefiles defining tiles |
77 | 77 |
in_dir_shp <- "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output/subset/shapefiles" |
78 | 78 |
|
... | ... | |
83 | 83 |
#out_dir <- "/nobackup/bparmen1/" #on NEX |
84 | 84 |
#in_dir_shp <- "/nobackupp4/aguzman4/climateLayers/output4/subset/shapefiles/" |
85 | 85 |
|
86 |
df_tile_processed <- read.table(file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",") |
|
87 |
#in_dir_list <- file.path(in_dir1,read.table(file.path(in_dir1,"processed.txt"))$V1) |
|
88 |
y_var_name <- "dailyTmax" |
|
86 |
y_var_nay_var_name <- "dailyTmax" |
|
89 | 87 |
interpolation_method <- c("gam_CAI") |
90 |
out_prefix<-"run3_global_analyses_06192014" |
|
88 |
out_prefix<-"run4_global_analyses_08142014" |
|
89 |
|
|
91 | 90 |
|
92 | 91 |
#out_dir <-paste(out_dir,"_",out_prefix,sep="") |
93 | 92 |
create_out_dir_param <- FALSE |
... | ... | |
111 | 110 |
#lf_tables <- list.files(out_dir,) |
112 | 111 |
summary_metrics_v <- read.table(file=file.path(out_dir,paste("summary_metrics_v2_NA_",out_prefix,".txt",sep="")),sep=",") |
113 | 112 |
tb <- read.table(file=file.path(out_dir,paste("tb_diagnostic_v_NA","_",out_prefix,".txt",sep="")),sep=",") |
114 |
df_tile_processed <- read.table(file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=","),paste("pred_data_month_info_",out_prefix,".txt",sep="")) <- read.table(file=file.path(out_dir,paste("pred_data_month_info_",out_prefix,".txt",sep="")),sep=",") |
|
115 | 113 |
pred_data_month_info <- read.table(file=file.path(out_dir,paste("pred_data_month_info_",out_prefix,".txt",sep="")),sep=",") |
116 | 114 |
pred_data_day_info <- read.table(file=file.path(out_dir,paste("pred_data_day_info_",out_prefix,".txt",sep="")),sep=",") |
115 |
df_tile_processed <- read.table(file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",") |
|
116 |
#in_dir_list <- file.path(in_dir1,read.table(file.path(in_dir1,"processed.txt"))$V1) |
|
117 | 117 |
|
118 | 118 |
########################## START SCRIPT ############################## |
119 | 119 |
|
120 | 120 |
############### |
121 | 121 |
### Figure 1: plot location of the study area with tiles processed |
122 |
list_shp_world <- list.files(in_dir_shp,".shp") |
|
123 |
l_shp <- unlist(lapply(1:length(list_shp_world),FUN=function(i){paste(strsplit(list_shp_world[i],"_")[[1]][2:3],collapse="_")})) |
|
124 |
list_shp_reg_files <- as.character(df_tile_processed$tile_coord) |
|
125 |
#matching_index <- match(l_shp,list_shp_reg_files) |
|
126 |
matching_index <- match(list_shp_reg_files,l_shp) |
|
127 |
df_tile_processed$shp_files <-list_shp_world[matching_index] |
|
128 |
list_shp_reg_files <- df_tile_processed$shp_files |
|
129 | 122 |
|
130 | 123 |
#df_tiled_processed <- na.omit(df_tile_processed) #remove other list of folders irrelevant |
131 | 124 |
#list_shp_reg_files <- df_tiled_processed$shp_files |
132 |
list_shp_reg_files<- file.path(in_dir_shp,list_shp_reg_files) |
|
125 |
list_shp_reg_files<- as.character(df_tile_processed$shp_files) |
|
126 |
list_shp_reg_files <- file.path("/data/project/layers/commons/NEX_data/output_run4_global_analyses_08142014/output20Deg", |
|
127 |
as.character(df_tile_processed$tile_coord),"shapefiles",basename(list_shp_reg_files)) |
|
133 | 128 |
|
134 | 129 |
### First get background map to display where study area is located |
135 | 130 |
#can make this more general later on.. |
136 |
if region_name=="reg1"{ |
|
137 |
usa_map <- getData('GADM', country=c('USA'), level=1) #Get US map |
|
138 |
can_map <- getData('GADM', country=c('CAN'), level=1) #Get Canada map |
|
139 |
mex_map <- getData('GADM', country=c('MEX'), level=1) #Get Mexico map |
|
140 |
|
|
131 |
if region_name=="USA"{ |
|
132 |
usa_map <- getData('GADM', country='USA', level=1) #Get US map |
|
141 | 133 |
#usa_map <- getData('GADM', country=region_name,level=1) #Get US map, this is not working right now |
142 | 134 |
usa_map <- usa_map[usa_map$NAME_1!="Alaska",] #remove Alaska |
143 |
usa_map <- usa_map[usa_map$NAME_1!="Hawaii",] #remove Hawai |
|
144 |
#reg_layer <- combine all three? |
|
135 |
reg_layer <- usa_map[usa_map$NAME_1!="Hawaii",] #remove Hawai |
|
145 | 136 |
} |
146 | 137 |
|
147 | 138 |
if region_name=="world"{ |
... | ... | |
150 | 141 |
path_to_shp <- dirname(countries_shp) |
151 | 142 |
layer_name <- sub(".shp","",basename(countries_shp)) |
152 | 143 |
reg_layer <- readOGR(path_to_shp, layer_name) |
153 |
proj4string(reg_layer) <- CRS_locs_WGS84 |
|
144 |
#proj4string(reg_layer) <- CRS_locs_WGS84
|
|
154 | 145 |
#reg_shp<-readOGR(dirname(list_shp_reg_files[[i]]),sub(".shp","",basename(list_shp_reg_files[[i]]))) |
155 |
|
|
156 | 146 |
} |
157 | 147 |
|
158 | 148 |
centroids_pts <- vector("list",length(list_shp_reg_files)) |
... | ... | |
188 | 178 |
label_id <- df_tile_processed$tile_id[i] |
189 | 179 |
text(coordinates(pt)[1],coordinates(pt)[2],labels=i,cex=1,col=c("red")) |
190 | 180 |
} |
191 |
title(paste("Tiles location 10x10 degrees for ", region_name,sep=""))
|
|
181 |
title(paste("Tiles location 20x20 degrees for ", region_name,sep=""))
|
|
192 | 182 |
|
193 | 183 |
dev.off() |
194 | 184 |
|
... | ... | |
200 | 190 |
### Figure 2: boxplot of average accuracy by model and by tiles |
201 | 191 |
|
202 | 192 |
tb$tile_id <- factor(tb$tile_id, levels=unique(tb$tile_id)) |
203 |
model_name <- unique(tb$pred_mod)
|
|
193 |
model_name <- as.character(unique(tb$pred_mod))
|
|
204 | 194 |
|
205 | 195 |
## Figure 2a |
206 | 196 |
|
... | ... | |
262 | 252 |
|
263 | 253 |
dev.off() |
264 | 254 |
|
255 |
|
|
256 |
###################### |
|
257 |
### Figure 5: plot accuracy ranked |
|
258 |
|
|
259 |
#Turn summary table to a point shp |
|
260 |
|
|
261 |
list_df_ac_mod <- vector("list",length=length(model_name)) |
|
262 |
for (i in 1:length(model_name)){ |
|
263 |
|
|
264 |
ac_mod <- summary_metrics_v[summary_metrics_v$pred_mod==model_name[i],] |
|
265 |
### Ranking by tile... |
|
266 |
df_ac_mod <- arrange(as.data.frame(ac_mod),desc(rmse))[,c("pred_mod","rmse","mae","tile_id")] |
|
267 |
list_df_ac_mod[[i]] <- arrange(as.data.frame(ac_mod),desc(rmse))[,c("rmse","mae","tile_id")] |
|
268 |
|
|
269 |
res_pix <- 480 |
|
270 |
col_mfrow <- 1 |
|
271 |
row_mfrow <- 1 |
|
272 |
|
|
273 |
png(filename=paste("Figure5_ac_metrics_ranked_",model_name[i],"_",out_prefix,".png",sep=""), |
|
274 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
275 |
x<- as.character(df_ac_mod$tile_id) |
|
276 |
barplot(df_ac_mod$rmse, names.arg=x) |
|
277 |
#plot(ac_mod1,cex=sqrt(ac_mod1$rmse),pch=1,add=T) |
|
278 |
#plot(ac_mod1,cex=(ac_mod1$rmse1)*2,pch=1,add=T) |
|
279 |
title(paste("RMSE ranked by tile for ",model_name[i],sep="")) |
|
280 |
|
|
281 |
dev.off() |
|
282 |
|
|
283 |
} |
|
284 |
|
|
285 |
|
|
286 |
|
|
287 |
|
|
265 | 288 |
################ |
266 | 289 |
### Figure 4: plot predicted tiff for specific date per model |
267 | 290 |
|
... | ... | |
379 | 402 |
#plot(ac_mod1,cex=sqrt(ac_mod1$rmse),pch=1,add=T) |
380 | 403 |
plot(ac_mod,cex=(ac_mod$rmse^2)/10,pch=1,add=T) |
381 | 404 |
#plot(ac_mod1,cex=(ac_mod1$rmse1)*2,pch=1,add=T) |
382 |
title(paste("Average RMSE per tile and by ",model_name[i])) |
|
405 |
title(paste("Averrage RMSE per tile and by ",model_name[i]))
|
|
383 | 406 |
|
384 | 407 |
dev.off() |
385 | 408 |
|
... | ... | |
399 | 422 |
###################### |
400 | 423 |
### Figure 9: Plot the number of stations in a processing tile |
401 | 424 |
|
402 |
## Get ID from tile number... |
|
403 |
ID_str <- unlist(lapply(1:nrow(df_tile_processed),function(i){unlist(strsplit(as.character(df_tile_processed$tile_id[i]),"_"))[2]})) |
|
404 |
|
|
405 |
assign_FID_spatial_polygons_df <-function(list_spdf,ID_str=NULL){ |
|
406 |
list_spdf_tmp <- vector("list",length(list_spdf)) |
|
407 |
if(is.null(ID_str)){ |
|
408 |
nf <- 0 #number of features |
|
409 |
#for(i in 1:length(spdf)){ |
|
410 |
# shp1 <- list_spdf[[i]] |
|
411 |
# f <- nrow(shp1) |
|
412 |
# nf <- nf + f |
|
413 |
#} |
|
414 |
#This assumes that the list has one feature per item list |
|
415 |
nf <- length(list_spdf) |
|
416 |
ID_str <- as.character(1:nf) |
|
417 |
} |
|
418 |
for(i in 1:length(list_spdf)){ |
|
419 |
#test=spRbind(shps_tiles[[1]],shps_tiles[[2]]) |
|
420 |
shp1 <- list_spdf[[i]] |
|
421 |
shp1$FID <- ID_str |
|
422 |
shp1<- spChFIDs(shp1, as.character(shp1$FID)) #assign ID |
|
423 |
list_spdf_tmp[[i]] <-shp1 |
|
424 |
} |
|
425 |
return(list_spdf_tmp) |
|
425 |
dd <- merge(df_tile_processed,pred_data_month_info,"tile_id") |
|
426 |
coordinates(dd) <- c(dd$x,dd$y) |
|
427 |
|
|
428 |
## Make this a function later... |
|
429 |
list_shp_tmp <- vector("list",length(shps_tiles)) |
|
430 |
for(i in 1:length(shps_tiles)){ |
|
431 |
#test=spRbind(shps_tiles[[1]],shps_tiles[[2]]) |
|
432 |
shp1 <- shps_tiles[[i]] |
|
433 |
|
|
434 |
ID_str <- unlist(strsplit(as.character(df_tile_processed$tile_id[i]),"_"))[2] |
|
435 |
shp1$FID <- ID_str |
|
436 |
shp1<- spChFIDs(shp1, as.character(shp1$FID)) #assign ID |
|
437 |
list_shp_tmp[[i]] <-shp1 |
|
426 | 438 |
} |
427 | 439 |
|
428 |
combine_spatial_polygons_df_fun <- function(list_spdf_tmp,ID_str=NULL){ |
|
429 |
if(is.null(ID_str)){ |
|
430 |
#call function |
|
431 |
list_spdf_tmp <- assign_FID_spatial_polygons_df |
|
432 |
} |
|
433 |
combined_spdf <- list_spdf_tmp[[1]] |
|
434 |
for(i in 2:length(list_spdf_tmp)){ |
|
435 |
combined_spdf <- rbind(combined_spdf,list_spdf_tmp[[i]]) |
|
436 |
#sapply(slot(shps_tiles[[2]], "polygons"), function(x) slot(x, "ID")) |
|
437 |
#rownames(as(alaska.tract, "data.frame")) |
|
438 |
} |
|
439 |
return(combined_spdf) |
|
440 |
combined_shp <- list_shp_tmp[[1]] |
|
441 |
for(i in 2:length(list_shp_tmp)){ |
|
442 |
combined_shp <- rbind(combined_shp,list_shp_tmp[[i]]) |
|
443 |
#sapply(slot(shps_tiles[[2]], "polygons"), function(x) slot(x, "ID")) |
|
444 |
#rownames(as(alaska.tract, "data.frame")) |
|
440 | 445 |
} |
441 | 446 |
|
442 | 447 |
combined_shp$tile_id <- df_tile_processed$tile_id |
443 | 448 |
|
449 |
test <- combined_shp |
|
450 |
test2 <- merge(test,pred_data_month_info, by="tile_id") |
|
451 |
|
|
444 | 452 |
r <- raster(lf_pred_list[i]) |
445 | 453 |
|
446 | 454 |
plot(combined_shp) |
455 |
# polygons |
|
456 |
plot(combined_shp, col = fillColour, border = outlineColour) |
|
447 | 457 |
|
448 | 458 |
p0 <- spplot(combined_shp, "Stations",col.regions=matlab.like(100)) |
449 |
p1 <- spplot(usa_map,"ISO",colorkey=FALSE) #Use ISO instead of NAME_1 to see no color? |
|
450 |
p2 <- spplot(can_map,"ISO",colorkey=FALSE) #Use ISO instead of NAME_1 to see no color? |
|
451 |
p3 <- spplot(mex_map,"ISO",colorkey=FALSE) #Use ISO instead of NAME_1 to see no color? |
|
452 |
|
|
453 |
p0 +p1+p2+p3 |
|
459 |
p1 <- spplot(reg_layer,"ISO",colorkey=FALSE) #Use ISO instead of NAME_1 to see no color? |
|
460 |
p0 +p1 |
|
454 | 461 |
|
455 | 462 |
### Now plot number of training for monthly data |
456 | 463 |
|
... | ... | |
466 | 473 |
spp <- SpatialPolygonsDataFrame(pol,data=shp_dat) |
467 | 474 |
|
468 | 475 |
p0 <- spplot(spp, "n_mod",col.regions=matlab.like(100)) |
469 |
#p1 <- spplot(reg_layer,"ISO",colorkey=FALSE) #Use ISO instead of NAME_1 to see no color? |
|
470 |
p0 + p1 + p2 + p3 |
|
471 |
|
|
472 |
###################### |
|
473 |
### Figure 10: Plot the number of stations in a processing tile |
|
474 |
|
|
475 |
boxplot(n_mod~tile_id,pred_data_month_info) |
|
476 |
boxplot(n_mod~date,pred_data_month_info) |
|
476 |
p1 <- spplot(reg_layer,"ISO",colorkey=FALSE) #Use ISO instead of NAME_1 to see no color? |
|
477 |
p0 +p1 |
|
477 | 478 |
|
478 | 479 |
##################### END OF SCRIPT ###################### |
Also available in: Unified diff
run4 assessment part2: comparison of 6 tiles using different gam k values