Project

General

Profile

« Previous | Next » 

Revision 8f68282a

Added by Benoit Parmentier over 10 years ago

run4 assessment part2: comparison of 6 tiles using different gam k values

View differences:

climate/research/oregon/interpolation/global_run_scalingup_assessment_part2.R
5 5
#Analyses, figures, tables and data are also produced in the script.
6 6
#AUTHOR: Benoit Parmentier 
7 7
#CREATED ON: 03/23/2014  
8
#MODIFIED ON: 07/02/2014            
8
#MODIFIED ON: 08/14/2014            
9 9
#Version: 3
10 10
#PROJECT: Environmental Layers project     
11 11
#COMMENTS: analyses for run 3 global using 2 specific tiles
......
40 40

  
41 41
#### FUNCTION USED IN SCRIPT
42 42

  
43
function_analyses_paper1 <-"contribution_of_covariates_paper_interpolation_functions_10222013.R" #first interp paper
44
function_analyses_paper2 <-"multi_timescales_paper_interpolation_functions_05052014.R"
43
function_analyses_paper1 <-"contribution_of_covariates_paper_interpolation_functions_07182014.R" #first interp paper
44
function_analyses_paper2 <-"multi_timescales_paper_interpolation_functions_08132014.R"
45 45

  
46 46
load_obj <- function(f)
47 47
{
......
69 69
#on ATLAS
70 70
#in_dir1 <- "/data/project/layers/commons/NEX_data/test_run1_03232014/output" #On Atlas
71 71
#parent output dir : contains subset of the data produced on NEX
72
in_dir1 <- "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output"
72
in_dir1 <- "/data/project/layers/commons/NEX_data/output_run4_global_analyses_08142014/output20Deg"
73 73
# parent output dir for the curent script analyes
74 74
#out_dir <-"/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/" #On NCEAS Atlas
75
out_dir <-"/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/"
75
out_dir <-"/data/project/layers/commons/NEX_data/output_run4_global_analyses_08142014/"
76 76
# input dir containing shapefiles defining tiles
77 77
in_dir_shp <- "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output/subset/shapefiles"
78 78

  
......
83 83
#out_dir <- "/nobackup/bparmen1/" #on NEX
84 84
#in_dir_shp <- "/nobackupp4/aguzman4/climateLayers/output4/subset/shapefiles/"
85 85

  
86
df_tile_processed <- read.table(file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",")
87
#in_dir_list <- file.path(in_dir1,read.table(file.path(in_dir1,"processed.txt"))$V1)
88
y_var_name <- "dailyTmax"
86
y_var_nay_var_name <- "dailyTmax"
89 87
interpolation_method <- c("gam_CAI")
90
out_prefix<-"run3_global_analyses_06192014"
88
out_prefix<-"run4_global_analyses_08142014"
89

  
91 90

  
92 91
#out_dir <-paste(out_dir,"_",out_prefix,sep="")
93 92
create_out_dir_param <- FALSE
......
111 110
#lf_tables <- list.files(out_dir,)
112 111
summary_metrics_v <- read.table(file=file.path(out_dir,paste("summary_metrics_v2_NA_",out_prefix,".txt",sep="")),sep=",")
113 112
tb <- read.table(file=file.path(out_dir,paste("tb_diagnostic_v_NA","_",out_prefix,".txt",sep="")),sep=",")
114
df_tile_processed <- read.table(file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=","),paste("pred_data_month_info_",out_prefix,".txt",sep="")) <- read.table(file=file.path(out_dir,paste("pred_data_month_info_",out_prefix,".txt",sep="")),sep=",")
115 113
pred_data_month_info <- read.table(file=file.path(out_dir,paste("pred_data_month_info_",out_prefix,".txt",sep="")),sep=",")
116 114
pred_data_day_info <- read.table(file=file.path(out_dir,paste("pred_data_day_info_",out_prefix,".txt",sep="")),sep=",")
115
df_tile_processed <- read.table(file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",")
116
#in_dir_list <- file.path(in_dir1,read.table(file.path(in_dir1,"processed.txt"))$V1)
117 117

  
118 118
########################## START SCRIPT ##############################
119 119

  
120 120
###############
121 121
### Figure 1: plot location of the study area with tiles processed
122
list_shp_world <- list.files(in_dir_shp,".shp")
123
l_shp <- unlist(lapply(1:length(list_shp_world),FUN=function(i){paste(strsplit(list_shp_world[i],"_")[[1]][2:3],collapse="_")}))
124
list_shp_reg_files <- as.character(df_tile_processed$tile_coord)
125
#matching_index <- match(l_shp,list_shp_reg_files)
126
matching_index <- match(list_shp_reg_files,l_shp)
127
df_tile_processed$shp_files <-list_shp_world[matching_index]
128
list_shp_reg_files <- df_tile_processed$shp_files
129 122

  
130 123
#df_tiled_processed <- na.omit(df_tile_processed) #remove other list of folders irrelevant
131 124
#list_shp_reg_files <- df_tiled_processed$shp_files
132
list_shp_reg_files<- file.path(in_dir_shp,list_shp_reg_files)
125
list_shp_reg_files<- as.character(df_tile_processed$shp_files)
126
list_shp_reg_files <- file.path("/data/project/layers/commons/NEX_data/output_run4_global_analyses_08142014/output20Deg",
127
          as.character(df_tile_processed$tile_coord),"shapefiles",basename(list_shp_reg_files))
133 128

  
134 129
### First get background map to display where study area is located
135 130
#can make this more general later on..       
136
if region_name=="reg1"{
137
  usa_map <- getData('GADM', country=c('USA'), level=1) #Get US map
138
  can_map <- getData('GADM', country=c('CAN'), level=1) #Get Canada map
139
  mex_map <- getData('GADM', country=c('MEX'), level=1) #Get Mexico map
140
  
131
if region_name=="USA"{
132
  usa_map <- getData('GADM', country='USA', level=1) #Get US map
141 133
  #usa_map <- getData('GADM', country=region_name,level=1) #Get US map, this is not working right now
142 134
  usa_map <- usa_map[usa_map$NAME_1!="Alaska",] #remove Alaska
143
  usa_map <- usa_map[usa_map$NAME_1!="Hawaii",] #remove Hawai 
144
  #reg_layer <- combine all three?
135
  reg_layer <- usa_map[usa_map$NAME_1!="Hawaii",] #remove Hawai 
145 136
}
146 137

  
147 138
if region_name=="world"{
......
150 141
  path_to_shp <- dirname(countries_shp)
151 142
  layer_name <- sub(".shp","",basename(countries_shp))
152 143
  reg_layer <- readOGR(path_to_shp, layer_name)
153
  proj4string(reg_layer) <- CRS_locs_WGS84
144
  #proj4string(reg_layer) <- CRS_locs_WGS84
154 145
  #reg_shp<-readOGR(dirname(list_shp_reg_files[[i]]),sub(".shp","",basename(list_shp_reg_files[[i]])))
155

  
156 146
}
157 147

  
158 148
centroids_pts <- vector("list",length(list_shp_reg_files))
......
188 178
  label_id <- df_tile_processed$tile_id[i]
189 179
  text(coordinates(pt)[1],coordinates(pt)[2],labels=i,cex=1,col=c("red"))
190 180
}
191
title(paste("Tiles location 10x10 degrees for ", region_name,sep=""))
181
title(paste("Tiles location 20x20 degrees for ", region_name,sep=""))
192 182

  
193 183
dev.off()
194 184
      
......
200 190
### Figure 2: boxplot of average accuracy by model and by tiles
201 191

  
202 192
tb$tile_id <- factor(tb$tile_id, levels=unique(tb$tile_id))
203
model_name <- unique(tb$pred_mod)
193
model_name <- as.character(unique(tb$pred_mod))
204 194

  
205 195
## Figure 2a
206 196

  
......
262 252

  
263 253
dev.off()
264 254

  
255

  
256
######################
257
### Figure 5: plot accuracy ranked 
258

  
259
#Turn summary table to a point shp
260

  
261
list_df_ac_mod <- vector("list",length=length(model_name))
262
for (i in 1:length(model_name)){
263
  
264
  ac_mod <- summary_metrics_v[summary_metrics_v$pred_mod==model_name[i],]
265
  ### Ranking by tile...
266
  df_ac_mod <- arrange(as.data.frame(ac_mod),desc(rmse))[,c("pred_mod","rmse","mae","tile_id")]
267
  list_df_ac_mod[[i]] <- arrange(as.data.frame(ac_mod),desc(rmse))[,c("rmse","mae","tile_id")]
268
  
269
  res_pix <- 480
270
  col_mfrow <- 1
271
  row_mfrow <- 1
272

  
273
  png(filename=paste("Figure5_ac_metrics_ranked_",model_name[i],"_",out_prefix,".png",sep=""),
274
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
275
  x<- as.character(df_ac_mod$tile_id)
276
  barplot(df_ac_mod$rmse, names.arg=x)
277
  #plot(ac_mod1,cex=sqrt(ac_mod1$rmse),pch=1,add=T)
278
  #plot(ac_mod1,cex=(ac_mod1$rmse1)*2,pch=1,add=T)
279
  title(paste("RMSE ranked by tile for ",model_name[i],sep=""))
280

  
281
  dev.off()
282
  
283
}
284

  
285

  
286

  
287

  
265 288
################ 
266 289
### Figure 4: plot predicted tiff for specific date per model
267 290

  
......
379 402
  #plot(ac_mod1,cex=sqrt(ac_mod1$rmse),pch=1,add=T)
380 403
  plot(ac_mod,cex=(ac_mod$rmse^2)/10,pch=1,add=T)
381 404
  #plot(ac_mod1,cex=(ac_mod1$rmse1)*2,pch=1,add=T)
382
  title(paste("Average RMSE per tile and by ",model_name[i]))
405
  title(paste("Averrage RMSE per tile and by ",model_name[i]))
383 406

  
384 407
  dev.off()
385 408
  
......
399 422
######################
400 423
### Figure 9: Plot the number of stations in a processing tile
401 424

  
402
## Get ID from tile number...
403
ID_str <- unlist(lapply(1:nrow(df_tile_processed),function(i){unlist(strsplit(as.character(df_tile_processed$tile_id[i]),"_"))[2]}))
404

  
405
assign_FID_spatial_polygons_df <-function(list_spdf,ID_str=NULL){
406
  list_spdf_tmp <- vector("list",length(list_spdf))
407
  if(is.null(ID_str)){
408
    nf <- 0 #number of features
409
    #for(i in 1:length(spdf)){
410
    #    shp1 <- list_spdf[[i]]
411
    #    f <- nrow(shp1)
412
    #    nf <- nf + f
413
    #}
414
    #This assumes that the list has one feature per item list
415
    nf <- length(list_spdf)
416
    ID_str <- as.character(1:nf)
417
  }
418
  for(i in 1:length(list_spdf)){
419
    #test=spRbind(shps_tiles[[1]],shps_tiles[[2]])
420
    shp1 <- list_spdf[[i]]
421
    shp1$FID <- ID_str
422
    shp1<- spChFIDs(shp1, as.character(shp1$FID)) #assign ID
423
    list_spdf_tmp[[i]]  <-shp1
424
  }
425
  return(list_spdf_tmp)
425
dd <- merge(df_tile_processed,pred_data_month_info,"tile_id")
426
coordinates(dd) <- c(dd$x,dd$y)
427

  
428
## Make this a function later...
429
list_shp_tmp <- vector("list",length(shps_tiles))
430
for(i in 1:length(shps_tiles)){
431
  #test=spRbind(shps_tiles[[1]],shps_tiles[[2]])
432
  shp1 <- shps_tiles[[i]]
433

  
434
  ID_str <- unlist(strsplit(as.character(df_tile_processed$tile_id[i]),"_"))[2]
435
  shp1$FID <- ID_str
436
  shp1<- spChFIDs(shp1, as.character(shp1$FID)) #assign ID
437
  list_shp_tmp[[i]]  <-shp1
426 438
}
427 439

  
428
combine_spatial_polygons_df_fun <- function(list_spdf_tmp,ID_str=NULL){
429
  if(is.null(ID_str)){
430
    #call function
431
    list_spdf_tmp <- assign_FID_spatial_polygons_df
432
  }
433
  combined_spdf <- list_spdf_tmp[[1]]
434
  for(i in 2:length(list_spdf_tmp)){
435
    combined_spdf <- rbind(combined_spdf,list_spdf_tmp[[i]])
436
    #sapply(slot(shps_tiles[[2]], "polygons"), function(x) slot(x, "ID"))
437
    #rownames(as(alaska.tract, "data.frame"))
438
  }
439
  return(combined_spdf)
440
combined_shp <- list_shp_tmp[[1]]
441
for(i in 2:length(list_shp_tmp)){
442
  combined_shp <- rbind(combined_shp,list_shp_tmp[[i]])
443
  #sapply(slot(shps_tiles[[2]], "polygons"), function(x) slot(x, "ID"))
444
  #rownames(as(alaska.tract, "data.frame"))
440 445
}
441 446

  
442 447
combined_shp$tile_id <- df_tile_processed$tile_id
443 448
 
449
test <- combined_shp
450
test2 <- merge(test,pred_data_month_info, by="tile_id")
451

  
444 452
r <- raster(lf_pred_list[i])
445 453

  
446 454
plot(combined_shp)
455
# polygons
456
plot(combined_shp, col = fillColour, border = outlineColour)
447 457

  
448 458
p0 <- spplot(combined_shp, "Stations",col.regions=matlab.like(100))
449
p1 <- spplot(usa_map,"ISO",colorkey=FALSE) #Use ISO instead of NAME_1 to see no color?
450
p2 <- spplot(can_map,"ISO",colorkey=FALSE) #Use ISO instead of NAME_1 to see no color?
451
p3 <- spplot(mex_map,"ISO",colorkey=FALSE) #Use ISO instead of NAME_1 to see no color?
452

  
453
p0 +p1+p2+p3
459
p1 <- spplot(reg_layer,"ISO",colorkey=FALSE) #Use ISO instead of NAME_1 to see no color?
460
p0 +p1
454 461

  
455 462
### Now plot number of training for monthly data
456 463

  
......
466 473
spp <- SpatialPolygonsDataFrame(pol,data=shp_dat)
467 474

  
468 475
p0 <- spplot(spp, "n_mod",col.regions=matlab.like(100))
469
#p1 <- spplot(reg_layer,"ISO",colorkey=FALSE) #Use ISO instead of NAME_1 to see no color?
470
p0 + p1 + p2 + p3
471

  
472
######################
473
### Figure 10: Plot the number of stations in a processing tile
474

  
475
boxplot(n_mod~tile_id,pred_data_month_info)
476
boxplot(n_mod~date,pred_data_month_info)
476
p1 <- spplot(reg_layer,"ISO",colorkey=FALSE) #Use ISO instead of NAME_1 to see no color?
477
p0 +p1
477 478

  
478 479
##################### END OF SCRIPT ######################

Also available in: Unified diff