Project

General

Profile

« Previous | Next » 

Revision 98f67591

Added by Benoit Parmentier over 10 years ago

global scalingup assessment part3 initial commit to diaganose and evaluate specific tiles issues (eg LST)

View differences:

climate/research/oregon/interpolation/global_run_scalingup_assessment_part3.R
1
##############################  INTERPOLATION OF TEMPERATURES  #######################################
2
#######################  Script for assessment of scaling up on NEX : part3 ##############################
3
#This script uses the worklfow code applied to the globe. Results currently reside on NEX/PLEIADES NASA.
4
#In part 3, models and results are assessed on a tile basis using modifications of method assessment methods
5
#Analyses, figures, tables and data are also produced in the script.
6
#AUTHOR: Benoit Parmentier 
7
#CREATED ON: 05/21/2014  
8
#MODIFIED ON: 05/21/2014            
9
#Version: 1
10
#PROJECT: Environmental Layers project                                     
11
#################################################################################################
12

  
13
### Loading R library and packages        
14
#library used in the workflow production:
15
library(gtools)                              # loading some useful tools 
16
library(mgcv)                                # GAM package by Simon Wood
17
library(sp)                                  # Spatial pacakge with class definition by Bivand et al.
18
library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
19
library(rgdal)                               # GDAL wrapper for R, spatial utilities
20
library(gstat)                               # Kriging and co-kriging by Pebesma et al.
21
library(fields)                              # NCAR Spatial Interpolation methods such as kriging, splines
22
library(raster)                              # Hijmans et al. package for raster processing
23
library(gdata)                               # various tools with xls reading, cbindX
24
library(rasterVis)                           # Raster plotting functions
25
library(parallel)                            # Parallelization of processes with multiple cores
26
library(maptools)                            # Tools and functions for sp and other spatial objects e.g. spCbind
27
library(maps)                                # Tools and data for spatial/geographic objects
28
library(reshape)                             # Change shape of object, summarize results 
29
library(plotrix)                             # Additional plotting functions
30
library(plyr)                                # Various tools including rbind.fill
31
library(spgwr)                               # GWR method
32
library(automap)                             # Kriging automatic fitting of variogram using gstat
33
library(rgeos)                               # Geometric, topologic library of functions
34
#RPostgreSQL                                 # Interface R and Postgres, not used in this script
35
library(gridExtra)
36
#Additional libraries not used in workflow
37
library(pgirmess)                            # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess}  
38
library(colorRamps)
39

  
40
#### FUNCTION USED IN SCRIPT
41

  
42
function_analyses_paper1 <-"contribution_of_covariates_paper_interpolation_functions_10222013.R" #first interp paper
43
function_analyses_paper2 <-"multi_timescales_paper_interpolation_functions_05052014.R"
44

  
45
load_obj <- function(f)
46
{
47
  env <- new.env()
48
  nm <- load(f, env)[1]
49
  env[[nm]]
50
}
51

  
52
create_dir_fun <- function(out_dir,out_suffix){
53
  if(!is.null(out_suffix)){
54
    out_name <- paste("output_",out_suffix,sep="")
55
    out_dir <- file.path(out_dir,out_name)
56
  }
57
  #create if does not exists
58
  if(!file.exists(out_dir)){
59
    dir.create(out_dir)
60
  }
61
  return(out_dir)
62
}
63

  
64

  
65
##############################
66
#### Parameters and constants  
67

  
68
#on ATLAS
69
#in_dir1 <- "/data/project/layers/commons/NEX_data/test_run1_03232014/output" #On Atlas
70
#parent output dir : contains subset of the data produced on NEX
71
in_dir1 <- "/data/project/layers/commons/NEX_data/output_run2_05122014/output/"
72
# parent output dir for the curent script analyes
73
out_dir <-"/data/project/layers/commons/NEX_data/" #On NCEAS Atlas
74
# input dir containing shapefiles defining tiles
75
in_dir_shp <- "/data/project/layers/commons/NEX_data/output_run2_05122014/output/subset/shapefiles"
76

  
77
#On NEX
78
#contains all data from the run by Alberto
79
#in_dir1 <- "/nobackupp4/aguzman4/climateLayers/output4" #On NEX
80
#parent output dir for the current script analyes
81
#out_dir <- "/nobackup/bparmen1/" #on NEX
82
#in_dir_shp <- "/nobackupp4/aguzman4/climateLayers/output4/subset/shapefiles/"
83

  
84
df_tile_processed <- read.table(file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",")
85
in_dir_list <- file.path(in_dir1,read.table(file.path(in_dir1,"processed.txt"))$V1)
86
y_var_name <- "dailyTmax"
87
interpolation_method <- c("gam_CAI")
88
out_prefix<-"run2_global_analyses_05122014"
89

  
90
#out_dir <-paste(out_dir,"_",out_prefix,sep="")
91
create_out_dir_param <- TRUE
92

  
93
if(create_out_dir_param==TRUE){
94
  out_dir <- create_dir_fun(out_dir,out_prefix)
95
  setwd(out_dir)
96
}else{
97
  setwd(out_dir) #use previoulsy defined directory
98
}
99

  
100
setwd(out_dir)
101
                                   
102
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
103

  
104
region_name <- "USA"
105

  
106
###Table 1: Average accuracy metrics
107
###Table 2: daily accuracy metrics for all tiles
108

  
109
summary_metrics_v <- read.table(file=file.path(out_dir,paste("summary_metrics_v2_NA_",out_prefix,".txt",sep="")),sep=",")
110
tb <- read.table(file=file.path(out_dir,paste("tb_diagnostic_v_NA","_",out_prefix,".txt",sep="")),sep=",")
111
#df_tile_processed <- read.table(file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",")
112

  
113
########################## START SCRIPT ##############################
114

  
115
###############
116
### Figure 1: plot location of the study area with tiles processed
117

  
118
list_shp_reg_files <- file.path(in_dir_shp,df_tile_processed$shp_files)
119
                                                                    
120
### First get background map to display where study area is located
121
#can make this more general later on..                                                                    
122
#usa_map <- getData('GADM', country='USA', level=1) #Get US map
123
usa_map <- getData('GADM', country=region_name,level=1) #Get US map, this is not working right now
124
usa_map_2 <- usa_map[usa_map$NAME_1!="Alaska",] #remove Alaska
125
usa_map_2 <- usa_map_2[usa_map_2$NAME_1!="Hawaii",] #remove Hawai 
126

  
127
centroids_pts <- vector("list",length(list_shp_reg_files))
128
shps_tiles <- vector("list",length(list_shp_reg_files))
129
#collect info: read in all shapfiles
130
for(i in 1:length(list_shp_reg_files)){
131
  shp1<-readOGR(dirname(list_shp_reg_files[[i]]),sub(".shp","",basename(list_shp_reg_files[[i]])))
132
  pt <- gCentroid(shp1)
133
  centroids_pts[[i]] <-pt
134
  shps_tiles[[i]] <- shp1
135
}
136

  
137
#plot info: with labels
138
res_pix <- 480
139
col_mfrow <- 1
140
row_mfrow <- 1
141

  
142
png(filename=paste("Figure1_tile_processed_region_",out_prefix,".png",sep=""),
143
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
144

  
145
plot(usa_map_2)
146
#Add polygon tiles...
147
for(i in 1:length(list_shp_reg_files)){
148
  shp1 <- shps_tiles[[i]]
149
  pt <- centroids_pts[[i]]
150
  plot(shp1,add=T,border="blue")
151
  #plot(pt,add=T,cex=2,pch=5)
152
  label_id <- df_tile_processed$tile_id[i]
153
  text(coordinates(pt)[1],coordinates(pt)[2],labels=i,cex=1,col=c("red"))
154
}
155
title(paste("Tiles location 10x10 degrees for ", region_name,sep=""))
156

  
157
dev.off()
158
      
159
#unique(summaty_metrics$tile_id)
160
#text(lat-shp,)
161
#union(list_shp_reg_files[[1]],list_shp_reg_files[[2]])
162

  
163
###############
164
### Figure 2: boxplot of average accuracy by model and by tiles
165

  
166
tb$tile_id <- factor(tb$tile_id, levels=unique(tb$tile_id))
167
model_name <- unique(tb$pred_mod)
168

  
169
## Figure 2a
170

  
171
for(i in  1:length(model_name)){
172
  
173
  res_pix <- 480
174
  col_mfrow <- 1
175
  row_mfrow <- 1
176

  
177
  png(filename=paste("Figure2a_boxplot_with_oultiers_by_tiles_",model_name[i],"_",out_prefix,".png",sep=""),
178
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
179
  
180
  boxplot(rmse~tile_id,data=subset(tb,tb$pred_mod==model_name[i]))
181
  title(paste("RMSE per ",model_name[i]))
182
  
183
  dev.off()
184
}
185

  
186
## Figure 2b
187
#wtih ylim and removing trailing...
188
for(i in  1:length(model_name)){
189

  
190
  res_pix <- 480
191
  col_mfrow <- 1
192
  row_mfrow <- 1
193
  png(filename=paste("Figure2b_boxplot_scaling_by_tiles","_",model_name[i],"_",out_prefix,".png",sep=""),
194
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
195

  
196
  model_name <- unique(tb$pred_mod)
197
  boxplot(rmse~tile_id,data=subset(tb,tb$pred_mod==model_name[i])
198
          ,ylim=c(0,4),outline=FALSE)
199
  title(paste("RMSE per ",model_name[i]))
200
  dev.off()
201
}
202
#bwplot(rmse~tile_id, data=subset(tb,tb$pred_mod=="mod1"))
203

  
204
###############
205
### Figure 3: boxplot of average RMSE by model acrosss all tiles
206

  
207
## Figure 3a
208
res_pix <- 480
209
col_mfrow <- 1
210
row_mfrow <- 1
211

  
212
png(filename=paste("Figure3a_boxplot_overall_region_with_oultiers_",model_name[i],"_",out_prefix,".png",sep=""),
213
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
214

  
215
boxplot(rmse~pred_mod,data=tb)#,names=tb$pred_mod)
216
title("RMSE per model over all tiles")
217

  
218
dev.off()
219

  
220
## Figure 3b
221
png(filename=paste("Figure3b_boxplot_overall_region_scaling_",model_name[i],"_",out_prefix,".png",sep=""),
222
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
223

  
224
boxplot(rmse~pred_mod,data=tb,ylim=c(0,5),outline=FALSE)#,names=tb$pred_mod)
225
title("RMSE per model over all tiles")
226

  
227
dev.off()
228

  
229
################ 
230
### Figure 4: plot predicted tiff for specific date per model
231

  
232
#y_var_name <-"dailyTmax"
233
#index <-244 #index corresponding to Sept 1
234
index  <- 1 #index corresponding to Jan 1
235
date_selected <- "20100101"
236
name_method_var <- paste(interpolation_method,"_",y_var_name,"_",sep="")
237

  
238
pattern_str <- paste("mosaiced","_",name_method_var,".*.",date_selected,".*.tif",sep="")
239
lf_list <- list.files(pattern=pattern_str)
240

  
241
for(i in 1:length(lf_list)){
242
  
243
  r_pred <- raster(lf_list[i])
244
  
245
  res_pix <- 480
246
  col_mfrow <- 1
247
  row_mfrow <- 1
248
  
249
  png(filename=paste("Figure4_models_predicted_surfaces_",model_name[i],"_",name_method_var,out_prefix,".png",sep=""),
250
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
251
  
252
  plot(r_pred)
253
  title(paste("Mosaiced",model_name[i],name_method_var,date_selected,sep=" "))
254
  dev.off()
255
  
256
}
257

  
258
#### Now combined plot...
259

  
260
#pred_s <- stack(lf_list) #problem different extent!!
261
#methods_names <-c("gam","kriging","gwr")
262
#methods_names <- interpolation_method
263

  
264
#names_layers<-methods_names[1]
265
#names_layers <-c("mod1 = lat*long + elev","mod2 = lat*long + elev + LST",
266
#                 "mod3 = lat*long + elev + LST*FOREST")#, "mod_kr")
267
#nb_fig<- c("4a","4b")
268
#list_plots_spt <- vector("list",length=length(lf))
269
#png(filename=paste("Figure4_models_predicted_surfaces_",date_selected,"_",out_prefix,".png",sep=""),
270
#    height=480*layout_m[1],width=480*layout_m[2])
271
#  max_val <- 40
272
#  min_val <- -40
273
#  layout_m <- c(1,3) #one row two columns
274
#  no_brks <- length(seq(min_val,max_val,by=0.1))-1
275
#  #temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
276
#  #temp.colors <- colorRampPalette(c('blue', 'lightgoldenrodyellow', 'red'))
277
#  #temp.colors <- matlab.like(no_brks)
278
#  temp.colors <- colorRampPalette(c('blue', 'khaki', 'red'))
279
#  
280
#  p <- levelplot(pred_s,main=methods_names[i], 
281
#                 ylab=NULL,xlab=NULL,
282
#          par.settings = list(axis.text = list(font = 2, cex = 2),layout=layout_m,
283
#                              par.main.text=list(font=2,cex=2.5),strip.background=list(col="white")),par.strip.text=list(font=2,cex=2),
284
#          names.attr=names_layers,
285
#                 col.regions=temp.colors(no_brks),at=seq(min_val,max_val,by=0.1))
286
##col.regions=temp.colors(25))
287
#print(p)
288
#dev.off()
289

  
290
######################
291
### Figure 5: plot map of average RMSE per tile at centroids
292

  
293
#Turn summary table to a point shp
294
coordinates(summary_metrics_v) <- cbind(long,lat)  
295
list_df_ac_mod <- vector("list",length=length(lf_list))
296
for (i in 1:length(lf_list)){
297
  
298
  ac_mod <- summary_metrics_v[summary_metrics_v$pred_mod==model_name[i],]
299
  r_pred <- raster(lf_list[i])
300
  
301
  res_pix <- 480
302
  col_mfrow <- 1
303
  row_mfrow <- 1
304

  
305
  png(filename=paste("Figure5_ac_metrics_map_centroids_tile_",model_name[i],"_",out_prefix,".png",sep=""),
306
    width=col_mfrow*res_pix,height=row_mfrow*res_pix)
307

  
308
  plot(r_pred)  
309

  
310
  #plot(ac_mod1,cex=sqrt(ac_mod1$rmse),pch=1,add=T)
311
  plot(ac_mod,cex=(ac_mod$rmse^2)/10,pch=1,add=T)
312
  #plot(ac_mod1,cex=(ac_mod1$rmse1)*2,pch=1,add=T)
313
  title(paste("Averrage RMSE per tile and by ",model_name[i]))
314

  
315
  dev.off()
316
  
317
  ### Ranking by tile...
318
  #df_ac_mod <- 
319
  list_df_ac_mod[[i]] <- arrange(as.data.frame(ac_mod),desc(rmse))[,c("rmse","mae","tile_id")]
320
}
321
  
322
#quick kriging...
323
#autokrige(rmse~1,r2,)
324

  
325
######################
326
### Figure 6: Histograms...
327

  
328

  
329
##################### END OF SCRIPT ######################

Also available in: Unified diff