Revision 98f67591
Added by Benoit Parmentier over 10 years ago
climate/research/oregon/interpolation/global_run_scalingup_assessment_part3.R | ||
---|---|---|
1 |
############################## INTERPOLATION OF TEMPERATURES ####################################### |
|
2 |
####################### Script for assessment of scaling up on NEX : part3 ############################## |
|
3 |
#This script uses the worklfow code applied to the globe. Results currently reside on NEX/PLEIADES NASA. |
|
4 |
#In part 3, models and results are assessed on a tile basis using modifications of method assessment methods |
|
5 |
#Analyses, figures, tables and data are also produced in the script. |
|
6 |
#AUTHOR: Benoit Parmentier |
|
7 |
#CREATED ON: 05/21/2014 |
|
8 |
#MODIFIED ON: 05/21/2014 |
|
9 |
#Version: 1 |
|
10 |
#PROJECT: Environmental Layers project |
|
11 |
################################################################################################# |
|
12 |
|
|
13 |
### Loading R library and packages |
|
14 |
#library used in the workflow production: |
|
15 |
library(gtools) # loading some useful tools |
|
16 |
library(mgcv) # GAM package by Simon Wood |
|
17 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
18 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
19 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
20 |
library(gstat) # Kriging and co-kriging by Pebesma et al. |
|
21 |
library(fields) # NCAR Spatial Interpolation methods such as kriging, splines |
|
22 |
library(raster) # Hijmans et al. package for raster processing |
|
23 |
library(gdata) # various tools with xls reading, cbindX |
|
24 |
library(rasterVis) # Raster plotting functions |
|
25 |
library(parallel) # Parallelization of processes with multiple cores |
|
26 |
library(maptools) # Tools and functions for sp and other spatial objects e.g. spCbind |
|
27 |
library(maps) # Tools and data for spatial/geographic objects |
|
28 |
library(reshape) # Change shape of object, summarize results |
|
29 |
library(plotrix) # Additional plotting functions |
|
30 |
library(plyr) # Various tools including rbind.fill |
|
31 |
library(spgwr) # GWR method |
|
32 |
library(automap) # Kriging automatic fitting of variogram using gstat |
|
33 |
library(rgeos) # Geometric, topologic library of functions |
|
34 |
#RPostgreSQL # Interface R and Postgres, not used in this script |
|
35 |
library(gridExtra) |
|
36 |
#Additional libraries not used in workflow |
|
37 |
library(pgirmess) # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess} |
|
38 |
library(colorRamps) |
|
39 |
|
|
40 |
#### FUNCTION USED IN SCRIPT |
|
41 |
|
|
42 |
function_analyses_paper1 <-"contribution_of_covariates_paper_interpolation_functions_10222013.R" #first interp paper |
|
43 |
function_analyses_paper2 <-"multi_timescales_paper_interpolation_functions_05052014.R" |
|
44 |
|
|
45 |
load_obj <- function(f) |
|
46 |
{ |
|
47 |
env <- new.env() |
|
48 |
nm <- load(f, env)[1] |
|
49 |
env[[nm]] |
|
50 |
} |
|
51 |
|
|
52 |
create_dir_fun <- function(out_dir,out_suffix){ |
|
53 |
if(!is.null(out_suffix)){ |
|
54 |
out_name <- paste("output_",out_suffix,sep="") |
|
55 |
out_dir <- file.path(out_dir,out_name) |
|
56 |
} |
|
57 |
#create if does not exists |
|
58 |
if(!file.exists(out_dir)){ |
|
59 |
dir.create(out_dir) |
|
60 |
} |
|
61 |
return(out_dir) |
|
62 |
} |
|
63 |
|
|
64 |
|
|
65 |
############################## |
|
66 |
#### Parameters and constants |
|
67 |
|
|
68 |
#on ATLAS |
|
69 |
#in_dir1 <- "/data/project/layers/commons/NEX_data/test_run1_03232014/output" #On Atlas |
|
70 |
#parent output dir : contains subset of the data produced on NEX |
|
71 |
in_dir1 <- "/data/project/layers/commons/NEX_data/output_run2_05122014/output/" |
|
72 |
# parent output dir for the curent script analyes |
|
73 |
out_dir <-"/data/project/layers/commons/NEX_data/" #On NCEAS Atlas |
|
74 |
# input dir containing shapefiles defining tiles |
|
75 |
in_dir_shp <- "/data/project/layers/commons/NEX_data/output_run2_05122014/output/subset/shapefiles" |
|
76 |
|
|
77 |
#On NEX |
|
78 |
#contains all data from the run by Alberto |
|
79 |
#in_dir1 <- "/nobackupp4/aguzman4/climateLayers/output4" #On NEX |
|
80 |
#parent output dir for the current script analyes |
|
81 |
#out_dir <- "/nobackup/bparmen1/" #on NEX |
|
82 |
#in_dir_shp <- "/nobackupp4/aguzman4/climateLayers/output4/subset/shapefiles/" |
|
83 |
|
|
84 |
df_tile_processed <- read.table(file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",") |
|
85 |
in_dir_list <- file.path(in_dir1,read.table(file.path(in_dir1,"processed.txt"))$V1) |
|
86 |
y_var_name <- "dailyTmax" |
|
87 |
interpolation_method <- c("gam_CAI") |
|
88 |
out_prefix<-"run2_global_analyses_05122014" |
|
89 |
|
|
90 |
#out_dir <-paste(out_dir,"_",out_prefix,sep="") |
|
91 |
create_out_dir_param <- TRUE |
|
92 |
|
|
93 |
if(create_out_dir_param==TRUE){ |
|
94 |
out_dir <- create_dir_fun(out_dir,out_prefix) |
|
95 |
setwd(out_dir) |
|
96 |
}else{ |
|
97 |
setwd(out_dir) #use previoulsy defined directory |
|
98 |
} |
|
99 |
|
|
100 |
setwd(out_dir) |
|
101 |
|
|
102 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 |
|
103 |
|
|
104 |
region_name <- "USA" |
|
105 |
|
|
106 |
###Table 1: Average accuracy metrics |
|
107 |
###Table 2: daily accuracy metrics for all tiles |
|
108 |
|
|
109 |
summary_metrics_v <- read.table(file=file.path(out_dir,paste("summary_metrics_v2_NA_",out_prefix,".txt",sep="")),sep=",") |
|
110 |
tb <- read.table(file=file.path(out_dir,paste("tb_diagnostic_v_NA","_",out_prefix,".txt",sep="")),sep=",") |
|
111 |
#df_tile_processed <- read.table(file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",") |
|
112 |
|
|
113 |
########################## START SCRIPT ############################## |
|
114 |
|
|
115 |
############### |
|
116 |
### Figure 1: plot location of the study area with tiles processed |
|
117 |
|
|
118 |
list_shp_reg_files <- file.path(in_dir_shp,df_tile_processed$shp_files) |
|
119 |
|
|
120 |
### First get background map to display where study area is located |
|
121 |
#can make this more general later on.. |
|
122 |
#usa_map <- getData('GADM', country='USA', level=1) #Get US map |
|
123 |
usa_map <- getData('GADM', country=region_name,level=1) #Get US map, this is not working right now |
|
124 |
usa_map_2 <- usa_map[usa_map$NAME_1!="Alaska",] #remove Alaska |
|
125 |
usa_map_2 <- usa_map_2[usa_map_2$NAME_1!="Hawaii",] #remove Hawai |
|
126 |
|
|
127 |
centroids_pts <- vector("list",length(list_shp_reg_files)) |
|
128 |
shps_tiles <- vector("list",length(list_shp_reg_files)) |
|
129 |
#collect info: read in all shapfiles |
|
130 |
for(i in 1:length(list_shp_reg_files)){ |
|
131 |
shp1<-readOGR(dirname(list_shp_reg_files[[i]]),sub(".shp","",basename(list_shp_reg_files[[i]]))) |
|
132 |
pt <- gCentroid(shp1) |
|
133 |
centroids_pts[[i]] <-pt |
|
134 |
shps_tiles[[i]] <- shp1 |
|
135 |
} |
|
136 |
|
|
137 |
#plot info: with labels |
|
138 |
res_pix <- 480 |
|
139 |
col_mfrow <- 1 |
|
140 |
row_mfrow <- 1 |
|
141 |
|
|
142 |
png(filename=paste("Figure1_tile_processed_region_",out_prefix,".png",sep=""), |
|
143 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
144 |
|
|
145 |
plot(usa_map_2) |
|
146 |
#Add polygon tiles... |
|
147 |
for(i in 1:length(list_shp_reg_files)){ |
|
148 |
shp1 <- shps_tiles[[i]] |
|
149 |
pt <- centroids_pts[[i]] |
|
150 |
plot(shp1,add=T,border="blue") |
|
151 |
#plot(pt,add=T,cex=2,pch=5) |
|
152 |
label_id <- df_tile_processed$tile_id[i] |
|
153 |
text(coordinates(pt)[1],coordinates(pt)[2],labels=i,cex=1,col=c("red")) |
|
154 |
} |
|
155 |
title(paste("Tiles location 10x10 degrees for ", region_name,sep="")) |
|
156 |
|
|
157 |
dev.off() |
|
158 |
|
|
159 |
#unique(summaty_metrics$tile_id) |
|
160 |
#text(lat-shp,) |
|
161 |
#union(list_shp_reg_files[[1]],list_shp_reg_files[[2]]) |
|
162 |
|
|
163 |
############### |
|
164 |
### Figure 2: boxplot of average accuracy by model and by tiles |
|
165 |
|
|
166 |
tb$tile_id <- factor(tb$tile_id, levels=unique(tb$tile_id)) |
|
167 |
model_name <- unique(tb$pred_mod) |
|
168 |
|
|
169 |
## Figure 2a |
|
170 |
|
|
171 |
for(i in 1:length(model_name)){ |
|
172 |
|
|
173 |
res_pix <- 480 |
|
174 |
col_mfrow <- 1 |
|
175 |
row_mfrow <- 1 |
|
176 |
|
|
177 |
png(filename=paste("Figure2a_boxplot_with_oultiers_by_tiles_",model_name[i],"_",out_prefix,".png",sep=""), |
|
178 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
179 |
|
|
180 |
boxplot(rmse~tile_id,data=subset(tb,tb$pred_mod==model_name[i])) |
|
181 |
title(paste("RMSE per ",model_name[i])) |
|
182 |
|
|
183 |
dev.off() |
|
184 |
} |
|
185 |
|
|
186 |
## Figure 2b |
|
187 |
#wtih ylim and removing trailing... |
|
188 |
for(i in 1:length(model_name)){ |
|
189 |
|
|
190 |
res_pix <- 480 |
|
191 |
col_mfrow <- 1 |
|
192 |
row_mfrow <- 1 |
|
193 |
png(filename=paste("Figure2b_boxplot_scaling_by_tiles","_",model_name[i],"_",out_prefix,".png",sep=""), |
|
194 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
195 |
|
|
196 |
model_name <- unique(tb$pred_mod) |
|
197 |
boxplot(rmse~tile_id,data=subset(tb,tb$pred_mod==model_name[i]) |
|
198 |
,ylim=c(0,4),outline=FALSE) |
|
199 |
title(paste("RMSE per ",model_name[i])) |
|
200 |
dev.off() |
|
201 |
} |
|
202 |
#bwplot(rmse~tile_id, data=subset(tb,tb$pred_mod=="mod1")) |
|
203 |
|
|
204 |
############### |
|
205 |
### Figure 3: boxplot of average RMSE by model acrosss all tiles |
|
206 |
|
|
207 |
## Figure 3a |
|
208 |
res_pix <- 480 |
|
209 |
col_mfrow <- 1 |
|
210 |
row_mfrow <- 1 |
|
211 |
|
|
212 |
png(filename=paste("Figure3a_boxplot_overall_region_with_oultiers_",model_name[i],"_",out_prefix,".png",sep=""), |
|
213 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
214 |
|
|
215 |
boxplot(rmse~pred_mod,data=tb)#,names=tb$pred_mod) |
|
216 |
title("RMSE per model over all tiles") |
|
217 |
|
|
218 |
dev.off() |
|
219 |
|
|
220 |
## Figure 3b |
|
221 |
png(filename=paste("Figure3b_boxplot_overall_region_scaling_",model_name[i],"_",out_prefix,".png",sep=""), |
|
222 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
223 |
|
|
224 |
boxplot(rmse~pred_mod,data=tb,ylim=c(0,5),outline=FALSE)#,names=tb$pred_mod) |
|
225 |
title("RMSE per model over all tiles") |
|
226 |
|
|
227 |
dev.off() |
|
228 |
|
|
229 |
################ |
|
230 |
### Figure 4: plot predicted tiff for specific date per model |
|
231 |
|
|
232 |
#y_var_name <-"dailyTmax" |
|
233 |
#index <-244 #index corresponding to Sept 1 |
|
234 |
index <- 1 #index corresponding to Jan 1 |
|
235 |
date_selected <- "20100101" |
|
236 |
name_method_var <- paste(interpolation_method,"_",y_var_name,"_",sep="") |
|
237 |
|
|
238 |
pattern_str <- paste("mosaiced","_",name_method_var,".*.",date_selected,".*.tif",sep="") |
|
239 |
lf_list <- list.files(pattern=pattern_str) |
|
240 |
|
|
241 |
for(i in 1:length(lf_list)){ |
|
242 |
|
|
243 |
r_pred <- raster(lf_list[i]) |
|
244 |
|
|
245 |
res_pix <- 480 |
|
246 |
col_mfrow <- 1 |
|
247 |
row_mfrow <- 1 |
|
248 |
|
|
249 |
png(filename=paste("Figure4_models_predicted_surfaces_",model_name[i],"_",name_method_var,out_prefix,".png",sep=""), |
|
250 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
251 |
|
|
252 |
plot(r_pred) |
|
253 |
title(paste("Mosaiced",model_name[i],name_method_var,date_selected,sep=" ")) |
|
254 |
dev.off() |
|
255 |
|
|
256 |
} |
|
257 |
|
|
258 |
#### Now combined plot... |
|
259 |
|
|
260 |
#pred_s <- stack(lf_list) #problem different extent!! |
|
261 |
#methods_names <-c("gam","kriging","gwr") |
|
262 |
#methods_names <- interpolation_method |
|
263 |
|
|
264 |
#names_layers<-methods_names[1] |
|
265 |
#names_layers <-c("mod1 = lat*long + elev","mod2 = lat*long + elev + LST", |
|
266 |
# "mod3 = lat*long + elev + LST*FOREST")#, "mod_kr") |
|
267 |
#nb_fig<- c("4a","4b") |
|
268 |
#list_plots_spt <- vector("list",length=length(lf)) |
|
269 |
#png(filename=paste("Figure4_models_predicted_surfaces_",date_selected,"_",out_prefix,".png",sep=""), |
|
270 |
# height=480*layout_m[1],width=480*layout_m[2]) |
|
271 |
# max_val <- 40 |
|
272 |
# min_val <- -40 |
|
273 |
# layout_m <- c(1,3) #one row two columns |
|
274 |
# no_brks <- length(seq(min_val,max_val,by=0.1))-1 |
|
275 |
# #temp.colors <- colorRampPalette(c('blue', 'white', 'red')) |
|
276 |
# #temp.colors <- colorRampPalette(c('blue', 'lightgoldenrodyellow', 'red')) |
|
277 |
# #temp.colors <- matlab.like(no_brks) |
|
278 |
# temp.colors <- colorRampPalette(c('blue', 'khaki', 'red')) |
|
279 |
# |
|
280 |
# p <- levelplot(pred_s,main=methods_names[i], |
|
281 |
# ylab=NULL,xlab=NULL, |
|
282 |
# par.settings = list(axis.text = list(font = 2, cex = 2),layout=layout_m, |
|
283 |
# par.main.text=list(font=2,cex=2.5),strip.background=list(col="white")),par.strip.text=list(font=2,cex=2), |
|
284 |
# names.attr=names_layers, |
|
285 |
# col.regions=temp.colors(no_brks),at=seq(min_val,max_val,by=0.1)) |
|
286 |
##col.regions=temp.colors(25)) |
|
287 |
#print(p) |
|
288 |
#dev.off() |
|
289 |
|
|
290 |
###################### |
|
291 |
### Figure 5: plot map of average RMSE per tile at centroids |
|
292 |
|
|
293 |
#Turn summary table to a point shp |
|
294 |
coordinates(summary_metrics_v) <- cbind(long,lat) |
|
295 |
list_df_ac_mod <- vector("list",length=length(lf_list)) |
|
296 |
for (i in 1:length(lf_list)){ |
|
297 |
|
|
298 |
ac_mod <- summary_metrics_v[summary_metrics_v$pred_mod==model_name[i],] |
|
299 |
r_pred <- raster(lf_list[i]) |
|
300 |
|
|
301 |
res_pix <- 480 |
|
302 |
col_mfrow <- 1 |
|
303 |
row_mfrow <- 1 |
|
304 |
|
|
305 |
png(filename=paste("Figure5_ac_metrics_map_centroids_tile_",model_name[i],"_",out_prefix,".png",sep=""), |
|
306 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
307 |
|
|
308 |
plot(r_pred) |
|
309 |
|
|
310 |
#plot(ac_mod1,cex=sqrt(ac_mod1$rmse),pch=1,add=T) |
|
311 |
plot(ac_mod,cex=(ac_mod$rmse^2)/10,pch=1,add=T) |
|
312 |
#plot(ac_mod1,cex=(ac_mod1$rmse1)*2,pch=1,add=T) |
|
313 |
title(paste("Averrage RMSE per tile and by ",model_name[i])) |
|
314 |
|
|
315 |
dev.off() |
|
316 |
|
|
317 |
### Ranking by tile... |
|
318 |
#df_ac_mod <- |
|
319 |
list_df_ac_mod[[i]] <- arrange(as.data.frame(ac_mod),desc(rmse))[,c("rmse","mae","tile_id")] |
|
320 |
} |
|
321 |
|
|
322 |
#quick kriging... |
|
323 |
#autokrige(rmse~1,r2,) |
|
324 |
|
|
325 |
###################### |
|
326 |
### Figure 6: Histograms... |
|
327 |
|
|
328 |
|
|
329 |
##################### END OF SCRIPT ###################### |
Also available in: Unified diff
global scalingup assessment part3 initial commit to diaganose and evaluate specific tiles issues (eg LST)