Revision 63238731
Added by Benoit Parmentier almost 10 years ago
climate/research/oregon/interpolation/global_run_scalingup_assessment_part1.R | ||
---|---|---|
5 | 5 |
#Part 1 create summary tables and inputs files for figure in part 2 and part 3. |
6 | 6 |
#AUTHOR: Benoit Parmentier |
7 | 7 |
#CREATED ON: 03/23/2014 |
8 |
#MODIFIED ON: 03/11/2015
|
|
8 |
#MODIFIED ON: 03/23/2015
|
|
9 | 9 |
#Version: 4 |
10 | 10 |
#PROJECT: Environmental Layers project |
11 | 11 |
#TO DO: |
... | ... | |
15 | 15 |
# |
16 | 16 |
#First source these files: |
17 | 17 |
#Resolved call issues from R. |
18 |
#source /nobackupp6/aguzman4/climateLayers/sharedModules/etc/environ.sh
|
|
19 |
#MODULEPATH=$MODULEPATH:/nex/modules/files
|
|
20 |
#module load pythonkits/gdal_1.10.0_python_2.7.3_nex
|
|
18 |
source /nobackupp6/aguzman4/climateLayers/sharedModules/etc/environ.sh |
|
19 |
MODULEPATH=$MODULEPATH:/nex/modules/files |
|
20 |
module load pythonkits/gdal_1.10.0_python_2.7.3_nex |
|
21 | 21 |
|
22 | 22 |
# These are the names and number for the current subset regions used for global runs: |
23 | 23 |
#reg1 - North America (NAM) |
... | ... | |
75 | 75 |
region_names <- c("reg1","reg2","reg3","reg4","reg5","reg6") #selected region names, #PARAM2 |
76 | 76 |
y_var_name <- "dailyTmax" #PARAM3 |
77 | 77 |
interpolation_method <- c("gam_CAI") #PARAM4 |
78 |
out_prefix<-"run10_1500x4500_global_analyses_03112015" #PARAM5
|
|
78 |
out_prefix<-"run10_1500x4500_global_analyses_03232015" #PARAM5
|
|
79 | 79 |
|
80 | 80 |
#out_dir<-"/data/project/layers/commons/NEX_data/" #On NCEAS Atlas |
81 | 81 |
#out_dir <- "/nobackup/bparmen1/" #on NEX |
... | ... | |
195 | 195 |
#names(list_tb_diagnostic_v) <- list_names_tile_id |
196 | 196 |
|
197 | 197 |
################ |
198 |
#### Table 1: Average accuracy metrics |
|
198 |
#### Table 1: Average accuracy metrics per tile and predictions
|
|
199 | 199 |
|
200 | 200 |
#can use a maximum of 6 cores on the NEX Bridge |
201 | 201 |
#For 43 tiles but only xx RData boject it takes xxx min |
... | ... | |
230 | 230 |
file=file.path(out_dir,paste("summary_metrics_v2_NA_",out_prefix,".txt",sep="")),sep=",") |
231 | 231 |
|
232 | 232 |
################# |
233 |
###Table 2: daily accuracy metrics for all tiles |
|
233 |
###Table 2: daily validation/testing accuracy metrics for all tiles
|
|
234 | 234 |
#this takes about 25min |
235 | 235 |
#tb_diagnostic_v_list <- lapply(list_raster_obj_files,FUN=function(x){x<-load_obj(x);x[["tb_diagnostic_v"]]}) |
236 | 236 |
tb_diagnostic_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_v"]])},mc.preschedule=FALSE,mc.cores = num_cores) |
... | ... | |
251 | 251 |
file=file.path(out_dir,paste("tb_diagnostic_v_NA","_",out_prefix,".txt",sep="")),sep=",") |
252 | 252 |
|
253 | 253 |
################# |
254 |
###Table 3: monthly station information with predictions for all tiles
|
|
254 |
###Table 3: monthly fit/training accuracy information for all tiles
|
|
255 | 255 |
|
256 | 256 |
## Monthly fitting information |
257 | 257 |
tb_month_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_month_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = num_cores) |
... | ... | |
274 | 274 |
write.table((tb_month_diagnostic_s_NA), |
275 | 275 |
file=file.path(out_dir,paste("tb_month_diagnostic_s_NA","_",out_prefix,".txt",sep="")),sep=",") |
276 | 276 |
|
277 |
################# |
|
278 |
###Table 4: daily fit/training accuracy information with predictions for all tiles |
|
279 |
|
|
280 |
## daily fit info: |
|
281 |
|
|
282 |
tb_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = num_cores) |
|
283 |
|
|
284 |
names(tb_diagnostic_s_list) <- list_names_tile_id |
|
285 |
tb_diagnostic_s_tmp <- remove_from_list_fun(tb_diagnostic_s_list)$list |
|
286 |
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid |
|
287 |
|
|
288 |
tb_diagnostic_s_NA <- do.call(rbind.fill,tb_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics |
|
289 |
tile_id_tmp <- lapply(1:length(tb_diagnostic_s_tmp), |
|
290 |
FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_s_tmp,y=names(tb_diagnostic_s_tmp)) |
|
291 |
|
|
292 |
tb_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile |
|
293 |
|
|
294 |
tb_diagnostic_s_NA <- merge(tb_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id") |
|
295 |
|
|
296 |
write.table((tb_diagnostic_s_NA), |
|
297 |
file=file.path(out_dir,paste("tb_diagnostic_s_NA","_",out_prefix,".txt",sep="")),sep=",") |
|
298 |
|
|
299 |
##### Table 5: Add later on: daily info |
|
300 |
### with also data_s and data_v saved!!! |
|
301 |
|
|
302 |
#Insert here...compute input and predicted ranges to spot potential errors? |
|
303 |
|
|
304 |
##### SPDF of Monhtly Station info |
|
277 | 305 |
#load data_month for specific tiles |
278 | 306 |
# data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month") |
279 | 307 |
# names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info |
... | ... | |
293 | 321 |
# write.table((data_month_NAM), |
294 | 322 |
# file=file.path(out_dir,paste("data_month_s_NAM","_",out_prefix,".txt",sep="")),sep=",") |
295 | 323 |
|
296 |
## daily fit info:
|
|
324 |
##### SPDF of Daily Station info
|
|
297 | 325 |
|
298 |
tb_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = num_cores) |
|
299 | 326 |
|
300 |
names(tb_diagnostic_s_list) <- list_names_tile_id |
|
301 |
tb_diagnostic_s_tmp <- remove_from_list_fun(tb_diagnostic_s_list)$list |
|
302 |
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid |
|
327 |
###################################################### |
|
328 |
####### PART 3: EXAMINE STATIONS AND MODEL FITTING ### |
|
303 | 329 |
|
304 |
tb_diagnostic_s_NA <- do.call(rbind.fill,tb_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics |
|
305 |
tile_id_tmp <- lapply(1:length(tb_diagnostic_s_tmp), |
|
306 |
FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_s_tmp,y=names(tb_diagnostic_s_tmp)) |
|
330 |
### Stations and model fitting ### |
|
331 |
#summarize location and number of training and testing used by tiles |
|
307 | 332 |
|
308 |
tb_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile |
|
333 |
names(robj1$clim_method_mod_obj[[1]]$data_month) # monthly data for January |
|
334 |
#names(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions |
|
335 |
#note that there is no holdout in the current run at the monthly time scale: |
|
309 | 336 |
|
310 |
tb_diagnostic_s_NA <- merge(tb_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id") |
|
337 |
robj1$clim_method_mod_obj[[1]]$data_month_v #zero rows for testing stations at monthly timescale |
|
338 |
#load data_month for specific tiles |
|
339 |
data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month") |
|
311 | 340 |
|
312 |
write.table((tb_diagnostic_s_NA), |
|
313 |
file=file.path(out_dir,paste("tb_diagnostic_s_NA","_",out_prefix,".txt",sep="")),sep=",") |
|
341 |
names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info |
|
314 | 342 |
|
315 |
##### Table 4: Add later on: daily info |
|
316 |
### with also data_s and data_v saved!!! |
|
343 |
use_day=TRUE |
|
344 |
use_month=TRUE |
|
345 |
|
|
346 |
#list_raster_obj_files <- c("/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-100.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-100.0.RData", |
|
347 |
# "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-105.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-105.0.RData") |
|
317 | 348 |
|
318 |
#Insert here...compute input and predicted ranges to spot potential errors? |
|
349 |
list_names_tile_id <- df_tile_processed$tile_id |
|
350 |
list_raster_obj_files[list_names_tile_id] |
|
351 |
#list_names_tile_id <- c("tile_1","tile_2") |
|
352 |
list_param_training_testing_info <- list(list_raster_obj_files[list_names_tile_id],use_month,use_day,list_names_tile_id) |
|
353 |
names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id") |
|
354 |
|
|
355 |
list_param <- list_param_training_testing_info |
|
356 |
#debug(extract_daily_training_testing_info) |
|
357 |
#pred_data_info <- extract_daily_training_testing_info(1,list_param=list_param_training_testing_info) |
|
358 |
pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = num_cores) |
|
359 |
#pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id][1:6]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = 6) |
|
360 |
#pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info) |
|
361 |
#pred_data_info <- lapply(1:length(list_raster_obj_files[1]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info) |
|
362 |
|
|
363 |
pred_data_info_tmp <- remove_from_list_fun(pred_data_info)$list #remove data not predicted |
|
364 |
##Add tile nanmes?? it is alreaready there |
|
365 |
#names(pred_data_info)<-list_names_tile_id |
|
366 |
pred_data_month_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_month_info})) |
|
367 |
pred_data_day_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_day_info})) |
|
368 |
|
|
369 |
#putput inforamtion in csv !! |
|
370 |
write.table(pred_data_month_info, |
|
371 |
file=file.path(out_dir,paste("pred_data_month_info_",out_prefix,".txt",sep="")),sep=",") |
|
372 |
write.table(pred_data_day_info, |
|
373 |
file=file.path(out_dir,paste("pred_data_day_info_",out_prefix,".txt",sep="")),sep=",") |
|
319 | 374 |
|
320 | 375 |
###################################################### |
321 | 376 |
####### PART 4: Get shapefile tiling with centroids ### |
... | ... | |
451 | 506 |
|
452 | 507 |
} |
453 | 508 |
|
454 |
###################################################### |
|
455 |
####### PART 3: EXAMINE STATIONS AND MODEL FITTING ### |
|
456 |
|
|
457 |
### Stations and model fitting ### |
|
458 |
#summarize location and number of training and testing used by tiles |
|
459 |
|
|
460 |
names(robj1$clim_method_mod_obj[[1]]$data_month) # monthly data for January |
|
461 |
#names(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions |
|
462 |
#note that there is no holdout in the current run at the monthly time scale: |
|
463 |
|
|
464 |
robj1$clim_method_mod_obj[[1]]$data_month_v #zero rows for testing stations at monthly timescale |
|
465 |
#load data_month for specific tiles |
|
466 |
data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month") |
|
467 |
|
|
468 |
names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info |
|
469 |
|
|
470 |
use_day=TRUE |
|
471 |
use_month=TRUE |
|
472 |
|
|
473 |
#list_raster_obj_files <- c("/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-100.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-100.0.RData", |
|
474 |
# "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-105.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-105.0.RData") |
|
475 |
|
|
476 |
list_names_tile_id <- df_tile_processed$tile_id |
|
477 |
list_raster_obj_files[list_names_tile_id] |
|
478 |
#list_names_tile_id <- c("tile_1","tile_2") |
|
479 |
list_param_training_testing_info <- list(list_raster_obj_files[list_names_tile_id],use_month,use_day,list_names_tile_id) |
|
480 |
names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id") |
|
481 |
|
|
482 |
list_param <- list_param_training_testing_info |
|
483 |
#debug(extract_daily_training_testing_info) |
|
484 |
#pred_data_info <- extract_daily_training_testing_info(1,list_param=list_param_training_testing_info) |
|
485 |
pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = num_cores) |
|
486 |
#pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id][1:6]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = 6) |
|
487 |
#pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info) |
|
488 |
#pred_data_info <- lapply(1:length(list_raster_obj_files[1]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info) |
|
489 |
|
|
490 |
pred_data_info_tmp <- remove_from_list_fun(pred_data_info)$list #remove data not predicted |
|
491 |
##Add tile nanmes?? it is alreaready there |
|
492 |
#names(pred_data_info)<-list_names_tile_id |
|
493 |
pred_data_month_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_month_info})) |
|
494 |
pred_data_day_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_day_info})) |
|
495 |
|
|
496 |
#putput inforamtion in csv !! |
|
497 |
write.table(pred_data_month_info, |
|
498 |
file=file.path(out_dir,paste("pred_data_month_info_",out_prefix,".txt",sep="")),sep=",") |
|
499 |
write.table(pred_data_day_info, |
|
500 |
file=file.path(out_dir,paste("pred_data_day_info_",out_prefix,".txt",sep="")),sep=",") |
|
501 | 509 |
|
502 | 510 |
########### LAST PART: COPY SOME DATA BACK TO ATLAS ##### |
503 | 511 |
#this part cannot be automated... |
Also available in: Unified diff
global assessment part1: clean up of script