Project

General

Profile

« Previous | Next » 

Revision 63238731

Added by Benoit Parmentier almost 10 years ago

global assessment part1: clean up of script

View differences:

climate/research/oregon/interpolation/global_run_scalingup_assessment_part1.R
5 5
#Part 1 create summary tables and inputs files for figure in part 2 and part 3.
6 6
#AUTHOR: Benoit Parmentier 
7 7
#CREATED ON: 03/23/2014  
8
#MODIFIED ON: 03/11/2015            
8
#MODIFIED ON: 03/23/2015            
9 9
#Version: 4
10 10
#PROJECT: Environmental Layers project  
11 11
#TO DO:
......
15 15
#
16 16
#First source these files:
17 17
#Resolved call issues from R.
18
#source /nobackupp6/aguzman4/climateLayers/sharedModules/etc/environ.sh 
19
#MODULEPATH=$MODULEPATH:/nex/modules/files
20
#module load pythonkits/gdal_1.10.0_python_2.7.3_nex
18
source /nobackupp6/aguzman4/climateLayers/sharedModules/etc/environ.sh 
19
MODULEPATH=$MODULEPATH:/nex/modules/files
20
module load pythonkits/gdal_1.10.0_python_2.7.3_nex
21 21

  
22 22
# These are the names and number for the current subset regions used for global runs:
23 23
#reg1 - North America (NAM)
......
75 75
region_names <- c("reg1","reg2","reg3","reg4","reg5","reg6") #selected region names, #PARAM2
76 76
y_var_name <- "dailyTmax" #PARAM3
77 77
interpolation_method <- c("gam_CAI") #PARAM4
78
out_prefix<-"run10_1500x4500_global_analyses_03112015" #PARAM5
78
out_prefix<-"run10_1500x4500_global_analyses_03232015" #PARAM5
79 79

  
80 80
#out_dir<-"/data/project/layers/commons/NEX_data/" #On NCEAS Atlas
81 81
#out_dir <- "/nobackup/bparmen1/" #on NEX
......
195 195
#names(list_tb_diagnostic_v) <- list_names_tile_id
196 196

  
197 197
################
198
#### Table 1: Average accuracy metrics
198
#### Table 1: Average accuracy metrics per tile and predictions
199 199

  
200 200
#can use a maximum of 6 cores on the NEX Bridge
201 201
#For 43 tiles but only xx RData boject it takes xxx min
......
230 230
            file=file.path(out_dir,paste("summary_metrics_v2_NA_",out_prefix,".txt",sep="")),sep=",")
231 231

  
232 232
#################
233
###Table 2: daily accuracy metrics for all tiles
233
###Table 2: daily validation/testing accuracy metrics for all tiles
234 234
#this takes about 25min
235 235
#tb_diagnostic_v_list <- lapply(list_raster_obj_files,FUN=function(x){x<-load_obj(x);x[["tb_diagnostic_v"]]})                           
236 236
tb_diagnostic_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_v"]])},mc.preschedule=FALSE,mc.cores = num_cores)                           
......
251 251
            file=file.path(out_dir,paste("tb_diagnostic_v_NA","_",out_prefix,".txt",sep="")),sep=",")
252 252

  
253 253
#################
254
###Table 3: monthly station information with predictions for all tiles
254
###Table 3: monthly fit/training accuracy information for all tiles
255 255

  
256 256
## Monthly fitting information
257 257
tb_month_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_month_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = num_cores)                           
......
274 274
write.table((tb_month_diagnostic_s_NA),
275 275
            file=file.path(out_dir,paste("tb_month_diagnostic_s_NA","_",out_prefix,".txt",sep="")),sep=",")
276 276

  
277
#################
278
###Table 4: daily fit/training accuracy information with predictions for all tiles
279

  
280
## daily fit info:
281

  
282
tb_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = num_cores)                           
283

  
284
names(tb_diagnostic_s_list) <- list_names_tile_id
285
tb_diagnostic_s_tmp <- remove_from_list_fun(tb_diagnostic_s_list)$list
286
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
287

  
288
tb_diagnostic_s_NA <- do.call(rbind.fill,tb_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics
289
tile_id_tmp <- lapply(1:length(tb_diagnostic_s_tmp),
290
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_s_tmp,y=names(tb_diagnostic_s_tmp))
291

  
292
tb_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
293

  
294
tb_diagnostic_s_NA <- merge(tb_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id")
295

  
296
write.table((tb_diagnostic_s_NA),
297
            file=file.path(out_dir,paste("tb_diagnostic_s_NA","_",out_prefix,".txt",sep="")),sep=",")
298

  
299
##### Table 5: Add later on: daily info
300
### with also data_s and data_v saved!!!
301

  
302
#Insert here...compute input and predicted ranges to spot potential errors?
303

  
304
##### SPDF of Monhtly Station info
277 305
#load data_month for specific tiles
278 306
# data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
279 307
# names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
......
293 321
# write.table((data_month_NAM),
294 322
#             file=file.path(out_dir,paste("data_month_s_NAM","_",out_prefix,".txt",sep="")),sep=",")
295 323

  
296
## daily fit info:
324
##### SPDF of Daily Station info
297 325

  
298
tb_diagnostic_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_s"]])},mc.preschedule=FALSE,mc.cores = num_cores)                           
299 326

  
300
names(tb_diagnostic_s_list) <- list_names_tile_id
301
tb_diagnostic_s_tmp <- remove_from_list_fun(tb_diagnostic_s_list)$list
302
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
327
######################################################
328
####### PART 3: EXAMINE STATIONS AND MODEL FITTING ###
303 329

  
304
tb_diagnostic_s_NA <- do.call(rbind.fill,tb_diagnostic_s_tmp) #create a df for NA tiles with all accuracy metrics
305
tile_id_tmp <- lapply(1:length(tb_diagnostic_s_tmp),
306
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_s_tmp,y=names(tb_diagnostic_s_tmp))
330
### Stations and model fitting ###
331
#summarize location and number of training and testing used by tiles
307 332

  
308
tb_diagnostic_s_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
333
names(robj1$clim_method_mod_obj[[1]]$data_month) # monthly data for January
334
#names(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions
335
#note that there is no holdout in the current run at the monthly time scale:
309 336

  
310
tb_diagnostic_s_NA <- merge(tb_diagnostic_s_NA,df_tile_processed[,1:2],by="tile_id")
337
robj1$clim_method_mod_obj[[1]]$data_month_v #zero rows for testing stations at monthly timescale
338
#load data_month for specific tiles
339
data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
311 340

  
312
write.table((tb_diagnostic_s_NA),
313
            file=file.path(out_dir,paste("tb_diagnostic_s_NA","_",out_prefix,".txt",sep="")),sep=",")
341
names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
314 342

  
315
##### Table 4: Add later on: daily info
316
### with also data_s and data_v saved!!!
343
use_day=TRUE
344
use_month=TRUE
345
 
346
#list_raster_obj_files <- c("/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-100.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-100.0.RData",
347
#                    "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-105.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-105.0.RData")
317 348

  
318
#Insert here...compute input and predicted ranges to spot potential errors?
349
list_names_tile_id <- df_tile_processed$tile_id
350
list_raster_obj_files[list_names_tile_id]
351
#list_names_tile_id <- c("tile_1","tile_2")
352
list_param_training_testing_info <- list(list_raster_obj_files[list_names_tile_id],use_month,use_day,list_names_tile_id)
353
names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id")
354
 
355
list_param <- list_param_training_testing_info
356
#debug(extract_daily_training_testing_info)
357
#pred_data_info <- extract_daily_training_testing_info(1,list_param=list_param_training_testing_info)
358
pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = num_cores)
359
#pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id][1:6]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = 6)
360
#pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
361
#pred_data_info <- lapply(1:length(list_raster_obj_files[1]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
362

  
363
pred_data_info_tmp <- remove_from_list_fun(pred_data_info)$list #remove data not predicted
364
##Add tile nanmes?? it is alreaready there
365
#names(pred_data_info)<-list_names_tile_id
366
pred_data_month_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_month_info}))
367
pred_data_day_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_day_info}))
368

  
369
#putput inforamtion in csv !!
370
write.table(pred_data_month_info,
371
            file=file.path(out_dir,paste("pred_data_month_info_",out_prefix,".txt",sep="")),sep=",")
372
write.table(pred_data_day_info,
373
            file=file.path(out_dir,paste("pred_data_day_info_",out_prefix,".txt",sep="")),sep=",")
319 374

  
320 375
######################################################
321 376
####### PART 4: Get shapefile tiling with centroids ###
......
451 506

  
452 507
}
453 508

  
454
######################################################
455
####### PART 3: EXAMINE STATIONS AND MODEL FITTING ###
456

  
457
### Stations and model fitting ###
458
#summarize location and number of training and testing used by tiles
459

  
460
names(robj1$clim_method_mod_obj[[1]]$data_month) # monthly data for January
461
#names(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions
462
#note that there is no holdout in the current run at the monthly time scale:
463

  
464
robj1$clim_method_mod_obj[[1]]$data_month_v #zero rows for testing stations at monthly timescale
465
#load data_month for specific tiles
466
data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
467

  
468
names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
469

  
470
use_day=TRUE
471
use_month=TRUE
472
 
473
#list_raster_obj_files <- c("/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-100.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-100.0.RData",
474
#                    "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-105.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-105.0.RData")
475

  
476
list_names_tile_id <- df_tile_processed$tile_id
477
list_raster_obj_files[list_names_tile_id]
478
#list_names_tile_id <- c("tile_1","tile_2")
479
list_param_training_testing_info <- list(list_raster_obj_files[list_names_tile_id],use_month,use_day,list_names_tile_id)
480
names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id")
481
 
482
list_param <- list_param_training_testing_info
483
#debug(extract_daily_training_testing_info)
484
#pred_data_info <- extract_daily_training_testing_info(1,list_param=list_param_training_testing_info)
485
pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = num_cores)
486
#pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id][1:6]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = 6)
487
#pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
488
#pred_data_info <- lapply(1:length(list_raster_obj_files[1]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
489

  
490
pred_data_info_tmp <- remove_from_list_fun(pred_data_info)$list #remove data not predicted
491
##Add tile nanmes?? it is alreaready there
492
#names(pred_data_info)<-list_names_tile_id
493
pred_data_month_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_month_info}))
494
pred_data_day_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_day_info}))
495

  
496
#putput inforamtion in csv !!
497
write.table(pred_data_month_info,
498
            file=file.path(out_dir,paste("pred_data_month_info_",out_prefix,".txt",sep="")),sep=",")
499
write.table(pred_data_day_info,
500
            file=file.path(out_dir,paste("pred_data_day_info_",out_prefix,".txt",sep="")),sep=",")
501 509

  
502 510
########### LAST PART: COPY SOME DATA BACK TO ATLAS #####
503 511
#this part cannot be automated...

Also available in: Unified diff