Project

General

Profile

« Previous | Next » 

Revision cf05c940

Added by Benoit Parmentier over 10 years ago

global run scaling up assessment, clean up and split into part 1, part 2 and part 3 scripts for evaluation

View differences:

climate/research/oregon/interpolation/global_run_scalingup_assessment_part1.R
5 5
#Analyses, figures, tables and data are also produced in the script.
6 6
#AUTHOR: Benoit Parmentier 
7 7
#CREATED ON: 03/23/2014  
8
#MODIFIED ON: 05/12/2014            
8
#MODIFIED ON: 05/14/2014            
9 9
#Version: 3
10 10
#PROJECT: Environmental Layers project                                     
11 11
#################################################################################################
......
276 276
  return(pred_data_info_obj)
277 277
}
278 278

  
279
remove_from_list_fun <- function(l_x,condition_class ="try-error"){
280
  index <- vector("list",length(l_x))
281
  for (i in 1:length(l_x)){
282
    if (inherits(l_x[[i]],condition_class)){
283
      index[[i]] <- FALSE #remove from list
284
    }else{
285
      index[[i]] <- TRUE
286
    }
287
  }
288
  l_x<-l_x[unlist(index)] #remove from list all elements using subset
289
  
290
  obj <- list(l_x,index)
291
  names(obj) <- c("list","valid")
292
  return(obj)
293
}
294

  
295
##Function to list predicted tif
296
list_tif_fun <- function(i,in_dir_list,pattern_str){
297
  #list.files(path=x,pattern=".*predicted_mod1_0_1.*20100101.*.tif",full.names=T)})
298
  pat_str<- pattern_str[i]
299
  list_tif_files_dates <-lapply(in_dir_list,
300
         FUN=function(x,pat_str){list.files(path=x,pattern=pat_str,full.names=T)},pat_str=pat_str)
301
  return(list_tif_files_dates)
302
} 
303

  
279 304
##############################
280 305
#### Parameters and constants  
281 306

  
......
317 342

  
318 343
##raster_prediction object : contains testing and training stations with RMSE and model object
319 344

  
320
#pattern_raster_obj <- gam
321
#list.files(path=in_dir_list[1],pattern="gam_CAI_mod.*.RData")
322
#list_raster_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="gam_CAI_mod.*.RData",full.names=T)})
323 345
list_raster_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="^raster_prediction_obj.*.RData",full.names=T)})
324 346
basename(dirname(list_raster_obj_files[[1]]))
325 347
list_names_tile_coord <- lapply(list_raster_obj_files,FUN=function(x){basename(dirname(x))})
326 348
list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_")
327
#names(list_raster_obj_files)<- 
328
                                                                                                             
329
names(list_raster_obj_files)<- list_names_tile_coord
349
names(list_raster_obj_files)<- list_names_tile_id
350

  
351
########################## START SCRIPT ##############################
330 352

  
331
###################### PART I: Generate tables to collect information over all tiles in North America ##########
332
#Table 1: Average accuracy metrics
333
#Table 2: daily accuracy metrics for all tiles
353
################################################################
354
######## PART 1: Generate tables to collect information 
355
######## over all tiles in North America 
356

  
357
##Function to collect all the tables from tiles into a table
358
###Table 1: Average accuracy metrics
359
###Table 2: daily accuracy metrics for all tiles
360

  
361
#First create table of tiles under analysis and their coord
362
df_tile_processed <- data.frame(tile_coord=basename(in_dir_list))
363
df_tile_processed$tile_id <- unlist(list_names_tile_id)
334 364

  
335 365
##Quick exploration of raster object
336 366
robj1 <- load_obj(list_raster_obj_files[[1]])
......
341 371
names(robj1$clim_method_mod_obj[[1]]$data_month) #for January
342 372
names(robj1$validation_mod_month_obj[[1]]$data_s) #for January with predictions
343 373

  
344
#data_month_list <- lapply(list_raster_obj_files,FUN=function(x){x<-load_obj(x);x[["summary_metrics_v"]]$avg})                           
374
################
375
#### Table 1: Average accuracy metrics
345 376

  
346
#robj1$tb_diagnostic_v[1:10,] #first 10 rows of accuarcy metrics per day and model (for specific tile)
347
#robj1$summary_metrics_v #first 10 rows of accuarcy metrics per day and model (for specific tile)
348

  
349
#summary_metrics_v_list <- lapply(list_raster_obj_files,FUN=function(x){x<-load_obj(x);x[["summary_metrics_v"]]$avg$rmse})                           
350
#summary_metrics_v_list <- lapply(list_raster_obj_files,FUN=function(x){try(x<- load_obj(x);x[["summary_metrics_v"]]$avg)})     
351 377
#can use a maximum of 6 cores on the NEX Bridge
352
summary_metrics_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = 5)                           
353
list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_")
378
summary_metrics_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = 6)                           
354 379
names(summary_metrics_v_list) <- list_names_tile_id
355
#names(summary_metrics_v_list) <- list_names_tile_coord
356

  
357
remove_from_list_fun <- function(l_x,condition_class ="try-error"){
358
  index <- vector("list",length(l_x))
359
  for (i in 1:length(l_x)){
360
    if (inherits(l_x[[i]],condition_class)){
361
      index[[i]] <- FALSE #remove from list
362
    }else{
363
      index[[i]] <- TRUE
364
    }
365
  }
366
  l_x<-l_x[unlist(index)] #remove from list all elements using subset
367
  return(l_x)
368
}
369 380

  
370
summary_metrics_v_list <- remove_from_list_fun(summary_metrics_v_list)
371

  
372
#summary_metrics_v_NA <- do.call(rbind,summary_metrics_v_list) #create a df for NA tiles with all accuracy metrics
381
summary_metrics_v_tmp <- remove_from_list_fun(summary_metrics_v_list)$list
382
df_tile_processed$metrics_v <- remove_from_list_fun(summary_metrics_v_list)$valid
373 383
#Now remove "try-error" from list of accuracy)
374 384

  
375
summary_metrics_v_NA <- do.call(rbind.fill,summary_metrics_v_list) #create a df for NA tiles with all accuracy metrics
385
summary_metrics_v_NA <- do.call(rbind.fill,summary_metrics_v_tmp) #create a df for NA tiles with all accuracy metrics
376 386
#tile_coord <- lapply(1:length(summary_metrics_v_list),FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=summary_metrics_v_list)
377

  
378
tile_id <- lapply(1:length(summary_metrics_v_list),
379
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=summary_metrics_v_list,y=list_names_tile_id)
380

  
381
summary_metrics_v_NA$tile_id <-unlist(tile_id)
382
#summary_metrics_v_NA$tile_coord <-unlist(tile_coord)
383

  
387
#add the tile id identifier
388
tile_id_tmp <- lapply(1:length(summary_metrics_v_tmp),
389
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=summary_metrics_v_tmp,y=names(summary_metrics_v_tmp))
390
#adding tile id summary data.frame
391
summary_metrics_v_NA$tile_id <-unlist(tile_id_tmp)
384 392
summary_metrics_v_NA$n <- as.integer(summary_metrics_v_NA$n)
385 393
write.table(as.data.frame(summary_metrics_v_NA),
386 394
            file=file.path(out_dir,paste("summary_metrics_v2_NA_",out_prefix,".txt",sep="")),sep=",")
387
#Function to collect all the tables from tiles into a table
388 395

  
396
#################
397
###Table 2: daily accuracy metrics for all tiles
398
#this takes about 25min
389 399
#tb_diagnostic_v_list <- lapply(list_raster_obj_files,FUN=function(x){x<-load_obj(x);x[["tb_diagnostic_v"]]})                           
390 400
tb_diagnostic_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x[["tb_diagnostic_v"]])},mc.preschedule=FALSE,mc.cores = 6)                           
391 401

  
392
names(tb_diagnostic_v_list)
402
names(tb_diagnostic_v_list) <- list_names_tile_id
403
tb_diagnostic_v_tmp <- remove_from_list_fun(tb_diagnostic_v_list)$list
404
#df_tile_processed$tb_diag <- remove_from_list_fun(tb_diagnostic_v_list)$valid
393 405

  
394
tb_diagnostic_v_NA <- do.call(rbind.fill,tb_diagnostic_v_list) #create a df for NA tiles with all accuracy metrics
406
tb_diagnostic_v_NA <- do.call(rbind.fill,tb_diagnostic_v_tmp) #create a df for NA tiles with all accuracy metrics
407
tile_id_tmp <- lapply(1:length(tb_diagnostic_v_tmp),
408
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_v_tmp,y=names(tb_diagnostic_v_tmp))
395 409

  
396
tile_coord <- lapply(1:length(tb_diagnostic_v_list),
397
                     FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=tb_diagnostic_v_list)
398
tile_id <- lapply(1:length(tb_diagnostic_v_list),
399
                     FUN=function(i,x,y){rep(y[i],nrow(x[[i]]))},x=tb_diagnostic_v_list,y=list_names_tile_id)
410
tb_diagnostic_v_NA$tile_id <- unlist(tile_id_tmp) #adding identifier for tile
400 411

  
401
tb_diagnostic_v_NA$tile_id <- unlist(tile_id) #adding identifier for tile
402
tb_diagnostic_v_NA$tile_coord <- unlist(tile_coord) #adding identifier for tile
412
tb_diagnostic_v_NA <- merge(tb_diagnostic_v_NA,df_tile_processed[,1:2],by="tile_id")
403 413

  
404 414
write.table((tb_diagnostic_v_NA),
405
            file=file.path(out_dir,paste("tb_diagnostic_v2_NA","_",out_prefix,".txt",sep="")),sep=",")
406

  
407
#load data_month for specific tiles
408
data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
409

  
410
names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
411

  
412
#problem with tile 12...the raster ojbect has missing sub object
413
#data_month_list <- lapply(1:length(list_raster_obj_files),x=list_raster_obj_files,
414
#                          FUN=function(i,x){x<-load_obj(x[[i]]);
415
#                                            extract_from_list_obj(x$validation_mod_month_obj,"data_s")})                           
416

  
417
data_month_list <- lapply(1:length(list_raster_obj_files),x=list_raster_obj_files,
418
                          FUN=function(i,x){x<-load_obj(x[[i]]);
419
                                            extract_from_list_obj(x$clim_method_mod_obj,"data_month")})                           
415
            file=file.path(out_dir,paste("tb_diagnostic_v_NA","_",out_prefix,".txt",sep="")),sep=",")
420 416

  
421
names(data_month_list) <- paste("tile","_",1:length(data_month_list),sep="")
417
#################
418
###Table 3: monthly station information with predictions for all tiles
422 419

  
423

  
424
#names(data_month_list) <- basename(in_dir_list) #use folder id instead
425

  
426
tile_id <- lapply(1:length(data_month_list),
427
                  FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_month_list)
428
data_month_NAM <- do.call(rbind.fill,data_month_list) #combined data_month for "NAM" North America
429
data_month_NAM$tile_id <- unlist(tile_id)
430

  
431
#plot(mm_01 ~ elev_s,data=data_month_NAM) #Jan across all tiles
432
#plot(mm_06 ~ elev_s,data=data_month_NAM) #June across all tiles
433
#plot(TMax ~ mm_01,data=data_month_NAM) #monthly tmax averages and LST across all tiles
434

  
435
#copy back to atlas
436
system("scp -p ./*.txt ./*.tif parmentier@atlas.nceas.ucsb.edu:/data/project/layers/commons/NEX_data/output_run2_global_analyses_05122014")
420
#load data_month for specific tiles
421
# data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
422
# names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
423
# 
424
# data_month_s_list <- mclapply(list_raster_obj_files,FUN=function(x){try(x<-load_obj(x));try(x$validation_mod_month_obj[["data_s"]])},mc.preschedule=FALSE,mc.cores = 6)                           
425
# 
426
# names(data_month_s_list) <- list_names_tile_id
427
# 
428
# data_month_tmp <- remove_from_list_fun(data_month_s_list)$list
429
# #df_tile_processed$metrics_v <- remove_from_list_fun(data_month_s_list)$valid
430
# 
431
# tile_id <- lapply(1:length(data_month_tmp),
432
#                   FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_month_tmp)
433
# data_month_NAM <- do.call(rbind.fill,data_month_list) #combined data_month for "NAM" North America
434
# data_month_NAM$tile_id <- unlist(tile_id)
435
# 
436
# write.table((data_month_NAM),
437
#             file=file.path(out_dir,paste("data_month_s_NAM","_",out_prefix,".txt",sep="")),sep=",")
438

  
439
##### Table 4: Add later on: daily info
440
### with also data_s and data_v saved!!!
441

  
442
#Copy back to atlas
443
system("scp -p ./*.txt parmentier@atlas.nceas.ucsb.edu:/data/project/layers/commons/NEX_data/output_run2_global_analyses_05122014")
444
#system("scp -p ./*.txt ./*.tif parmentier@atlas.nceas.ucsb.edu:/data/project/layers/commons/NEX_data/output_run2_global_analyses_05122014")
437 445

  
438 446
######################################################
439 447
####### PART 2 CREATE MOSAIC OF PREDICTIONS PER DAY ###
440 448

  
441
#list.files(path=in_dir_list[[1]],pattern=".*predicted.*20100101.*.tif")
442

  
443
#some files are
444
#"/nobackupp4/aguzman4/climateLayers/output/30.0_-115.0/gam_fusion_dailyTmax_predicted_mod_kr_0_1_20100901_30_130.0_-115.0.tif"
445
#"/nobackupp4/aguzman4/climateLayers/output/30.0_-115.0/gam_fusion_dailyTmax_predicted_mod_kr_20100901_30_130.0_-115.0.tif"
446
#list_tif_files_dates <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern=".*month.*.tif",full.names=T)})
447
#list_tif_files_dates <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern=".*predicted.*20100101.*.tif",full.names=T)})
448

  
449
#".*predicted_mod1_0_1.*20100101.*.tif"
450

  
451
#tb <- tb_diagnostic_v_NA
452
#get specific dates from tb
453
#dates_l <- unique(tb$date)
454 449
dates_l <- unique(robj1$tb_diagnostic_s$date) #list of dates to query tif
455 450

  
456
##Function to list predicted tif
457
list_tif_fun <- function(i,in_dir_list,pattern_str){
458
  #list.files(path=x,pattern=".*predicted_mod1_0_1.*20100101.*.tif",full.names=T)})
459
  pat_str<- pattern_str[i]
460
  list_tif_files_dates <-lapply(in_dir_list,
461
         FUN=function(x,pat_str){list.files(path=x,pattern=pat_str,full.names=T)},pat_str=pat_str)
462
  return(list_tif_files_dates)
463
} 
464

  
465 451
#First mosaic mod1
466 452

  
467 453
## make this a function? report on number of tiles used for mosaic...
......
516 502

  
517 503
unlist(lapply(l_f_bytiles,length))
518 504

  
505
system("scp -p ./*.tif parmentier@atlas.nceas.ucsb.edu:/data/project/layers/commons/NEX_data/output_run2_global_analyses_05122014")
506

  
507
######################################################
508
####### PART 3: EXAMINE STATIONS AND MODEL FITTING ###
509

  
510
### Stations and model fitting ###
511
#summarize location and number of training and testing used by tiles
512

  
513
names(robj1$clim_method_mod_obj[[1]]$data_month) # monthly data for January
514
#names(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions
515
#note that there is no holdout in the current run at the monthly time scale:
516

  
517
robj1$clim_method_mod_obj[[1]]$data_month_v #zero rows for testing stations at monthly timescale
518
#load data_month for specific tiles
519
data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
520

  
521
names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
522

  
523
#problem with tile 12...the raster ojbect has missing sub object
524
#data_month_list <- lapply(1:length(list_raster_obj_files),x=list_raster_obj_files,
525
#                          FUN=function(i,x){x<-load_obj(x[[i]]);
526
#                                            extract_from_list_obj(x$validation_mod_month_obj,"data_s")})                           
527

  
528
### make this part a function:
529

  
530
#create a table for every month, day and tiles...
531
# data_month_list <- lapply(1:length(list_raster_obj_files),x=list_raster_obj_files,
532
#                           FUN=function(i,x){x<-load_obj(x[[i]]);
533
#                                             extract_from_list_obj(x$clim_method_mod_obj,"data_month")})                           
534
# 
535
# names(data_month_list) <- paste("tile","_",1:length(data_month_list),sep="")
536
# 
537
# #names(data_month_list) <- basename(in_dir_list) #use folder id instead
538
# 
539
# list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_")
540
# 
541
# #tile_id <- lapply(1:length(data_month_list),
542
# #                  FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_month_list)
543
# 
544
# data_month_NAM <- do.call(rbind.fill,data_month_list) #combined data_month for "NAM" North America
545
# data_month_NAM$tile_id <- unlist(tile_id)
546
# 
547
# names(robj1$validation_mod_day_obj[[1]]$data_s) # daily for January with predictions
548
# dim(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions
549
# 
550
# use_day=TRUE
551
# use_month=TRUE
552
# 
553
# list_param_training_testing_info <- list(list_raster_obj_files,use_month,use_day,list_names_tile_id)
554
# names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id")
555
# 
556
# list_param <- list_param_training_testing_info
557
# 
558
# pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
559
# 
560
# pred_data_month_info <- do.call(rbind,lapply(pred_data_info,function(x){x$pred_data_month_info}))
561
# pred_data_day_info <- do.call(rbind,lapply(pred_data_info,function(x){x$pred_data_day_info}))
562

  
563
######################################################
564
####### PART 4: GENERATE DIAGNOSTIC plots for the area analyzed ###
565

  
519 566
### Create a combined shape file for all region?
520 567

  
521 568
#get centroid
......
526 573
### Create a quick mosaic (mean method...)
527 574
#mean predicitons
528 575
#mean of kriging error?
529
                    
576
#plot(mm_01 ~ elev_s,data=data_month_NAM) #Jan across all tiles
577
#plot(mm_06 ~ elev_s,data=data_month_NAM) #June across all tiles
578
#plot(TMax ~ mm_01,data=data_month_NAM) #monthly tmax averages and LST across all tiles
579
              
530 580
tb <- read.table(file.path(out_dir,"tb_diagnostic_v2_NA_run1_NA_analyses_03232013.txt"),sep=",")
531 581
summary_metrics_v <- read.table(file.path(out_dir,"summary_metrics_v2_NA_run1_NA_analyses_03232013.txt"),sep=",")
532 582

  
......
639 689
pred_s <- stack(l_m_tif[2:5])
640 690
levelplot(pred_s)
641 691

  
642
######################################################
643
####### PART 3: EXAMINE STATIONS AND MODEL FITTING ###
644

  
645
### Stations and model fitting ###
646
#summarize location and number of training and testing used by tiles
647

  
648
names(robj1$clim_method_mod_obj[[1]]$data_month) # monthly data for January
649
#names(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions
650
#note that there is no holdout in the current run at the monthly time scale:
651

  
652
robj1$clim_method_mod_obj[[1]]$data_month_v #zero rows for testing stations at monthly timescale
653
#load data_month for specific tiles
654
data_month <- extract_from_list_obj(robj1$clim_method_mod_obj,"data_month")
655

  
656
names(data_month) #this contains LST means (mm_1, mm_2 etc.) as well as TMax and other info
657

  
658
#problem with tile 12...the raster ojbect has missing sub object
659
#data_month_list <- lapply(1:length(list_raster_obj_files),x=list_raster_obj_files,
660
#                          FUN=function(i,x){x<-load_obj(x[[i]]);
661
#                                            extract_from_list_obj(x$validation_mod_month_obj,"data_s")})                           
662

  
663
### make this part a function:
664

  
665
#create a table for every month, day and tiles...
666
data_month_list <- lapply(1:length(list_raster_obj_files),x=list_raster_obj_files,
667
                          FUN=function(i,x){x<-load_obj(x[[i]]);
668
                                            extract_from_list_obj(x$clim_method_mod_obj,"data_month")})                           
669

  
670
names(data_month_list) <- paste("tile","_",1:length(data_month_list),sep="")
671

  
672
#names(data_month_list) <- basename(in_dir_list) #use folder id instead
673

  
674
list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_")
675

  
676
#tile_id <- lapply(1:length(data_month_list),
677
#                  FUN=function(i,x){rep(names(x)[i],nrow(x[[i]]))},x=data_month_list)
678

  
679
data_month_NAM <- do.call(rbind.fill,data_month_list) #combined data_month for "NAM" North America
680
data_month_NAM$tile_id <- unlist(tile_id)
681

  
682
names(robj1$validation_mod_day_obj[[1]]$data_s) # daily for January with predictions
683
dim(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions
684

  
685
use_day=TRUE
686
use_month=TRUE
687

  
688
list_param_training_testing_info <- list(list_raster_obj_files,use_month,use_day,list_names_tile_id)
689
names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id")
690

  
691
list_param <- list_param_training_testing_info
692

  
693
pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info)
694

  
695
pred_data_month_info <- do.call(rbind,lapply(pred_data_info,function(x){x$pred_data_month_info}))
696
pred_data_day_info <- do.call(rbind,lapply(pred_data_info,function(x){x$pred_data_day_info}))
697

  
698

  
699 692
##################### END OF SCRIPT ######################

Also available in: Unified diff