Revision c2f8f2d6
Added by Benoit Parmentier almost 10 years ago
climate/research/oregon/interpolation/global_run_scalingup_assessment_part1.R | ||
---|---|---|
5 | 5 |
#Part 1 create summary tables and inputs for figure in part 2 and part 3. |
6 | 6 |
#AUTHOR: Benoit Parmentier |
7 | 7 |
#CREATED ON: 03/23/2014 |
8 |
#MODIFIED ON: 12/15/2014
|
|
8 |
#MODIFIED ON: 12/23/2014
|
|
9 | 9 |
#Version: 3 |
10 | 10 |
#PROJECT: Environmental Layers project |
11 | 11 |
#TO DO: |
... | ... | |
20 | 20 |
#source /nobackupp4/aguzman4/climateLayers/sharedModules/etc/environ.sh |
21 | 21 |
#MODULEPATH=$MODULEPATH:/nex/modules/files |
22 | 22 |
#module load /nex/modules/files/pythonkits/gdal_1.10.0_python_2.7.3_nex |
23 |
# These are the names and number for the current subset regions used for global runs: |
|
24 |
#reg1 - North America (NAM) |
|
25 |
#reg2 - Western Europe (WE) |
|
26 |
#reg3 - Eastern Europe to East Asia (EE_EA) |
|
27 |
#reg4 - South America (SAM) |
|
28 |
#reg5 - Africa (AF) |
|
29 |
#reg6 - South East Asia and Australia (SEA_AUS) |
|
23 | 30 |
|
24 | 31 |
################################################################################################# |
25 | 32 |
|
... | ... | |
459 | 466 |
############################## |
460 | 467 |
#### Parameters and constants |
461 | 468 |
|
462 |
#in_dir1 <- "/nobackupp4/aguzman4/climateLayers/output1000x3000_km/" |
|
463 |
#in_dir1 <- "/nobackupp6/aguzman4/climateLayers/output1500x4500_km/" |
|
464 |
in_dir1 <- "/nobackupp6/aguzman4/climateLayers/output1000x3000_km/reg2" |
|
465 |
#/nobackupp4/aguzman4/climateLayers/output10Deg/reg1/finished.txt |
|
466 |
in_dir_list <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run |
|
467 |
#in_dir_list <- in_dir_list[c(3,4)] #get the list regions processed for this run |
|
469 |
#reg1 (North Am), reg2(Europe),reg3(Asia), reg4 (South Am), reg5 (Africa), reg6 (Australia-Asia) |
|
470 |
#master directory containing the definition of tile size and tiles predicted |
|
471 |
in_dir1 <- "/nobackupp4/aguzman4/climateLayers/output1000x3000_km/" |
|
472 |
|
|
473 |
region_names <- c("reg2","reg6") #selected region names |
|
468 | 474 |
|
469 |
#if(basename(in_dir_list)[[1]]=="reg?") #add later |
|
470 |
#in_dir_list_all <- lapply(in_dir_list,function(x){list.dirs(path=x,recursive=F)}) |
|
471 |
#in_dir_list_all <- in_dir_list |
|
472 |
#in_dir_list <- list.dirs(path=in_dir_reg,recursive=FALSE) #get the list of tiles/directories with outputs |
|
473 |
#in_dir_list <- unlist(in_dir_list_all[c(2)]) #only region 3 has informatation at this stage |
|
474 |
#in_dir_list <- unlist(in_dir_list_all) #[c(2)]) #only region 3 has informatation at this stage |
|
475 |
in_dir_list <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run |
|
476 |
#basename(in_dir_list) |
|
477 |
in_dir_list<- lapply(region_names,FUN=function(x,y){y[grep(x,basename(y),invert=FALSE)]}, |
|
478 |
y=in_dir_list) |
|
475 | 479 |
|
480 |
in_dir_list_all <- lapply(in_dir_list,function(x){list.dirs(path=x,recursive=F)}) |
|
481 |
in_dir_list <- unlist(in_dir_list_all) |
|
476 | 482 |
#in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1 |
477 | 483 |
in_dir_subset <- in_dir_list[grep("subset",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles... |
478 | 484 |
in_dir_shp <- file.path(in_dir_subset,"shapefiles") |
... | ... | |
481 | 487 |
in_dir_reg <- in_dir_list[grep(".*._.*.",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles... |
482 | 488 |
#in_dir_reg <- in_dir_list[grep("july_tiffs",basename(in_dir_reg),invert=TRUE)] #select directory with shapefiles... |
483 | 489 |
in_dir_list <- in_dir_reg |
490 |
|
|
484 | 491 |
#Models used. |
485 | 492 |
#list_models<-c("y_var ~ s(lat,lon,k=4) + s(elev_s,k=3) + s(LST,k=3)", |
486 | 493 |
# "y_var ~ s(lat,lon,k=5) + s(elev_s,k=3) + s(LST,k=3)", |
487 | 494 |
# "y_var ~ s(lat,lon,k=8) + s(elev_s,k=4) + s(LST,k=4)", |
488 |
|
|
489 |
|
|
490 |
#in_dir_list <- file.path(in_dir1,read.table(file.path(in_dir1,"processed.txt"))$V1) |
|
491 |
#in_dir_list <- as.list(in_dir_list[-1]) |
|
495 |
|
|
492 | 496 |
in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1 |
493 |
#in_dir_shp <- in_dir_list[grep("shapefiles",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles... |
|
494 |
in_dir_shp <- in_dir_shp[grep("subset_bak",basename(dirname(in_dir_shp)),invert=TRUE)] #the first one is the in_dir1 |
|
495 |
|
|
496 |
#in_dir_shp <- "/nobackupp4/aguzman4/climateLayers/output10Deg/reg1/subset/shapefiles/" |
|
497 |
#in_dir_shp <- "/nobackupp4/aguzman4/climateLayers/output20Deg/reg2/subset/shapefiles" |
|
497 |
#list of shapefiles used to define tiles |
|
498 | 498 |
in_dir_shp_list <- list.files(in_dir_shp,".shp",full.names=T) |
499 | 499 |
|
500 |
#in_dir_list <- in_dir_list[grep("shapefiles",basename(in_dir_list),invert=TRUE)] |
|
501 |
#the first one is the in_dir1 |
|
502 |
# the last directory contains shapefiles |
|
503 | 500 |
y_var_name <- "dailyTmax" |
504 | 501 |
interpolation_method <- c("gam_CAI") |
505 |
out_prefix<-"run10_global_analyses_12152014"
|
|
502 |
out_prefix<-"run10_global_analyses_12232014"
|
|
506 | 503 |
|
507 | 504 |
#out_dir<-"/data/project/layers/commons/NEX_data/" #On NCEAS Atlas |
508 | 505 |
out_dir <- "/nobackup/bparmen1/" #on NEX |
... | ... | |
522 | 519 |
|
523 | 520 |
CRS_locs_WGS84 <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 |
524 | 521 |
|
525 |
day_to_mosaic <- c("20100101","20100901") |
|
522 |
#day_to_mosaic <- c("20100101","20100901") |
|
523 |
day_to_mosaic <- c("20100101","20100102","20100103","20100104","20100105", |
|
524 |
"20100301","20100302","20100303","20100304","20100305", |
|
525 |
"20100501","20100502","20100503","20100504","20100505", |
|
526 |
"20100701","20100702","20100703","20100704","20100705", |
|
527 |
"20100901","20100902","20100903","20100904","20100905", |
|
528 |
"20101101","20101102","20101103","20101104","20101105") |
|
529 |
#day_to_mosaic <- NULL #if day to mosaic is null then mosaic all dates? |
|
530 |
|
|
526 | 531 |
file_format <- ".tif" #format for mosaiced files |
527 | 532 |
NA_flag_val <- -9999 #No data value |
528 | 533 |
|
529 |
#day_to_mosaic <- NULL #if day to mosaic is null then mosaic all dates |
|
530 |
|
|
531 | 534 |
##raster_prediction object : contains testing and training stations with RMSE and model object |
532 | 535 |
|
533 |
#l_shp <- lapply(1:length(in_dir_shp_list),FUN=function(i){paste(strsplit(in_dir_shp_list[i],"_")[[1]][2:3],collapse="_")}) |
|
534 |
#match(l_shp,in_dir_list) |
|
535 |
#in_dir_list[match(in_dir_list,l_shp] |
|
536 |
|
|
537 | 536 |
list_raster_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="^raster_prediction_obj.*.RData",full.names=T)}) |
538 | 537 |
basename(dirname(list_raster_obj_files[[1]])) |
539 | 538 |
list_names_tile_coord <- lapply(list_raster_obj_files,FUN=function(x){basename(dirname(x))}) |
... | ... | |
586 | 585 |
#### Table 1: Average accuracy metrics |
587 | 586 |
|
588 | 587 |
#can use a maximum of 6 cores on the NEX Bridge |
589 |
#For 177 tiles but only xx RData boject it takes xxx min
|
|
588 |
#For 43 tiles but only xx RData boject it takes xxx min
|
|
590 | 589 |
#summary_metrics_v_list <- mclapply(list_raster_obj_files[5:6],FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = 2) |
591 | 590 |
|
592 | 591 |
summary_metrics_v_list <- mclapply(list_raster_obj_files,FUN=function(x){try( x<- load_obj(x)); try(x[["summary_metrics_v"]]$avg)},mc.preschedule=FALSE,mc.cores = 6) |
Also available in: Unified diff
run 10 assessment of reg2 and reg6 with generation of mosaics for 1000x3000