Revision 563fe056
Added by Benoit Parmentier over 10 years ago
climate/research/oregon/interpolation/global_run_scalingup_assessment_part1.R | ||
---|---|---|
5 | 5 |
#Part 1 create summary tables and inputs for figure in part 2 and part 3. |
6 | 6 |
#AUTHOR: Benoit Parmentier |
7 | 7 |
#CREATED ON: 03/23/2014 |
8 |
#MODIFIED ON: 06/19/2014
|
|
8 |
#MODIFIED ON: 06/27/2014
|
|
9 | 9 |
#Version: 3 |
10 | 10 |
#PROJECT: Environmental Layers project |
11 | 11 |
#TO DO: |
... | ... | |
61 | 61 |
out_name <- paste("output_",out_suffix,sep="") |
62 | 62 |
out_dir <- file.path(out_dir,out_name) |
63 | 63 |
} |
64 |
#create if does not exists |
|
64 |
#create if does not exists: create the output dir as defined
|
|
65 | 65 |
if(!file.exists(out_dir)){ |
66 | 66 |
dir.create(out_dir) |
67 | 67 |
} |
... | ... | |
158 | 158 |
|
159 | 159 |
### Function: |
160 | 160 |
pred_data_info_fun <- function(k,list_data,pred_mod,sampling_dat_info){ |
161 |
#Summarizing input info |
|
161 |
#Summarizing input info from sampling and df used in training/testing
|
|
162 | 162 |
|
163 | 163 |
data <- list_data[[k]] |
164 | 164 |
sampling_dat <- sampling_dat_info[[k]] |
165 |
if(data_day!="try-error"){
|
|
165 |
if(data!="try-error"){ |
|
166 | 166 |
n <- nrow(data) |
167 | 167 |
n_mod <- vector("numeric",length(pred_mod)) |
168 | 168 |
for(j in 1:length(pred_mod)){ |
... | ... | |
188 | 188 |
return(df_n) |
189 | 189 |
} |
190 | 190 |
|
191 |
extract_daily_training_testing_info<- function(i,list_param){ |
|
191 |
extract_daily_training_testing_info <- function(i,list_param){
|
|
192 | 192 |
#This function extracts training and testing information from the raster object produced for each tile |
193 | 193 |
#This is looping through tiles... |
194 |
|
|
195 | 194 |
##Functions used |
196 | 195 |
#Defined outside this script: |
197 | 196 |
#pred_data_info_fun |
... | ... | |
220 | 219 |
list_data_day_v <- try(extract_list_from_list_obj(raster_obj$validation_mod_obj,"data_v")) |
221 | 220 |
list_data_day_s <- try(extract_list_from_list_obj(raster_obj$validation_mod_obj,"data_s")) |
222 | 221 |
sampling_dat_day <- extract_list_from_list_obj(raster_obj$method_mod_obj,"daily_dev_sampling_dat") |
222 |
#debug(pred_data_info_fun) |
|
223 |
#list_pred_data_day_s_info <- pred_data_info_fun(1,list_data=list_data_day_s,pred_mod=pred_mod,sampling_dat_info=sampling_dat_day) |
|
223 | 224 |
list_pred_data_day_s_info <- lapply(1:length(sampling_dat_day),FUN=pred_data_info_fun, |
224 | 225 |
list_data=list_data_day_s,pred_mod=pred_mod,sampling_dat_info=sampling_dat_day) |
225 | 226 |
list_pred_data_day_v_info <- lapply(1:length(sampling_dat_day),FUN=pred_data_info_fun, |
... | ... | |
242 | 243 |
sampling_dat_month <- extract_list_from_list_obj(raster_obj$clim_method_mod_obj,"sampling_month_dat") |
243 | 244 |
list_pred_data_month_s_info <- lapply(1:length(sampling_dat_month),FUN=pred_data_info_fun, |
244 | 245 |
list_data=list_data_month_s,pred_mod=pred_mod,sampling_dat_info=sampling_dat_month) |
245 |
list_pred_data_month_v_info <- lapply(1:length(sampling_dat_month),FUN=pred_data_info_fun, |
|
246 |
list_data=list_data_month_v,pred_mod=pred_mod,sampling_dat_info=sampling_dat_month) |
|
247 |
|
|
246 |
list_pred_data_month_v_info <- mclapply(1:length(sampling_dat_month),FUN=pred_data_info_fun,
|
|
247 |
list_data=list_data_month_v,pred_mod=pred_mod,sampling_dat_info=sampling_dat_month,mc.preschedule=FALSE,mc.cores = 1)
|
|
248 |
#Note for list_pred_data_month_v_info it will be try-error when there is no holdout. |
|
248 | 249 |
#combine training and testing later? also combined with accuracy |
249 | 250 |
pred_data_month_s_info <- do.call(rbind,list_pred_data_month_s_info) |
250 |
pred_data_month_v_info <- do.call(rbind,list_pred_data_month_v_info) |
|
251 |
|
|
252 |
pred_data_month_v_info$training <- rep(0,nrow(pred_data_month_v_info)) |
|
253 |
pred_data_month_s_info$training <- rep(1,nrow(pred_data_month_v_info)) |
|
254 |
pred_data_month_info <- rbind(pred_data_month_v_info,pred_data_month_s_info) |
|
251 |
#Adding a column for training: if data item used as training then it is 1 |
|
252 |
pred_data_month_s_info$training <- try(rep(1,nrow(pred_data_month_s_info))) |
|
253 |
|
|
254 |
#Remove try-error?? |
|
255 |
list_pred_data_month_v_info_tmp <- remove_from_list_fun(list_pred_data_month_v_info) |
|
256 |
list_pred_data_month_v_info <- list_pred_data_month_v_info_tmp$list |
|
257 |
if (length(list_pred_data_month_v_info)>0){ |
|
258 |
pred_data_month_v_info <- do.call(rbind,list_pred_data_month_v_info) |
|
259 |
pred_data_month_v_info$training <- try(rep(0,nrow(pred_data_month_v_info))) |
|
260 |
pred_data_month_info <- rbind(pred_data_month_v_info,pred_data_month_s_info) |
|
261 |
}else{ |
|
262 |
pred_data_month_info <- pred_data_month_s_info |
|
263 |
} |
|
255 | 264 |
|
256 | 265 |
pred_data_month_info$method_interp <- rep(method_interp,nrow(pred_data_month_info)) |
257 | 266 |
pred_data_month_info$var_interp <- rep(var_interp,nrow(pred_data_month_info)) |
... | ... | |
263 | 272 |
} |
264 | 273 |
if(use_day==FALSE){ |
265 | 274 |
pred_data_day_info <- NULL |
266 |
} |
|
267 |
|
|
275 |
} |
|
268 | 276 |
#prepare object to return |
269 | 277 |
pred_data_info_obj <- list(pred_data_month_info,pred_data_day_info) |
270 | 278 |
names(pred_data_info_obj) <- c("pred_data_month_info","pred_data_day_info") |
271 | 279 |
#could add data.frame data_s and data_v later... |
272 |
|
|
280 |
### |
|
273 | 281 |
return(pred_data_info_obj) |
274 | 282 |
} |
275 | 283 |
|
... | ... | |
341 | 349 |
|
342 | 350 |
setwd(out_dir) |
343 | 351 |
|
344 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
|
|
352 |
CRS_locs_WGS84 <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
|
|
345 | 353 |
|
346 | 354 |
day_to_mosaic <- c("20100101","20100901") |
347 | 355 |
file_format <- ".tif" #format for mosaiced files |
... | ... | |
472 | 480 |
|
473 | 481 |
#Insert here...compute input and predicted ranges to spot potential errors? |
474 | 482 |
|
483 |
###################################################### |
|
484 |
####### PART 4: Get shapefile tiling with centroids ### |
|
485 |
|
|
486 |
#in_dir_shp <- "/nobackupp4/aguzman4/climateLayers/output4/subset/shapefiles/" |
|
487 |
|
|
488 |
#get shape files for the region being assessed: |
|
489 |
list_shp_global_tiles_files <- list.files(path=in_dir_shp,pattern="*.shp") |
|
490 |
l_shp<-lapply(1:length(in_dir_shp_list),FUN=function(i){paste(strsplit(in_dir_shp_list[i],"_")[[1]][2:3],collapse="_")}) |
|
491 |
list_shp_global_tiles_files <- l_shp |
|
492 |
pattern_str <- basename(in_dir_list) |
|
493 |
list_shp_global_tiles_files |
|
494 |
list_shp_reg_files <- lapply(pattern_str,function(x){list_shp_global_tiles_files[grep(x,invert=FALSE,list_shp_global_tiles_files)]}) #select directory with shapefiles... |
|
495 |
df_tile_processed$shp_files <- unlist(list_shp_reg_files) |
|
496 |
|
|
497 |
tx<-strsplit(as.character(df_tile_processed$tile_coord),"_") |
|
498 |
lat<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx)) |
|
499 |
long<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx)) |
|
500 |
df_tile_processed$lat <- lat |
|
501 |
df_tile_processed$lon <- long |
|
502 |
|
|
503 |
list_shp_world <- list.files(in_dir_shp,".shp") |
|
504 |
l_shp <- unlist(lapply(1:length(list_shp_world),FUN=function(i){paste(strsplit(list_shp_world[i],"_")[[1]][2:3],collapse="_")})) |
|
505 |
list_shp_reg_files <- as.character(df_tile_processed$tile_coord) |
|
506 |
#matching_index <- match(l_shp,list_shp_reg_files) |
|
507 |
matching_index <- match(list_shp_reg_files,l_shp) |
|
508 |
df_tile_processed$shp_files <-list_shp_world[matching_index] |
|
509 |
|
|
510 |
df_tile_processed <- na.omit(df_tile_processed) #remove other list of folders irrelevant |
|
511 |
list_shp_reg_files <- df_tile_processed$shp_files |
|
512 |
list_shp_reg_files<- file.path(in_dir_shp,list_shp_reg_files) |
|
513 |
|
|
514 |
#put that list in the df_processed and also the centroids!! |
|
515 |
write.table(df_tile_processed, |
|
516 |
file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",") |
|
517 |
|
|
475 | 518 |
###################################################### |
476 | 519 |
####### PART 2 CREATE MOSAIC OF PREDICTIONS PER DAY, Delta surfaces and clim ### |
477 | 520 |
|
... | ... | |
694 | 737 |
# names(robj1$validation_mod_day_obj[[1]]$data_s) # daily for January with predictions |
695 | 738 |
# dim(robj1$validation_mod_month_obj[[1]]$data_s) # daily for January with predictions |
696 | 739 |
# |
697 |
# use_day=TRUE |
|
698 |
# use_month=TRUE |
|
699 |
# |
|
700 |
|
|
701 |
# list_param_training_testing_info <- list(list_raster_obj_files,use_month,use_day,list_names_tile_id) |
|
702 |
# names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id") |
|
703 |
# |
|
704 |
# list_param <- list_param_training_testing_info |
|
705 |
# |
|
706 |
# pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info) |
|
707 |
# |
|
708 |
# pred_data_month_info <- do.call(rbind,lapply(pred_data_info,function(x){x$pred_data_month_info})) |
|
709 |
# pred_data_day_info <- do.call(rbind,lapply(pred_data_info,function(x){x$pred_data_day_info})) |
|
710 |
|
|
711 |
###################################################### |
|
712 |
####### PART 4: Get shapefile tiling with centroids ### |
|
713 |
|
|
714 |
#in_dir_shp <- "/nobackupp4/aguzman4/climateLayers/output4/subset/shapefiles/" |
|
715 |
|
|
716 |
#get shape files for the region being assessed: |
|
717 |
#list_shp_global_tiles_files <- list.files(path=in_dir_shp,pattern="*.shp") |
|
718 |
#l_shp<-lapply(1:length(in_dir_shp_list),FUN=function(i){paste(strsplit(in_dir_shp_list[i],"_")[[1]][2:3],collapse="_")}) |
|
719 |
list_shp_global_tiles_files <- l_shp |
|
720 |
pattern_str <- basename(in_dir_list) |
|
721 |
#list_shp_global_tiles_files |
|
722 |
list_shp_reg_files <- lapply(pattern_str,function(x){list_shp_global_tiles_files[grep(x,invert=FALSE,list_shp_global_tiles_files)]}) #select directory with shapefiles... |
|
723 |
df_tile_processed$shp_files <- unlist(list_shp_reg_files) |
|
724 |
|
|
725 |
tx<-strsplit(as.character(df_tile_processed$tile_coord),"_") |
|
726 |
lat<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][1]},x=tx)) |
|
727 |
long<- as.numeric(lapply(1:length(tx),function(i,x){x[[i]][2]},x=tx)) |
|
728 |
df_tile_processed$lat <- lat |
|
729 |
df_tile_processed$lon <- long |
|
730 | 740 |
|
731 |
#put that list in the df_processed and also the centroids!! |
|
732 |
write.table(df_tile_processed, |
|
733 |
file=file.path(out_dir,paste("df_tile_processed_",out_prefix,".txt",sep="")),sep=",") |
|
741 |
use_day=TRUE |
|
742 |
use_month=TRUE |
|
743 |
|
|
744 |
#list_raster_obj_files <- c("/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-100.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-100.0.RData", |
|
745 |
# "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1//30.0_-105.0/raster_prediction_obj_gam_CAI_dailyTmax30.0_-105.0.RData") |
|
746 |
|
|
747 |
list_names_tile_id <- df_tile_processed$tile_id |
|
748 |
list_raster_obj_files[list_names_tile_id] |
|
749 |
#list_names_tile_id <- c("tile_1","tile_2") |
|
750 |
list_param_training_testing_info <- list(list_raster_obj_files[list_names_tile_id],use_month,use_day,list_names_tile_id) |
|
751 |
names(list_param_training_testing_info) <- c("list_raster_obj_files","use_month","use_day","list_names_tile_id") |
|
752 |
|
|
753 |
list_param <- list_param_training_testing_info |
|
754 |
#debug(extract_daily_training_testing_info) |
|
755 |
#pred_data_info <- extract_daily_training_testing_info(1,list_param=list_param_training_testing_info) |
|
756 |
pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = 6) |
|
757 |
#pred_data_info <- mclapply(1:length(list_raster_obj_files[list_names_tile_id][1:6]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info,mc.preschedule=FALSE,mc.cores = 6) |
|
758 |
#pred_data_info <- lapply(1:length(list_raster_obj_files),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info) |
|
759 |
#pred_data_info <- lapply(1:length(list_raster_obj_files[1]),FUN=extract_daily_training_testing_info,list_param=list_param_training_testing_info) |
|
760 |
|
|
761 |
pred_data_info_tmp <- remove_from_list_fun(pred_data_info)$list #remove data not predicted |
|
762 |
##Add tile nanmes?? it is alreaready there |
|
763 |
#names(pred_data_info)<-list_names_tile_id |
|
764 |
pred_data_month_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_month_info})) |
|
765 |
pred_data_day_info <- do.call(rbind,lapply(pred_data_info_tmp,function(x){x$pred_data_day_info})) |
|
766 |
|
|
767 |
#putput inforamtion in csv !! |
|
768 |
write.table(pred_data_month_info, |
|
769 |
file=file.path(out_dir,paste("pred_data_month_info_",out_prefix,".txt",sep="")),sep=",") |
|
770 |
write.table(pred_data_day_info, |
|
771 |
file=file.path(out_dir,paste("pred_data_day_info_",out_prefix,".txt",sep="")),sep=",") |
|
734 | 772 |
|
735 | 773 |
########### LAST PART: COPY SOME DATA BACK TO ATLAS ##### |
736 | 774 |
|
775 |
### This assumes the tree structure has been replicated on Atlas: |
|
776 |
#for i in 1:length(df_tiled_processed$tile_coord) |
|
777 |
output_atlas_dir <- "/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/output10Deg/reg1" |
|
778 |
#for (i in 1:length(df_tiled_processed$tile_coord)){ |
|
779 |
# create_dir_fun(file.path(output_atlas_dir,as.character(df_tiled_processed$tile_coord[i])),out_suffix=NULL) |
|
780 |
#} |
|
781 |
|
|
737 | 782 |
#### FIRST COPY DATA FOR SPECIFIC TILES ##### |
738 | 783 |
#Copy specific tiles info back...This assumes that the tree structre |
739 | 784 |
#has been created on ATLAS: |
... | ... | |
741 | 786 |
|
742 | 787 |
list_tile_scp <- c(1,2) |
743 | 788 |
|
744 |
for (i in 1:length(list_tile_scp)){
|
|
745 |
tile_nb <- list_tile_scp[i]
|
|
789 |
for (j in 1:length(list_tile_scp)){
|
|
790 |
tile_nb <- list_tile_scp[j]
|
|
746 | 791 |
#nb_mod <- 3+1 #set up earlier |
747 | 792 |
date_selected <- c("20100101","20100901") #should be set up earlier |
748 | 793 |
date_index <- c(1,244) #list_day?? |
... | ... | |
750 | 795 |
|
751 | 796 |
in_dir_tile <- basename(df_tile_processed$path_NEX[tile_nb]) |
752 | 797 |
#/data/project/layers/commons/NEX_data/output_run2_05122014/output |
753 |
Atlas_dir <- file.path(file.path("/data/project/layers/commons/NEX_data/",basename(out_dir),"output"),in_dir_tile) |
|
798 |
#output_atlas_dir |
|
799 |
#Atlas_dir <- file.path(file.path("/data/project/layers/commons/NEX_data/",basename(out_dir),"output"),in_dir_tile) |
|
800 |
Atlas_dir <- file.path(output_atlas_dir,in_dir_tile) |
|
754 | 801 |
Atlas_hostname <- "parmentier@atlas.nceas.ucsb.edu" |
755 | 802 |
#filenames_NEX <- list_raster_obj_files[tile_nb] #copy raster prediction object |
756 | 803 |
#cmd_str <- paste("scp -p",filenames_NEX,paste(Atlas_hostname,Atlas_dir,sep=":"), sep=" ") |
Also available in: Unified diff
assessment NEX run 3 North America with prediction info for training and testing stations