Revision 4543f113
Added by Benoit Parmentier over 8 years ago
climate/research/oregon/interpolation/global_product_assessment_part1.R | ||
---|---|---|
4 | 4 |
#Combining tables and figures for individual runs for years and tiles. |
5 | 5 |
#AUTHOR: Benoit Parmentier |
6 | 6 |
#CREATED ON: 05/15/2016 |
7 |
#MODIFIED ON: 09/05/2016
|
|
7 |
#MODIFIED ON: 09/06/2016
|
|
8 | 8 |
#Version: 1 |
9 | 9 |
#PROJECT: Environmental Layers project |
10 | 10 |
#COMMENTS: Initial commit, script based on part NASA biodiversity conferenc |
... | ... | |
85 | 85 |
source(file.path(script_path,function_assessment_part3)) #source all functions used in this script |
86 | 86 |
|
87 | 87 |
#Product assessment |
88 |
function_product_assessment_part1_functions <- "global_product_assessment_part1_functions_09052016.R"
|
|
88 |
function_product_assessment_part1_functions <- "global_product_assessment_part1_functions_09062016.R"
|
|
89 | 89 |
source(file.path(script_path,function_product_assessment_part1_functions)) #source all functions used in this script |
90 | 90 |
|
91 | 91 |
############################### |
... | ... | |
163 | 163 |
df_centroids_fname <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaic/output_reg1_1984/df_centroids_19840101_reg1_1984.txt" |
164 | 164 |
#/nobackupp6/aguzman4/climateLayers/out/reg1/assessment//output_reg1_1984/df_assessment_files_reg1_1984_reg1_1984.txt |
165 | 165 |
|
166 |
raster_name_lf <- c("/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920101_reg4_1992_m_gam_CAI_dailyTmax_19920101_reg4_1992.tif", |
|
167 |
"/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920102_reg4_1992_m_gam_CAI_dailyTmax_19920102_reg4_1992.tif", |
|
168 |
"/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920103_reg4_1992_m_gam_CAI_dailyTmax_19920103_reg4_1992.tif", |
|
169 |
"/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920701_reg4_1992_m_gam_CAI_dailyTmax_19920701_reg4_1992.tif", |
|
170 |
"/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920702_reg4_1992_m_gam_CAI_dailyTmax_19920702_reg4_1992.tif", |
|
171 |
"/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920703_reg4_1992_m_gam_CAI_dailyTmax_19920703_reg4_1992.tif") |
|
166 |
#raster_name_lf <- c("/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920101_reg4_1992_m_gam_CAI_dailyTmax_19920101_reg4_1992.tif",
|
|
167 |
# "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920102_reg4_1992_m_gam_CAI_dailyTmax_19920102_reg4_1992.tif",
|
|
168 |
# "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920103_reg4_1992_m_gam_CAI_dailyTmax_19920103_reg4_1992.tif",
|
|
169 |
# "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920701_reg4_1992_m_gam_CAI_dailyTmax_19920701_reg4_1992.tif",
|
|
170 |
# "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920702_reg4_1992_m_gam_CAI_dailyTmax_19920702_reg4_1992.tif",
|
|
171 |
# "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920703_reg4_1992_m_gam_CAI_dailyTmax_19920703_reg4_1992.tif")
|
|
172 | 172 |
|
173 | 173 |
#l_dates <- c("19990101","19990102","19990103","19990701","19990702","19990703") |
174 | 174 |
l_dates <- c("19990101","19990102","19990103","19990104","19990105") #dates to plot and analze |
... | ... | |
326 | 326 |
## do training |
327 | 327 |
|
328 | 328 |
##plot summary of staistics |
329 |
list_plot_obj_data_v[[1]]$df_basic_stat |
|
330 |
names(list_plot_obj_data_v[[1]]) |
|
331 |
list_data_stations_var_pred_data_v <- lapply(list_plot_obj_data_v,FUN=function(x){x$data_stations_var_pred}) |
|
332 |
list_data_stations_var_pred_data_s <- lapply(list_plot_obj_data_s,FUN=function(x){x$data_stations_var_pred}) |
|
333 |
|
|
334 |
data_stations_var_pred_data_v <- do.call(rbind,list_data_stations_var_pred_data_v) |
|
335 |
data_stations_var_pred_data_s <- do.call(rbind,list_data_stations_var_pred_data_s) |
|
336 |
dim(data_stations_var_pred_data_v) |
|
337 |
dim(data_stations_var_pred_data_s) |
|
338 |
|
|
339 |
write.table(data_stations_var_pred_data_v,"data_stations_var_pred_data_v") |
|
340 |
write.table(data_stations_var_pred_data_s,"data_stations_var_pred_data_s") |
|
341 |
|
|
329 | 342 |
|
330 | 343 |
############### PART2: Select stations by ID to build a time series ############# |
331 | 344 |
#### Extract corresponding stationsfor given dates and plot stations used |
332 | 345 |
## Use station from specific year and date? |
333 | 346 |
|
334 |
##################### |
|
347 |
###################### make this part a function!!!
|
|
335 | 348 |
#select one station based on id or coordinates and find that in the list of data.frame?? |
336 | 349 |
|
337 |
id_selected <- "82111099999" |
|
338 |
dim(df_points) |
|
350 |
#Make a function to find the closest stations to a givine coordinates? |
|
351 |
#42.262003, -71.965866 #this is near Spencer MA |
|
352 |
|
|
353 |
#id_selected <- "82111099999" |
|
354 |
#dim(df_points) |
|
355 |
list_id_data_v <- unique(data_stations_var_pred_data_v$id) |
|
356 |
list_id_data_s <- unique(data_stations_var_pred_data_s$id) |
|
339 | 357 |
|
340 | 358 |
### loop through all files and get the time series |
341 |
extract_from_df <- function(x,col_selected,val_selected){ |
|
342 |
df_tmp <- read.table(x,stringsAsFactors=F,sep=",") |
|
343 |
#data_subset <- subset(data_stations,col_selected==val_selected) |
|
344 |
data_subset <- subset(df_tmp,df_tmp$id%in%val_selected) |
|
345 |
return(data_subset) |
|
346 |
} |
|
347 | 359 |
|
348 | 360 |
lf_data_s_subset <- mclapply(list_data_s_fname, |
349 | 361 |
FUN=extract_from_df, |
350 | 362 |
col_selected="id", |
351 |
val_selected=id_selected,
|
|
363 |
val_selected=list_id_data_s,
|
|
352 | 364 |
mc.preschedule=FALSE, |
353 |
mc.cores = num_cores) |
|
365 |
mc.cores = num_cores) |
|
366 |
#took less than 8 minutes for 1839 stations |
|
367 |
|
|
354 | 368 |
lf_data_v_subset <- mclapply(list_data_v_fname, |
355 | 369 |
FUN=extract_from_df, |
356 | 370 |
col_selected="id", |
357 |
val_selected=id_selected,
|
|
371 |
val_selected=list_id_data_v,
|
|
358 | 372 |
mc.preschedule=FALSE, |
359 | 373 |
mc.cores = num_cores) |
360 | 374 |
|
... | ... | |
364 | 378 |
data_s_subset$training <- 1 |
365 | 379 |
data_v_subset$training <- 0 |
366 | 380 |
|
367 |
data_stations <- rbind(data_s_subset,data_v_subset) |
|
381 |
## Need a testing variable to count later the use of a station |
|
382 |
data_s_subset$testing <- 0 |
|
383 |
data_v_subset$testing <- 1 |
|
384 |
# a station can be used multipel times as trainin gor testing within a day because of the overlap of tiles. |
|
385 |
|
|
386 |
#data_stations <- rbind(data_s_subset,data_v_subset) |
|
368 | 387 |
dim(data_s_subset) |
388 |
#[1] 21991826 9 |
|
369 | 389 |
dim(data_v_subset) |
390 |
#[1] 9319967 85 |
|
370 | 391 |
|
392 |
##36 minutes to get here |
|
371 | 393 |
#rbind.fill(mtcars[c("mpg", "wt")], mtcars[c("wt", "cyl")]) |
372 | 394 |
data_stations <- rbind.fill(data_v_subset, data_s_subset) |
395 |
#dim(data_stations)#one station only but repetition of records because of tiles and dates!!! |
|
396 |
#[1] 31311793 90 |
|
397 |
dim(data_stations)#one station only but repetition of records because of tiles and dates!!! |
|
398 |
#[1] 30202891 91 |
|
373 | 399 |
|
374 |
coordinates(data_stations) <- cbind(data_stations$x,data_stations$y) |
|
375 |
proj4string(data_stations) <- CRS_locs_WGS84 |
|
376 |
|
|
377 |
dim(data_stations) #one station only but repitition of records because of tiles and dates!!! |
|
378 |
#> dim(data_stations) |
|
379 |
#[1] 100458 90 |
|
400 |
#coordinates(data_stations) <- cbind(data_stations$x,data_stations$y) |
|
401 |
#proj4string(data_stations) <- CRS_locs_WGS84 |
|
380 | 402 |
|
381 | 403 |
#data_stations_var_pred <- aggregate(id ~ date, data = data_stations, min) |
382 | 404 |
#data_stations_var_pred <- aggregate(id ~ x + y + date + dailyTmax + mod1 + res_mod1 , data = data_stations, min) |
... | ... | |
384 | 406 |
##Add tile id here...and check if data stations was training or testing. |
385 | 407 |
|
386 | 408 |
|
387 |
data_stations_var_pred <- aggregate(id ~ x + y + date + dailyTmax,data = data_stations, min ) #+ mod1 + res_mod1 , data = data_stations, min)
|
|
409 |
data_stations_var_pred <- aggregate(id ~ x + y + date + dailyTmax + mod1 + res_mod1 ,data = data_stations, mean ) #+ mod1 + res_mod1 , data = data_stations, min)
|
|
388 | 410 |
dim(data_stations_var_pred) |
389 | 411 |
#> dim(data_stations_var_pred) |
390 | 412 |
#[1] 11171 5 |
413 |
write.table(data_stations_var_pred,"data_stations_var_pred_tmp.txt") |
|
414 |
data_stations_var_pred_training_testing <- aggregate(id ~ training + testing ,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min) |
|
415 |
write.table(data_stations_var_pred_training_testing,"data_stations_var_pred_training_testing.txt") |
|
391 | 416 |
|
392 | 417 |
data_stations_var_pred$date_str <- data_stations_var_pred$date |
393 | 418 |
data_stations_var_pred$date <- as.Date(strptime(data_stations_var_pred$date_str,"%Y%m%d")) |
... | ... | |
396 | 421 |
#data_stations_var_pred2 <- aggregate(id ~ training,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min) |
397 | 422 |
#data_stations_var_pred2 <- aggregate(date ~ training,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min) |
398 | 423 |
|
424 |
data_stations_var_pred <- cbind(data_stations_var_pred,data_stations_var_pred_training_testing) |
|
425 |
write.table(data_stations_var_pred,"data_stations_var_pred.txt") |
|
426 |
#started at 16.51, 09/07 |
|
427 |
|
|
399 | 428 |
############### PART3: Make raster stack and display maps ############# |
400 | 429 |
#### Extract corresponding raster for given dates and plot stations used |
401 | 430 |
|
... | ... | |
404 | 433 |
if(is.null(r_mosaic_fname)){ |
405 | 434 |
pattern_str <-"*.tif" |
406 | 435 |
lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T) |
407 |
r_mosaic <- stack(lf_mosaic_list) |
|
436 |
r_mosaic <- stack(lf_mosaic_list,quick=T)
|
|
408 | 437 |
save(r_mosaic,file="r_mosaic.RData") |
438 |
|
|
409 | 439 |
}else{ |
410 | 440 |
r_mosaic <- load_obj(r_mosaic_fname) #load raster stack of images |
411 | 441 |
} |
... | ... | |
465 | 495 |
#/nobackupp6/aguzman4/climateLayers/out/reg4/mosaics2/mosaic/output_reg4_*/r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_*_reg4_*.tif |
466 | 496 |
pattern_str <- "r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_.*._reg4_.*.tif" |
467 | 497 |
searchStr = paste(in_dir_mosaic,"/output_reg4_2014",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="") |
468 |
|
|
498 |
pattern_str <- ".*.tif" |
|
499 |
in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaics/mosaic" |
|
469 | 500 |
#lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern="*.tif",recursive=T) |
470 | 501 |
lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=T) |
502 |
|
|
503 |
writeRaster() |
|
504 |
|
|
471 | 505 |
lf_mosaic_list <- lapply(1:length(day_to_mosaic), |
472 | 506 |
FUN=function(i){ |
473 | 507 |
searchStr = paste(in_dir_tiles_tmp,"/*/",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="") |
... | ... | |
792 | 826 |
#### PLOT ACCURACY METRICS: First test #### |
793 | 827 |
##this will be cleaned up later: |
794 | 828 |
|
795 |
dir_ac_mosaics <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg4/mosaic/output_reg4_1999" |
|
829 |
#dir_ac_mosaics <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg4/mosaic/output_reg4_1999"
|
|
796 | 830 |
lf_tmp <-list.files(path=dir_ac_mosaics,pattern="r_m_use_edge_weights_weighted_mean_mask_gam_CAI_.*.ac.*._reg4_1999.tif") |
797 | 831 |
|
798 | 832 |
#lf_tmp1 <- lf_tmp[21:24] |
Also available in: Unified diff
gathering stations data for building time series, clean up of script for product assessment part1