Revision 0892e770
Added by Benoit Parmentier over 8 years ago
climate/research/oregon/interpolation/global_product_assessment_part1.R | ||
---|---|---|
4 | 4 |
#Combining tables and figures for individual runs for years and tiles. |
5 | 5 |
#AUTHOR: Benoit Parmentier |
6 | 6 |
#CREATED ON: 05/15/2016 |
7 |
#MODIFIED ON: 09/06/2016
|
|
7 |
#MODIFIED ON: 09/11/2016
|
|
8 | 8 |
#Version: 1 |
9 | 9 |
#PROJECT: Environmental Layers project |
10 | 10 |
#COMMENTS: Initial commit, script based on part NASA biodiversity conferenc |
... | ... | |
85 | 85 |
source(file.path(script_path,function_assessment_part3)) #source all functions used in this script |
86 | 86 |
|
87 | 87 |
#Product assessment |
88 |
function_product_assessment_part1_functions <- "global_product_assessment_part1_functions_09062016.R"
|
|
88 |
function_product_assessment_part1_functions <- "global_product_assessment_part1_functions_09112016.R"
|
|
89 | 89 |
source(file.path(script_path,function_product_assessment_part1_functions)) #source all functions used in this script |
90 | 90 |
|
91 | 91 |
############################### |
... | ... | |
381 | 381 |
c( -78.396011,35.696143)) |
382 | 382 |
|
383 | 383 |
|
384 |
query_for_station_lat_long <- function(point_val,df_points_spdf,step_x=0.25,step_y=0.25){ |
|
385 |
#make this function better using gbuffer later!!!, works for now |
|
386 |
#Improve: circular query + random within it |
|
387 |
y_val <- point_val[2] |
|
388 |
x_val <- point_val[1] |
|
389 |
|
|
390 |
y_val_tmp <- y_val + step_y |
|
391 |
if(x_val>0){ |
|
392 |
x_val_tmp <- x_val - step_x |
|
393 |
}else{ |
|
394 |
x_val_tmp <- x_val + step_x |
|
395 |
} |
|
396 |
|
|
397 |
|
|
398 |
test_df <- subset(df_points_spdf,(y_val < df_points_spdf$y & df_points_spdf$y < y_val_tmp)) |
|
399 |
test_df2 <- subset(test_df,(x_val < test_df$x & test_df$x < x_val_tmp)) |
|
400 |
#plot(test_df2) |
|
401 |
if(nrow(test_df2)>0){ |
|
402 |
df_selected <- test_df2[1,] |
|
403 |
}else{ |
|
404 |
df_selected <- NULL |
|
405 |
} |
|
406 |
|
|
407 |
return(df_selected) |
|
408 |
} |
|
409 |
|
|
410 | 384 |
test_day_query2 <- lapply(list_lat_long,FUN=query_for_station_lat_long,df_points_spdf=df_point_day,step_x=1,step_y=1) |
411 | 385 |
#test_day_query <-query_for_station_lat_long(c(-72,42),df_points_spdf=df_point_day,step_x=1,step_y=0.25) |
412 | 386 |
df_stations_selected <- do.call(rbind,test_day_query2) |
... | ... | |
485 | 459 |
data_stations_var_pred <- cast(md, id + date ~ variable, fun.aggregate = mean, |
486 | 460 |
na.rm = TRUE) |
487 | 461 |
|
462 |
#write.table(data_stations_var_pred, |
|
463 |
# file=file.path(out_dir,paste0("data_stations_var_pred_tmp_",out_suffix,".txt", |
|
464 |
# sep=","))) |
|
488 | 465 |
write.table(data_stations_var_pred, |
489 |
file=file.path(out_dir,paste0("data_stations_var_pred_tmp_",out_suffix,".txt", |
|
490 |
sep=","))) |
|
491 |
write.table(data_stations_var_pred_training_testing,"data_stations_var_pred_training_testing.txt") |
|
466 |
file=file.path(out_dir,paste0("data_stations_var_pred_tmp_",out_suffix,".txt")), |
|
467 |
sep=",") |
|
468 |
|
|
469 |
#data_stations_var_pred <- read.table( |
|
470 |
# file=file.path(out_dir,paste0("data_stations_var_pred_tmp_",out_suffix,".txt", |
|
471 |
# sep=","))) |
|
492 | 472 |
|
493 | 473 |
md <- melt(data_stations, id=(c("id", "date")),measure.vars=c("training","testing")) |
494 | 474 |
data_stations_training_testing <- cast(md, id + date ~ variable, fun.aggregate = sum, |
495 | 475 |
na.rm = TRUE) |
496 | 476 |
|
477 |
#write.table(data_stations_training_testing, |
|
478 |
# file=file.path(out_dir,paste0("data_stations_training_testing_",out_suffix,".txt", |
|
479 |
# sep=","))) |
|
497 | 480 |
write.table(data_stations_training_testing, |
498 |
file=file.path(out_dir,paste0("data_stations_training_testing_",out_suffix,".txt", |
|
499 |
sep=","))) |
|
500 |
|
|
481 |
file=file.path(out_dir,paste0("data_stations_training_testing_",out_suffix,".txt")), |
|
482 |
sep=",") |
|
501 | 483 |
#data_stations_var_pred <- aggregate(id2 ~ x + y + date + dailyTmax + mod1 + res_mod1 ,data = data_stations, mean ) #+ mod1 + res_mod1 , data = data_stations, min) |
502 | 484 |
#data_stations$id2 <- as.numeric(data_stations$id) |
503 | 485 |
#data_stations$date <- as.character(data_stations$date) |
... | ... | |
516 | 498 |
#data_stations_var_pred2 <- aggregate(id ~ training,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min) |
517 | 499 |
#data_stations_var_pred2 <- aggregate(date ~ training,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min) |
518 | 500 |
|
519 |
data_stations_var_pred <- cbind(data_stations_var_pred,data_stations_var_pred_training_testing) |
|
520 |
|
|
521 |
write.table(data_stations_var_pred,"data_stations_var_pred.txt") |
|
522 |
#started at 16.51, 09/07 |
|
523 |
|
|
524 |
############### PART3: Make raster stack and display maps ############# |
|
525 |
#### Extract corresponding raster for given dates and plot stations used |
|
526 |
|
|
527 |
|
|
528 |
#start_date <- day_to_mosaic_range[1] |
|
529 |
#end_date <- day_to_mosaic_range[2] |
|
530 |
#start_date <- day_start #PARAM 12 arg 12 |
|
531 |
#end_date <- day_end #PARAM 13 arg 13 |
|
532 |
|
|
533 |
#date_to_plot <- seq(as.Date(strptime(start_date,"%Y%m%d")), as.Date(strptime(end_date,"%Y%m%d")), 'day') |
|
534 |
#l_dates <- format(date_to_plot,"%Y%m%d") #format back to the relevant date format for files |
|
535 |
#mask_pred <- FALSE |
|
536 |
#matching <- FALSE #to be added after mask_pred option |
|
537 |
#list_param_pre_process <- list(raster_name_lf,python_bin,infile_mask,scaling,mask_pred,NA_flag_val,out_suffix,out_dir) |
|
538 |
#names(list_param_pre_process) <- c("lf","python_bin","infile_mask","scaling","mask_pred","NA_flag_val","out_suffix","out_dir") |
|
539 |
|
|
540 |
#debug(pre_process_raster_mosaic_fun) |
|
501 |
#data_stations_var_pred <- merge(data_stations_var_pred, data_stations_training_testing , by="id") #this is slow maybe do cbind? |
|
502 |
#data_stations_var_pred_test <- data_stations_var_pred |
|
541 | 503 |
|
542 |
#lf_mosaic_scaled <- mclapply(1:length(raster_name_lf),FUN=pre_process_raster_mosaic_fun,list_param=list_param_pre_process,mc.preschedule=FALSE,mc.cores = num_cores) |
|
543 |
#lf_mosaic_scaled <- mclapply(1:length(raster_name_lf),FUN=pre_process_raster_mosaic_fun,list_param=list_param_pre_process,mc.preschedule=FALSE,mc.cores = num_cores) |
|
504 |
data_stations_var_pred <- cbind(data_stations_var_pred,data_stations_training_testing) |
|
544 | 505 |
|
545 |
#test <- pre_process_raster_mosaic_fun(2,list_param_pre_process) |
|
546 |
#lf_mosaic_scaled <- unlist(lf_mosaic_scaled) |
|
547 |
|
|
548 |
r_mosaic_scaled <- stack(lf_mosaic_scaled) |
|
549 |
NAvalue(r_mosaic_scaled)<- -3399999901438340239948148078125514752.000 |
|
550 |
plot(r_mosaic_scaled,y=6,zlim=c(-50,50)) |
|
551 |
plot(r_mosaic_scaled,zlim=c(-50,50),col=matlab.like(255)) |
|
552 |
|
|
553 |
#layout_m<-c(1,3) #one row two columns |
|
554 |
#levelplot(r_mosaic_scaled,zlim=c(-50,50),col.regions=matlab.like(255)) |
|
555 |
#levelplot(r_mosaic_scaled,zlim=c(-50,50),col.regions=matlab.like(255)) |
|
556 |
|
|
557 |
#png(paste("Figure7a__spatial_pattern_tmax_prediction_levelplot_",date_selected,out_prefix,".png", sep=""), |
|
558 |
# height=480*layout_m[1],width=480*layout_m[2]) |
|
559 |
#plot(r_pred,col=temp.colors(255),zlim=c(-3500,4500)) |
|
560 |
#plot(r_pred,col=matlab.like(255),zlim=c(-40,50)) |
|
561 |
#paste(raster_name[1:7],collapse="_") |
|
562 |
#add filename option later |
|
563 |
|
|
564 |
#NA_flag_val_mosaic <- -3399999901438340239948148078125514752.000 |
|
506 |
write.table(data_stations_var_pred, |
|
507 |
file=file.path(out_dir,paste0("data_stations_var_pred_",out_suffix,".txt")), |
|
508 |
sep=",") |
|
565 | 509 |
|
566 |
list_param_plot_raster_mosaic <- list(l_dates,r_mosaic_scaled,NA_flag_val_mosaic,out_dir,out_suffix, |
|
567 |
region_name,variable_name) |
|
568 |
names(list_param_plot_raster_mosaic) <- c("l_dates","r_mosaic_scaled","NA_flag_val_mosaic","out_dir","out_suffix", |
|
569 |
"region_name","variable_name") |
|
510 |
#started at 16.51, 09/07 |
|
570 | 511 |
|
571 |
lf_mosaic_plot_fig <- mclapply(1:length(lf_mosaic_scaled),FUN=plot_raster_mosaic,list_param=list_param_plot_raster_mosaic,mc.preschedule=FALSE,mc.cores = num_cores) |
|
572 | 512 |
|
573 | 513 |
############### PART4: Checking for mosaic produced for given region ############## |
574 | 514 |
## From list of mosaic files predicted extract dates |
... | ... | |
577 | 517 |
|
578 | 518 |
##Now grab year year 1992 or matching year...maybe put this in a data.frame?? |
579 | 519 |
|
580 |
pattern_str <- "r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_.*._reg4_.*.tif" |
|
581 |
searchStr = paste(in_dir_mosaic,"/output_reg4_2014",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="") |
|
582 |
pattern_str <- ".*.tif" |
|
583 |
in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaics/mosaic" |
|
584 |
#lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern="*.tif",recursive=T) |
|
585 |
|
|
586 |
if(is.null(r_mosaic_fname)){ |
|
587 |
pattern_str <-"*.tif" |
|
588 |
lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T) |
|
589 |
r_mosaic <- stack(lf_mosaic_list,quick=T) |
|
590 |
save(r_mosaic,file="r_mosaic.RData") |
|
591 |
|
|
592 |
}else{ |
|
593 |
r_mosaic <- load_obj(r_mosaic_fname) #load raster stack of images |
|
594 |
} |
|
595 |
|
|
596 |
|
|
597 |
#r_mosaic_ts <- stack(lf_mosaic_list) |
|
598 |
#df_centroids <- extract(df_centroids,r_mosaic_ts) |
|
599 |
|
|
600 |
df_points$files <- lf_mosaic_list |
|
520 |
#pattern_str <- "r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_.*._reg4_.*.tif" |
|
521 |
#searchStr = paste(in_dir_mosaic,"/output_reg4_2014",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="") |
|
601 | 522 |
|
602 | 523 |
#debug(extract_date) |
603 | 524 |
#test <- extract_date(6431,lf_mosaic_list,12) #extract item number 12 from the name of files to get the data |
604 |
list_dates_produced <- unlist(mclapply(1:length(lf_mosaic_list),FUN=extract_date,x=lf_mosaic_list,item_no=14,mc.preschedule=FALSE,mc.cores = num_cores))
|
|
605 |
#list_dates_produced <- mclapply(6400:6431,FUN=extract_date,x=lf_mosaic_list,item_no=12,mc.preschedule=FALSE,mc.cores = num_cores)
|
|
525 |
list_dates_produced <- unlist(mclapply(1:length(lf_mosaic_list),FUN=extract_date,x=lf_mosaic_list,item_no=13,mc.preschedule=FALSE,mc.cores = num_cores))
|
|
526 |
#list_dates_produced <- mclapply(1:2,FUN=extract_date,x=lf_mosaic_list,item_no=13,mc.preschedule=FALSE,mc.cores = 2)
|
|
606 | 527 |
|
607 | 528 |
list_dates_produced_date_val <- as.Date(strptime(list_dates_produced,"%Y%m%d")) |
608 | 529 |
month_str <- format(list_dates_produced_date_val, "%b") ## Month, char, abbreviated |
609 | 530 |
year_str <- format(list_dates_produced_date_val, "%Y") ## Year with century |
610 | 531 |
day_str <- as.numeric(format(list_dates_produced_date_val, "%d")) ## numeric month |
611 | 532 |
|
612 |
df_produced <- data.frame(lf_mosaic_list,list_dates_produced_date_val,month_str,year_str,day_str)
|
|
533 |
df_produced <- data.frame(basename(lf_mosaic_list),list_dates_produced_date_val,month_str,year_str,day_str,dirname(lf_mosaic_list))
|
|
613 | 534 |
|
614 |
date_start <- "19840101" |
|
615 |
date_end <- "19991231" |
|
616 |
date1 <- as.Date(strptime(date_start,"%Y%m%d")) |
|
617 |
date2 <- as.Date(strptime(date_end,"%Y%m%d")) |
|
618 |
dates_range <- seq.Date(date1, date2, by="1 day") #sequence of dates |
|
535 |
finding_missing_dates <- function(date_start,date_end,list_dates){ |
|
536 |
|
|
537 |
date_start <- "19840101" |
|
538 |
date_end <- "19991231" |
|
539 |
date1 <- as.Date(strptime(date_start,"%Y%m%d")) |
|
540 |
date2 <- as.Date(strptime(date_end,"%Y%m%d")) |
|
541 |
dates_range <- seq.Date(date1, date2, by="1 day") #sequence of dates |
|
542 |
|
|
543 |
missing_dates <- setdiff(as.character(dates_range),as.character(list_dates_produced_date_val)) |
|
544 |
|
|
545 |
return(missing_dates) |
|
546 |
} |
|
619 | 547 |
|
620 |
missing_dates <- setdiff(as.character(dates_range),as.character(list_dates_produced_date_val)) |
|
621 | 548 |
|
622 |
month_str <- format(list_dates_produced_date_val, "%b") ## Month, char, abbreviated |
|
623 |
year_str <- format(list_dates_produced_date_val, "%Y") ## Year with century |
|
624 |
day_str <- as.numeric(format(list_dates_produced_date_val, "%d")) ## numeric month |
|
625 | 549 |
|
550 |
|
|
551 |
#### |
|
626 | 552 |
df_points$date <- list_dates_produced_date_val |
627 | 553 |
df_points$month <- month_str |
628 | 554 |
df_points$year <- year_str |
629 | 555 |
df_points$day <- day_str |
630 | 556 |
|
557 |
in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaics/mosaic" |
|
558 |
in_dir_mosaic_rmse <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaicsRMSE/mosaic" |
|
559 |
#pattern_str <- ".*.tif" |
|
560 |
|
|
561 |
extract_from_time_series_raster_stack <- function(df_points,date_start,date_end,lf_raster,item_no=13,num_cores=4,pattern_str=NULL,in_dir=NULL,out_dir=".",out_suffix=""){ |
|
562 |
# |
|
563 |
#This function extract value given from a raster stack time series given a spatial data.frame and a list of files |
|
564 |
# |
|
565 |
#INPUTS |
|
566 |
#1) df_points |
|
567 |
#2) date_start,num_cores=4,pattern_str=NULL,in_dir=NULL,out_dir=".",out_suffix= |
|
568 |
#3) date_end |
|
569 |
#3) lf_raster |
|
570 |
#4) item_no=13 |
|
571 |
#5) num_cores=4, |
|
572 |
#6) pattern_str=NULL |
|
573 |
#7) in_dir=NULL, |
|
574 |
#8) out_dir="." |
|
575 |
#9) out_suffix="" |
|
576 |
#OUTPUTS |
|
577 |
# |
|
578 |
# |
|
579 |
|
|
580 |
#### Start script #### |
|
581 |
|
|
582 |
if(is.null(lf_raster)){ |
|
583 |
#pattern_str <- ".*.tif" |
|
584 |
pattern_str <-"*.tif" |
|
585 |
lf_raster <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T) |
|
586 |
r_stack <- stack(lf_raster,quick=T) #this is very fast now with the quick option! |
|
587 |
#save(r_mosaic,file="r_mosaic.RData") |
|
588 |
|
|
589 |
}else{ |
|
590 |
r_stack <- stack(lf_raster,quick=T) #this is very fast now with the quick option! |
|
591 |
} |
|
631 | 592 |
|
593 |
#df_points$files <- lf_mosaic_list |
|
594 |
#Use the global output?? |
|
632 | 595 |
|
633 |
#Use the global output?? |
|
596 |
##23.09 (on 05/22) |
|
597 |
#df_points_day_extracted <- extract(r_mosaic,data_stations,df=T) |
|
598 |
#df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname",".txt") |
|
599 |
#write.table(df_points_day_extracted,file=df_points_day_extracted_fname) #5.1 Giga |
|
600 |
#4.51 (on 05/23) |
|
601 |
#df_points_day <- data_stations_var_pred_data_s |
|
634 | 602 |
|
635 |
##23.09 (on 05/22) |
|
636 |
#df_points_day_extracted <- extract(r_mosaic,data_stations,df=T) |
|
637 |
#df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname",".txt") |
|
638 |
#write.table(df_points_day_extracted,file=df_points_day_extracted_fname) #5.1 Giga |
|
639 |
#4.51 (on 05/23) |
|
640 |
df_points_day <- data_stations_var_pred_data_s |
|
641 |
if(is.null(df_points_extracted_fname)){ |
|
642 | 603 |
#15.17 (on 09/08) |
643 | 604 |
##10.41 (on 05/22) |
644 |
df_points_day_extracted <- extract(r_mosaic,df_points_day,df=T) |
|
605 |
#took about 7h for 5262 layers, maybe can be sped up later |
|
606 |
df_points_extracted <- extract(r_stack,df_points,df=T,sp=T) #attach back to the original data... |
|
607 |
|
|
645 | 608 |
#17.19 (on 05/23) |
646 | 609 |
#22.27 (on 09/08) |
647 |
df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname2",".txt")
|
|
610 |
#df_points_extracted_fname <- paste0("df_points_day_extracted_fname2",".txt")
|
|
648 | 611 |
#17.27 (on 05/23) |
649 |
write.table(df_points_day_extracted,file=df_points_day_extracted_fname,sep=",",row.names = F) |
|
612 |
|
|
613 |
df_points_extracted_fname <- file.path(out_dir,paste0("df_points_extracted_",out_suffix,".txt")) |
|
614 |
write.table(df_points_extracted,file= df_points_extracted_fname,sep=",",row.names = F) |
|
650 | 615 |
#17.19 (on 05/23) |
616 |
|
|
617 |
#### Now check for missing dates |
|
618 |
|
|
619 |
#debug(extract_date) |
|
620 |
#test <- extract_date(6431,lf_mosaic_list,12) #extract item number 12 from the name of files to get the data |
|
621 |
#list_dates_produced <- unlist(mclapply(1:length(lf_raster),FUN=extract_date,x=lf_raster,item_no=13,mc.preschedule=FALSE,mc.cores = num_cores)) |
|
622 |
#list_dates_produced <- mclapply(1:2,FUN=extract_date,x=lf_mosaic_list,item_no=13,mc.preschedule=FALSE,mc.cores = 2) |
|
623 |
list_dates_produced <- unlist(mclapply(1:length(lf_raster),FUN=extract_date,x=lf_raster,item_no=item_no, |
|
624 |
mc.preschedule=FALSE,mc.cores = num_cores)) |
|
625 |
|
|
626 |
list_dates_produced_date_val <- as.Date(strptime(list_dates_produced,"%Y%m%d")) |
|
627 |
month_str <- format(list_dates_produced_date_val, "%b") ## Month, char, abbreviated |
|
628 |
year_str <- format(list_dates_produced_date_val, "%Y") ## Year with century |
|
629 |
day_str <- as.numeric(format(list_dates_produced_date_val, "%d")) ## numeric month |
|
630 |
|
|
631 |
df_raster <- data.frame(basename(lf_mosaic_list),list_dates_produced_date_val,month_str,year_str,day_str,dirname(lf_mosaic_list)) |
|
632 |
|
|
633 |
df_raster_fname <- file.path(out_dir,paste0("df_raster_",out_suffix,".txt")) |
|
634 |
write.table(df_raster,file= df_raster_fname,sep=",",row.names = F) |
|
635 |
|
|
636 |
df_points_extracted_fname |
|
637 |
df_raster_fname |
|
638 |
|
|
639 |
extract_obj <- list(df_points_extracted_fname,df_raster_fname) |
|
640 |
names(extract_obj) <- c("df_points_extracted_fname","df_raster_fname") |
|
641 |
|
|
642 |
return(extract_obj) |
|
643 |
} |
|
644 |
|
|
645 |
|
|
646 |
|
|
647 |
|
|
651 | 648 |
|
652 | 649 |
}else{ |
653 | 650 |
df_points_day_extracted <- read.table(df_points_extracted_fname,sep=",") |
654 | 651 |
} |
655 | 652 |
|
656 |
head(df_points_day) #contains ID, dates and coordinates |
|
657 |
df_points_day$id |
|
658 |
|
|
659 |
max_idst <- 0.009*5 #5km in degree |
|
660 |
min_dist <- 0 #minimum distance to start with |
|
653 |
df_points_day_extracted |
|
654 |
names(df_points_day_extracted)[1:10] |
|
655 |
(df_points_day_extracted$ID)[1:10] |
|
661 | 656 |
|
662 |
###this needs be modified |
|
663 |
## target number |
|
664 |
target_max_nb <- 1 #this is not actually used yet in the current implementation |
|
665 |
target_min_nb <- 5 #this is the target number of stations we would like |
|
666 |
max_dist <- 1000 # the maximum distance used for pruning ie removes stations that are closer than 1000m |
|
667 |
min_dist <- 0 #minimum distance to start with |
|
668 |
step_dist <- 1000 #iteration step to remove the stations |
|
669 |
target_range_nb <- c(target_min_nb,target_max_nb) #target range of number of stations |
|
670 |
#First increase distance till 5km |
|
671 |
#then use random sampling...to get the extact target? |
|
672 |
#First test with maximum distance of 100m |
|
673 |
test1 <- sub_sampling_by_dist(target_range_nb,dist=min_dist,max_dist=max_dist,step=step_dist,data_in=df_points_day) |
|
657 |
df_points_day_extracted_tmp <- df_points_day_extracted |
|
658 |
df_points_extracted <- cbind(df_points_day,df_points_day_extracted_tmp) |
|
659 |
#df_points_extracted$id <- df_points_day$id |
|
674 | 660 |
|
661 |
#### Now combined with the station data extracted from the assessment stage |
|
662 |
combine |
|
663 |
data_stations_var_pred |
|
675 | 664 |
|
665 |
##write function to combine data!!! |
|
676 | 666 |
|
677 | 667 |
pix_ts <- as.data.frame(t(df_points_day_extracted)) |
678 | 668 |
pix_ts <- pix_ts[-1,] |
... | ... | |
688 | 678 |
#pix_ts <- t(as.data.frame(subset(data_pixel,select=r_ts_name))) #can subset to range later |
689 | 679 |
#pix_ts <- subset(as.data.frame(pix_ts),select=r_ts_name) |
690 | 680 |
|
681 |
combine_measurements_and_predictions_df <- function(i,dates_val,df_ts_pix,data_var,list_selected_ID,r_ts_name,var_name,dates_str,plot_fig=T){ |
|
682 |
|
|
683 |
# Input arguments: |
|
684 |
# i : selected station |
|
685 |
# df_ts_pix_data : data extracted from raster layer |
|
686 |
# data_var : data with station measurements (tmin,tmax or precip) |
|
687 |
# list_selected_ID : list of selected station |
|
688 |
# plot_fig : if T, figures are plotted |
|
689 |
# Output |
|
690 |
# |
|
691 |
|
|
692 |
##### START FUNCTION ############ |
|
693 |
|
|
694 |
#get the relevant station |
|
695 |
id_name <- list_selected_ID[i] # e.g. WS037.00 |
|
696 |
#id_selected <- df_ts_pix[[var_ID]]==id_name |
|
697 |
id_selected <- df_ts_pix[["ID_stat"]]==id_name |
|
698 |
|
|
699 |
### Not get the data from the time series |
|
700 |
data_pixel <- df_ts_pix[id_selected,] |
|
701 |
data_pixel <- as.data.frame(data_pixel) |
|
702 |
|
|
703 |
pix_ts <- t(as.data.frame(subset(data_pixel,select=r_ts_name))) #can subset to range later |
|
704 |
#pix_ts <- subset(as.data.frame(pix_ts),select=r_ts_name) |
|
705 |
pix_ts <- (as.data.frame(pix_ts)) |
|
706 |
## Process the coliform data |
|
707 |
|
|
708 |
#there are several measurements per day for some stations !!! |
|
709 |
#id_name <- data_pixel[[var_ID]] |
|
710 |
|
|
711 |
#df_tmp <-data_var[data_var$LOCATION_ID==id_name,] |
|
712 |
df_tmp <- subset(data_var,data_var$ID_stat==id_name) |
|
713 |
#if(da) |
|
714 |
#aggregate(df_tmp |
|
715 |
if(nrow(df_tmp)>1){ |
|
716 |
|
|
717 |
formula_str <- paste(var_name," ~ ","TRIP_START_DATE_f",sep="") |
|
718 |
#var_pix <- aggregate(COL_SCORE ~ TRIP_START_DATE_f, data = df_tmp, mean) #aggregate by date |
|
719 |
var_pix <- try(aggregate(as.formula(formula_str), data = df_tmp, FUN=mean)) #aggregate by date |
|
720 |
#length(unique(test$TRIP_START_DATE_f)) |
|
721 |
#var_pix_ts <- t(as.data.frame(subset(data_pixel,select=var_name))) |
|
722 |
#pix <- t(data_pixel[1,24:388])#can subset to range later |
|
723 |
}else{ |
|
724 |
var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!! |
|
725 |
} |
|
726 |
#var_pix <- subset(as.data.frame(data_id_selected,c(var_name,"TRIP_START_DATE_f")])) #,select=var_name) |
|
727 |
|
|
728 |
#Create time series object from extract pixel time series |
|
729 |
d_z <- zoo(pix_ts,dates_val) #make a time series ... |
|
730 |
names(d_z)<- "rainfall" |
|
731 |
#Create date object for data from stations |
|
732 |
|
|
733 |
d_var <- zoo(var_pix,var_pix$TRIP_START_DATE_f) |
|
734 |
#plot(d_var,pch=10) |
|
735 |
|
|
736 |
d_z2 <- merge(d_z,d_var) |
|
737 |
##Now subset? |
|
738 |
d_z2 <- window(d_z2,start=dates_val[1],end=dates_val[length(dates_val)]) |
|
739 |
|
|
740 |
d_z2$TRIP_START_DATE_f <- NULL |
|
741 |
|
|
742 |
df2 <- as.data.frame(d_z2) |
|
743 |
df2$date <- rownames(df2) |
|
744 |
rownames(df2) <- NULL |
|
745 |
df2[[var_name]] <- as.numeric(as.character(df2[[var_name]])) |
|
746 |
|
|
747 |
#df2$COL_SCORE <- as.numeric(as.character(df2$COL_SCORE)) |
|
748 |
df2$rainfall <- as.numeric(as.character(df2$rainfall)) |
|
749 |
df2$ID_stat <- id_name |
|
750 |
|
|
751 |
#plot(df2$rainfall) |
|
752 |
#list_pix[[i]] <- pix_ts |
|
753 |
|
|
754 |
if(plot_fig==T){ |
|
755 |
|
|
756 |
res_pix <- 480 |
|
757 |
col_mfrow <- 2 |
|
758 |
row_mfrow <- 1 |
|
759 |
|
|
760 |
### |
|
761 |
#Figure 3b |
|
762 |
png(filename=paste("Figure3b_","pixel_profile_var_combined_",id_name,"_",out_suffix,".png",sep=""), |
|
763 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
764 |
|
|
765 |
#plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="") |
|
766 |
#points(d_z2$COL_SCORE,col="red",pch=10,cex=2) |
|
767 |
plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="") |
|
768 |
abline(h=threshold_val,col="green") |
|
769 |
|
|
770 |
par(new=TRUE) # key: ask for new plot without erasing old |
|
771 |
#plot(x,y,type="l",col=t_col[k],xlab="",ylab="",lty="dotted",axes=F) #plotting fusion profile |
|
772 |
plot(df2[[var_name]],pch=10,cex=2.5,col="red", axes=F,ylab="",xlab="") |
|
773 |
#points(d_z2$COL_SCORE,col="red",pch=10,cex=2) |
|
774 |
legend("topleft",legend=c("stations"), |
|
775 |
cex=1.2,col="red",pch =10,bty="n") |
|
776 |
|
|
777 |
axis(4,cex=1.2) |
|
778 |
mtext(4, text = "coliform scores", line = 3) |
|
779 |
|
|
780 |
title(paste("Station time series",id_name,sep=" ")) |
|
781 |
|
|
782 |
dev.off() |
|
783 |
|
|
784 |
#Figure 3c |
|
785 |
png(filename=paste("Figure3c_","pixel_profile_var_combined_log_scale_",id_name,"_",out_suffix,".png",sep=""), |
|
786 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
787 |
|
|
788 |
#plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="") |
|
789 |
#points(d_z2$COL_SCORE,col="red",pch=10,cex=2) |
|
790 |
plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="") |
|
791 |
abline(h=threshold_val,col="green") |
|
792 |
par(new=TRUE) # key: ask for new plot without erasing old |
|
793 |
#plot(x,y,type="l",col=t_col[k],xlab="",ylab="",lty="dotted",axes=F) #plotting fusion profile |
|
794 |
#plot(log(df2$COL_SCORE),pch=10,cex=2.5,col="red", axes=F,ylab="",xlab="") |
|
795 |
plot(log(df2[[var_name]]),pch=10,cex=2.5,col="red", axes=F,ylab="",xlab="") |
|
796 |
|
|
797 |
#points(d_z2$COL_SCORE,col="red",pch=10,cex=2) |
|
798 |
legend("topleft",legend=c("stations"), |
|
799 |
cex=1.2,col="red",pch =10,bty="n") |
|
800 |
|
|
801 |
axis(4,cex=1.2) |
|
802 |
mtext(4, text = "coliform scores", line = 3) |
|
803 |
|
|
804 |
title(paste("Station time series",id_name,sep=" ")) |
|
805 |
|
|
806 |
dev.off() |
|
807 |
|
|
808 |
####Histogram of values |
|
809 |
|
|
810 |
res_pix <- 480 |
|
811 |
col_mfrow <- 2 |
|
812 |
row_mfrow <- 1 |
|
813 |
|
|
814 |
png(filename=paste("Figure4_","histogram_measurements_",year_processed,"_",id_name,"_",out_suffix,".png",sep=""), |
|
815 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
816 |
|
|
817 |
hist_val <- hist(df2[[var_name]],main="",xlab="COLIFORM SCORES") |
|
818 |
#hist_val <- hist(df2$COL_SCORE,main="",xlab="COLIFORM SCORES") |
|
819 |
title(paste("Histrogram of coliform scores for station",id_name,"in",year_processed,sep=" ")) |
|
820 |
#abline(v=threshold_val,col="green" ) |
|
821 |
legend("topright",legend=c("treshold val"), |
|
822 |
cex=1.2, col="green",lty =1,bty="n") |
|
823 |
|
|
824 |
y_loc <- max(hist_val$counts)/2 |
|
825 |
|
|
826 |
#text(threshold_val,y_loc,paste(as.character(threshold_val)),pos=1,offset=0.1) |
|
827 |
|
|
828 |
dev.off() |
|
829 |
|
|
830 |
#res_pix <- 480 |
|
831 |
#col_mfrow <- 2 |
|
832 |
#row_mfrow <- 1 |
|
833 |
|
|
834 |
#png(filename=paste("Figure4_","histogram_coliform_measurements_",year_processed,"_",id_name,"_",out_suffix,".png",sep=""), |
|
835 |
# width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
836 |
|
|
837 |
plot(df2$rainfall) |
|
838 |
#plot(df2$rainfall,df2$COL_SCORE) |
|
839 |
#plot(log(df2$rainfall),log(df2$COL_SCORE)) |
|
840 |
plot(df2$rainfall,df2[[var_name]]) |
|
841 |
plot(df2$rainfall,log(df2[[var_name]])) |
|
842 |
|
|
843 |
|
|
844 |
|
|
845 |
} |
|
846 |
|
|
847 |
## Now correlation. |
|
848 |
#sum(is.na(df2$rainfall)) |
|
849 |
#[1] 0 |
|
850 |
nb_zero <- sum((df2$rainfall==0)) #203 |
|
851 |
#nb_NA <- sum(is.na(df2$COL_SCORE)) |
|
852 |
nb_NA <- sum(is.na(df2[[var_name]])) #for ID 394 DMR it is 361 missing values for 2012!! |
|
853 |
## Cumulated precip and lag? |
|
854 |
#Keep number of 0 for every year for rainfall |
|
855 |
#summarize by month |
|
856 |
#Kepp number of NA for scores... |
|
857 |
#Summarize by season... |
|
858 |
## Threshold? |
|
859 |
station_summary_obj <- list(nb_zero,nb_NA,df2) |
|
860 |
names(station_summary_obj) <- c("nb_zero_precip","nb_NA_var","df_combined") |
|
861 |
return(station_summary_obj) |
|
862 |
} |
|
863 |
|
|
691 | 864 |
list_dates_produced <- unlist(mclapply(1:length(var_names), |
692 | 865 |
FUN=extract_date, |
693 | 866 |
x=var_names, |
... | ... | |
846 | 1019 |
|
847 | 1020 |
dev.off() |
848 | 1021 |
|
1022 |
############### PART5: Make raster stack and display maps ############# |
|
1023 |
#### Extract corresponding raster for given dates and plot stations used |
|
1024 |
|
|
1025 |
|
|
1026 |
#start_date <- day_to_mosaic_range[1] |
|
1027 |
#end_date <- day_to_mosaic_range[2] |
|
1028 |
#start_date <- day_start #PARAM 12 arg 12 |
|
1029 |
#end_date <- day_end #PARAM 13 arg 13 |
|
1030 |
|
|
1031 |
#date_to_plot <- seq(as.Date(strptime(start_date,"%Y%m%d")), as.Date(strptime(end_date,"%Y%m%d")), 'day') |
|
1032 |
#l_dates <- format(date_to_plot,"%Y%m%d") #format back to the relevant date format for files |
|
1033 |
#mask_pred <- FALSE |
|
1034 |
#matching <- FALSE #to be added after mask_pred option |
|
1035 |
#list_param_pre_process <- list(raster_name_lf,python_bin,infile_mask,scaling,mask_pred,NA_flag_val,out_suffix,out_dir) |
|
1036 |
#names(list_param_pre_process) <- c("lf","python_bin","infile_mask","scaling","mask_pred","NA_flag_val","out_suffix","out_dir") |
|
1037 |
|
|
1038 |
#debug(pre_process_raster_mosaic_fun) |
|
1039 |
|
|
1040 |
#lf_mosaic_scaled <- mclapply(1:length(raster_name_lf),FUN=pre_process_raster_mosaic_fun,list_param=list_param_pre_process,mc.preschedule=FALSE,mc.cores = num_cores) |
|
1041 |
#lf_mosaic_scaled <- mclapply(1:length(raster_name_lf),FUN=pre_process_raster_mosaic_fun,list_param=list_param_pre_process,mc.preschedule=FALSE,mc.cores = num_cores) |
|
1042 |
|
|
1043 |
#test <- pre_process_raster_mosaic_fun(2,list_param_pre_process) |
|
1044 |
#lf_mosaic_scaled <- unlist(lf_mosaic_scaled) |
|
1045 |
|
|
1046 |
##################################### PART 5 ###### |
|
1047 |
##### Plotting specific days for the mosaics |
|
1048 |
|
|
1049 |
r_mosaic_scaled <- stack(lf_mosaic_scaled) |
|
1050 |
NAvalue(r_mosaic_scaled)<- -3399999901438340239948148078125514752.000 |
|
1051 |
plot(r_mosaic_scaled,y=6,zlim=c(-50,50)) |
|
1052 |
plot(r_mosaic_scaled,zlim=c(-50,50),col=matlab.like(255)) |
|
1053 |
|
|
1054 |
#layout_m<-c(1,3) #one row two columns |
|
1055 |
#levelplot(r_mosaic_scaled,zlim=c(-50,50),col.regions=matlab.like(255)) |
|
1056 |
#levelplot(r_mosaic_scaled,zlim=c(-50,50),col.regions=matlab.like(255)) |
|
1057 |
|
|
1058 |
#png(paste("Figure7a__spatial_pattern_tmax_prediction_levelplot_",date_selected,out_prefix,".png", sep=""), |
|
1059 |
# height=480*layout_m[1],width=480*layout_m[2]) |
|
1060 |
#plot(r_pred,col=temp.colors(255),zlim=c(-3500,4500)) |
|
1061 |
#plot(r_pred,col=matlab.like(255),zlim=c(-40,50)) |
|
1062 |
#paste(raster_name[1:7],collapse="_") |
|
1063 |
#add filename option later |
|
1064 |
|
|
1065 |
#NA_flag_val_mosaic <- -3399999901438340239948148078125514752.000 |
|
1066 |
|
|
1067 |
list_param_plot_raster_mosaic <- list(l_dates,r_mosaic_scaled,NA_flag_val_mosaic,out_dir,out_suffix, |
|
1068 |
region_name,variable_name) |
|
1069 |
names(list_param_plot_raster_mosaic) <- c("l_dates","r_mosaic_scaled","NA_flag_val_mosaic","out_dir","out_suffix", |
|
1070 |
"region_name","variable_name") |
|
1071 |
|
|
1072 |
lf_mosaic_plot_fig <- mclapply(1:length(lf_mosaic_scaled),FUN=plot_raster_mosaic,list_param=list_param_plot_raster_mosaic,mc.preschedule=FALSE,mc.cores = num_cores) |
|
1073 |
|
|
849 | 1074 |
#################### |
850 | 1075 |
###### Now add figures with additional met stations? |
851 | 1076 |
|
Also available in: Unified diff
major changes adding function to extract from raster, combine extracted data with assessmment and cleaning up