Revision ff3e42fe
Added by Benoit Parmentier over 8 years ago
climate/research/oregon/interpolation/global_product_assessment_part1.R | ||
---|---|---|
4 | 4 |
#Combining tables and figures for individual runs for years and tiles. |
5 | 5 |
#AUTHOR: Benoit Parmentier |
6 | 6 |
#CREATED ON: 05/15/2016 |
7 |
#MODIFIED ON: 09/18/2016
|
|
7 |
#MODIFIED ON: 09/19/2016
|
|
8 | 8 |
#Version: 1 |
9 | 9 |
#PROJECT: Environmental Layers project |
10 | 10 |
#COMMENTS: Initial commit, script based on part NASA biodiversity conferenc |
... | ... | |
19 | 19 |
# |
20 | 20 |
#setfacl -Rmd user:aguzman4:rwx /nobackupp8/bparmen1/output_run10_1500x4500_global_analyses_pred_1992_10052015 |
21 | 21 |
|
22 |
#COMMIT: combine extracted and stations information from assessment
|
|
22 |
#COMMIT: testing function combine extraction and assessment data, also moved to function script
|
|
23 | 23 |
|
24 | 24 |
################################################################################################# |
25 | 25 |
|
... | ... | |
85 | 85 |
source(file.path(script_path,function_assessment_part3)) #source all functions used in this script |
86 | 86 |
|
87 | 87 |
#Product assessment |
88 |
function_product_assessment_part1_functions <- "global_product_assessment_part1_functions_09112016.R"
|
|
88 |
function_product_assessment_part1_functions <- "global_product_assessment_part1_functions_09192016.R"
|
|
89 | 89 |
source(file.path(script_path,function_product_assessment_part1_functions)) #source all functions used in this script |
90 | 90 |
|
91 | 91 |
############################### |
... | ... | |
589 | 589 |
#dates_val <- |
590 | 590 |
df_raster |
591 | 591 |
df_time_series |
592 |
plot_fig <- false |
|
592 |
plot_fig <- FALSE |
|
593 |
i<-1 |
|
593 | 594 |
|
594 |
combine_measurements_and_predictions_df <- function(i,df_raster,df_time_series,df_ts_pix,data_var,list_selected_ID,r_ts_name,var_name,plot_fig=T){ |
|
595 |
|
|
596 |
# Input arguments: |
|
597 |
# i : selected station |
|
598 |
# df_ts_pix_data : data extracted from raster layer |
|
599 |
# data_var : data with station measurements (tmin,tmax or precip) |
|
600 |
# list_selected_ID : list of selected station |
|
601 |
# plot_fig : if T, figures are plotted |
|
602 |
# Output |
|
603 |
# |
|
604 |
|
|
605 |
##### START FUNCTION ############ |
|
606 |
|
|
607 |
#get the relevant station |
|
608 |
id_name <- list_selected_ID[i] # e.g. WS037.00,1238099999 |
|
609 |
#id_selected <- df_ts_pix[[var_ID]]==id_name |
|
610 |
id_selected <- df_ts_pix[["id"]]== id_name |
|
611 |
|
|
612 |
### Not get the data from the time series |
|
613 |
data_pixel <- df_ts_pix[id_selected,] #this should be a unique row!!! |
|
614 |
#data_pixel <- data_pixel[1,] |
|
615 |
data_pixel <- as.data.frame(data_pixel) |
|
616 |
|
|
617 |
##Transpose data to have rows as date and one unique column |
|
618 |
pix_ts <- t(as.data.frame(subset(data_pixel,select=r_ts_name))) #can subset to range later |
|
619 |
#pix_ts <- subset(as.data.frame(pix_ts),select=r_ts_name) |
|
620 |
pix_ts <- (as.data.frame(pix_ts)) |
|
621 |
names(pix_ts) <- paste(var_pred,"_mosaic",sep="") |
|
622 |
#add scaling option |
|
623 |
#!is.null(scaling) |
|
624 |
## Process the measurements data (with tmax/tmin/precip) |
|
625 |
|
|
626 |
#there are several measurements per day for some stations !!! |
|
627 |
#id_name <- data_pixel[[var_ID]] |
|
628 |
|
|
629 |
#df_tmp <-data_var[data_var$LOCATION_ID==id_name,] |
|
630 |
df_tmp <- subset(data_var,data_var$id==id_name) |
|
631 |
#if(da) |
|
632 |
#aggregate(df_tmp |
|
633 |
#if(nrow(df_tmp)>1){ |
|
634 |
# |
|
635 |
# formula_str <- paste(var_name," ~ ","TRIP_START_DATE_f",sep="") |
|
636 |
# #var_pix <- aggregate(COL_SCORE ~ TRIP_START_DATE_f, data = df_tmp, mean) #aggregate by date |
|
637 |
# var_pix <- try(aggregate(as.formula(formula_str), data = df_tmp, FUN=mean)) #aggregate by date |
|
638 |
# #length(unique(test$TRIP_START_DATE_f)) |
|
639 |
# #var_pix_ts <- t(as.data.frame(subset(data_pixel,select=var_name))) |
|
640 |
# #pix <- t(data_pixel[1,24:388])#can subset to range later |
|
641 |
#}else{ |
|
642 |
# var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!! |
|
643 |
#} |
|
644 |
#var_pix <- subset(as.data.frame(data_id_selected,c(var_name,"TRIP_START_DATE_f")])) #,select=var_name) |
|
645 |
var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!! |
|
646 |
var_pix$date_str <- as.character(var_pix$date) |
|
647 |
#match from 20011231 to 2001-12-31 to date format |
|
648 |
var_pix$date <- as.character(as.Date(var_pix$date_str,"%Y%m%d")) #format back to the relevant date format for files |
|
649 |
|
|
650 |
#dates_val <- df_time_series$date |
|
651 |
dates_val <- df_raster$date |
|
652 |
pix_ts$date <- dates_val |
|
653 |
#pix_ts <- merge(df_raster,pix_ts,by="date") |
|
654 |
|
|
655 |
pix_ts$lf <- df_raster$lf |
|
656 |
#pix_ts$ |
|
657 |
pix_ts <- merge(df_time_series,pix_ts,by="date",all=T) |
|
658 |
|
|
659 |
#check for duplicates in extracted values (this can happen if there is a test layer or repetition |
|
660 |
if(nrow(pix_ts)!=length(unique(pix_ts$date))){ |
|
661 |
var_pred_tmp <- paste0(var_pred,"_mosaic") |
|
662 |
md <- melt(pix_ts, id=(c("date")),measure.vars=c(var_pred_tmp, "missing")) #c("x","y","dailyTmax","mod1","res_mod1")) |
|
663 |
#formula_str <- "id + date ~ x + y + dailyTmax + mod1 + res_mod1" |
|
664 |
pix_ts <- cast(md, date ~ variable, fun.aggregate = mean, |
|
665 |
na.rm = TRUE) |
|
666 |
|
|
667 |
} |
|
668 |
|
|
669 |
#if(nrow(var_pix)!=length(unique(var_pix$date))){ |
|
670 |
# |
|
671 |
# md <- melt(var_pix, id=(c("date")),measure.vars=c(var_pred, "missing")) #c("x","y","dailyTmax","mod1","res_mod1")) |
|
672 |
# #formula_str <- "id + date ~ x + y + dailyTmax + mod1 + res_mod1" |
|
673 |
# test <- cast(md, date ~ variable, fun.aggregate = mean, |
|
674 |
# na.rm = TRUE) |
|
675 |
# |
|
676 |
# |
|
677 |
#} |
|
678 |
df_pix_ts <- merge(pix_ts,var_pix,by="date",all=T) |
|
679 |
#Create time series object from extract pixel time series |
|
680 |
|
|
681 |
nb_zero <- sum( df_pix_ts[[var_pred_tmp]]==0) #relevant for precip |
|
682 |
#nb_NA <- sum(is.na(df2$COL_SCORE)) |
|
683 |
nb_NA <- sum(is.na( df_pix_ts[[var_pred_tmp]])) #for ID 394 DMR it is 361 missing values for 2012!! |
|
684 |
## Cumulated precip and lag? |
|
685 |
#Keep number of 0 for every year for rainfall |
|
686 |
#summarize by month |
|
687 |
#Kepp number of NA for scores... |
|
688 |
#Summarize by season... |
|
689 |
## Threshold? |
|
690 |
station_summary_obj <- list(nb_zero,nb_NA, df_pix_ts) |
|
691 |
names(station_summary_obj) <- c("nb_zero_precip","nb_NA_var"," df_pix_ts") |
|
692 |
return(station_summary_obj) |
|
693 |
} |
|
595 |
#Product assessment |
|
596 |
#function_product_assessment_part1_functions <- "global_product_assessment_part1_functions_09192016.R" |
|
597 |
#source(file.path(script_path,function_product_assessment_part1_functions)) #source all functions used in this script |
|
598 |
#undebug(combine_measurements_and_predictions_df) |
|
599 |
|
|
600 |
#this can be run with mclapply, very fast right now: |
|
601 |
station_summary_obj <- combine_measurements_and_predictions_df(i=i, |
|
602 |
df_raster=df_raster, |
|
603 |
df_time_series=df_time_series, |
|
604 |
df_ts_pix=df_ts_pix, |
|
605 |
data_var=data_var, |
|
606 |
list_selected_ID=list_selected_ID, |
|
607 |
r_ts_name=r_ts_name, |
|
608 |
var_name=var_name, |
|
609 |
var_pred = var_pred, |
|
610 |
plot_fig=T) |
|
611 |
df_pix_ts <- station_summary_obj$df_pix_ts |
|
612 |
#station_summary_obj <- list(nb_zero,nb_NA, df_pix_ts) |
|
613 |
|
|
614 |
id_name <- list_selected_ID[i] |
|
615 |
df_pix_ts_filename <- file.path(out_dir,paste0("df_pix_ts_",id_name,out_suffix,y_var_name,".txt")) |
|
616 |
write.table(df_pix_ts,df_pix_ts_filename,sep=",") |
|
694 | 617 |
|
695 |
list_dates_produced <- unlist(mclapply(1:length(var_names), |
|
696 |
FUN=extract_date, |
|
697 |
x=var_names, |
|
698 |
item_no=12, |
|
699 |
mc.preschedule=FALSE, |
|
700 |
mc.cores = num_cores)) |
|
701 | 618 |
|
702 |
list_dates_produced_date_val <- as.Date(strptime(list_dates_produced,"%Y%m%d")) |
|
619 |
##################### plot time series: make this a function!!! #### |
|
620 |
###start of plotting |
|
621 |
### makes this a function: |
|
622 |
|
|
703 | 623 |
pix_ts$date <- list_dates_produced_date_val |
704 | 624 |
pix_ts[,1]*scaling #scale? |
705 | 625 |
|
... | ... | |
711 | 631 |
|
712 | 632 |
#head(pix_ts) |
713 | 633 |
|
714 |
|
|
715 |
##################### plot time series: make this a function!!! #### |
|
716 |
###start of plotting |
|
717 |
### makes this a function: |
|
718 | 634 |
id_selected <- "82111099999" |
719 | 635 |
#station_id <- 8 |
720 | 636 |
station_id <- id_selected |
Also available in: Unified diff
testing function combine extraction and assessment data, also moved to function script