Revision 35380459
Added by Benoit Parmentier about 8 years ago
climate/research/oregon/interpolation/global_product_assessment_part1_functions.R | ||
---|---|---|
544 | 544 |
return(df_selected) |
545 | 545 |
} |
546 | 546 |
|
547 |
|
|
548 |
|
|
549 |
finding_missing_dates <- function(date_start,date_end,list_dates){ |
|
550 |
#this assumes daily time steps!! |
|
551 |
#can be improved later on |
|
552 |
|
|
553 |
#date_start <- "19840101" |
|
554 |
#date_end <- "19991231" |
|
555 |
date1 <- as.Date(strptime(date_start,"%Y%m%d")) |
|
556 |
date2 <- as.Date(strptime(date_end,"%Y%m%d")) |
|
557 |
dates_range <- seq.Date(date1, date2, by="1 day") #sequence of dates |
|
558 |
|
|
559 |
missing_dates <- setdiff(as.character(dates_range),as.character(list_dates)) |
|
560 |
#df_dates_missing <- data.frame(date=missing_dates) |
|
561 |
#which(df_dates$date%in%missing_dates) |
|
562 |
#df_dates_missing$missing <- 1 |
|
563 |
|
|
564 |
df_dates <- data.frame(date=as.character(dates_range),missing = 0) |
|
565 |
|
|
566 |
df_dates$missing[df_dates$date %in% missing_dates] <- 1 |
|
567 |
#a$flag[a$id %in% temp] <- 1 |
|
568 |
|
|
569 |
missing_dates_obj <- list(missing_dates,df_dates) |
|
570 |
names(missing_dates_obj) <- c("missing_dates","df_dates") |
|
571 |
return(missing_dates) |
|
572 |
} |
|
573 |
|
|
574 |
|
|
575 |
|
|
576 |
extract_from_time_series_raster_stack <- function(df_points,date_start,date_end,lf_raster,item_no=13,num_cores=4,pattern_str=NULL,in_dir=NULL,out_dir=".",out_suffix=""){ |
|
577 |
# |
|
578 |
#This function extract value given from a raster stack time series given a spatial data.frame and a list of files |
|
579 |
# |
|
580 |
#INPUTS |
|
581 |
#1) df_points |
|
582 |
#2) date_start,num_cores=4,pattern_str=NULL,in_dir=NULL,out_dir=".",out_suffix= |
|
583 |
#3) date_end |
|
584 |
#3) lf_raster |
|
585 |
#4) item_no=13 |
|
586 |
#5) num_cores=4, |
|
587 |
#6) pattern_str=NULL |
|
588 |
#7) in_dir=NULL, |
|
589 |
#8) out_dir="." |
|
590 |
#9) out_suffix="" |
|
591 |
#OUTPUTS |
|
592 |
# |
|
593 |
# |
|
594 |
|
|
595 |
#### Start script #### |
|
596 |
|
|
597 |
if(is.null(lf_raster)){ |
|
598 |
#pattern_str <- ".*.tif" |
|
599 |
pattern_str <-"*.tif" |
|
600 |
lf_raster <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T) |
|
601 |
r_stack <- stack(lf_raster,quick=T) #this is very fast now with the quick option! |
|
602 |
#save(r_mosaic,file="r_mosaic.RData") |
|
603 |
|
|
604 |
}else{ |
|
605 |
r_stack <- stack(lf_raster,quick=T) #this is very fast now with the quick option! |
|
606 |
} |
|
607 |
|
|
608 |
#df_points$files <- lf_mosaic_list |
|
609 |
#Use the global output?? |
|
610 |
|
|
611 |
##23.09 (on 05/22) |
|
612 |
#df_points_day_extracted <- extract(r_mosaic,data_stations,df=T) |
|
613 |
#df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname",".txt") |
|
614 |
#write.table(df_points_day_extracted,file=df_points_day_extracted_fname) #5.1 Giga |
|
615 |
#4.51 (on 05/23) |
|
616 |
#df_points_day <- data_stations_var_pred_data_s |
|
617 |
|
|
618 |
#15.17 (on 09/08) |
|
619 |
##10.41 (on 05/22) |
|
620 |
#took about 7h for 5262 layers, maybe can be sped up later |
|
621 |
df_points_extracted <- extract(r_stack,df_points,df=T,sp=T) #attach back to the original data... |
|
622 |
|
|
623 |
#17.19 (on 05/23) |
|
624 |
#22.27 (on 09/08) |
|
625 |
#df_points_extracted_fname <- paste0("df_points_day_extracted_fname2",".txt") |
|
626 |
#17.27 (on 05/23) |
|
627 |
|
|
628 |
df_points_extracted_fname <- file.path(out_dir,paste0("df_points_extracted_",out_suffix,".txt")) |
|
629 |
write.table(df_points_extracted,file= df_points_extracted_fname,sep=",",row.names = F) |
|
630 |
#17.19 (on 05/23) |
|
631 |
|
|
632 |
#### Now check for missing dates |
|
633 |
|
|
634 |
#debug(extract_date) |
|
635 |
#test <- extract_date(6431,lf_mosaic_list,12) #extract item number 12 from the name of files to get the data |
|
636 |
#list_dates_produced <- unlist(mclapply(1:length(lf_raster),FUN=extract_date,x=lf_raster,item_no=13,mc.preschedule=FALSE,mc.cores = num_cores)) |
|
637 |
#list_dates_produced <- mclapply(1:2,FUN=extract_date,x=lf_mosaic_list,item_no=13,mc.preschedule=FALSE,mc.cores = 2) |
|
638 |
list_dates_produced <- unlist(mclapply(1:length(lf_raster),FUN=extract_date,x=lf_raster,item_no=item_no, |
|
639 |
mc.preschedule=FALSE,mc.cores = num_cores)) |
|
640 |
|
|
641 |
list_dates_produced_date_val <- as.Date(strptime(list_dates_produced,"%Y%m%d")) |
|
642 |
month_str <- format(list_dates_produced_date_val, "%b") ## Month, char, abbreviated |
|
643 |
year_str <- format(list_dates_produced_date_val, "%Y") ## Year with century |
|
644 |
day_str <- as.numeric(format(list_dates_produced_date_val, "%d")) ## numeric month |
|
645 |
|
|
646 |
df_raster <- data.frame(lf=basename(lf_raster), |
|
647 |
date=list_dates_produced_date_val, |
|
648 |
month_str=month_str, |
|
649 |
year=year_str, |
|
650 |
day=day_str, |
|
651 |
dir=dirname(lf_mosaic_list)) |
|
652 |
|
|
653 |
df_raster_fname <- file.path(out_dir,paste0("df_raster_",out_suffix,".txt")) |
|
654 |
write.table(df_raster,file= df_raster_fname,sep=",",row.names = F) |
|
655 |
|
|
656 |
missing_dates_obj <- finding_missing_dates (date_start,date_end,list_dates_produced_date_val) |
|
657 |
|
|
658 |
df_time_series <- missing_dates_obj$df_dates |
|
659 |
df_time_series$date <- as.character(df_time_series$date) |
|
660 |
df_raster$date <- as.character(df_raster$date) |
|
661 |
|
|
662 |
df_time_series <- merge(df_time_series,df_raster,by="date",all=T) #outer join to keep missing dates |
|
663 |
|
|
664 |
df_time_series_fname <- file.path(out_dir,paste0("df_time_series_",out_suffix,".txt")) #add the name of var later (tmax) |
|
665 |
write.table(df_time_series,file= df_time_series_fname,sep=",",row.names = F) |
|
666 |
|
|
667 |
extract_obj <- list(df_points_extracted_fname,df_raster_fname,df_time_series_fname) |
|
668 |
names(extract_obj) <- c("df_points_extracted_fname","df_raster_fname","df_time_series_fname") |
|
669 |
|
|
670 |
return(extract_obj) |
|
671 |
} |
|
672 |
|
|
673 |
|
|
674 |
combine_measurements_and_predictions_df <- function(i,df_raster,df_time_series,df_ts_pix,data_var,list_selected_ID,r_ts_name,var_name,plot_fig=T){ |
|
675 |
|
|
676 |
# Input arguments: |
|
677 |
# i : selected station |
|
678 |
# df_ts_pix_data : data extracted from raster layer |
|
679 |
# data_var : data with station measurements (tmin,tmax or precip) |
|
680 |
# list_selected_ID : list of selected station |
|
681 |
# plot_fig : if T, figures are plotted |
|
682 |
# Output |
|
683 |
# |
|
684 |
|
|
685 |
##### START FUNCTION ############ |
|
686 |
|
|
687 |
#get the relevant station |
|
688 |
id_name <- list_selected_ID[i] # e.g. WS037.00,1238099999 |
|
689 |
#id_selected <- df_ts_pix[[var_ID]]==id_name |
|
690 |
id_selected <- df_ts_pix[["id"]]== id_name |
|
691 |
|
|
692 |
### Not get the data from the time series |
|
693 |
data_pixel <- df_ts_pix[id_selected,] #this should be a unique row!!! |
|
694 |
#data_pixel <- data_pixel[1,] |
|
695 |
data_pixel <- as.data.frame(data_pixel) |
|
696 |
|
|
697 |
##Transpose data to have rows as date and one unique column |
|
698 |
pix_ts <- t(as.data.frame(subset(data_pixel,select=r_ts_name))) #can subset to range later |
|
699 |
#pix_ts <- subset(as.data.frame(pix_ts),select=r_ts_name) |
|
700 |
pix_ts <- (as.data.frame(pix_ts)) |
|
701 |
names(pix_ts) <- paste(var_pred,"_mosaic",sep="") |
|
702 |
#add scaling option |
|
703 |
#!is.null(scaling) |
|
704 |
## Process the measurements data (with tmax/tmin/precip) |
|
705 |
|
|
706 |
#there are several measurements per day for some stations !!! |
|
707 |
#id_name <- data_pixel[[var_ID]] |
|
708 |
|
|
709 |
#df_tmp <-data_var[data_var$LOCATION_ID==id_name,] |
|
710 |
df_tmp <- subset(data_var,data_var$id==id_name) |
|
711 |
#if(da) |
|
712 |
#aggregate(df_tmp |
|
713 |
#if(nrow(df_tmp)>1){ |
|
714 |
# |
|
715 |
# formula_str <- paste(var_name," ~ ","TRIP_START_DATE_f",sep="") |
|
716 |
# #var_pix <- aggregate(COL_SCORE ~ TRIP_START_DATE_f, data = df_tmp, mean) #aggregate by date |
|
717 |
# var_pix <- try(aggregate(as.formula(formula_str), data = df_tmp, FUN=mean)) #aggregate by date |
|
718 |
# #length(unique(test$TRIP_START_DATE_f)) |
|
719 |
# #var_pix_ts <- t(as.data.frame(subset(data_pixel,select=var_name))) |
|
720 |
# #pix <- t(data_pixel[1,24:388])#can subset to range later |
|
721 |
#}else{ |
|
722 |
# var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!! |
|
723 |
#} |
|
724 |
#var_pix <- subset(as.data.frame(data_id_selected,c(var_name,"TRIP_START_DATE_f")])) #,select=var_name) |
|
725 |
var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!! |
|
726 |
var_pix$date_str <- as.character(var_pix$date) |
|
727 |
#match from 20011231 to 2001-12-31 to date format |
|
728 |
var_pix$date <- as.character(as.Date(var_pix$date_str,"%Y%m%d")) #format back to the relevant date format for files |
|
729 |
|
|
730 |
#dates_val <- df_time_series$date |
|
731 |
dates_val <- df_raster$date |
|
732 |
pix_ts$date <- dates_val |
|
733 |
#pix_ts <- merge(df_raster,pix_ts,by="date") |
|
734 |
|
|
735 |
pix_ts$lf <- df_raster$lf |
|
736 |
#pix_ts$ |
|
737 |
pix_ts <- merge(df_time_series,pix_ts,by="date",all=T) |
|
738 |
|
|
739 |
#check for duplicates in extracted values (this can happen if there is a test layer or repetition |
|
740 |
if(nrow(pix_ts)!=length(unique(pix_ts$date))){ |
|
741 |
var_pred_tmp <- paste0(var_pred,"_mosaic") |
|
742 |
md <- melt(pix_ts, id=(c("date")),measure.vars=c(var_pred_tmp, "missing")) #c("x","y","dailyTmax","mod1","res_mod1")) |
|
743 |
#formula_str <- "id + date ~ x + y + dailyTmax + mod1 + res_mod1" |
|
744 |
pix_ts <- cast(md, date ~ variable, fun.aggregate = mean, |
|
745 |
na.rm = TRUE) |
|
746 |
|
|
747 |
} |
|
748 |
|
|
749 |
#if(nrow(var_pix)!=length(unique(var_pix$date))){ |
|
750 |
# |
|
751 |
# md <- melt(var_pix, id=(c("date")),measure.vars=c(var_pred, "missing")) #c("x","y","dailyTmax","mod1","res_mod1")) |
|
752 |
# #formula_str <- "id + date ~ x + y + dailyTmax + mod1 + res_mod1" |
|
753 |
# test <- cast(md, date ~ variable, fun.aggregate = mean, |
|
754 |
# na.rm = TRUE) |
|
755 |
# |
|
756 |
# |
|
757 |
#} |
|
758 |
df_pix_ts <- merge(pix_ts,var_pix,by="date",all=T) |
|
759 |
#Create time series object from extract pixel time series |
|
760 |
|
|
761 |
nb_zero <- sum( df_pix_ts[[var_pred_tmp]]==0) #relevant for precip |
|
762 |
#nb_NA <- sum(is.na(df2$COL_SCORE)) |
|
763 |
nb_NA <- sum(is.na( df_pix_ts[[var_pred_tmp]])) #for ID 394 DMR it is 361 missing values for 2012!! |
|
764 |
##Add quantile, and range info later on... |
|
765 |
|
|
766 |
## Cumulated precip and lag? |
|
767 |
#Keep number of 0 for every year for rainfall |
|
768 |
#summarize by month |
|
769 |
#Kepp number of NA for scores... |
|
770 |
#Summarize by season... |
|
771 |
## Threshold? |
|
772 |
station_summary_obj <- list(nb_zero,nb_NA, df_pix_ts) |
|
773 |
names(station_summary_obj) <- c("nb_zero_precip","nb_NA_var"," df_pix_ts") |
|
774 |
return(station_summary_obj) |
|
775 |
} |
|
776 |
|
|
547 | 777 |
############################ END OF SCRIPT ################################## |
Also available in: Unified diff
adding functions to product assessment part1: extracting from raster and combining data to assessment stage