Revision e6ce3b29
Added by Benoit Parmentier over 8 years ago
climate/research/oregon/interpolation/global_product_assessment_part1.R | ||
---|---|---|
4 | 4 |
#Combining tables and figures for individual runs for years and tiles. |
5 | 5 |
#AUTHOR: Benoit Parmentier |
6 | 6 |
#CREATED ON: 05/15/2016 |
7 |
#MODIFIED ON: 09/17/2016
|
|
7 |
#MODIFIED ON: 09/18/2016
|
|
8 | 8 |
#Version: 1 |
9 | 9 |
#PROJECT: Environmental Layers project |
10 | 10 |
#COMMENTS: Initial commit, script based on part NASA biodiversity conferenc |
... | ... | |
19 | 19 |
# |
20 | 20 |
#setfacl -Rmd user:aguzman4:rwx /nobackupp8/bparmen1/output_run10_1500x4500_global_analyses_pred_1992_10052015 |
21 | 21 |
|
22 |
#COMMIT: clean up and first testing of extraction function
|
|
22 |
#COMMIT: combine extracted and stations information from assessment
|
|
23 | 23 |
|
24 | 24 |
################################################################################################# |
25 | 25 |
|
... | ... | |
149 | 149 |
|
150 | 150 |
day_start <- "1984101" #PARAM 12 arg 12 |
151 | 151 |
day_end <- "19991231" #PARAM 13 arg 13 |
152 |
#date_start <- |
|
153 |
#date_end <- |
|
152 |
#date_start <- day_start
|
|
153 |
#date_end <- day_end
|
|
154 | 154 |
|
155 | 155 |
#infile_mask <- "/nobackupp8/bparmen1/NEX_data/regions_input_files/r_mask_LST_reg4.tif" |
156 | 156 |
#infile_mask <- "/data/project/layers/commons/NEX_data/regions_input_files/r_mask_LST_reg5.tif" |
... | ... | |
506 | 506 |
#started at 16.51, 09/07 |
507 | 507 |
|
508 | 508 |
|
509 |
##### This could be moved in a separate file!! |
|
509 | 510 |
############### PART4: Checking for mosaic produced for given region ############## |
510 | 511 |
## From list of mosaic files predicted extract dates |
511 | 512 |
## Check dates predicted against date range for a given date range |
... | ... | |
526 | 527 |
in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaics/mosaic" |
527 | 528 |
#in_dir_mosaic_rmse <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaicsRMSE/mosaic" |
528 | 529 |
pattern_str <- ".*.tif" |
529 |
df_points <- #this contains the location of points to be used for extraction |
|
530 |
|
|
530 |
#df_points <- #this contains the location of points to be used for extraction |
|
531 |
#df_points <- data_stations_var_pred_data_s #collected previously |
|
532 |
|
|
531 | 533 |
#extract from var pred mosaic, tmax in this case: |
532 | 534 |
extract_obj_var_pred <- extract_from_time_series_raster_stack(df_points,date_start,date_end,lf_raster=NULL,item_no=13, |
533 | 535 |
num_cores=11,pattern_str=NULL,in_dir=in_dir_mosaic,out_dir=out_dir,out_suffix=out_suffix) |
... | ... | |
551 | 553 |
#### Now combined with the station data extracted from the assessment stage |
552 | 554 |
|
553 | 555 |
#combine |
554 |
data_stations_var_pred |
|
555 |
|
|
556 |
##write function to combine data!!! |
|
557 |
|
|
558 |
pix_ts <- as.data.frame(t(df_points_day_extracted)) |
|
559 |
pix_ts <- pix_ts[-1,] |
|
560 |
#var_names <- rownames(df_points_day_extracted) #same as lf_mosaic_list but without (*.tif) |
|
561 |
var_names <- rownames(pix_ts) #same as lf_mosaic_list but without (*.tif) |
|
562 |
var_id <- df_points_day$id |
|
563 |
df_points_day_extracted$id <- var_id |
|
564 |
|
|
565 |
#lf_var <- names(r_mosaic) |
|
566 |
### Not get the data from the time series |
|
567 |
#data_pixel <- df_ts_pix[id_selected,] |
|
568 |
#data_pixel <- as.data.frame(data_pixel) |
|
569 |
#pix_ts <- t(as.data.frame(subset(data_pixel,select=r_ts_name))) #can subset to range later |
|
570 |
#pix_ts <- subset(as.data.frame(pix_ts),select=r_ts_name) |
|
571 |
|
|
572 |
combine_measurements_and_predictions_df <- function(i,dates_val,df_ts_pix,data_var,list_selected_ID,r_ts_name,var_name,dates_str,plot_fig=T){ |
|
556 |
head(data_stations_var_pred) |
|
557 |
|
|
558 |
|
|
559 |
# id date x y dailyTmax mod1 res_mod1 id date training testing |
|
560 |
#1 71238099999 19930703 -112.867 53.683 20.2 18.19909 -2.000915 71238099999 19930703 4 0 |
|
561 |
#2 71238099999 19930704 -112.867 53.683 23.4 17.74476 -5.655237 71238099999 19930704 3 1 |
|
562 |
#3 71238099999 19930705 -112.867 53.683 21.7 19.22313 -2.476870 71238099999 19930705 3 1 |
|
563 |
#4 71238099999 19930706 -112.867 53.683 22.0 19.53294 -2.467061 71238099999 19930706 3 1 |
|
564 |
#5 71238099999 19930707 -112.867 53.683 20.9 17.84168 -3.058324 71238099999 19930707 2 2 |
|
565 |
#6 71238099999 19930708 -112.867 53.683 21.2 16.50887 -4.691128 71238099999 19930708 1 3 |
|
566 |
|
|
567 |
#need to combine: |
|
568 |
#data_stations_var_pred: constains id, x, y, dailyTmax, mod1, res_mod1, date, training, testing |
|
569 |
#df_time_series |
|
570 |
#df_points_extracted #contains id,x,y of stations and extracted values from raster stack |
|
571 |
dim(df_time_series) |
|
572 |
#dim(data_stations_var_pred) |
|
573 |
df_points_extracted_tmp <- df_points_extracted |
|
574 |
#df_points_extracted <- cbind(df_points,df_points_extracted) |
|
575 |
|
|
576 |
df_points_extracted$id <- df_points$id #this should have been done earlier in the extraction function |
|
577 |
df_points_extracted$x <- df_points$x #this should have been done earlier in the extraction function |
|
578 |
df_points_extracted$y <- df_points$y #this should have been done earlier in the extraction function |
|
579 |
|
|
580 |
dim(df_time_series) |
|
581 |
|
|
582 |
list_selected_ID <- unique(data_stations_var_pred$id) #11 stations selected |
|
583 |
data_var <- data_stations_var_pred |
|
584 |
df_ts_pix <- df_points_extracted |
|
585 |
r_ts_name <- sub(extension(lf_raster),"",basename(lf_raster)) |
|
586 |
var_name <- "dailyTmax" #observed measurements |
|
587 |
var_pred <- "mod1" #predictions |
|
588 |
#dates_str <- |
|
589 |
#dates_val <- |
|
590 |
df_raster |
|
591 |
df_time_series |
|
592 |
plot_fig <- false |
|
593 |
|
|
594 |
combine_measurements_and_predictions_df <- function(i,df_raster,df_time_series,df_ts_pix,data_var,list_selected_ID,r_ts_name,var_name,plot_fig=T){ |
|
573 | 595 |
|
574 | 596 |
# Input arguments: |
575 | 597 |
# i : selected station |
... | ... | |
583 | 605 |
##### START FUNCTION ############ |
584 | 606 |
|
585 | 607 |
#get the relevant station |
586 |
id_name <- list_selected_ID[i] # e.g. WS037.00 |
|
608 |
id_name <- list_selected_ID[i] # e.g. WS037.00,1238099999
|
|
587 | 609 |
#id_selected <- df_ts_pix[[var_ID]]==id_name |
588 |
id_selected <- df_ts_pix[["ID_stat"]]==id_name
|
|
610 |
id_selected <- df_ts_pix[["id"]]== id_name
|
|
589 | 611 |
|
590 | 612 |
### Not get the data from the time series |
591 |
data_pixel <- df_ts_pix[id_selected,] |
|
613 |
data_pixel <- df_ts_pix[id_selected,] #this should be a unique row!!! |
|
614 |
#data_pixel <- data_pixel[1,] |
|
592 | 615 |
data_pixel <- as.data.frame(data_pixel) |
593 | 616 |
|
617 |
##Transpose data to have rows as date and one unique column |
|
594 | 618 |
pix_ts <- t(as.data.frame(subset(data_pixel,select=r_ts_name))) #can subset to range later |
595 | 619 |
#pix_ts <- subset(as.data.frame(pix_ts),select=r_ts_name) |
596 | 620 |
pix_ts <- (as.data.frame(pix_ts)) |
597 |
## Process the coliform data |
|
621 |
names(pix_ts) <- paste(var_pred,"_mosaic",sep="") |
|
622 |
#add scaling option |
|
623 |
#!is.null(scaling) |
|
624 |
## Process the measurements data (with tmax/tmin/precip) |
|
598 | 625 |
|
599 | 626 |
#there are several measurements per day for some stations !!! |
600 | 627 |
#id_name <- data_pixel[[var_ID]] |
601 | 628 |
|
602 | 629 |
#df_tmp <-data_var[data_var$LOCATION_ID==id_name,] |
603 |
df_tmp <- subset(data_var,data_var$ID_stat==id_name)
|
|
630 |
df_tmp <- subset(data_var,data_var$id==id_name)
|
|
604 | 631 |
#if(da) |
605 | 632 |
#aggregate(df_tmp |
606 |
if(nrow(df_tmp)>1){ |
|
607 |
|
|
608 |
formula_str <- paste(var_name," ~ ","TRIP_START_DATE_f",sep="") |
|
609 |
#var_pix <- aggregate(COL_SCORE ~ TRIP_START_DATE_f, data = df_tmp, mean) #aggregate by date |
|
610 |
var_pix <- try(aggregate(as.formula(formula_str), data = df_tmp, FUN=mean)) #aggregate by date |
|
611 |
#length(unique(test$TRIP_START_DATE_f)) |
|
612 |
#var_pix_ts <- t(as.data.frame(subset(data_pixel,select=var_name))) |
|
613 |
#pix <- t(data_pixel[1,24:388])#can subset to range later |
|
614 |
}else{ |
|
615 |
var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!! |
|
616 |
} |
|
633 |
#if(nrow(df_tmp)>1){
|
|
634 |
# |
|
635 |
# formula_str <- paste(var_name," ~ ","TRIP_START_DATE_f",sep="")
|
|
636 |
# #var_pix <- aggregate(COL_SCORE ~ TRIP_START_DATE_f, data = df_tmp, mean) #aggregate by date
|
|
637 |
# var_pix <- try(aggregate(as.formula(formula_str), data = df_tmp, FUN=mean)) #aggregate by date
|
|
638 |
# #length(unique(test$TRIP_START_DATE_f))
|
|
639 |
# #var_pix_ts <- t(as.data.frame(subset(data_pixel,select=var_name)))
|
|
640 |
# #pix <- t(data_pixel[1,24:388])#can subset to range later
|
|
641 |
#}else{
|
|
642 |
# var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!!
|
|
643 |
#}
|
|
617 | 644 |
#var_pix <- subset(as.data.frame(data_id_selected,c(var_name,"TRIP_START_DATE_f")])) #,select=var_name) |
645 |
var_pix <- as.data.frame(df_tmp) #select only dates and var_name!!! |
|
646 |
var_pix$date_str <- as.character(var_pix$date) |
|
647 |
#match from 20011231 to 2001-12-31 to date format |
|
648 |
var_pix$date <- as.character(as.Date(var_pix$date_str,"%Y%m%d")) #format back to the relevant date format for files |
|
618 | 649 |
|
619 |
#Create time series object from extract pixel time series |
|
620 |
d_z <- zoo(pix_ts,dates_val) #make a time series ... |
|
621 |
names(d_z)<- "rainfall" |
|
622 |
#Create date object for data from stations |
|
623 |
|
|
624 |
d_var <- zoo(var_pix,var_pix$TRIP_START_DATE_f) |
|
625 |
#plot(d_var,pch=10) |
|
626 |
|
|
627 |
d_z2 <- merge(d_z,d_var) |
|
628 |
##Now subset? |
|
629 |
d_z2 <- window(d_z2,start=dates_val[1],end=dates_val[length(dates_val)]) |
|
630 |
|
|
631 |
d_z2$TRIP_START_DATE_f <- NULL |
|
650 |
#dates_val <- df_time_series$date |
|
651 |
dates_val <- df_raster$date |
|
652 |
pix_ts$date <- dates_val |
|
653 |
#pix_ts <- merge(df_raster,pix_ts,by="date") |
|
632 | 654 |
|
633 |
df2 <- as.data.frame(d_z2) |
|
634 |
df2$date <- rownames(df2) |
|
635 |
rownames(df2) <- NULL |
|
636 |
df2[[var_name]] <- as.numeric(as.character(df2[[var_name]])) |
|
637 |
|
|
638 |
#df2$COL_SCORE <- as.numeric(as.character(df2$COL_SCORE)) |
|
639 |
df2$rainfall <- as.numeric(as.character(df2$rainfall)) |
|
640 |
df2$ID_stat <- id_name |
|
641 |
|
|
642 |
#plot(df2$rainfall) |
|
643 |
#list_pix[[i]] <- pix_ts |
|
655 |
pix_ts$lf <- df_raster$lf |
|
656 |
#pix_ts$ |
|
657 |
pix_ts <- merge(df_time_series,pix_ts,by="date",all=T) |
|
644 | 658 |
|
645 |
if(plot_fig==T){ |
|
646 |
|
|
647 |
res_pix <- 480 |
|
648 |
col_mfrow <- 2 |
|
649 |
row_mfrow <- 1 |
|
650 |
|
|
651 |
### |
|
652 |
#Figure 3b |
|
653 |
png(filename=paste("Figure3b_","pixel_profile_var_combined_",id_name,"_",out_suffix,".png",sep=""), |
|
654 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
655 |
|
|
656 |
#plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="") |
|
657 |
#points(d_z2$COL_SCORE,col="red",pch=10,cex=2) |
|
658 |
plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="") |
|
659 |
abline(h=threshold_val,col="green") |
|
660 |
|
|
661 |
par(new=TRUE) # key: ask for new plot without erasing old |
|
662 |
#plot(x,y,type="l",col=t_col[k],xlab="",ylab="",lty="dotted",axes=F) #plotting fusion profile |
|
663 |
plot(df2[[var_name]],pch=10,cex=2.5,col="red", axes=F,ylab="",xlab="") |
|
664 |
#points(d_z2$COL_SCORE,col="red",pch=10,cex=2) |
|
665 |
legend("topleft",legend=c("stations"), |
|
666 |
cex=1.2,col="red",pch =10,bty="n") |
|
667 |
|
|
668 |
axis(4,cex=1.2) |
|
669 |
mtext(4, text = "coliform scores", line = 3) |
|
670 |
|
|
671 |
title(paste("Station time series",id_name,sep=" ")) |
|
672 |
|
|
673 |
dev.off() |
|
674 |
|
|
675 |
#Figure 3c |
|
676 |
png(filename=paste("Figure3c_","pixel_profile_var_combined_log_scale_",id_name,"_",out_suffix,".png",sep=""), |
|
677 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
678 |
|
|
679 |
#plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="") |
|
680 |
#points(d_z2$COL_SCORE,col="red",pch=10,cex=2) |
|
681 |
plot(d_z,lty=2,ylab="rainfall",xlab="Time",main="") |
|
682 |
abline(h=threshold_val,col="green") |
|
683 |
par(new=TRUE) # key: ask for new plot without erasing old |
|
684 |
#plot(x,y,type="l",col=t_col[k],xlab="",ylab="",lty="dotted",axes=F) #plotting fusion profile |
|
685 |
#plot(log(df2$COL_SCORE),pch=10,cex=2.5,col="red", axes=F,ylab="",xlab="") |
|
686 |
plot(log(df2[[var_name]]),pch=10,cex=2.5,col="red", axes=F,ylab="",xlab="") |
|
687 |
|
|
688 |
#points(d_z2$COL_SCORE,col="red",pch=10,cex=2) |
|
689 |
legend("topleft",legend=c("stations"), |
|
690 |
cex=1.2,col="red",pch =10,bty="n") |
|
691 |
|
|
692 |
axis(4,cex=1.2) |
|
693 |
mtext(4, text = "coliform scores", line = 3) |
|
694 |
|
|
695 |
title(paste("Station time series",id_name,sep=" ")) |
|
696 |
|
|
697 |
dev.off() |
|
698 |
|
|
699 |
####Histogram of values |
|
700 |
|
|
701 |
res_pix <- 480 |
|
702 |
col_mfrow <- 2 |
|
703 |
row_mfrow <- 1 |
|
704 |
|
|
705 |
png(filename=paste("Figure4_","histogram_measurements_",year_processed,"_",id_name,"_",out_suffix,".png",sep=""), |
|
706 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
707 |
|
|
708 |
hist_val <- hist(df2[[var_name]],main="",xlab="COLIFORM SCORES") |
|
709 |
#hist_val <- hist(df2$COL_SCORE,main="",xlab="COLIFORM SCORES") |
|
710 |
title(paste("Histrogram of coliform scores for station",id_name,"in",year_processed,sep=" ")) |
|
711 |
#abline(v=threshold_val,col="green" ) |
|
712 |
legend("topright",legend=c("treshold val"), |
|
713 |
cex=1.2, col="green",lty =1,bty="n") |
|
714 |
|
|
715 |
y_loc <- max(hist_val$counts)/2 |
|
716 |
|
|
717 |
#text(threshold_val,y_loc,paste(as.character(threshold_val)),pos=1,offset=0.1) |
|
718 |
|
|
719 |
dev.off() |
|
720 |
|
|
721 |
#res_pix <- 480 |
|
722 |
#col_mfrow <- 2 |
|
723 |
#row_mfrow <- 1 |
|
724 |
|
|
725 |
#png(filename=paste("Figure4_","histogram_coliform_measurements_",year_processed,"_",id_name,"_",out_suffix,".png",sep=""), |
|
726 |
# width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
727 |
|
|
728 |
plot(df2$rainfall) |
|
729 |
#plot(df2$rainfall,df2$COL_SCORE) |
|
730 |
#plot(log(df2$rainfall),log(df2$COL_SCORE)) |
|
731 |
plot(df2$rainfall,df2[[var_name]]) |
|
732 |
plot(df2$rainfall,log(df2[[var_name]])) |
|
659 |
#check for duplicates in extracted values (this can happen if there is a test layer or repetition |
|
660 |
if(nrow(pix_ts)!=length(unique(pix_ts$date))){ |
|
661 |
var_pred_tmp <- paste0(var_pred,"_mosaic") |
|
662 |
md <- melt(pix_ts, id=(c("date")),measure.vars=c(var_pred_tmp, "missing")) #c("x","y","dailyTmax","mod1","res_mod1")) |
|
663 |
#formula_str <- "id + date ~ x + y + dailyTmax + mod1 + res_mod1" |
|
664 |
pix_ts <- cast(md, date ~ variable, fun.aggregate = mean, |
|
665 |
na.rm = TRUE) |
|
733 | 666 |
|
734 |
|
|
735 |
|
|
736 | 667 |
} |
737 | 668 |
|
738 |
## Now correlation. |
|
739 |
#sum(is.na(df2$rainfall)) |
|
740 |
#[1] 0 |
|
741 |
nb_zero <- sum((df2$rainfall==0)) #203 |
|
669 |
#if(nrow(var_pix)!=length(unique(var_pix$date))){ |
|
670 |
# |
|
671 |
# md <- melt(var_pix, id=(c("date")),measure.vars=c(var_pred, "missing")) #c("x","y","dailyTmax","mod1","res_mod1")) |
|
672 |
# #formula_str <- "id + date ~ x + y + dailyTmax + mod1 + res_mod1" |
|
673 |
# test <- cast(md, date ~ variable, fun.aggregate = mean, |
|
674 |
# na.rm = TRUE) |
|
675 |
# |
|
676 |
# |
|
677 |
#} |
|
678 |
df_pix_ts <- merge(pix_ts,var_pix,by="date",all=T) |
|
679 |
#Create time series object from extract pixel time series |
|
680 |
|
|
681 |
nb_zero <- sum( df_pix_ts[[var_pred_tmp]]==0) #relevant for precip |
|
742 | 682 |
#nb_NA <- sum(is.na(df2$COL_SCORE)) |
743 |
nb_NA <- sum(is.na(df2[[var_name]])) #for ID 394 DMR it is 361 missing values for 2012!!
|
|
683 |
nb_NA <- sum(is.na( df_pix_ts[[var_pred_tmp]])) #for ID 394 DMR it is 361 missing values for 2012!!
|
|
744 | 684 |
## Cumulated precip and lag? |
745 | 685 |
#Keep number of 0 for every year for rainfall |
746 | 686 |
#summarize by month |
747 | 687 |
#Kepp number of NA for scores... |
748 | 688 |
#Summarize by season... |
749 | 689 |
## Threshold? |
750 |
station_summary_obj <- list(nb_zero,nb_NA,df2)
|
|
751 |
names(station_summary_obj) <- c("nb_zero_precip","nb_NA_var","df_combined")
|
|
690 |
station_summary_obj <- list(nb_zero,nb_NA, df_pix_ts)
|
|
691 |
names(station_summary_obj) <- c("nb_zero_precip","nb_NA_var"," df_pix_ts")
|
|
752 | 692 |
return(station_summary_obj) |
753 | 693 |
} |
754 | 694 |
|
... | ... | |
771 | 711 |
|
772 | 712 |
#head(pix_ts) |
773 | 713 |
|
714 |
|
|
715 |
##################### plot time series: make this a function!!! #### |
|
774 | 716 |
###start of plotting |
775 | 717 |
### makes this a function: |
776 | 718 |
id_selected <- "82111099999" |
Also available in: Unified diff
removing code related to time series and combine extracted and stations information from assessment