Revision ed76cbaa
Added by Benoit Parmentier over 8 years ago
climate/research/oregon/interpolation/global_product_assessment_part1.R | ||
---|---|---|
352 | 352 |
|
353 | 353 |
#id_selected <- "82111099999" |
354 | 354 |
#dim(df_points) |
355 |
list_id_data_v <- unique(data_stations_var_pred_data_v$id) |
|
356 |
list_id_data_s <- unique(data_stations_var_pred_data_s$id) |
|
355 |
############### PART5: Extraction of temporal profile ############# |
|
356 |
#### Extract time series from raster stack |
|
357 |
### Add dates to mkae it a date object? |
|
358 |
#-72,42.24 #near Spencer MA |
|
359 |
#48.281724, -71.401815 near Saguenay Quebec |
|
360 |
#39.805293, -89.872626 near Springfield Illinois |
|
361 |
#32.009676, -106.990266 near El Paso Us Texas |
|
362 |
#39.955529, -105.263851: near Boulder Colorado |
|
363 |
#45.384051, -121.859146 : near lost Creek near Mount Hood Oregon |
|
364 |
#53.283685, -113.341702: leduc Canada near Edmonton |
|
365 |
#39.009052, -77.026875: Silver Spring Sligo Creek, MD |
|
366 |
#36.627806, -119.928901 : Raisin City, Fresno CA |
|
367 |
#36.677139, -115.330122: Las Vegas |
|
368 |
#35.696143, -78.396011: near Raleigh NC |
|
369 |
|
|
370 |
list_lat_long <- list( |
|
371 |
c( -72, 42.24), |
|
372 |
c( -71.401815, 48.281724), |
|
373 |
c( -89.872626, 39.805293), |
|
374 |
c( -106.990266, 32.009676), |
|
375 |
c( -105.263851, 39.955529), |
|
376 |
c( -121.859146,45.384051), |
|
377 |
c( -113.341702,53.283685), |
|
378 |
c( -77.026875,39.009052), |
|
379 |
c( -119.928901,36.627806), |
|
380 |
c( -115.330122,36.677139), |
|
381 |
c( -78.396011,35.696143)) |
|
382 |
|
|
383 |
|
|
384 |
query_for_station_lat_long <- function(point_val,df_points_spdf,step_x=0.25,step_y=0.25){ |
|
385 |
#make this function better using gbuffer later!!!, works for now |
|
386 |
#Improve: circular query + random within it |
|
387 |
y_val <- point_val[2] |
|
388 |
x_val <- point_val[1] |
|
389 |
|
|
390 |
y_val_tmp <- y_val + step_y |
|
391 |
if(x_val>0){ |
|
392 |
x_val_tmp <- x_val - step_x |
|
393 |
}else{ |
|
394 |
x_val_tmp <- x_val + step_x |
|
395 |
} |
|
396 |
|
|
397 |
|
|
398 |
test_df <- subset(df_points_spdf,(y_val < df_points_spdf$y & df_points_spdf$y < y_val_tmp)) |
|
399 |
test_df2 <- subset(test_df,(x_val < test_df$x & test_df$x < x_val_tmp)) |
|
400 |
#plot(test_df2) |
|
401 |
if(nrow(test_df2)>0){ |
|
402 |
df_selected <- test_df2[1,] |
|
403 |
}else{ |
|
404 |
df_selected <- NULL |
|
405 |
} |
|
406 |
|
|
407 |
return(df_selected) |
|
408 |
} |
|
409 |
|
|
410 |
test_day_query2 <- lapply(list_lat_long,FUN=query_for_station_lat_long,df_points_spdf=df_point_day,step_x=1,step_y=1) |
|
411 |
#test_day_query <-query_for_station_lat_long(c(-72,42),df_points_spdf=df_point_day,step_x=1,step_y=0.25) |
|
412 |
df_stations_selected <- do.call(rbind,test_day_query2) |
|
413 |
proj4string(df_stations_selected) <- proj_str |
|
414 |
#debug(query_for_station_lat_long) |
|
415 |
|
|
416 |
##Next use the day results and select a mean station, quartile and min and max? |
|
417 |
|
|
418 |
#list_id_data_v <- unique(data_stations_var_pred_data_v$id) |
|
419 |
#list_id_data_s <- unique(data_stations_var_pred_data_s$id) |
|
420 |
|
|
421 |
#Started at 4pm: on sept 9 |
|
422 |
list_id_data_v <- df_stations_selected$id |
|
423 |
list_id_data_s <- df_stations_selected$id |
|
357 | 424 |
|
358 | 425 |
### loop through all files and get the time series |
359 | 426 |
|
... | ... | |
382 | 449 |
data_s_subset$testing <- 0 |
383 | 450 |
data_v_subset$testing <- 1 |
384 | 451 |
# a station can be used multipel times as trainin gor testing within a day because of the overlap of tiles. |
452 |
write.table(data_v_subset,"data_v_subset_test.txt") |
|
453 |
write.table(data_s_subset,"data_s_subset_test.txt") |
|
454 |
##finish at 16.10pm on 09/09 |
|
455 |
|
|
385 | 456 |
|
386 | 457 |
#data_stations <- rbind(data_s_subset,data_v_subset) |
387 | 458 |
dim(data_s_subset) |
... | ... | |
405 | 476 |
|
406 | 477 |
##Add tile id here...and check if data stations was training or testing. |
407 | 478 |
|
479 |
#16.30 pm on 09/09 |
|
480 |
#data_stations_var_pred <- aggregate(id2 + date ~ x + y + dailyTmax + mod1 + res_mod1 ,data = data_stations, FUN=mean ) #+ mod1 + res_mod1 , data = data_stations, min) |
|
481 |
dim(data_stations_var_pred) |
|
482 |
#md <- melt(mydata, id=(c("id", "time")),) |
|
483 |
md <- melt(data_stations, id=(c("id", "date")),measure.vars=c("x","y","dailyTmax","mod1","res_mod1")) |
|
484 |
#formula_str <- "id + date ~ x + y + dailyTmax + mod1 + res_mod1" |
|
485 |
data_stations_var_pred <- cast(md, id + date ~ variable, fun.aggregate = mean, |
|
486 |
na.rm = TRUE) |
|
487 |
|
|
488 |
write.table(data_stations_var_pred, |
|
489 |
file=file.path(out_dir,paste0("data_stations_var_pred_tmp_",out_suffix,".txt", |
|
490 |
sep=","))) |
|
491 |
write.table(data_stations_var_pred_training_testing,"data_stations_var_pred_training_testing.txt") |
|
492 |
|
|
493 |
md <- melt(data_stations, id=(c("id", "date")),measure.vars=c("training","testing")) |
|
494 |
data_stations_training_testing <- cast(md, id + date ~ variable, fun.aggregate = sum, |
|
495 |
na.rm = TRUE) |
|
496 |
|
|
497 |
write.table(data_stations_training_testing, |
|
498 |
file=file.path(out_dir,paste0("data_stations_training_testing_",out_suffix,".txt", |
|
499 |
sep=","))) |
|
500 |
|
|
501 |
#data_stations_var_pred <- aggregate(id2 ~ x + y + date + dailyTmax + mod1 + res_mod1 ,data = data_stations, mean ) #+ mod1 + res_mod1 , data = data_stations, min) |
|
502 |
#data_stations$id2 <- as.numeric(data_stations$id) |
|
503 |
#data_stations$date <- as.character(data_stations$date) |
|
408 | 504 |
|
409 |
data_stations_var_pred <- aggregate(id ~ x + y + date + dailyTmax + mod1 + res_mod1 ,data = data_stations, mean ) #+ mod1 + res_mod1 , data = data_stations, min) |
|
410 | 505 |
dim(data_stations_var_pred) |
411 | 506 |
#> dim(data_stations_var_pred) |
412 |
#[1] 11171 5
|
|
413 |
write.table(data_stations_var_pred,"data_stations_var_pred_tmp.txt")
|
|
414 |
data_stations_var_pred_training_testing <- aggregate(id ~ training + testing ,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min)
|
|
415 |
write.table(data_stations_var_pred_training_testing,"data_stations_var_pred_training_testing.txt")
|
|
507 |
#[1] 57154 7
|
|
508 |
unique(data_stations_var_pred$id)
|
|
509 |
dim(data_stations_training_testing)
|
|
510 |
#[1] 57154 4
|
|
416 | 511 |
|
417 | 512 |
data_stations_var_pred$date_str <- data_stations_var_pred$date |
418 | 513 |
data_stations_var_pred$date <- as.Date(strptime(data_stations_var_pred$date_str,"%Y%m%d")) |
... | ... | |
422 | 517 |
#data_stations_var_pred2 <- aggregate(date ~ training,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min) |
423 | 518 |
|
424 | 519 |
data_stations_var_pred <- cbind(data_stations_var_pred,data_stations_var_pred_training_testing) |
520 |
|
|
425 | 521 |
write.table(data_stations_var_pred,"data_stations_var_pred.txt") |
426 | 522 |
#started at 16.51, 09/07 |
427 | 523 |
|
428 | 524 |
############### PART3: Make raster stack and display maps ############# |
429 | 525 |
#### Extract corresponding raster for given dates and plot stations used |
430 | 526 |
|
431 |
##Now grab year year 1992 or matching year...maybe put this in a data.frame?? |
|
432 |
|
|
433 |
if(is.null(r_mosaic_fname)){ |
|
434 |
pattern_str <-"*.tif" |
|
435 |
lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T) |
|
436 |
r_mosaic <- stack(lf_mosaic_list,quick=T) |
|
437 |
save(r_mosaic,file="r_mosaic.RData") |
|
438 |
|
|
439 |
}else{ |
|
440 |
r_mosaic <- load_obj(r_mosaic_fname) #load raster stack of images |
|
441 |
} |
|
442 | 527 |
|
443 | 528 |
#start_date <- day_to_mosaic_range[1] |
444 | 529 |
#end_date <- day_to_mosaic_range[2] |
... | ... | |
447 | 532 |
|
448 | 533 |
#date_to_plot <- seq(as.Date(strptime(start_date,"%Y%m%d")), as.Date(strptime(end_date,"%Y%m%d")), 'day') |
449 | 534 |
#l_dates <- format(date_to_plot,"%Y%m%d") #format back to the relevant date format for files |
450 |
mask_pred <- FALSE |
|
451 |
matching <- FALSE #to be added after mask_pred option |
|
452 |
list_param_pre_process <- list(raster_name_lf,python_bin,infile_mask,scaling,mask_pred,NA_flag_val,out_suffix,out_dir) |
|
453 |
names(list_param_pre_process) <- c("lf","python_bin","infile_mask","scaling","mask_pred","NA_flag_val","out_suffix","out_dir") |
|
535 |
#mask_pred <- FALSE
|
|
536 |
#matching <- FALSE #to be added after mask_pred option
|
|
537 |
#list_param_pre_process <- list(raster_name_lf,python_bin,infile_mask,scaling,mask_pred,NA_flag_val,out_suffix,out_dir)
|
|
538 |
#names(list_param_pre_process) <- c("lf","python_bin","infile_mask","scaling","mask_pred","NA_flag_val","out_suffix","out_dir")
|
|
454 | 539 |
|
455 | 540 |
#debug(pre_process_raster_mosaic_fun) |
456 | 541 |
|
457 |
lf_mosaic_scaled <- mclapply(1:length(raster_name_lf),FUN=pre_process_raster_mosaic_fun,list_param=list_param_pre_process,mc.preschedule=FALSE,mc.cores = num_cores) |
|
542 |
#lf_mosaic_scaled <- mclapply(1:length(raster_name_lf),FUN=pre_process_raster_mosaic_fun,list_param=list_param_pre_process,mc.preschedule=FALSE,mc.cores = num_cores)
|
|
458 | 543 |
#lf_mosaic_scaled <- mclapply(1:length(raster_name_lf),FUN=pre_process_raster_mosaic_fun,list_param=list_param_pre_process,mc.preschedule=FALSE,mc.cores = num_cores) |
459 | 544 |
|
460 | 545 |
#test <- pre_process_raster_mosaic_fun(2,list_param_pre_process) |
... | ... | |
490 | 575 |
## Check dates predicted against date range for a given date range |
491 | 576 |
## Join file information to centroids of tiles data.frame |
492 | 577 |
|
493 |
## Checking new files: |
|
494 |
#in_dir_mosaic <- "/nobackupp6/aguzman4/climateLayers/out/reg4/mosaics2/mosaic" |
|
495 |
#/nobackupp6/aguzman4/climateLayers/out/reg4/mosaics2/mosaic/output_reg4_*/r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_*_reg4_*.tif |
|
578 |
##Now grab year year 1992 or matching year...maybe put this in a data.frame?? |
|
579 |
|
|
496 | 580 |
pattern_str <- "r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_.*._reg4_.*.tif" |
497 | 581 |
searchStr = paste(in_dir_mosaic,"/output_reg4_2014",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="") |
498 | 582 |
pattern_str <- ".*.tif" |
499 | 583 |
in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaics/mosaic" |
500 | 584 |
#lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern="*.tif",recursive=T) |
501 |
lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=T) |
|
502 | 585 |
|
503 |
writeRaster() |
|
586 |
if(is.null(r_mosaic_fname)){ |
|
587 |
pattern_str <-"*.tif" |
|
588 |
lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T) |
|
589 |
r_mosaic <- stack(lf_mosaic_list,quick=T) |
|
590 |
save(r_mosaic,file="r_mosaic.RData") |
|
591 |
|
|
592 |
}else{ |
|
593 |
r_mosaic <- load_obj(r_mosaic_fname) #load raster stack of images |
|
594 |
} |
|
504 | 595 |
|
505 |
lf_mosaic_list <- lapply(1:length(day_to_mosaic), |
|
506 |
FUN=function(i){ |
|
507 |
searchStr = paste(in_dir_tiles_tmp,"/*/",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="") |
|
508 |
Sys.glob(searchStr)}) |
|
509 | 596 |
|
510 | 597 |
#r_mosaic_ts <- stack(lf_mosaic_list) |
511 | 598 |
#df_centroids <- extract(df_centroids,r_mosaic_ts) |
... | ... | |
525 | 612 |
df_produced <- data.frame(lf_mosaic_list,list_dates_produced_date_val,month_str,year_str,day_str) |
526 | 613 |
|
527 | 614 |
date_start <- "19840101" |
528 |
date_end <- "20141231"
|
|
615 |
date_end <- "19991231"
|
|
529 | 616 |
date1 <- as.Date(strptime(date_start,"%Y%m%d")) |
530 | 617 |
date2 <- as.Date(strptime(date_end,"%Y%m%d")) |
531 | 618 |
dates_range <- seq.Date(date1, date2, by="1 day") #sequence of dates |
... | ... | |
541 | 628 |
df_points$year <- year_str |
542 | 629 |
df_points$day <- day_str |
543 | 630 |
|
544 |
############### PART5: Extraction of temporal profile ############# |
|
545 |
#### Extract time series from raster stack |
|
546 |
### Add dates to mkae it a date object? |
|
547 |
#-65,-22 |
|
631 |
|
|
548 | 632 |
|
549 | 633 |
#Use the global output?? |
550 | 634 |
|
... | ... | |
553 | 637 |
#df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname",".txt") |
554 | 638 |
#write.table(df_points_day_extracted,file=df_points_day_extracted_fname) #5.1 Giga |
555 | 639 |
#4.51 (on 05/23) |
556 |
|
|
640 |
df_points_day <- data_stations_var_pred_data_s |
|
557 | 641 |
if(is.null(df_points_extracted_fname)){ |
558 |
|
|
642 |
#15.17 (on 09/08) |
|
559 | 643 |
##10.41 (on 05/22) |
560 | 644 |
df_points_day_extracted <- extract(r_mosaic,df_points_day,df=T) |
561 | 645 |
#17.19 (on 05/23) |
646 |
#22.27 (on 09/08) |
|
562 | 647 |
df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname2",".txt") |
563 | 648 |
#17.27 (on 05/23) |
564 | 649 |
write.table(df_points_day_extracted,file=df_points_day_extracted_fname,sep=",",row.names = F) |
... | ... | |
571 | 656 |
head(df_points_day) #contains ID, dates and coordinates |
572 | 657 |
df_points_day$id |
573 | 658 |
|
659 |
max_idst <- 0.009*5 #5km in degree |
|
660 |
min_dist <- 0 #minimum distance to start with |
|
661 |
|
|
662 |
###this needs be modified |
|
663 |
## target number |
|
664 |
target_max_nb <- 1 #this is not actually used yet in the current implementation |
|
665 |
target_min_nb <- 5 #this is the target number of stations we would like |
|
666 |
max_dist <- 1000 # the maximum distance used for pruning ie removes stations that are closer than 1000m |
|
667 |
min_dist <- 0 #minimum distance to start with |
|
668 |
step_dist <- 1000 #iteration step to remove the stations |
|
669 |
target_range_nb <- c(target_min_nb,target_max_nb) #target range of number of stations |
|
670 |
#First increase distance till 5km |
|
671 |
#then use random sampling...to get the extact target? |
|
672 |
#First test with maximum distance of 100m |
|
673 |
test1 <- sub_sampling_by_dist(target_range_nb,dist=min_dist,max_dist=max_dist,step=step_dist,data_in=df_points_day) |
|
674 |
|
|
675 |
|
|
676 |
|
|
574 | 677 |
pix_ts <- as.data.frame(t(df_points_day_extracted)) |
575 | 678 |
pix_ts <- pix_ts[-1,] |
576 | 679 |
#var_names <- rownames(df_points_day_extracted) #same as lf_mosaic_list but without (*.tif) |
Also available in: Unified diff
adding function to match specific set of station location from assembled stations of training and testing