Project

General

Profile

« Previous | Next » 

Revision ed76cbaa

Added by Benoit Parmentier over 8 years ago

adding function to match specific set of station location from assembled stations of training and testing

View differences:

climate/research/oregon/interpolation/global_product_assessment_part1.R
352 352

  
353 353
#id_selected <- "82111099999"
354 354
#dim(df_points)
355
list_id_data_v <- unique(data_stations_var_pred_data_v$id)
356
list_id_data_s <- unique(data_stations_var_pred_data_s$id)
355
############### PART5: Extraction of temporal profile #############
356
#### Extract time series from raster stack
357
### Add dates to mkae it a date object?
358
#-72,42.24 #near Spencer MA
359
#48.281724, -71.401815 near Saguenay Quebec
360
#39.805293, -89.872626 near Springfield Illinois
361
#32.009676, -106.990266 near El Paso Us Texas
362
#39.955529, -105.263851: near Boulder Colorado
363
#45.384051, -121.859146 : near lost Creek near Mount Hood Oregon
364
#53.283685, -113.341702: leduc Canada near Edmonton
365
#39.009052, -77.026875: Silver Spring Sligo Creek, MD
366
#36.627806, -119.928901 : Raisin City, Fresno CA
367
#36.677139, -115.330122: Las Vegas
368
#35.696143, -78.396011: near Raleigh NC
369

  
370
list_lat_long <- list(
371
c( -72, 42.24),
372
c( -71.401815, 48.281724),
373
c( -89.872626, 39.805293),
374
c( -106.990266, 32.009676),
375
c( -105.263851, 39.955529), 
376
c( -121.859146,45.384051),
377
c( -113.341702,53.283685),
378
c( -77.026875,39.009052),
379
c( -119.928901,36.627806),
380
c(  -115.330122,36.677139),
381
c( -78.396011,35.696143)) 
382

  
383

  
384
query_for_station_lat_long <- function(point_val,df_points_spdf,step_x=0.25,step_y=0.25){
385
  #make this function better using gbuffer later!!!, works for now 
386
  #Improve: circular query + random within it
387
  y_val <- point_val[2]
388
  x_val <- point_val[1]
389
  
390
  y_val_tmp <- y_val + step_y
391
  if(x_val>0){
392
    x_val_tmp <- x_val - step_x
393
  }else{
394
    x_val_tmp <- x_val + step_x
395
  }
396

  
397

  
398
  test_df <- subset(df_points_spdf,(y_val < df_points_spdf$y & df_points_spdf$y < y_val_tmp))
399
  test_df2 <- subset(test_df,(x_val < test_df$x & test_df$x < x_val_tmp))
400
  #plot(test_df2)
401
 if(nrow(test_df2)>0){
402
   df_selected <- test_df2[1,]
403
 }else{
404
   df_selected <- NULL
405
 }
406
 
407
 return(df_selected)
408
}
409

  
410
test_day_query2 <- lapply(list_lat_long,FUN=query_for_station_lat_long,df_points_spdf=df_point_day,step_x=1,step_y=1)
411
#test_day_query <-query_for_station_lat_long(c(-72,42),df_points_spdf=df_point_day,step_x=1,step_y=0.25)
412
df_stations_selected <- do.call(rbind,test_day_query2)
413
proj4string(df_stations_selected) <- proj_str
414
#debug(query_for_station_lat_long)
415

  
416
##Next use the day results and select a mean station, quartile and min and max?
417

  
418
#list_id_data_v <- unique(data_stations_var_pred_data_v$id)
419
#list_id_data_s <- unique(data_stations_var_pred_data_s$id)
420

  
421
#Started at 4pm: on sept 9
422
list_id_data_v <- df_stations_selected$id
423
list_id_data_s <- df_stations_selected$id
357 424

  
358 425
### loop through all files and get the time series
359 426

  
......
382 449
data_s_subset$testing <- 0
383 450
data_v_subset$testing <- 1
384 451
# a station can be used multipel times as trainin gor testing within a day because of the overlap of tiles.
452
write.table(data_v_subset,"data_v_subset_test.txt")
453
write.table(data_s_subset,"data_s_subset_test.txt")
454
##finish at 16.10pm on 09/09
455

  
385 456

  
386 457
#data_stations <- rbind(data_s_subset,data_v_subset)
387 458
dim(data_s_subset)
......
405 476

  
406 477
##Add tile id here...and check if data stations was training or testing.
407 478

  
479
#16.30 pm on 09/09
480
#data_stations_var_pred <- aggregate(id2 + date ~ x + y + dailyTmax + mod1 + res_mod1 ,data = data_stations, FUN=mean ) #+ mod1 + res_mod1 , data = data_stations, min)
481
dim(data_stations_var_pred)
482
#md <- melt(mydata, id=(c("id", "time")),)
483
md <- melt(data_stations, id=(c("id", "date")),measure.vars=c("x","y","dailyTmax","mod1","res_mod1"))
484
#formula_str <- "id + date ~ x + y + dailyTmax + mod1 + res_mod1"
485
data_stations_var_pred <- cast(md, id + date ~ variable, fun.aggregate = mean, 
486
  na.rm = TRUE)
487

  
488
write.table(data_stations_var_pred,
489
            file=file.path(out_dir,paste0("data_stations_var_pred_tmp_",out_suffix,".txt",
490
                                                                 sep=",")))
491
write.table(data_stations_var_pred_training_testing,"data_stations_var_pred_training_testing.txt")
492

  
493
md <- melt(data_stations, id=(c("id", "date")),measure.vars=c("training","testing"))
494
data_stations_training_testing <- cast(md, id + date ~ variable, fun.aggregate = sum, 
495
  na.rm = TRUE)
496

  
497
write.table(data_stations_training_testing,
498
            file=file.path(out_dir,paste0("data_stations_training_testing_",out_suffix,".txt",
499
                                                                 sep=",")))
500

  
501
#data_stations_var_pred <- aggregate(id2 ~ x + y + date + dailyTmax + mod1 + res_mod1 ,data = data_stations, mean ) #+ mod1 + res_mod1 , data = data_stations, min)
502
#data_stations$id2 <- as.numeric(data_stations$id)
503
#data_stations$date <- as.character(data_stations$date)
408 504

  
409
data_stations_var_pred <- aggregate(id ~ x + y + date + dailyTmax + mod1 + res_mod1 ,data = data_stations, mean ) #+ mod1 + res_mod1 , data = data_stations, min)
410 505
dim(data_stations_var_pred)
411 506
#> dim(data_stations_var_pred)
412
#[1] 11171     5
413
write.table(data_stations_var_pred,"data_stations_var_pred_tmp.txt")
414
data_stations_var_pred_training_testing <- aggregate(id ~ training + testing ,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min)
415
write.table(data_stations_var_pred_training_testing,"data_stations_var_pred_training_testing.txt")
507
#[1] 57154     7
508
unique(data_stations_var_pred$id)
509
dim(data_stations_training_testing)
510
#[1] 57154     4
416 511

  
417 512
data_stations_var_pred$date_str <- data_stations_var_pred$date
418 513
data_stations_var_pred$date <- as.Date(strptime(data_stations_var_pred$date_str,"%Y%m%d"))
......
422 517
#data_stations_var_pred2 <- aggregate(date ~ training,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min)
423 518

  
424 519
data_stations_var_pred <- cbind(data_stations_var_pred,data_stations_var_pred_training_testing)
520

  
425 521
write.table(data_stations_var_pred,"data_stations_var_pred.txt")
426 522
#started at 16.51, 09/07
427 523

  
428 524
############### PART3: Make raster stack and display maps #############
429 525
#### Extract corresponding raster for given dates and plot stations used
430 526

  
431
##Now grab year year 1992 or matching year...maybe put this in a data.frame??
432

  
433
if(is.null(r_mosaic_fname)){
434
  pattern_str <-"*.tif"
435
  lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T)
436
  r_mosaic <- stack(lf_mosaic_list,quick=T)
437
  save(r_mosaic,file="r_mosaic.RData")
438
  
439
}else{
440
  r_mosaic <- load_obj(r_mosaic_fname) #load raster stack of images
441
}
442 527

  
443 528
#start_date <- day_to_mosaic_range[1]
444 529
#end_date <- day_to_mosaic_range[2]
......
447 532

  
448 533
#date_to_plot <- seq(as.Date(strptime(start_date,"%Y%m%d")), as.Date(strptime(end_date,"%Y%m%d")), 'day')
449 534
#l_dates <- format(date_to_plot,"%Y%m%d") #format back to the relevant date format for files
450
mask_pred <- FALSE
451
matching <- FALSE #to be added after mask_pred option
452
list_param_pre_process <- list(raster_name_lf,python_bin,infile_mask,scaling,mask_pred,NA_flag_val,out_suffix,out_dir) 
453
names(list_param_pre_process) <- c("lf","python_bin","infile_mask","scaling","mask_pred","NA_flag_val","out_suffix","out_dir") 
535
#mask_pred <- FALSE
536
#matching <- FALSE #to be added after mask_pred option
537
#list_param_pre_process <- list(raster_name_lf,python_bin,infile_mask,scaling,mask_pred,NA_flag_val,out_suffix,out_dir) 
538
#names(list_param_pre_process) <- c("lf","python_bin","infile_mask","scaling","mask_pred","NA_flag_val","out_suffix","out_dir") 
454 539
  
455 540
#debug(pre_process_raster_mosaic_fun)
456 541

  
457
lf_mosaic_scaled <- mclapply(1:length(raster_name_lf),FUN=pre_process_raster_mosaic_fun,list_param=list_param_pre_process,mc.preschedule=FALSE,mc.cores = num_cores)                         
542
#lf_mosaic_scaled <- mclapply(1:length(raster_name_lf),FUN=pre_process_raster_mosaic_fun,list_param=list_param_pre_process,mc.preschedule=FALSE,mc.cores = num_cores)                         
458 543
#lf_mosaic_scaled <- mclapply(1:length(raster_name_lf),FUN=pre_process_raster_mosaic_fun,list_param=list_param_pre_process,mc.preschedule=FALSE,mc.cores = num_cores)                         
459 544

  
460 545
#test <- pre_process_raster_mosaic_fun(2,list_param_pre_process)
......
490 575
## Check dates predicted against date range for a given date range
491 576
## Join file information to centroids of tiles data.frame
492 577

  
493
## Checking new files:
494
#in_dir_mosaic <- "/nobackupp6/aguzman4/climateLayers/out/reg4/mosaics2/mosaic"
495
#/nobackupp6/aguzman4/climateLayers/out/reg4/mosaics2/mosaic/output_reg4_*/r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_*_reg4_*.tif
578
##Now grab year year 1992 or matching year...maybe put this in a data.frame??
579

  
496 580
pattern_str <- "r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_.*._reg4_.*.tif"
497 581
searchStr = paste(in_dir_mosaic,"/output_reg4_2014",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="")
498 582
pattern_str <- ".*.tif"
499 583
in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaics/mosaic"
500 584
#lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern="*.tif",recursive=T)
501
lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=T)
502 585

  
503
writeRaster()
586
if(is.null(r_mosaic_fname)){
587
  pattern_str <-"*.tif"
588
  lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T)
589
  r_mosaic <- stack(lf_mosaic_list,quick=T)
590
  save(r_mosaic,file="r_mosaic.RData")
591
  
592
}else{
593
  r_mosaic <- load_obj(r_mosaic_fname) #load raster stack of images
594
}
504 595

  
505
lf_mosaic_list <- lapply(1:length(day_to_mosaic),
506
                         FUN=function(i){
507
                           searchStr = paste(in_dir_tiles_tmp,"/*/",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="")
508
                           Sys.glob(searchStr)})
509 596

  
510 597
#r_mosaic_ts <- stack(lf_mosaic_list)
511 598
#df_centroids <- extract(df_centroids,r_mosaic_ts)
......
525 612
df_produced <- data.frame(lf_mosaic_list,list_dates_produced_date_val,month_str,year_str,day_str)
526 613

  
527 614
date_start <- "19840101"
528
date_end <- "20141231"
615
date_end <- "19991231"
529 616
date1 <- as.Date(strptime(date_start,"%Y%m%d"))
530 617
date2 <- as.Date(strptime(date_end,"%Y%m%d"))
531 618
dates_range <- seq.Date(date1, date2, by="1 day") #sequence of dates
......
541 628
df_points$year <- year_str
542 629
df_points$day <- day_str
543 630

  
544
############### PART5: Extraction of temporal profile #############
545
#### Extract time series from raster stack
546
### Add dates to mkae it a date object?
547
#-65,-22
631

  
548 632

  
549 633
#Use the global output??
550 634

  
......
553 637
#df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname",".txt") 
554 638
#write.table(df_points_day_extracted,file=df_points_day_extracted_fname) #5.1 Giga
555 639
#4.51 (on 05/23)
556

  
640
df_points_day <- data_stations_var_pred_data_s
557 641
if(is.null(df_points_extracted_fname)){
558
  
642
  #15.17 (on 09/08)
559 643
  ##10.41 (on 05/22)
560 644
  df_points_day_extracted <- extract(r_mosaic,df_points_day,df=T)
561 645
  #17.19 (on 05/23)
646
  #22.27 (on 09/08)
562 647
  df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname2",".txt")
563 648
  #17.27 (on 05/23)
564 649
  write.table(df_points_day_extracted,file=df_points_day_extracted_fname,sep=",",row.names = F) 
......
571 656
head(df_points_day) #contains ID, dates and coordinates
572 657
df_points_day$id
573 658

  
659
max_idst <- 0.009*5 #5km in degree
660
min_dist <- 0    #minimum distance to start with
661

  
662
###this needs be modified
663
## target number
664
target_max_nb <- 1 #this is not actually used yet in the current implementation
665
target_min_nb <- 5 #this is the target number of stations we would like
666
max_dist <- 1000 # the maximum distance used for pruning ie removes stations that are closer than 1000m 
667
min_dist <- 0    #minimum distance to start with
668
step_dist <- 1000 #iteration step to remove the stations
669
target_range_nb <- c(target_min_nb,target_max_nb) #target range of number of stations
670
#First increase distance till 5km
671
#then use random sampling...to get the extact target?
672
#First test with maximum distance of 100m
673
test1 <- sub_sampling_by_dist(target_range_nb,dist=min_dist,max_dist=max_dist,step=step_dist,data_in=df_points_day)
674

  
675

  
676

  
574 677
pix_ts <- as.data.frame(t(df_points_day_extracted))
575 678
pix_ts <- pix_ts[-1,]
576 679
#var_names <- rownames(df_points_day_extracted) #same as lf_mosaic_list but without (*.tif)

Also available in: Unified diff