Project

General

Profile

« Previous | Next » 

Revision 4543f113

Added by Benoit Parmentier over 8 years ago

gathering stations data for building time series, clean up of script for product assessment part1

View differences:

climate/research/oregon/interpolation/global_product_assessment_part1.R
4 4
#Combining tables and figures for individual runs for years and tiles.
5 5
#AUTHOR: Benoit Parmentier 
6 6
#CREATED ON: 05/15/2016  
7
#MODIFIED ON: 09/05/2016            
7
#MODIFIED ON: 09/06/2016            
8 8
#Version: 1
9 9
#PROJECT: Environmental Layers project     
10 10
#COMMENTS: Initial commit, script based on part NASA biodiversity conferenc 
......
85 85
source(file.path(script_path,function_assessment_part3)) #source all functions used in this script 
86 86

  
87 87
#Product assessment
88
function_product_assessment_part1_functions <- "global_product_assessment_part1_functions_09052016.R"
88
function_product_assessment_part1_functions <- "global_product_assessment_part1_functions_09062016.R"
89 89
source(file.path(script_path,function_product_assessment_part1_functions)) #source all functions used in this script 
90 90

  
91 91
###############################
......
163 163
df_centroids_fname <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaic/output_reg1_1984/df_centroids_19840101_reg1_1984.txt"
164 164
#/nobackupp6/aguzman4/climateLayers/out/reg1/assessment//output_reg1_1984/df_assessment_files_reg1_1984_reg1_1984.txt
165 165

  
166
raster_name_lf <- c("/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920101_reg4_1992_m_gam_CAI_dailyTmax_19920101_reg4_1992.tif",
167
                    "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920102_reg4_1992_m_gam_CAI_dailyTmax_19920102_reg4_1992.tif",
168
                    "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920103_reg4_1992_m_gam_CAI_dailyTmax_19920103_reg4_1992.tif",
169
                    "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920701_reg4_1992_m_gam_CAI_dailyTmax_19920701_reg4_1992.tif",
170
                    "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920702_reg4_1992_m_gam_CAI_dailyTmax_19920702_reg4_1992.tif",
171
                    "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920703_reg4_1992_m_gam_CAI_dailyTmax_19920703_reg4_1992.tif")
166
#raster_name_lf <- c("/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920101_reg4_1992_m_gam_CAI_dailyTmax_19920101_reg4_1992.tif",
167
#                    "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920102_reg4_1992_m_gam_CAI_dailyTmax_19920102_reg4_1992.tif",
168
#                    "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920103_reg4_1992_m_gam_CAI_dailyTmax_19920103_reg4_1992.tif",
169
#                    "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920701_reg4_1992_m_gam_CAI_dailyTmax_19920701_reg4_1992.tif",
170
#                    "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920702_reg4_1992_m_gam_CAI_dailyTmax_19920702_reg4_1992.tif",
171
#                    "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920703_reg4_1992_m_gam_CAI_dailyTmax_19920703_reg4_1992.tif")
172 172

  
173 173
#l_dates <- c("19990101","19990102","19990103","19990701","19990702","19990703")
174 174
l_dates <- c("19990101","19990102","19990103","19990104","19990105") #dates to plot and analze
......
326 326
## do training
327 327

  
328 328
##plot summary of staistics
329
list_plot_obj_data_v[[1]]$df_basic_stat
330
names(list_plot_obj_data_v[[1]])
331
list_data_stations_var_pred_data_v <- lapply(list_plot_obj_data_v,FUN=function(x){x$data_stations_var_pred})
332
list_data_stations_var_pred_data_s <- lapply(list_plot_obj_data_s,FUN=function(x){x$data_stations_var_pred})
333

  
334
data_stations_var_pred_data_v <- do.call(rbind,list_data_stations_var_pred_data_v)
335
data_stations_var_pred_data_s <- do.call(rbind,list_data_stations_var_pred_data_s)
336
dim(data_stations_var_pred_data_v)
337
dim(data_stations_var_pred_data_s)
338

  
339
write.table(data_stations_var_pred_data_v,"data_stations_var_pred_data_v")
340
write.table(data_stations_var_pred_data_s,"data_stations_var_pred_data_s")
341

  
329 342

  
330 343
############### PART2: Select stations by ID to build a time series #############
331 344
#### Extract corresponding stationsfor given dates and plot stations used
332 345
## Use station from specific year and date?
333 346

  
334
#####################
347
###################### make this part a function!!!
335 348
#select one station based on id or coordinates and find that in the list of data.frame??
336 349

  
337
id_selected <- "82111099999"
338
dim(df_points)
350
#Make a function to find the closest stations to a givine coordinates?
351
#42.262003, -71.965866 #this is near Spencer MA
352

  
353
#id_selected <- "82111099999"
354
#dim(df_points)
355
list_id_data_v <- unique(data_stations_var_pred_data_v$id)
356
list_id_data_s <- unique(data_stations_var_pred_data_s$id)
339 357

  
340 358
### loop through all files and get the time series
341
extract_from_df <- function(x,col_selected,val_selected){
342
  df_tmp <- read.table(x,stringsAsFactors=F,sep=",")
343
  #data_subset <- subset(data_stations,col_selected==val_selected)
344
  data_subset <- subset(df_tmp,df_tmp$id%in%val_selected)
345
  return(data_subset)
346
}
347 359

  
348 360
lf_data_s_subset <- mclapply(list_data_s_fname,
349 361
                           FUN=extract_from_df,
350 362
                           col_selected="id",
351
                           val_selected=id_selected,
363
                           val_selected=list_id_data_s,
352 364
                           mc.preschedule=FALSE,
353
                           mc.cores = num_cores)                         
365
                           mc.cores = num_cores)   
366
#took less than 8 minutes for 1839 stations
367

  
354 368
lf_data_v_subset <- mclapply(list_data_v_fname,
355 369
                           FUN=extract_from_df,
356 370
                           col_selected="id",
357
                           val_selected=id_selected,
371
                           val_selected=list_id_data_v,
358 372
                           mc.preschedule=FALSE,
359 373
                           mc.cores = num_cores)   
360 374

  
......
364 378
data_s_subset$training <- 1
365 379
data_v_subset$training <- 0
366 380

  
367
data_stations <- rbind(data_s_subset,data_v_subset)
381
## Need a testing variable to  count  later the use of  a station
382
data_s_subset$testing <- 0
383
data_v_subset$testing <- 1
384
# a station can be used multipel times as trainin gor testing within a day because of the overlap of tiles.
385

  
386
#data_stations <- rbind(data_s_subset,data_v_subset)
368 387
dim(data_s_subset)
388
#[1] 21991826       9
369 389
dim(data_v_subset)
390
#[1] 9319967      85
370 391

  
392
##36 minutes to get here
371 393
#rbind.fill(mtcars[c("mpg", "wt")], mtcars[c("wt", "cyl")])
372 394
data_stations <- rbind.fill(data_v_subset, data_s_subset)
395
#dim(data_stations)#one station only but repetition of records because of tiles and dates!!!
396
#[1] 31311793       90
397
dim(data_stations)#one station only but repetition of records because of tiles and dates!!!
398
#[1] 30202891       91
373 399

  
374
coordinates(data_stations) <- cbind(data_stations$x,data_stations$y)
375
proj4string(data_stations) <- CRS_locs_WGS84
376

  
377
dim(data_stations) #one station only but repitition of records because of tiles and dates!!!
378
#> dim(data_stations)
379
#[1] 100458     90
400
#coordinates(data_stations) <- cbind(data_stations$x,data_stations$y)
401
#proj4string(data_stations) <- CRS_locs_WGS84
380 402

  
381 403
#data_stations_var_pred <- aggregate(id ~ date, data = data_stations, min)
382 404
#data_stations_var_pred <- aggregate(id ~ x + y + date + dailyTmax + mod1 + res_mod1 , data = data_stations, min)
......
384 406
##Add tile id here...and check if data stations was training or testing.
385 407

  
386 408

  
387
data_stations_var_pred <- aggregate(id ~ x + y + date + dailyTmax,data = data_stations, min ) #+ mod1 + res_mod1 , data = data_stations, min)
409
data_stations_var_pred <- aggregate(id ~ x + y + date + dailyTmax + mod1 + res_mod1 ,data = data_stations, mean ) #+ mod1 + res_mod1 , data = data_stations, min)
388 410
dim(data_stations_var_pred)
389 411
#> dim(data_stations_var_pred)
390 412
#[1] 11171     5
413
write.table(data_stations_var_pred,"data_stations_var_pred_tmp.txt")
414
data_stations_var_pred_training_testing <- aggregate(id ~ training + testing ,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min)
415
write.table(data_stations_var_pred_training_testing,"data_stations_var_pred_training_testing.txt")
391 416

  
392 417
data_stations_var_pred$date_str <- data_stations_var_pred$date
393 418
data_stations_var_pred$date <- as.Date(strptime(data_stations_var_pred$date_str,"%Y%m%d"))
......
396 421
#data_stations_var_pred2 <- aggregate(id ~ training,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min)
397 422
#data_stations_var_pred2 <- aggregate(date ~ training,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min)
398 423

  
424
data_stations_var_pred <- cbind(data_stations_var_pred,data_stations_var_pred_training_testing)
425
write.table(data_stations_var_pred,"data_stations_var_pred.txt")
426
#started at 16.51, 09/07
427

  
399 428
############### PART3: Make raster stack and display maps #############
400 429
#### Extract corresponding raster for given dates and plot stations used
401 430

  
......
404 433
if(is.null(r_mosaic_fname)){
405 434
  pattern_str <-"*.tif"
406 435
  lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T)
407
  r_mosaic <- stack(lf_mosaic_list)
436
  r_mosaic <- stack(lf_mosaic_list,quick=T)
408 437
  save(r_mosaic,file="r_mosaic.RData")
438
  
409 439
}else{
410 440
  r_mosaic <- load_obj(r_mosaic_fname) #load raster stack of images
411 441
}
......
465 495
#/nobackupp6/aguzman4/climateLayers/out/reg4/mosaics2/mosaic/output_reg4_*/r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_*_reg4_*.tif
466 496
pattern_str <- "r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_.*._reg4_.*.tif"
467 497
searchStr = paste(in_dir_mosaic,"/output_reg4_2014",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="")
468

  
498
pattern_str <- ".*.tif"
499
in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaics/mosaic"
469 500
#lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern="*.tif",recursive=T)
470 501
lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=T)
502

  
503
writeRaster()
504

  
471 505
lf_mosaic_list <- lapply(1:length(day_to_mosaic),
472 506
                         FUN=function(i){
473 507
                           searchStr = paste(in_dir_tiles_tmp,"/*/",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="")
......
792 826
#### PLOT ACCURACY METRICS: First test ####
793 827
##this will be cleaned up later:
794 828

  
795
dir_ac_mosaics <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg4/mosaic/output_reg4_1999"
829
#dir_ac_mosaics <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg4/mosaic/output_reg4_1999"
796 830
lf_tmp <-list.files(path=dir_ac_mosaics,pattern="r_m_use_edge_weights_weighted_mean_mask_gam_CAI_.*.ac.*._reg4_1999.tif")
797 831

  
798 832
#lf_tmp1 <- lf_tmp[21:24]

Also available in: Unified diff