Project

General

Profile

« Previous | Next » 

Revision ee86120d

Added by Benoit Parmentier over 8 years ago

testing plot for both training and testing and introducing a new function

View differences:

climate/research/oregon/interpolation/global_product_assessment_part1.R
4 4
#Combining tables and figures for individual runs for years and tiles.
5 5
#AUTHOR: Benoit Parmentier 
6 6
#CREATED ON: 05/15/2016  
7
#MODIFIED ON: 08/31/2016            
7
#MODIFIED ON: 09/02/2016            
8 8
#Version: 1
9 9
#PROJECT: Environmental Layers project     
10 10
#COMMENTS: Initial commit, script based on part NASA biodiversity conferenc 
......
19 19
#
20 20
#setfacl -Rmd user:aguzman4:rwx /nobackupp8/bparmen1/output_run10_1500x4500_global_analyses_pred_1992_10052015
21 21

  
22
#COMMIT: testing plot for both training and testing and introducing a new function
23

  
22 24
#################################################################################################
23 25

  
24 26

  
......
59 61
script_path <- "/home/parmentier/Data/IPLANT_project/env_layers_scripts" #path to script
60 62

  
61 63
## NASA poster and paper related
62
source(file.path(script_path,"NASA2016_conference_temperature_predictions_function_05032016b.R"))
64
#source(file.path(script_path,"NASA2016_conference_temperature_predictions_function_05032016b.R"))
63 65

  
64 66
#Mosaic related on NEX
65 67
#script_path <- "/home/parmentier/Data/IPLANT_project/env_layers_scripts"
66
function_mosaicing_functions <- "global_run_scalingup_mosaicing_function_04232016.R" #PARAM12
67
function_mosaicing <-"global_run_scalingup_mosaicing_05012016.R"
68
function_mosaicing_functions <- "global_run_scalingup_mosaicing_function_08232016.R" #Functions used to mosaic predicted tiles
69
function_mosaicing <-"global_run_scalingup_mosaicing_08222016.R" #main scripts for mosaicing predicted tiles
70

  
68 71
source(file.path(script_path,function_mosaicing)) #source all functions used in this script 
69 72
source(file.path(script_path,function_mosaicing_functions)) #source all functions used in this script 
70 73

  
71 74
#Assessment on NEX
72
function_assessment_part1_functions <- "global_run_scalingup_assessment_part1_functions_02112015.R" #PARAM12
75
function_assessment_part1_functions <- "global_run_scalingup_assessment_part1_functions_12282015.R" #PARAM12
73 76
function_assessment_part1a <-"global_run_scalingup_assessment_part1a_01042016.R"
74 77
function_assessment_part2 <- "global_run_scalingup_assessment_part2_02092016.R"
75 78
function_assessment_part2_functions <- "global_run_scalingup_assessment_part2_functions_01032016.R"
76
function_assessment_part3 <- "global_run_scalingup_assessment_part3_05162016.R"
79
function_assessment_part3 <- "global_run_scalingup_assessment_part3_07292016.R"
80

  
77 81
source(file.path(script_path,function_assessment_part1_functions)) #source all functions used in this script 
78 82
source(file.path(script_path,function_assessment_part1a)) #source all functions used in this script 
79 83
source(file.path(script_path,function_assessment_part2)) #source all functions used in this script 
......
167 171
                    "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/comp_r_m_use_edge_weights_weighted_mean_gam_CAI_dailyTmax_19920703_reg4_1992_m_gam_CAI_dailyTmax_19920703_reg4_1992.tif")
168 172

  
169 173
#l_dates <- c("19990101","19990102","19990103","19990701","19990702","19990703")
170
l_dates <- c("19840110","19840111","19840112","19840113","19840114") #dates to plot and analze
174
l_dates <- c("19990101","19990102","19990103","19990104","19990105") #dates to plot and analze
171 175

  
172 176
#df_points_extracted_fname <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/data_points_extracted.txt"
173 177
df_points_extracted_fname <- NULL #if null compute on the fly
......
224 228
df_data_s_fname <- data.frame(year_str,region_name,dataset="data_s",file=list_data_s_fname)
225 229

  
226 230
year_str <- as.character(unlist(lapply(list_data_v_fname, function(x){unlist(strsplit(basename(x),"_"))[5]})))
227
df_data_v_fname <- data.frame(year_str,region_name,dataset="data_v",file=list_data_s_fname)
231
df_data_v_fname <- data.frame(year_str,region_name,dataset="data_v",file=list_data_v_fname)
228 232

  
229 233
df_data_v_fname$year <- df_data_v_fname$year_str
230 234
df_data_v_fname$year <- as.character(df_data_v_fname$year_str)
......
242 246
l_dates_day_str <- as.numeric(format(list_dates_val, "%d")) ## numeric month
243 247

  
244 248

  
249
##start new function
245 250
### Needs to make this a function!!!
246 251
#Step 1 for a list of dates, load relevant files with year matching,
247 252
#Step 2 for giving dates subset the data.frame
......
258 263
  list_df_s_stations[[i]] <- read.table(filename_tmp,stringsAsFactors=F,sep=",")
259 264
}
260 265

  
261
df_combined <- do.call(rbind,list_df_v_stations)
266
df_combined_data_v <- do.call(rbind,list_df_v_stations) #reading only the years related to the the dates e.g. 1999
267
df_combined_data_s <- do.call(rbind,list_df_s_stations)
262 268

  
263 269
#### Get df points for specific dates
264 270
#lapply(1:length(l_dates)list_df_v_stations,function(x){x$date==l_dates})
265 271
#dim(list_df_v_stations[[1]])
266
list_df_points_dates <- vector("list",length=length(l_dates))
272
list_df_points_data_v_dates <- vector("list",length=length(l_dates))
273
list_df_points_data_s_dates <- vector("list",length=length(l_dates))
267 274
for(i in 1:length(l_dates)){
268 275
  #year_str <- list_year_str[[i]]
269
  list_df_points_dates[[i]] <- subset(df_combined,df_combined$date==l_dates[i])
276
  list_df_points_data_v_dates[[i]] <- subset(df_combined_data_v,df_combined_data_v$date==l_dates[i])
277
  list_df_points_data_s_dates[[i]] <- subset(df_combined_data_s,df_combined_data_s$date==l_dates[i])
278

  
270 279
}
271 280

  
272
df_combined_dates <- do.call(rbind,list_df_points_dates)
281
df_combined_data_v_dates <- do.call(rbind,list_df_points_data_v_dates)
282
df_combined_data_s_dates <- do.call(rbind,list_df_points_data_s_dates)
283

  
284
####
285

  
273 286

  
274 287
#function(x){x$date==}
275 288
#df_points <- subset(df_stations,df_stations_tmp$date==l_dates[1])
......
283 296
variable_name
284 297
#8 minutes for 18 dates and reg1 ?
285 298
station_type_name <- "testing"
286

  
287
for(i in 1:length(l_dates)){
288
  #d
289

  
299
proj_str <- CRS_locs_WGS84
300
list_param_plot_stations_val_by_date <- list(l_dates,df_combined_data_v_dates,countries_shp_tmp,CRS_locs_WGS84,
301
                                             station_type_name,model_name,
302
                                             var_selected,y_var_name,out_suffix,out_dir)
303
names(list_param_plot_stations_val_by_date) <- c("l_dates","df_combined_data_points","countries_shp","proj_str","
304
                                                 station_type_name","model_name",
305
                                                 "var_selected","y_var_name","out_suffix","out_dir")
306
  
307
debug(plot_stations_val_by_date)
308
test<- plot_stations_val_by_date(1,list_param = list_param_plot_stations_val_by_date)
309

  
310
plot_stations_val_by_date <- function(i,list_param){
311
  #
312
  #
313
  #function to plot residuals by date
314
  #
315
  
316
  #####
317
  
318
  l_dates <- list_param$l_dates
319
  model_name <- list_param$model_name
320
  station_type_name <- list_param$station_type_name
321
  var_selected <- list_param$var_selected
322
  #list_df_points_dates <- 
323
  df_combined_data_points <- list_param$df_combined_data_points
324
  countries_shp <- list_param$countries_shp
325
  proj_str <- list_param$proj_str
326
  out_suffix <- list_param$out_suffix
327
  out_dir <- list_param$out_dir
328
  
329
  ### STARTFUNCTION ####
290 330
    
291 331
  date_processed <- l_dates[i]
292 332
  
293
  df_points <- list_df_points_dates[[i]]
333
  df_points <- subset(df_combined_data_points,date==date_processed)
334
  #df_points <- list_df_points_dates[[i]]
294 335
  #plot(df_points)
295
  table(df_points$tile_id)
336
  freq_tile_id <- as.data.frame(table(df_points$tile_id))
337
  freq_tile_id$date <- date_processed
338
  names(freq_tile_id) <- c("tile_id","freq","date")
339
    
296 340
  #plot(df_points,add=T)
297 341
  coordinates(df_points) <- cbind(df_points$x,df_points$y)
298
  proj4string(df_points) <- CRS_locs_WGS84
342
  #proj4string(df_points) <- CRS_locs_WGS84
343
  proj4string(df_points) <- proj_str
299 344
  # # No spatial duplicates
300 345
  df_points_day <- remove.duplicates(df_points) #remove duplicates...
301 346
  #plot(df_points_day)
302
  dim(df_points_day)
303
  dim(df_points)
347
  #dim(df_points_day)
348
  #dim(df_points)
304 349
  #plot(df_points)
305 350
  
306 351
  ##layer to be clipped
307
  if(class(countries_shp)=!"SpatialPolygonsDataFrame"){
352
  if(class(countries_shp)!="SpatialPolygonsDataFrame"){
308 353
    reg_layer <- readOGR(dsn=dirname(countries_shp),sub(".shp","",basename(countries_shp)))
354
  }else{
355
    reg_layer <- countries_shp
309 356
  }
310 357

  
311 358
  #extent_df_points_day <- extent(df_points_day)
312 359
  
313 360
  reg_layer_clipped <- gClip(shp=reg_layer, bb=df_points_day , keep.attribs=TRUE,outDir=NULL,outSuffix=NULL)
314 361
  
315
  #data_stations_temp <- aggregate(id ~ date, data = data_stations, min)
316
  #data_stations_temp <- aggregate(id ~ x + y + date + dailyTmax + mod1 + res_mod1 , data = data_stations, min)
317
  data_stations_temp <- aggregate(id ~ x + y + date + dailyTmax + res_mod1,data = df_points, mean ) #+ mod1 + res_mod1 , data = data_stations, min)
318
  dim(data_stations_temp)
319
  #> dim(data_stations_temp)
362
  #data_stations_var_pred <- aggregate(id ~ date, data = data_stations, min)
363
  #data_stations_var_pred <- aggregate(id ~ x + y + date + dailyTmax + mod1 + res_mod1 , data = data_stations, min)
364
  
365
  #change the formula later on to use different y_var_name (tmin and precip)
366
  data_stations_var_pred <- aggregate(id ~ x + y + date + dailyTmax + res_mod1 + tile_id + reg ,data = df_points, mean ) #+ mod1 + res_mod1 , data = data_stations, min)
367
  dim(data_stations_var_pred)
368
  #> dim(data_stations_var_pred)
320 369
  #[1] 11171     5
321 370

  
322
  data_stations_temp$date_str <- data_stations_temp$date
323
  data_stations_temp$date <- as.Date(strptime(data_stations_temp$date_str,"%Y%m%d"))
324
  coordinates(data_stations_temp) <- cbind(data_stations_temp$x,data_stations_temp$y)
325
  data_stations_temp$constant <- c(1000,2000)
326
  #plot(data_stations_temp)
371
  data_stations_var_pred$date_str <- data_stations_var_pred$date
372
  data_stations_var_pred$date <- as.Date(strptime(data_stations_var_pred$date_str,"%Y%m%d"))
373
  coordinates(data_stations_var_pred) <- cbind(data_stations_var_pred$x,data_stations_var_pred$y)
374
  #data_stations_var_pred$constant <- c(1000,2000)
375
  #plot(data_stations_var_pred)
327 376
  #plot(reg_layer)
328 377
  #res_pix <- 1200
329
  res_pix <- 400
378
  res_pix <- 800
330 379
  col_mfrow <- 1
331 380
  row_mfrow <- 1
332 381
  
......
334 383
                       "_",out_suffix,".png",sep="")
335 384
  png(filename=png_filename,
336 385
        width=col_mfrow*res_pix,height=row_mfrow*res_pix)
337
  #plot(data_stations_temp
386
  #plot(data_stations_var_pred
338 387
  #p_shp <- spplot(reg_layer_clipped,"ISO3" ,col.regions=NA, col="black") #ok problem solved!!
339 388
  #title("(a) Mean for 1 January")
340
  #p <- bubble(data_stations_temp,"constant",main=paste0("Average Residuals by validation stations ",
389
  #p <- bubble(data_stations_var_pred,"constant",main=paste0("Average Residuals by validation stations ",
341 390
  #                                                      date_processed,
342 391
  #                                                      "for ",var_selected))
343
  #p <- spplot(data_stations_temp,"constant",col.regions=NA, col="black",
392
  #p <- spplot(data_stations_var_pred,"constant",col.regions=NA, col="black",
344 393
  #            main=paste0("Average Residuals by validation stations ",pch=3,cex=10,
345 394
  #            date_processed, "for ",var_selected))
346 395

  
347 396
  #p1 <- p+p_shp
348 397
  #try(print(p1)) #error raised if number of missing values below a threshold does not exist
349 398
  
350
  title_str <- paste0("Average Residuals by ",station_type_name," stations ",date_processed, " for ",y_var_name," ", var_selected)
399
  title_str <- paste0("Stations ",station_type_name," on ",date_processed, " for ",y_var_name," ", var_selected)
351 400
  plot(reg_layer_clipped,main=title_str)
352
  plot(data_stations_temp,add=T,cex=0.5,col="blue")
353
  legend("topleft",legend=paste("n= ", nrow(data_stations_temp)),bty = "n")
401
  plot(data_stations_var_pred,add=T,cex=0.5,col="blue")
402
  legend("topleft",legend=paste("n= ", nrow(data_stations_var_pred)),bty = "n")
354 403
  
355 404
  dev.off()
356 405
  
357 406
  res_pix <- 800
358 407
  col_mfrow <- 1
359 408
  row_mfrow <- 1
360
  png_filename <- paste("Figure_ac_metrics_map_stations",station_type_name,"averaged",model_name,y_var_name,date_processed,
409
  png_filename <- paste("Figure_ac_metrics_map_stations",station_type_name,"averaged","_fixed_intervals_",model_name,y_var_name,date_processed,
361 410
                       out_suffix,".png",sep="_")
362 411
  png(filename=png_filename,
363 412
        width=col_mfrow*res_pix,height=row_mfrow*res_pix)
......
367 416
  #p_shp <- layer(sp.polygons(reg_layer, lwd=1, col='black'))
368 417
  p_shp <- spplot(reg_layer,"ISO3" ,col.regions=NA, col="black") #ok problem solved!!
369 418
  #title("(a) Mean for 1 January")
370
  p <- bubble(data_stations_temp,"res_mod1",main=title_str)
371
  p1 <- p+p_shp
419
  class_intervals <- c(-20,-10,-5,-4,-3,-2,-1,0,1,2,3,4,5,10,20)
420
  p <- bubble(data_stations_var_pred,zcol="res_mod1",key.entries = class_intervals , 
421
              fill=F, #plot circle with filling
422
              #col= matlab.like(length(class_intervals)),
423
              main=title_str)
424
  p1 <- p + p_shp
372 425
  try(print(p1)) #error raised if number of missing values below a threshold does not exist
373 426
  
374 427
  dev.off()
375 428

  
429
  #### Add additional plot with quantiles and min max?...
430
  
431
  
432
  #library(classInt)
433
  #breaks.qt <- classIntervals(palo_alto$PrCpInc, n = 6, style = "quantile", intervalClosure = "right")
434
  #spplot(palo_alto, "PrCpInc", col = "transparent", col.regions = my.palette, 
435
  #  at = breaks.qt$brks)
436

  
376 437
  #### histogram of values
377 438
  res_pix <- 800
378 439
  col_mfrow <- 1
......
382 443
  png(filename=png_filename,
383 444
        width=col_mfrow*res_pix,height=row_mfrow*res_pix)
384 445

  
385
  h<- histogram(data_stations_temp$res_mod1,breaks=seq(-50,50,5))
446
  title_str <- paste0("Stations ",station_type_name," on ",date_processed, " for ",y_var_name," ", var_selected)
447

  
448
  h<- histogram(data_stations_var_pred$res_mod1,breaks=seq(-50,50,5),
449
                main=title_str)
386 450
  
387 451
  print(h)
388 452
  dev.off()
389 453
  
390 454
  ##Make data.frame with dates for later use!!
391 455
  #from libary mosaic
392
  df_basic_stat <- fav_stats(data_stations_temp$res_mod1)
393
  #quantile(data_stations_temp$res_mod1,c(1,5,10,90,95,99))
394
  df_quantile_val <- (quantile(data_stations_temp$res_mod1,c(0.01,0.05,0.10,0.45,0.50,0.55,0.90,0.95,0.99)))
395
  #quantile(c(1,5,10,90,95,99),data_stations_temp$res_mod1,)
396
  #rmse(data_stations_temp$res_mod1)
456
  df_basic_stat <- fav_stats(data_stations_var_pred$res_mod1)
457
  df_basic_stat$date <- date_processed
458
  #df_basic_stat$reg <- reg
459
  #quantile(data_stations_var_pred$res_mod1,c(1,5,10,90,95,99))
460
  df_quantile_val <- quantile(data_stations_var_pred$res_mod1,c(0.01,0.05,0.10,0.45,0.50,0.55,0.90,0.95,0.99))
461
  #names(df_quantile_val)
462
  #as.list(df_quantile_val)
463
  #df_test <- data.frame(names(df_quantile_val))[numeric(0), ]
464

  
465

  
466
  #quantile(c(1,5,10,90,95,99),data_stations_var_pred$res_mod1,)
467
  #rmse(data_stations_var_pred$res_mod1)
468
  
469
  plot_obj <- list(data_stations_var_pred,df_basic_stat,df_quantile_val,freq_tile_id)
470
  names(plot_obj) <- c("data_stations_var_pred","df_basic_stat","df_quantile_val","freq_tile_id")
471
   
472
  return(plot_obj)
397 473
}
398 474

  
399 475

  
......
449 525
#> dim(data_stations)
450 526
#[1] 100458     90
451 527

  
452
#data_stations_temp <- aggregate(id ~ date, data = data_stations, min)
453
#data_stations_temp <- aggregate(id ~ x + y + date + dailyTmax + mod1 + res_mod1 , data = data_stations, min)
454
data_stations_temp <- aggregate(id ~ x + y + date + dailyTmax,data = data_stations, min ) #+ mod1 + res_mod1 , data = data_stations, min)
455
dim(data_stations_temp)
456
#> dim(data_stations_temp)
528
#data_stations_var_pred <- aggregate(id ~ date, data = data_stations, min)
529
#data_stations_var_pred <- aggregate(id ~ x + y + date + dailyTmax + mod1 + res_mod1 , data = data_stations, min)
530

  
531
##Add tile id here...and check if data stations was training or testing.
532

  
533

  
534
data_stations_var_pred <- aggregate(id ~ x + y + date + dailyTmax,data = data_stations, min ) #+ mod1 + res_mod1 , data = data_stations, min)
535
dim(data_stations_var_pred)
536
#> dim(data_stations_var_pred)
457 537
#[1] 11171     5
458 538

  
459
data_stations_temp$date_str <- data_stations_temp$date
460
data_stations_temp$date <- as.Date(strptime(data_stations_temp$date_str,"%Y%m%d"))
539
data_stations_var_pred$date_str <- data_stations_var_pred$date
540
data_stations_var_pred$date <- as.Date(strptime(data_stations_var_pred$date_str,"%Y%m%d"))
461 541

  
462 542
##Find stations used for training and testing
463
#data_stations_temp2 <- aggregate(id ~ training,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min)
464
#data_stations_temp2 <- aggregate(date ~ training,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min)
543
#data_stations_var_pred2 <- aggregate(id ~ training,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min)
544
#data_stations_var_pred2 <- aggregate(date ~ training,data = data_stations, sum ) #+ mod1 + res_mod1 , data = data_stations, min)
465 545

  
466 546
############### PART3: Make raster stack and display maps #############
467 547
#### Extract corresponding raster for given dates and plot stations used
......
646 726
#scaling
647 727
#start_date: subset dates
648 728
#end_date: subset dates
649
df2 <- data_stations_temp
729
df2 <- data_stations_var_pred
650 730
  
651 731
  
652 732
df_selected <- subset(df,select=station_id)
......
779 859
####################
780 860
###### Now add figures with additional met stations?
781 861

  
782
#df_selected2 <- data_stations_temp
862
#df_selected2 <- data_stations_var_pred
783 863

  
784 864
#d_z_tmp <- zoo(df_selected[[station_id]],df_selected$date)
785 865
#d_z_tmp2 <- zoo(df_selected2$dailyTmax,df_selected2$date)
......
824 904
#dev.off()
825 905

  
826 906
#This is a lot of replication!!! okay cut it down
827
data_stations$date <- as.Date(strptime(data_stations_temp$date_str,"%Y%m%d"))
907
data_stations$date <- as.Date(strptime(data_stations_var_pred$date_str,"%Y%m%d"))
828 908
data_stations_tmp <- data_stations
829 909
data_stations_tmp <- data_stations
830 910

  

Also available in: Unified diff