Project

General

Profile

« Previous | Next » 

Revision de0e6966

Added by Benoit Parmentier over 8 years ago

subsetting day and extracting pixel time series profiles for stations

View differences:

climate/research/oregon/interpolation/global_product_assessment_part1.R
4 4
#Combining tables and figures for individual runs for years and tiles.
5 5
#AUTHOR: Benoit Parmentier 
6 6
#CREATED ON: 05/15/2016  
7
#MODIFIED ON: 05/22/2016            
7
#MODIFIED ON: 05/23/2016            
8 8
#Version: 1
9 9
#PROJECT: Environmental Layers project     
10 10
#COMMENTS: Initial commit, script based on part NASA biodiversity conferenc 
......
212 212

  
213 213
df_data_v_fname$year <- df_data_v_fname$year_str
214 214
df_data_v_fname$year <- as.character(df_data_v_fname$year_str)
215

  
215
df_data_v_fname$file <- as.character(df_data_v_fname$file)
216
df_data_s_fname$file <- as.character(df_data_s_fname$file)
216 217

  
217 218
############### PART1: Select stations for specific list of dates #############
218 219
#### Extract corresponding stationsfor given dates and plot stations used
......
223 224
l_dates_year_str <- format(list_dates_val, "%Y") ## Year with century
224 225
l_dates_day_str <- as.numeric(format(list_dates_val, "%d")) ## numeric month
225 226

  
227

  
228
### Needs to make this a function!!!
229
#Step 1 for a list of dates, load relevant files with year matching,
230
#Step 2 for giving dates subset the data.frame
231
#Step 3: find duplicates, create return object and return given station data for the date
232

  
226 233
list_year_str <- unique(l_dates_year_str)
227
list_df_stations <- vector("list",length=length(list_year_str))
234
list_df_v_stations <- vector("list",length=length(list_year_str))
235
list_df_s_stations <- vector("list",length=length(list_year_str))
228 236

  
229 237
for(i in 1:length(list_year_str)){
230
  subset(df_data_v_fname,year=list_year_str)
231
  df_data_v_fname$data_date==l_dates_year_str[i]
232
  filename_tmp <- as.character(df_data_v_fname$file[9])
233
  df_stations_tmp <- read.table(filename_tmp,stringsAsFactors=F,sep=",")
238
  filename_tmp<- df_data_v_fname$file[df_data_v_fname$year==list_year_str[i]]
239
  list_df_v_stations[[i]] <- read.table(filename_tmp,stringsAsFactors=F,sep=",")
240
  filename_tmp<- df_data_s_fname$file[df_data_s_fname$year==list_year_str[i]]
241
  list_df_s_stations[[i]] <- read.table(filename_tmp,stringsAsFactors=F,sep=",")
234 242
}
235 243

  
244
#function(x){x$date==}
236 245
df_points <- subset(df_stations,df_stations_tmp$date==l_dates[1])
237 246
table(df_points$tile_id)
238 247
plot(df_points,add=T)
239 248
coordinates(df_points) <- cbind(df_points$x,df_points$y)
240 249
proj4string(df_points) <- CRS_locs_WGS84
241 250
## No spatial duplicates
242
df_points_day <- remove.duplicates(df_points)
251
df_points_day <- remove.duplicates(df_points) #remove duplicates...
243 252

  
244 253
############### PART2: Select stations by ID to build a time series #############
245 254
#### Extract corresponding stationsfor given dates and plot stations used
......
251 260
id_selected <- "82111099999"
252 261
dim(df_points)
253 262

  
254

  
255 263
### loop through all files and get the time series
256 264
extract_from_df <- function(x,col_selected,val_selected){
257 265
  df_tmp <- read.table(x,stringsAsFactors=F,sep=",")
......
286 294
#rbind.fill(mtcars[c("mpg", "wt")], mtcars[c("wt", "cyl")])
287 295
data_stations <- rbind.fill(data_v_subset, data_s_subset)
288 296

  
289
#aggregate(breaks ~ wool + tension, data = warpbreaks, mean)
290
#aggregate(df_points ~ wool + tension, data = warpbreaks, mean)
291

  
292 297
coordinates(data_stations) <- cbind(data_stations$x,data_stations$y)
293 298
proj4string(data_stations) <- CRS_locs_WGS84
294 299

  
295
dim(data_stations)
300
dim(data_stations) #one station only but repitition of records because of tiles and dates!!!
301
#> dim(data_stations)
302
#[1] 100458     90
303
#This is a lot of replication!!! okay cut it down
296 304

  
305
#data_stations_temp <- aggregate(id ~ date, data = data_stations, min)
306
#data_stations_temp <- aggregate(id ~ x + y + date + dailyTmax + mod1 + res_mod1 , data = data_stations, min)
307
data_stations_temp <- aggregate(id ~ x + y + date + dailyTmax,data = data_stations, min ) #+ mod1 + res_mod1 , data = data_stations, min)
308
dim(data_stations_temp)
309
#> dim(data_stations_temp)
310
#[1] 11171     5
297 311

  
298
############### PART1: Make raster stack and display maps #############
312
############### PART3: Make raster stack and display maps #############
299 313
#### Extract corresponding raster for given dates and plot stations used
300 314

  
301 315
##Now grab year year 1992 or matching year...maybe put this in a data.frame??
302 316

  
303
if(is.null(r_mosaic=)
304
pattern_str <-"*.tif"
305
lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T)
306
r_mosaic <- stack(lf_mosaic_list)
307
save(r_mosaic,file="r_mosaic.RData")
317
if(is.null(r_mosaic_fname)){
318
  pattern_str <-"*.tif"
319
  lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T)
320
  r_mosaic <- stack(lf_mosaic_list)
321
  save(r_mosaic,file="r_mosaic.RData")
322
}else{
323
  r_mosaic <- load_obj(r_mosaic_fname) #load raster stack of images
324
}
325

  
308 326
#start_date <- day_to_mosaic_range[1]
309 327
#end_date <- day_to_mosaic_range[2]
310 328
#start_date <- day_start #PARAM 12 arg 12
......
313 331
#date_to_plot <- seq(as.Date(strptime(start_date,"%Y%m%d")), as.Date(strptime(end_date,"%Y%m%d")), 'day')
314 332
#l_dates <- format(date_to_plot,"%Y%m%d") #format back to the relevant date format for files
315 333
mask_pred <- FALSE
334
matching <- FALSE #to be added after mask_pred option
316 335
list_param_pre_process <- list(raster_name_lf,python_bin,infile_mask,scaling,mask_pred,NA_flag_val,out_suffix,out_dir) 
317 336
names(list_param_pre_process) <- c("lf","python_bin","infile_mask","scaling","mask_pred","NA_flag_val","out_suffix","out_dir") 
318 337
  
......
349 368

  
350 369
lf_mosaic_plot_fig <- mclapply(1:length(lf_mosaic_scaled),FUN=plot_raster_mosaic,list_param=list_param_plot_raster_mosaic,mc.preschedule=FALSE,mc.cores = num_cores)                         
351 370

  
352
############### PART2: temporal profile #############
353
#### Extract time series from raster stack
354
###
355
#-65,-22
356

  
357
#Use the global output??
358

  
359
dim(data_stations) #
360
#> dim(data_stations)
361
#[1] 100458     90
362
#This is a lot of replication!!! okay cut it down
363

  
364
##23.09 (on 05/22)
365
#df_points_day_extracted <- extract(r_mosaic,data_stations,df=T)
366
#df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname",".txt") 
367
#write.table(df_points_day_extracted,file=df_points_day_extracted_fname) #5.1 Giga
368
#4.51 (on 05/23)
369

  
370
if(is.null(df_points_extracted_fname)){
371
  
372
  ##10.41 (on 05/22)
373
  df_points_day_extracted <- extract(r_mosaic,df_points_day,df=T)
374
  #17.19 (on 05/23)
375
  df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname2",".txt")
376
  #17.27 (on 05/23)
377
  write.table(df_points_day_extracted,file=df_points_day_extracted_fname,sep=",",row.names = F) 
378
  #17.19 (on 05/23)
371
###############  PART4: Checking for mosaic produced for given region ##############
372
## From list of mosaic files predicted extract dates
373
## Check dates predicted against date range for a given date range
374
## Join file information to centroids of tiles data.frame
379 375

  
380
}else{
381
  df_points_extracted <- read.table(df_points_extracted_fname,sep=",")
382
}
383

  
384

  
385
#df_points_extracted <- extract(r_mosaic,data_stations,df=T)
386
#df_points_extracted_fname <- paste0("df_points_extracted_fname",".txt")
387
#write.table(df_points_extracted,file=df_points_extracted_fname) 
388

  
389
#df_points <- read.table(df_points_extracted_fname,sep=",") 
390
#df_points_tmp <- df_points
391
#df_points <- as.data.frame(t(df_points))
392
#names(df_points) <- paste0("ID_",1:ncol(df_points))
393

  
394
#df_centroids <- read.table(df_centroids_fname,sep=",")
395

  
396
#coordinates(df_centroids)<- c("long","lat")
397
#proj4string(df_centroids) <- CRS_locs_WGS84
398 376
## Checking new files:
399
in_dir_mosaic <- "/nobackupp6/aguzman4/climateLayers/out/reg4/mosaics2/mosaic"
377
#in_dir_mosaic <- "/nobackupp6/aguzman4/climateLayers/out/reg4/mosaics2/mosaic"
400 378
#/nobackupp6/aguzman4/climateLayers/out/reg4/mosaics2/mosaic/output_reg4_*/r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_*_reg4_*.tif
401 379
pattern_str <- "r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_.*._reg4_.*.tif"
402 380
searchStr = paste(in_dir_mosaic,"/output_reg4_2014",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="")
......
442 420
df_points$year <- year_str
443 421
df_points$day <- day_str
444 422

  
423

  
424
############### PART5: Extraction of temporal profile #############
425
#### Extract time series from raster stack
426
### Add dates to mkae it a date object?
427
#-65,-22
428

  
429
#Use the global output??
430

  
431
##23.09 (on 05/22)
432
#df_points_day_extracted <- extract(r_mosaic,data_stations,df=T)
433
#df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname",".txt") 
434
#write.table(df_points_day_extracted,file=df_points_day_extracted_fname) #5.1 Giga
435
#4.51 (on 05/23)
436

  
437
if(is.null(df_points_extracted_fname)){
438
  
439
  ##10.41 (on 05/22)
440
  df_points_day_extracted <- extract(r_mosaic,df_points_day,df=T)
441
  #17.19 (on 05/23)
442
  df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname2",".txt")
443
  #17.27 (on 05/23)
444
  write.table(df_points_day_extracted,file=df_points_day_extracted_fname,sep=",",row.names = F) 
445
  #17.19 (on 05/23)
446

  
447
}else{
448
  df_points_day_extracted <- read.table(df_points_extracted_fname,sep=",")
449
}
450

  
451
pix_ts <- as.data.frame(t(df_points_day_extracted))
452
rownames(df_points_day_extracted)
453

  
454
lf_var <- names(r_mosaic)
455
### Not get the data from the time series
456
data_pixel <- df_ts_pix[id_selected,]
457
data_pixel <- as.data.frame(data_pixel)
458
  
459
pix_ts <- t(as.data.frame(subset(data_pixel,select=r_ts_name))) #can subset to range later
460
#pix_ts <- subset(as.data.frame(pix_ts),select=r_ts_name)
461
pix_ts <- (as.data.frame(pix_ts))
462
## Process the coliform data
463
  
464
  #there are several measurements per day for some stations !!!
465
  #id_name <- data_pixel[[var_ID]]
466
  
467
  #df_tmp  <-data_var[data_var$LOCATION_ID==id_name,]
468
  df_tmp <- subset(data_var,data_var$ID_stat==id_name)
469

  
470
#df_points_extracted <- extract(r_mosaic,data_stations,df=T)
471
#df_points_extracted_fname <- paste0("df_points_extracted_fname",".txt")
472
#write.table(df_points_extracted,file=df_points_extracted_fname) 
473

  
474
#df_points <- read.table(df_points_extracted_fname,sep=",") 
475
#df_points_tmp <- df_points
476
#df_points <- as.data.frame(t(df_points))
477
#names(df_points) <- paste0("ID_",1:ncol(df_points))
478

  
479
#df_centroids <- read.table(df_centroids_fname,sep=",")
480

  
445 481
unique_date_tb <-table(df_points$date)
446 482
unique_date <- unique(df_points$date)
447 483

  

Also available in: Unified diff