Revision de0e6966
Added by Benoit Parmentier over 8 years ago
climate/research/oregon/interpolation/global_product_assessment_part1.R | ||
---|---|---|
4 | 4 |
#Combining tables and figures for individual runs for years and tiles. |
5 | 5 |
#AUTHOR: Benoit Parmentier |
6 | 6 |
#CREATED ON: 05/15/2016 |
7 |
#MODIFIED ON: 05/22/2016
|
|
7 |
#MODIFIED ON: 05/23/2016
|
|
8 | 8 |
#Version: 1 |
9 | 9 |
#PROJECT: Environmental Layers project |
10 | 10 |
#COMMENTS: Initial commit, script based on part NASA biodiversity conferenc |
... | ... | |
212 | 212 |
|
213 | 213 |
df_data_v_fname$year <- df_data_v_fname$year_str |
214 | 214 |
df_data_v_fname$year <- as.character(df_data_v_fname$year_str) |
215 |
|
|
215 |
df_data_v_fname$file <- as.character(df_data_v_fname$file) |
|
216 |
df_data_s_fname$file <- as.character(df_data_s_fname$file) |
|
216 | 217 |
|
217 | 218 |
############### PART1: Select stations for specific list of dates ############# |
218 | 219 |
#### Extract corresponding stationsfor given dates and plot stations used |
... | ... | |
223 | 224 |
l_dates_year_str <- format(list_dates_val, "%Y") ## Year with century |
224 | 225 |
l_dates_day_str <- as.numeric(format(list_dates_val, "%d")) ## numeric month |
225 | 226 |
|
227 |
|
|
228 |
### Needs to make this a function!!! |
|
229 |
#Step 1 for a list of dates, load relevant files with year matching, |
|
230 |
#Step 2 for giving dates subset the data.frame |
|
231 |
#Step 3: find duplicates, create return object and return given station data for the date |
|
232 |
|
|
226 | 233 |
list_year_str <- unique(l_dates_year_str) |
227 |
list_df_stations <- vector("list",length=length(list_year_str)) |
|
234 |
list_df_v_stations <- vector("list",length=length(list_year_str)) |
|
235 |
list_df_s_stations <- vector("list",length=length(list_year_str)) |
|
228 | 236 |
|
229 | 237 |
for(i in 1:length(list_year_str)){ |
230 |
subset(df_data_v_fname,year=list_year_str)
|
|
231 |
df_data_v_fname$data_date==l_dates_year_str[i]
|
|
232 |
filename_tmp <- as.character(df_data_v_fname$file[9])
|
|
233 |
df_stations_tmp <- read.table(filename_tmp,stringsAsFactors=F,sep=",")
|
|
238 |
filename_tmp<- df_data_v_fname$file[df_data_v_fname$year==list_year_str[i]]
|
|
239 |
list_df_v_stations[[i]] <- read.table(filename_tmp,stringsAsFactors=F,sep=",")
|
|
240 |
filename_tmp<- df_data_s_fname$file[df_data_s_fname$year==list_year_str[i]]
|
|
241 |
list_df_s_stations[[i]] <- read.table(filename_tmp,stringsAsFactors=F,sep=",")
|
|
234 | 242 |
} |
235 | 243 |
|
244 |
#function(x){x$date==} |
|
236 | 245 |
df_points <- subset(df_stations,df_stations_tmp$date==l_dates[1]) |
237 | 246 |
table(df_points$tile_id) |
238 | 247 |
plot(df_points,add=T) |
239 | 248 |
coordinates(df_points) <- cbind(df_points$x,df_points$y) |
240 | 249 |
proj4string(df_points) <- CRS_locs_WGS84 |
241 | 250 |
## No spatial duplicates |
242 |
df_points_day <- remove.duplicates(df_points) |
|
251 |
df_points_day <- remove.duplicates(df_points) #remove duplicates...
|
|
243 | 252 |
|
244 | 253 |
############### PART2: Select stations by ID to build a time series ############# |
245 | 254 |
#### Extract corresponding stationsfor given dates and plot stations used |
... | ... | |
251 | 260 |
id_selected <- "82111099999" |
252 | 261 |
dim(df_points) |
253 | 262 |
|
254 |
|
|
255 | 263 |
### loop through all files and get the time series |
256 | 264 |
extract_from_df <- function(x,col_selected,val_selected){ |
257 | 265 |
df_tmp <- read.table(x,stringsAsFactors=F,sep=",") |
... | ... | |
286 | 294 |
#rbind.fill(mtcars[c("mpg", "wt")], mtcars[c("wt", "cyl")]) |
287 | 295 |
data_stations <- rbind.fill(data_v_subset, data_s_subset) |
288 | 296 |
|
289 |
#aggregate(breaks ~ wool + tension, data = warpbreaks, mean) |
|
290 |
#aggregate(df_points ~ wool + tension, data = warpbreaks, mean) |
|
291 |
|
|
292 | 297 |
coordinates(data_stations) <- cbind(data_stations$x,data_stations$y) |
293 | 298 |
proj4string(data_stations) <- CRS_locs_WGS84 |
294 | 299 |
|
295 |
dim(data_stations) |
|
300 |
dim(data_stations) #one station only but repitition of records because of tiles and dates!!! |
|
301 |
#> dim(data_stations) |
|
302 |
#[1] 100458 90 |
|
303 |
#This is a lot of replication!!! okay cut it down |
|
296 | 304 |
|
305 |
#data_stations_temp <- aggregate(id ~ date, data = data_stations, min) |
|
306 |
#data_stations_temp <- aggregate(id ~ x + y + date + dailyTmax + mod1 + res_mod1 , data = data_stations, min) |
|
307 |
data_stations_temp <- aggregate(id ~ x + y + date + dailyTmax,data = data_stations, min ) #+ mod1 + res_mod1 , data = data_stations, min) |
|
308 |
dim(data_stations_temp) |
|
309 |
#> dim(data_stations_temp) |
|
310 |
#[1] 11171 5 |
|
297 | 311 |
|
298 |
############### PART1: Make raster stack and display maps #############
|
|
312 |
############### PART3: Make raster stack and display maps #############
|
|
299 | 313 |
#### Extract corresponding raster for given dates and plot stations used |
300 | 314 |
|
301 | 315 |
##Now grab year year 1992 or matching year...maybe put this in a data.frame?? |
302 | 316 |
|
303 |
if(is.null(r_mosaic=) |
|
304 |
pattern_str <-"*.tif" |
|
305 |
lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T) |
|
306 |
r_mosaic <- stack(lf_mosaic_list) |
|
307 |
save(r_mosaic,file="r_mosaic.RData") |
|
317 |
if(is.null(r_mosaic_fname)){ |
|
318 |
pattern_str <-"*.tif" |
|
319 |
lf_mosaic_list <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T) |
|
320 |
r_mosaic <- stack(lf_mosaic_list) |
|
321 |
save(r_mosaic,file="r_mosaic.RData") |
|
322 |
}else{ |
|
323 |
r_mosaic <- load_obj(r_mosaic_fname) #load raster stack of images |
|
324 |
} |
|
325 |
|
|
308 | 326 |
#start_date <- day_to_mosaic_range[1] |
309 | 327 |
#end_date <- day_to_mosaic_range[2] |
310 | 328 |
#start_date <- day_start #PARAM 12 arg 12 |
... | ... | |
313 | 331 |
#date_to_plot <- seq(as.Date(strptime(start_date,"%Y%m%d")), as.Date(strptime(end_date,"%Y%m%d")), 'day') |
314 | 332 |
#l_dates <- format(date_to_plot,"%Y%m%d") #format back to the relevant date format for files |
315 | 333 |
mask_pred <- FALSE |
334 |
matching <- FALSE #to be added after mask_pred option |
|
316 | 335 |
list_param_pre_process <- list(raster_name_lf,python_bin,infile_mask,scaling,mask_pred,NA_flag_val,out_suffix,out_dir) |
317 | 336 |
names(list_param_pre_process) <- c("lf","python_bin","infile_mask","scaling","mask_pred","NA_flag_val","out_suffix","out_dir") |
318 | 337 |
|
... | ... | |
349 | 368 |
|
350 | 369 |
lf_mosaic_plot_fig <- mclapply(1:length(lf_mosaic_scaled),FUN=plot_raster_mosaic,list_param=list_param_plot_raster_mosaic,mc.preschedule=FALSE,mc.cores = num_cores) |
351 | 370 |
|
352 |
############### PART2: temporal profile ############# |
|
353 |
#### Extract time series from raster stack |
|
354 |
### |
|
355 |
#-65,-22 |
|
356 |
|
|
357 |
#Use the global output?? |
|
358 |
|
|
359 |
dim(data_stations) # |
|
360 |
#> dim(data_stations) |
|
361 |
#[1] 100458 90 |
|
362 |
#This is a lot of replication!!! okay cut it down |
|
363 |
|
|
364 |
##23.09 (on 05/22) |
|
365 |
#df_points_day_extracted <- extract(r_mosaic,data_stations,df=T) |
|
366 |
#df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname",".txt") |
|
367 |
#write.table(df_points_day_extracted,file=df_points_day_extracted_fname) #5.1 Giga |
|
368 |
#4.51 (on 05/23) |
|
369 |
|
|
370 |
if(is.null(df_points_extracted_fname)){ |
|
371 |
|
|
372 |
##10.41 (on 05/22) |
|
373 |
df_points_day_extracted <- extract(r_mosaic,df_points_day,df=T) |
|
374 |
#17.19 (on 05/23) |
|
375 |
df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname2",".txt") |
|
376 |
#17.27 (on 05/23) |
|
377 |
write.table(df_points_day_extracted,file=df_points_day_extracted_fname,sep=",",row.names = F) |
|
378 |
#17.19 (on 05/23) |
|
371 |
############### PART4: Checking for mosaic produced for given region ############## |
|
372 |
## From list of mosaic files predicted extract dates |
|
373 |
## Check dates predicted against date range for a given date range |
|
374 |
## Join file information to centroids of tiles data.frame |
|
379 | 375 |
|
380 |
}else{ |
|
381 |
df_points_extracted <- read.table(df_points_extracted_fname,sep=",") |
|
382 |
} |
|
383 |
|
|
384 |
|
|
385 |
#df_points_extracted <- extract(r_mosaic,data_stations,df=T) |
|
386 |
#df_points_extracted_fname <- paste0("df_points_extracted_fname",".txt") |
|
387 |
#write.table(df_points_extracted,file=df_points_extracted_fname) |
|
388 |
|
|
389 |
#df_points <- read.table(df_points_extracted_fname,sep=",") |
|
390 |
#df_points_tmp <- df_points |
|
391 |
#df_points <- as.data.frame(t(df_points)) |
|
392 |
#names(df_points) <- paste0("ID_",1:ncol(df_points)) |
|
393 |
|
|
394 |
#df_centroids <- read.table(df_centroids_fname,sep=",") |
|
395 |
|
|
396 |
#coordinates(df_centroids)<- c("long","lat") |
|
397 |
#proj4string(df_centroids) <- CRS_locs_WGS84 |
|
398 | 376 |
## Checking new files: |
399 |
in_dir_mosaic <- "/nobackupp6/aguzman4/climateLayers/out/reg4/mosaics2/mosaic" |
|
377 |
#in_dir_mosaic <- "/nobackupp6/aguzman4/climateLayers/out/reg4/mosaics2/mosaic"
|
|
400 | 378 |
#/nobackupp6/aguzman4/climateLayers/out/reg4/mosaics2/mosaic/output_reg4_*/r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_*_reg4_*.tif |
401 | 379 |
pattern_str <- "r_m_use_edge_weights_weighted_mean_mask_gam_CAI_dailyTmax_.*._reg4_.*.tif" |
402 | 380 |
searchStr = paste(in_dir_mosaic,"/output_reg4_2014",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="") |
... | ... | |
442 | 420 |
df_points$year <- year_str |
443 | 421 |
df_points$day <- day_str |
444 | 422 |
|
423 |
|
|
424 |
############### PART5: Extraction of temporal profile ############# |
|
425 |
#### Extract time series from raster stack |
|
426 |
### Add dates to mkae it a date object? |
|
427 |
#-65,-22 |
|
428 |
|
|
429 |
#Use the global output?? |
|
430 |
|
|
431 |
##23.09 (on 05/22) |
|
432 |
#df_points_day_extracted <- extract(r_mosaic,data_stations,df=T) |
|
433 |
#df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname",".txt") |
|
434 |
#write.table(df_points_day_extracted,file=df_points_day_extracted_fname) #5.1 Giga |
|
435 |
#4.51 (on 05/23) |
|
436 |
|
|
437 |
if(is.null(df_points_extracted_fname)){ |
|
438 |
|
|
439 |
##10.41 (on 05/22) |
|
440 |
df_points_day_extracted <- extract(r_mosaic,df_points_day,df=T) |
|
441 |
#17.19 (on 05/23) |
|
442 |
df_points_day_extracted_fname <- paste0("df_points_day_extracted_fname2",".txt") |
|
443 |
#17.27 (on 05/23) |
|
444 |
write.table(df_points_day_extracted,file=df_points_day_extracted_fname,sep=",",row.names = F) |
|
445 |
#17.19 (on 05/23) |
|
446 |
|
|
447 |
}else{ |
|
448 |
df_points_day_extracted <- read.table(df_points_extracted_fname,sep=",") |
|
449 |
} |
|
450 |
|
|
451 |
pix_ts <- as.data.frame(t(df_points_day_extracted)) |
|
452 |
rownames(df_points_day_extracted) |
|
453 |
|
|
454 |
lf_var <- names(r_mosaic) |
|
455 |
### Not get the data from the time series |
|
456 |
data_pixel <- df_ts_pix[id_selected,] |
|
457 |
data_pixel <- as.data.frame(data_pixel) |
|
458 |
|
|
459 |
pix_ts <- t(as.data.frame(subset(data_pixel,select=r_ts_name))) #can subset to range later |
|
460 |
#pix_ts <- subset(as.data.frame(pix_ts),select=r_ts_name) |
|
461 |
pix_ts <- (as.data.frame(pix_ts)) |
|
462 |
## Process the coliform data |
|
463 |
|
|
464 |
#there are several measurements per day for some stations !!! |
|
465 |
#id_name <- data_pixel[[var_ID]] |
|
466 |
|
|
467 |
#df_tmp <-data_var[data_var$LOCATION_ID==id_name,] |
|
468 |
df_tmp <- subset(data_var,data_var$ID_stat==id_name) |
|
469 |
|
|
470 |
#df_points_extracted <- extract(r_mosaic,data_stations,df=T) |
|
471 |
#df_points_extracted_fname <- paste0("df_points_extracted_fname",".txt") |
|
472 |
#write.table(df_points_extracted,file=df_points_extracted_fname) |
|
473 |
|
|
474 |
#df_points <- read.table(df_points_extracted_fname,sep=",") |
|
475 |
#df_points_tmp <- df_points |
|
476 |
#df_points <- as.data.frame(t(df_points)) |
|
477 |
#names(df_points) <- paste0("ID_",1:ncol(df_points)) |
|
478 |
|
|
479 |
#df_centroids <- read.table(df_centroids_fname,sep=",") |
|
480 |
|
|
445 | 481 |
unique_date_tb <-table(df_points$date) |
446 | 482 |
unique_date <- unique(df_points$date) |
447 | 483 |
|
Also available in: Unified diff
subsetting day and extracting pixel time series profiles for stations