Project

General

Profile

« Previous | Next » 

Revision 76fa8b31

Added by Benoit Parmentier about 8 years ago

more edit and adding test files checking for tiles

View differences:

climate/research/oregon/interpolation/global_product_assessment_part0.R
56 56

  
57 57
###### Function used in the script #######
58 58
  
59
#script_path <- "/nobackupp8/bparmen1/env_layers_scripts" #path to script
60
script_path <- "/home/parmentier/Data/IPLANT_project/env_layers_scripts" #path to script
59
script_path <- "/nobackupp8/bparmen1/env_layers_scripts" #path to script, NEX
60
#script_path <- "/home/parmentier/Data/IPLANT_project/env_layers_scripts" #path to script
61 61

  
62 62
## NASA poster and paper related
63 63
#source(file.path(script_path,"NASA2016_conference_temperature_predictions_function_05032016b.R"))
......
96 96

  
97 97
var<-"TMAX" # variable being interpolated #param 1, arg 1
98 98

  
99

  
100
#interpolation_method<-c("gam_fusion") #other otpions to be added later
101
interpolation_method<-c("gam_CAI") #param 2
102
CRS_interp <- "+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0" #param 3
103
#CRS_interp <-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
104

  
105
#out_region_name<-""
106
#list_models<-c("y_var ~ s(lat,lon,k=5) + s(elev_s,k=3) + s(LST,k=3)") #param 4
107
metric_name <- "var_pred" #use RMSE if accuracy
108

  
109
item_no <- 13
110
date_start <- day_start
111
date_end <- day_end
112
#day_start <- "1984101" #PARAM 12 arg 12
113
#day_end <- "20141231" #PARAM 13 arg 13
114

  
115
############################################
116
#### Parameters and constants  
117

  
99 118
##Add for precip later...
100 119
if (var == "TMAX") {
101 120
  y_var_name <- "dailyTmax"
......
114 133
  variable_name <- "minimum temperature"
115 134
}
116 135

  
117
#interpolation_method<-c("gam_fusion") #other otpions to be added later
118
interpolation_method<-c("gam_CAI") #param 2
119
CRS_interp <- "+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0" #param 3
120
#CRS_interp <-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
121

  
122
#out_region_name<-""
123
#list_models<-c("y_var ~ s(lat,lon,k=5) + s(elev_s,k=3) + s(LST,k=3)") #param 4
124
metric_name <- "var_pred" #use RMSE if accuracy
136
#on ATLAS
137
#in_dir1 <- "/data/project/layers/commons/NEX_data/test_run1_03232014/output" #On Atlas
138
#parent output dir : contains subset of the data produced on NEX
139
#in_dir1 <- "/data/project/layers/commons/NEX_data/output_run6_global_analyses_09162014/output20Deg2"
140
# parent output dir for the curent script analyes
141
#out_dir <-"/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/" #On NCEAS Atlas
142
# input dir containing shapefiles defining tiles
143
#in_dir_shp <- "/data/project/layers/commons/NEX_data/output_run5_global_analyses_08252014/output/subset/shapefiles"
144
in_dir <- "/nobackupp6/aguzman4/climateLayers/out/reg6/assessment"
145
in_dir <- "/nobackupp8/bparmen1/climateLayers/out/reg6/assessment"
125 146

  
126
#reg1 (North Am), reg2(Europe),reg3(Asia), reg4 (South Am), reg5 (Africa), reg6 (Australia-Asia)
127
#master directory containing the definition of tile size and tiles predicted
128
#in_dir <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/assessment"
129
#in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/mosaic"
130
in_dir <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg6/assessment"
131
in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg6/mosaics/mosaic" #predicted mosaic
147
#in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg6/mosaics/mosaic" #predicted mosaic
132 148
#in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaics/mosaic"
133 149
#in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaics/mosaic"
134 150
#in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg4/mosaic/mosaic" #note dropped the s in mosaics
......
152 168
#python_bin <- "/nobackupp6/aguzman4/climateLayers/sharedModules2/bin" #PARAM 30
153 169
python_bin <- "/usr/bin" #PARAM 30
154 170

  
155
day_start <- "1984101" #PARAM 12 arg 12
156
day_end <- "20141231" #PARAM 13 arg 13
171
day_start <- "2000101" #PARAM 12 arg 12
172
day_end <- "20001231" #PARAM 13 arg 13
157 173
#date_start <- day_start
158 174
#date_end <- day_end
159 175

  
160
#infile_mask <- "/nobackupp8/bparmen1/NEX_data/regions_input_files/r_mask_LST_reg4.tif"
161
#infile_mask <- "/data/project/layers/commons/NEX_data/regions_input_files/r_mask_LST_reg5.tif"
162
#infile_mask <- "/data/project/layers/commons/NEX_data/regions_input_files/r_mask_LST_reg4.tif"
163
#infile_mask <- "/data/project/layers/commons/NEX_data/regions_input_files/r_mask_LST_reg6.tif"
164

  
165
#run_figure_by_year <- TRUE # param 10, arg 7
166
#list_year_predicted <- "1984,2014"
167
scaling <- 0.01 #was scaled on 100 
168
#if scaling is null then perform no scaling!!
169

  
170
#df_centroids_fname <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/output_reg5_1999/df_centroids_19990701_reg5_1999.txt"
171
#df_centroids_fname <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg4/mosaic/output_reg4_1999/df_centroids_19990701_reg4_1999.txt"
172
#df_centroids_fname <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg6/mosaic/output_reg6_1984/df_centroids_19840101_reg6_1984.txt"
173
#/nobackupp6/aguzman4/climateLayers/out/reg1/assessment//output_reg1_1984/df_assessment_files_reg1_1984_reg1_1984.txt
174

  
175
#dates to plot and analyze
176

  
177
#l_dates <- c("19990101","19990102","19990103","19990701","19990702","19990703")
178
#l_dates <- c("19990101","19990102","19990103","19990104","19990105") 
179
#df_points_extracted_fname <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/data_points_extracted.txt"
180
#df_points_extracted_fname <- NULL #if null extract on the fly
181
#r_mosaic_fname <- "r_mosaic.RData"
182
#r_mosaic_fname <- NULL #if null create a stack from input dir
183

  
184 176
#NA_flag_val_mosaic <- -3399999901438340239948148078125514752.000
185 177
NA_flag_val_mosaic <- -32768
186 178
in_dir_list_filename <- NULL #if NULL, use the in_dir directory to search for info
......
188 180
lf_raster <- NULL #list of raster to consider
189 181
item_no <- 13
190 182

  
191
##################### START SCRIPT #################
192

  
193
####### PART 1: Read in data ########
194
out_dir <- in_dir
195
if (create_out_dir_param == TRUE) {
196
  out_dir <- create_dir_fun(out_dir,out_suffix)
197
  setwd(out_dir)
198
}else{
199
  setwd(out_dir) #use previoulsy defined directory
200
}
201

  
202
#setwd(out_dir)
203

  
204
###########  ####################
205

  
206
############ Using predicting first ##########
207

  
208
## using predictions
209
#pattern_str <- ".*.tif"
210
pattern_str <-"*.tif"
211
lf_raster <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T)
212
r_stack <- stack(lf_raster,quick=T) #this is very fast now with the quick option!
213
#save(r_mosaic,file="r_mosaic.RData")
214

  
215
#### check for missing dates from list of tif
216

  
217
###This should be a function!!
218

  
219
#####  This could be moved in a separate file!!
220
###############  PART4: Checking for mosaic produced for given region ##############
221
## From list of mosaic files predicted extract dates
222
## Check dates predicted against date range for a given date range
223
## Join file information to centroids of tiles data.frame
224
#list_dates_produced <- unlist(mclapply(1:length(lf_mosaic_list),FUN=extract_date,x=lf_mosaic_list,item_no=13,mc.preschedule=FALSE,mc.cores = num_cores))                         
225
#list_dates_produced <-  mclapply(1:2,FUN=extract_date,x=lf_mosaic_list,item_no=13,mc.preschedule=FALSE,mc.cores = 2)                         
226
item_no <- 13
227
date_start <- day_start
228
date_end <- day_end
229
#day_start <- "1984101" #PARAM 12 arg 12
230
#day_end <- "20141231" #PARAM 13 arg 13
231

  
232
#Using default values for parameters exectpt for num_cores=11 instead of 1
233
#debug(check_missing)
234
#test_missing <- check_missing(lf=lf_raster, 
235
#                              pattern_str=NULL,
236
#                              in_dir=".", #this is not used if lf is given
237
#                              date_start="1984101",
238
#                              date_end="20141231",
239
#                              item_no=13,
240
#                              out_suffix="",
241
#                              num_cores=num_cores,
242
#                              out_dir=".")
243
  
244
##Run this on reg4 and reg5 after
245
#Add report by year in text file?
246
#Using specified values for parameters
247
test_missing <- check_missing(lf=lf_raster, 
248
                              pattern_str=NULL,
249
                              in_dir=in_dir_mosaic,
250
                              date_start="1984101",
251
                              date_end="20141231",
252
                              item_no=13,
253
                              out_suffix=out_suffix,
254
                              num_cores=num_cores,
255
                              out_dir=".")
256

  
257
df_time_series <- test_missing$df_time_series
258
head(df_time_series)
259

  
260
table(df_time_series$missing)
261
table(df_time_series$year)
262

  
263

  
264
#combine polygon
265
#http://gis.stackexchange.com/questions/155328/merging-multiple-spatialpolygondataframes-into-1-spdf-in-r
266

  
267
#http://gis.stackexchange.com/questions/116388/count-overlapping-polygons-in-single-shape-file-with-r
268

  
269
#### Use the predictions directory
270
#By region
271
#For each polygon/tile find polygon overlapping with count and ID (like list w)
272
#for each polygon/tile and date find if there is a prediction using the tif (multiply number)
273
#for each date of year report data in table.
274

  
275
#go through table and hsow if there are missing data (no prediction) or report min predictions for tile set?
276

  
277
############################################
278
#### Parameters and constants  
279

  
280
#on ATLAS
281
#in_dir1 <- "/data/project/layers/commons/NEX_data/test_run1_03232014/output" #On Atlas
282
#parent output dir : contains subset of the data produced on NEX
283
#in_dir1 <- "/data/project/layers/commons/NEX_data/output_run6_global_analyses_09162014/output20Deg2"
284
# parent output dir for the curent script analyes
285
#out_dir <-"/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/" #On NCEAS Atlas
286
# input dir containing shapefiles defining tiles
287
#in_dir_shp <- "/data/project/layers/commons/NEX_data/output_run5_global_analyses_08252014/output/subset/shapefiles"
288

  
289 183
#On NEX
290 184
#contains all data from the run by Alberto
291 185
in_dir1 <- "/nobackupp6/aguzman4/climateLayers/out" #On NEX
292 186
#parent output dir for the current script analyes
293
#out_dir <- "/nobackup/bparmen1/" #on NEX
294
#in_dir_shp <- "/nobackupp4/aguzman4/climateLayers/output4/subset/shapefiles/"
295

  
296
#in_dir <- "/data/project/layers/commons/NEX_data/reg4_assessment"
297
#list_in_dir_run <-
298
#in_dir_list <-  c("output_run_global_analyses_pred_2009_reg4","output_run_global_analyses_pred_2010_reg4",
299
#                  "output_run_global_analyses_pred_2011_reg4","output_run_global_analyses_pred_2012_reg4",
300
#                  "output_run_global_analyses_pred_2013_reg4","output_run_global_analyses_pred_2014_reg4")
301
#in_dir_list_filename <- "/data/project/layers/commons/NEX_data/reg4_assessment/stage6_reg4_in_dir_list_02072016.txt"
302
in_dir <- "" #PARAM 0
187

  
303 188
y_var_name <- "dailyTmax" #PARAM1
304 189
interpolation_method <- c("gam_CAI") #PARAM2
305 190
out_suffix <- "predictions_assessment_reg6_10302016"
......
373 258
  #for specific each find prediction...
374 259
  year_predicted <- list_param_run_assessment_prediction$list_year_predicted[i] 
375 260

  
376
  in_dir1 <- file.path(in_dir1,region_name)
261
  in_dir1_reg <- file.path(in_dir1,region_name)
377 262
  
378 263
  list_outfiles <- vector("list", length=14) #collect names of output files
379 264
  
380
  in_dir_list <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run
265
  in_dir_list <- list.dirs(path=in_dir1_reg,recursive=FALSE) #get the list regions processed for this run
381 266
  #basename(in_dir_list)
382 267
  #                       y=in_dir_list) 
383 268
  
......
386 271
  in_dir_subset <- in_dir_list_all[grep("subset",basename(in_dir_list_all),invert=FALSE)] #select directory with shapefiles...
387 272
  in_dir_shp <- file.path(in_dir_subset,"shapefiles")
388 273
  
389
  
390 274
  #select only directories used for predictions
391 275
  #nested structure, we need to go to higher level to obtain the tiles...
392 276
  in_dir_reg <- in_dir_list[grep(".*._.*.",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles...
393 277
  #in_dir_reg <- in_dir_list[grep("july_tiffs",basename(in_dir_reg),invert=TRUE)] #select directory with shapefiles...
394 278
  in_dir_list <- in_dir_reg
395 279
  
396
  
397 280
  in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1
398 281
  #list of shapefiles used to define tiles
399 282
  in_dir_shp_list <- list.files(in_dir_shp,".shp",full.names=T)
......
404 287
  #system("ls /nobackup/bparmen1")
405 288
  out_dir <- in_dir
406 289
  if(create_out_dir_param==TRUE){
407
    out_dir <- create_dir_fun(out_dir,out_prefix)
290
    out_dir <- create_dir_fun(out_dir,out_suffix)
408 291
    setwd(out_dir)
409 292
  }else{
410 293
    setwd(out_dir) #use previoulsy defined directory
......
412 295
  
413 296
  setwd(out_dir)
414 297

  
298
  
299
  ##raster_prediction object : contains testing and training stations with RMSE and model object
300
  in_dir_list_tmp <- file.path(in_dir_list,year_predicted)
301
  list_raster_obj_files <- try(lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^raster_prediction_obj.*.RData",full.names=T)}))
302
  #Add stop message here...if no raster object in any tiles then break from the function
303
  
304
  list_names_tile_coord <- lapply(list_raster_obj_files,FUN=function(x){basename(dirname(x))})
305
  list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_")
306
  names(list_raster_obj_files)<- list_names_tile_id
307
  
308
  #one level up
309
  lf_covar_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar_obj.*.RData",full.names=T)})
310
  lf_covar_tif <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar.*.tif",full.names=T)})
311
  
312
  lf_sub_sampling_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern=paste("^sub_sampling_obj_",interpolation_method,".*.RData",sep=""),full.names=T)})
313
  lf_sub_sampling_obj_daily_files <- lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^sub_sampling_obj_daily.*.RData",full.names=T)})
314

  
315
  if(is.null(day_to_mosaic_range)){
316
  #  start_date <- #first date
317
     start_date <- paste0(year_processed,"0101") #change this later!!
318
     end_date <-   paste0(year_processed,"1231") #change this later!!
319
     day_to_mosaic <- seq(as.Date(strptime(start_date,"%Y%m%d")), as.Date(strptime(end_date,"%Y%m%d")), 'day')
320
     day_to_mosaic <- format(day_to_mosaic,"%Y%m%d") #format back to the relevant date format for files
321
  }else{
322
    start_date <- day_to_mosaic_range[1]
323
    end_date <- day_to_mosaic_range[2]
324
    day_to_mosaic <- seq(as.Date(strptime(start_date,"%Y%m%d")), as.Date(strptime(end_date,"%Y%m%d")), 'day')
325
    day_to_mosaic <- format(day_to_mosaic,"%Y%m%d") #format back to the relevant date format for files
326
  }
327
  
328
  in_dir_tiles_tmp <- in_dir1 #
329
  #in_dir_tiles_tmp <- in_dir_reg
330
  
331
  ### Do this by tile!!!
332
  
333
  # Making listing of files faster with multicores use
334
  lf_mosaic <- mclapply(1:length(day_to_mosaic),FUN=function(i){
335
    searchStr = paste(in_dir_tiles_tmp,"/*/",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="")
336
    Sys.glob(searchStr)},mc.preschedule=FALSE,mc.cores = num_cores)
337
  
338
  lf_mosaic <- mclapply(1:length(day_to_mosaic),FUN=function(i){
339
    searchStr = paste(in_dir_tiles_tmp,"/*/",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="")
340
    Sys.glob(searchStr)},mc.preschedule=FALSE,mc.cores = num_cores)
341

  
342
  ##Run this on reg4 and reg5 after
343
  #Add report by year in text file?
344
  #Using specified values for parameters
345
  test_missing <- check_missing(lf=lf_mosaic[[1]], 
346
                              pattern_str=NULL,
347
                              in_dir=in_dir_mosaic,
348
                              date_start="1984101",
349
                              date_end="20141231",
350
                              item_no=13,
351
                              out_suffix=out_suffix,
352
                              num_cores=num_cores,
353
                              out_dir=".")
354

  
355
  df_time_series <- test_missing$df_time_series
356
  head(df_time_series)
357

  
358
  table(df_time_series$missing)
359
  table(df_time_series$year)
360
  
361
  
362
  #combine polygon
363
  #http://gis.stackexchange.com/questions/155328/merging-multiple-spatialpolygondataframes-into-1-spdf-in-r
364

  
365
  #http://gis.stackexchange.com/questions/116388/count-overlapping-polygons-in-single-shape-file-with-r
366

  
367
  #### Use the predictions directory
368
  #By region
369
  #For each polygon/tile find polygon overlapping with count and ID (like list w)
370
  #for each polygon/tile and date find if there is a prediction using the tif (multiply number)
371
  #for each date of year report data in table.
372

  
373
  #go through table and hsow if there are missing data (no prediction) or report min predictions for tile set?
374

  
375
  
376
  
415 377
############################ END OF SCRIPT ##################################

Also available in: Unified diff