Revision 76fa8b31
Added by Benoit Parmentier about 8 years ago
climate/research/oregon/interpolation/global_product_assessment_part0.R | ||
---|---|---|
56 | 56 |
|
57 | 57 |
###### Function used in the script ####### |
58 | 58 |
|
59 |
#script_path <- "/nobackupp8/bparmen1/env_layers_scripts" #path to script
|
|
60 |
script_path <- "/home/parmentier/Data/IPLANT_project/env_layers_scripts" #path to script |
|
59 |
script_path <- "/nobackupp8/bparmen1/env_layers_scripts" #path to script, NEX
|
|
60 |
#script_path <- "/home/parmentier/Data/IPLANT_project/env_layers_scripts" #path to script
|
|
61 | 61 |
|
62 | 62 |
## NASA poster and paper related |
63 | 63 |
#source(file.path(script_path,"NASA2016_conference_temperature_predictions_function_05032016b.R")) |
... | ... | |
96 | 96 |
|
97 | 97 |
var<-"TMAX" # variable being interpolated #param 1, arg 1 |
98 | 98 |
|
99 |
|
|
100 |
#interpolation_method<-c("gam_fusion") #other otpions to be added later |
|
101 |
interpolation_method<-c("gam_CAI") #param 2 |
|
102 |
CRS_interp <- "+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0" #param 3 |
|
103 |
#CRS_interp <-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
|
104 |
|
|
105 |
#out_region_name<-"" |
|
106 |
#list_models<-c("y_var ~ s(lat,lon,k=5) + s(elev_s,k=3) + s(LST,k=3)") #param 4 |
|
107 |
metric_name <- "var_pred" #use RMSE if accuracy |
|
108 |
|
|
109 |
item_no <- 13 |
|
110 |
date_start <- day_start |
|
111 |
date_end <- day_end |
|
112 |
#day_start <- "1984101" #PARAM 12 arg 12 |
|
113 |
#day_end <- "20141231" #PARAM 13 arg 13 |
|
114 |
|
|
115 |
############################################ |
|
116 |
#### Parameters and constants |
|
117 |
|
|
99 | 118 |
##Add for precip later... |
100 | 119 |
if (var == "TMAX") { |
101 | 120 |
y_var_name <- "dailyTmax" |
... | ... | |
114 | 133 |
variable_name <- "minimum temperature" |
115 | 134 |
} |
116 | 135 |
|
117 |
#interpolation_method<-c("gam_fusion") #other otpions to be added later |
|
118 |
interpolation_method<-c("gam_CAI") #param 2 |
|
119 |
CRS_interp <- "+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0" #param 3 |
|
120 |
#CRS_interp <-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
|
121 |
|
|
122 |
#out_region_name<-"" |
|
123 |
#list_models<-c("y_var ~ s(lat,lon,k=5) + s(elev_s,k=3) + s(LST,k=3)") #param 4 |
|
124 |
metric_name <- "var_pred" #use RMSE if accuracy |
|
136 |
#on ATLAS |
|
137 |
#in_dir1 <- "/data/project/layers/commons/NEX_data/test_run1_03232014/output" #On Atlas |
|
138 |
#parent output dir : contains subset of the data produced on NEX |
|
139 |
#in_dir1 <- "/data/project/layers/commons/NEX_data/output_run6_global_analyses_09162014/output20Deg2" |
|
140 |
# parent output dir for the curent script analyes |
|
141 |
#out_dir <-"/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/" #On NCEAS Atlas |
|
142 |
# input dir containing shapefiles defining tiles |
|
143 |
#in_dir_shp <- "/data/project/layers/commons/NEX_data/output_run5_global_analyses_08252014/output/subset/shapefiles" |
|
144 |
in_dir <- "/nobackupp6/aguzman4/climateLayers/out/reg6/assessment" |
|
145 |
in_dir <- "/nobackupp8/bparmen1/climateLayers/out/reg6/assessment" |
|
125 | 146 |
|
126 |
#reg1 (North Am), reg2(Europe),reg3(Asia), reg4 (South Am), reg5 (Africa), reg6 (Australia-Asia) |
|
127 |
#master directory containing the definition of tile size and tiles predicted |
|
128 |
#in_dir <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/assessment" |
|
129 |
#in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/mosaic" |
|
130 |
in_dir <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg6/assessment" |
|
131 |
in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg6/mosaics/mosaic" #predicted mosaic |
|
147 |
#in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg6/mosaics/mosaic" #predicted mosaic |
|
132 | 148 |
#in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg1/mosaics/mosaic" |
133 | 149 |
#in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaics/mosaic" |
134 | 150 |
#in_dir_mosaic <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg4/mosaic/mosaic" #note dropped the s in mosaics |
... | ... | |
152 | 168 |
#python_bin <- "/nobackupp6/aguzman4/climateLayers/sharedModules2/bin" #PARAM 30 |
153 | 169 |
python_bin <- "/usr/bin" #PARAM 30 |
154 | 170 |
|
155 |
day_start <- "1984101" #PARAM 12 arg 12
|
|
156 |
day_end <- "20141231" #PARAM 13 arg 13
|
|
171 |
day_start <- "2000101" #PARAM 12 arg 12
|
|
172 |
day_end <- "20001231" #PARAM 13 arg 13
|
|
157 | 173 |
#date_start <- day_start |
158 | 174 |
#date_end <- day_end |
159 | 175 |
|
160 |
#infile_mask <- "/nobackupp8/bparmen1/NEX_data/regions_input_files/r_mask_LST_reg4.tif" |
|
161 |
#infile_mask <- "/data/project/layers/commons/NEX_data/regions_input_files/r_mask_LST_reg5.tif" |
|
162 |
#infile_mask <- "/data/project/layers/commons/NEX_data/regions_input_files/r_mask_LST_reg4.tif" |
|
163 |
#infile_mask <- "/data/project/layers/commons/NEX_data/regions_input_files/r_mask_LST_reg6.tif" |
|
164 |
|
|
165 |
#run_figure_by_year <- TRUE # param 10, arg 7 |
|
166 |
#list_year_predicted <- "1984,2014" |
|
167 |
scaling <- 0.01 #was scaled on 100 |
|
168 |
#if scaling is null then perform no scaling!! |
|
169 |
|
|
170 |
#df_centroids_fname <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/output_reg5_1999/df_centroids_19990701_reg5_1999.txt" |
|
171 |
#df_centroids_fname <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg4/mosaic/output_reg4_1999/df_centroids_19990701_reg4_1999.txt" |
|
172 |
#df_centroids_fname <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg6/mosaic/output_reg6_1984/df_centroids_19840101_reg6_1984.txt" |
|
173 |
#/nobackupp6/aguzman4/climateLayers/out/reg1/assessment//output_reg1_1984/df_assessment_files_reg1_1984_reg1_1984.txt |
|
174 |
|
|
175 |
#dates to plot and analyze |
|
176 |
|
|
177 |
#l_dates <- c("19990101","19990102","19990103","19990701","19990702","19990703") |
|
178 |
#l_dates <- c("19990101","19990102","19990103","19990104","19990105") |
|
179 |
#df_points_extracted_fname <- "/data/project/layers/commons/NEX_data/climateLayers/out/reg5/mosaic/int_mosaics/data_points_extracted.txt" |
|
180 |
#df_points_extracted_fname <- NULL #if null extract on the fly |
|
181 |
#r_mosaic_fname <- "r_mosaic.RData" |
|
182 |
#r_mosaic_fname <- NULL #if null create a stack from input dir |
|
183 |
|
|
184 | 176 |
#NA_flag_val_mosaic <- -3399999901438340239948148078125514752.000 |
185 | 177 |
NA_flag_val_mosaic <- -32768 |
186 | 178 |
in_dir_list_filename <- NULL #if NULL, use the in_dir directory to search for info |
... | ... | |
188 | 180 |
lf_raster <- NULL #list of raster to consider |
189 | 181 |
item_no <- 13 |
190 | 182 |
|
191 |
##################### START SCRIPT ################# |
|
192 |
|
|
193 |
####### PART 1: Read in data ######## |
|
194 |
out_dir <- in_dir |
|
195 |
if (create_out_dir_param == TRUE) { |
|
196 |
out_dir <- create_dir_fun(out_dir,out_suffix) |
|
197 |
setwd(out_dir) |
|
198 |
}else{ |
|
199 |
setwd(out_dir) #use previoulsy defined directory |
|
200 |
} |
|
201 |
|
|
202 |
#setwd(out_dir) |
|
203 |
|
|
204 |
########### #################### |
|
205 |
|
|
206 |
############ Using predicting first ########## |
|
207 |
|
|
208 |
## using predictions |
|
209 |
#pattern_str <- ".*.tif" |
|
210 |
pattern_str <-"*.tif" |
|
211 |
lf_raster <- list.files(path=in_dir_mosaic,pattern=pattern_str,recursive=F,full.names=T) |
|
212 |
r_stack <- stack(lf_raster,quick=T) #this is very fast now with the quick option! |
|
213 |
#save(r_mosaic,file="r_mosaic.RData") |
|
214 |
|
|
215 |
#### check for missing dates from list of tif |
|
216 |
|
|
217 |
###This should be a function!! |
|
218 |
|
|
219 |
##### This could be moved in a separate file!! |
|
220 |
############### PART4: Checking for mosaic produced for given region ############## |
|
221 |
## From list of mosaic files predicted extract dates |
|
222 |
## Check dates predicted against date range for a given date range |
|
223 |
## Join file information to centroids of tiles data.frame |
|
224 |
#list_dates_produced <- unlist(mclapply(1:length(lf_mosaic_list),FUN=extract_date,x=lf_mosaic_list,item_no=13,mc.preschedule=FALSE,mc.cores = num_cores)) |
|
225 |
#list_dates_produced <- mclapply(1:2,FUN=extract_date,x=lf_mosaic_list,item_no=13,mc.preschedule=FALSE,mc.cores = 2) |
|
226 |
item_no <- 13 |
|
227 |
date_start <- day_start |
|
228 |
date_end <- day_end |
|
229 |
#day_start <- "1984101" #PARAM 12 arg 12 |
|
230 |
#day_end <- "20141231" #PARAM 13 arg 13 |
|
231 |
|
|
232 |
#Using default values for parameters exectpt for num_cores=11 instead of 1 |
|
233 |
#debug(check_missing) |
|
234 |
#test_missing <- check_missing(lf=lf_raster, |
|
235 |
# pattern_str=NULL, |
|
236 |
# in_dir=".", #this is not used if lf is given |
|
237 |
# date_start="1984101", |
|
238 |
# date_end="20141231", |
|
239 |
# item_no=13, |
|
240 |
# out_suffix="", |
|
241 |
# num_cores=num_cores, |
|
242 |
# out_dir=".") |
|
243 |
|
|
244 |
##Run this on reg4 and reg5 after |
|
245 |
#Add report by year in text file? |
|
246 |
#Using specified values for parameters |
|
247 |
test_missing <- check_missing(lf=lf_raster, |
|
248 |
pattern_str=NULL, |
|
249 |
in_dir=in_dir_mosaic, |
|
250 |
date_start="1984101", |
|
251 |
date_end="20141231", |
|
252 |
item_no=13, |
|
253 |
out_suffix=out_suffix, |
|
254 |
num_cores=num_cores, |
|
255 |
out_dir=".") |
|
256 |
|
|
257 |
df_time_series <- test_missing$df_time_series |
|
258 |
head(df_time_series) |
|
259 |
|
|
260 |
table(df_time_series$missing) |
|
261 |
table(df_time_series$year) |
|
262 |
|
|
263 |
|
|
264 |
#combine polygon |
|
265 |
#http://gis.stackexchange.com/questions/155328/merging-multiple-spatialpolygondataframes-into-1-spdf-in-r |
|
266 |
|
|
267 |
#http://gis.stackexchange.com/questions/116388/count-overlapping-polygons-in-single-shape-file-with-r |
|
268 |
|
|
269 |
#### Use the predictions directory |
|
270 |
#By region |
|
271 |
#For each polygon/tile find polygon overlapping with count and ID (like list w) |
|
272 |
#for each polygon/tile and date find if there is a prediction using the tif (multiply number) |
|
273 |
#for each date of year report data in table. |
|
274 |
|
|
275 |
#go through table and hsow if there are missing data (no prediction) or report min predictions for tile set? |
|
276 |
|
|
277 |
############################################ |
|
278 |
#### Parameters and constants |
|
279 |
|
|
280 |
#on ATLAS |
|
281 |
#in_dir1 <- "/data/project/layers/commons/NEX_data/test_run1_03232014/output" #On Atlas |
|
282 |
#parent output dir : contains subset of the data produced on NEX |
|
283 |
#in_dir1 <- "/data/project/layers/commons/NEX_data/output_run6_global_analyses_09162014/output20Deg2" |
|
284 |
# parent output dir for the curent script analyes |
|
285 |
#out_dir <-"/data/project/layers/commons/NEX_data/output_run3_global_analyses_06192014/" #On NCEAS Atlas |
|
286 |
# input dir containing shapefiles defining tiles |
|
287 |
#in_dir_shp <- "/data/project/layers/commons/NEX_data/output_run5_global_analyses_08252014/output/subset/shapefiles" |
|
288 |
|
|
289 | 183 |
#On NEX |
290 | 184 |
#contains all data from the run by Alberto |
291 | 185 |
in_dir1 <- "/nobackupp6/aguzman4/climateLayers/out" #On NEX |
292 | 186 |
#parent output dir for the current script analyes |
293 |
#out_dir <- "/nobackup/bparmen1/" #on NEX |
|
294 |
#in_dir_shp <- "/nobackupp4/aguzman4/climateLayers/output4/subset/shapefiles/" |
|
295 |
|
|
296 |
#in_dir <- "/data/project/layers/commons/NEX_data/reg4_assessment" |
|
297 |
#list_in_dir_run <- |
|
298 |
#in_dir_list <- c("output_run_global_analyses_pred_2009_reg4","output_run_global_analyses_pred_2010_reg4", |
|
299 |
# "output_run_global_analyses_pred_2011_reg4","output_run_global_analyses_pred_2012_reg4", |
|
300 |
# "output_run_global_analyses_pred_2013_reg4","output_run_global_analyses_pred_2014_reg4") |
|
301 |
#in_dir_list_filename <- "/data/project/layers/commons/NEX_data/reg4_assessment/stage6_reg4_in_dir_list_02072016.txt" |
|
302 |
in_dir <- "" #PARAM 0 |
|
187 |
|
|
303 | 188 |
y_var_name <- "dailyTmax" #PARAM1 |
304 | 189 |
interpolation_method <- c("gam_CAI") #PARAM2 |
305 | 190 |
out_suffix <- "predictions_assessment_reg6_10302016" |
... | ... | |
373 | 258 |
#for specific each find prediction... |
374 | 259 |
year_predicted <- list_param_run_assessment_prediction$list_year_predicted[i] |
375 | 260 |
|
376 |
in_dir1 <- file.path(in_dir1,region_name) |
|
261 |
in_dir1_reg <- file.path(in_dir1,region_name)
|
|
377 | 262 |
|
378 | 263 |
list_outfiles <- vector("list", length=14) #collect names of output files |
379 | 264 |
|
380 |
in_dir_list <- list.dirs(path=in_dir1,recursive=FALSE) #get the list regions processed for this run |
|
265 |
in_dir_list <- list.dirs(path=in_dir1_reg,recursive=FALSE) #get the list regions processed for this run
|
|
381 | 266 |
#basename(in_dir_list) |
382 | 267 |
# y=in_dir_list) |
383 | 268 |
|
... | ... | |
386 | 271 |
in_dir_subset <- in_dir_list_all[grep("subset",basename(in_dir_list_all),invert=FALSE)] #select directory with shapefiles... |
387 | 272 |
in_dir_shp <- file.path(in_dir_subset,"shapefiles") |
388 | 273 |
|
389 |
|
|
390 | 274 |
#select only directories used for predictions |
391 | 275 |
#nested structure, we need to go to higher level to obtain the tiles... |
392 | 276 |
in_dir_reg <- in_dir_list[grep(".*._.*.",basename(in_dir_list),invert=FALSE)] #select directory with shapefiles... |
393 | 277 |
#in_dir_reg <- in_dir_list[grep("july_tiffs",basename(in_dir_reg),invert=TRUE)] #select directory with shapefiles... |
394 | 278 |
in_dir_list <- in_dir_reg |
395 | 279 |
|
396 |
|
|
397 | 280 |
in_dir_list <- in_dir_list[grep("bak",basename(basename(in_dir_list)),invert=TRUE)] #the first one is the in_dir1 |
398 | 281 |
#list of shapefiles used to define tiles |
399 | 282 |
in_dir_shp_list <- list.files(in_dir_shp,".shp",full.names=T) |
... | ... | |
404 | 287 |
#system("ls /nobackup/bparmen1") |
405 | 288 |
out_dir <- in_dir |
406 | 289 |
if(create_out_dir_param==TRUE){ |
407 |
out_dir <- create_dir_fun(out_dir,out_prefix)
|
|
290 |
out_dir <- create_dir_fun(out_dir,out_suffix)
|
|
408 | 291 |
setwd(out_dir) |
409 | 292 |
}else{ |
410 | 293 |
setwd(out_dir) #use previoulsy defined directory |
... | ... | |
412 | 295 |
|
413 | 296 |
setwd(out_dir) |
414 | 297 |
|
298 |
|
|
299 |
##raster_prediction object : contains testing and training stations with RMSE and model object |
|
300 |
in_dir_list_tmp <- file.path(in_dir_list,year_predicted) |
|
301 |
list_raster_obj_files <- try(lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^raster_prediction_obj.*.RData",full.names=T)})) |
|
302 |
#Add stop message here...if no raster object in any tiles then break from the function |
|
303 |
|
|
304 |
list_names_tile_coord <- lapply(list_raster_obj_files,FUN=function(x){basename(dirname(x))}) |
|
305 |
list_names_tile_id <- paste("tile",1:length(list_raster_obj_files),sep="_") |
|
306 |
names(list_raster_obj_files)<- list_names_tile_id |
|
307 |
|
|
308 |
#one level up |
|
309 |
lf_covar_obj <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar_obj.*.RData",full.names=T)}) |
|
310 |
lf_covar_tif <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern="covar.*.tif",full.names=T)}) |
|
311 |
|
|
312 |
lf_sub_sampling_obj_files <- lapply(in_dir_list,FUN=function(x){list.files(path=x,pattern=paste("^sub_sampling_obj_",interpolation_method,".*.RData",sep=""),full.names=T)}) |
|
313 |
lf_sub_sampling_obj_daily_files <- lapply(in_dir_list_tmp,FUN=function(x){list.files(path=x,pattern="^sub_sampling_obj_daily.*.RData",full.names=T)}) |
|
314 |
|
|
315 |
if(is.null(day_to_mosaic_range)){ |
|
316 |
# start_date <- #first date |
|
317 |
start_date <- paste0(year_processed,"0101") #change this later!! |
|
318 |
end_date <- paste0(year_processed,"1231") #change this later!! |
|
319 |
day_to_mosaic <- seq(as.Date(strptime(start_date,"%Y%m%d")), as.Date(strptime(end_date,"%Y%m%d")), 'day') |
|
320 |
day_to_mosaic <- format(day_to_mosaic,"%Y%m%d") #format back to the relevant date format for files |
|
321 |
}else{ |
|
322 |
start_date <- day_to_mosaic_range[1] |
|
323 |
end_date <- day_to_mosaic_range[2] |
|
324 |
day_to_mosaic <- seq(as.Date(strptime(start_date,"%Y%m%d")), as.Date(strptime(end_date,"%Y%m%d")), 'day') |
|
325 |
day_to_mosaic <- format(day_to_mosaic,"%Y%m%d") #format back to the relevant date format for files |
|
326 |
} |
|
327 |
|
|
328 |
in_dir_tiles_tmp <- in_dir1 # |
|
329 |
#in_dir_tiles_tmp <- in_dir_reg |
|
330 |
|
|
331 |
### Do this by tile!!! |
|
332 |
|
|
333 |
# Making listing of files faster with multicores use |
|
334 |
lf_mosaic <- mclapply(1:length(day_to_mosaic),FUN=function(i){ |
|
335 |
searchStr = paste(in_dir_tiles_tmp,"/*/",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="") |
|
336 |
Sys.glob(searchStr)},mc.preschedule=FALSE,mc.cores = num_cores) |
|
337 |
|
|
338 |
lf_mosaic <- mclapply(1:length(day_to_mosaic),FUN=function(i){ |
|
339 |
searchStr = paste(in_dir_tiles_tmp,"/*/",year_processed,"/gam_CAI_dailyTmax_predicted_",pred_mod_name,"*",day_to_mosaic[i],"*.tif",sep="") |
|
340 |
Sys.glob(searchStr)},mc.preschedule=FALSE,mc.cores = num_cores) |
|
341 |
|
|
342 |
##Run this on reg4 and reg5 after |
|
343 |
#Add report by year in text file? |
|
344 |
#Using specified values for parameters |
|
345 |
test_missing <- check_missing(lf=lf_mosaic[[1]], |
|
346 |
pattern_str=NULL, |
|
347 |
in_dir=in_dir_mosaic, |
|
348 |
date_start="1984101", |
|
349 |
date_end="20141231", |
|
350 |
item_no=13, |
|
351 |
out_suffix=out_suffix, |
|
352 |
num_cores=num_cores, |
|
353 |
out_dir=".") |
|
354 |
|
|
355 |
df_time_series <- test_missing$df_time_series |
|
356 |
head(df_time_series) |
|
357 |
|
|
358 |
table(df_time_series$missing) |
|
359 |
table(df_time_series$year) |
|
360 |
|
|
361 |
|
|
362 |
#combine polygon |
|
363 |
#http://gis.stackexchange.com/questions/155328/merging-multiple-spatialpolygondataframes-into-1-spdf-in-r |
|
364 |
|
|
365 |
#http://gis.stackexchange.com/questions/116388/count-overlapping-polygons-in-single-shape-file-with-r |
|
366 |
|
|
367 |
#### Use the predictions directory |
|
368 |
#By region |
|
369 |
#For each polygon/tile find polygon overlapping with count and ID (like list w) |
|
370 |
#for each polygon/tile and date find if there is a prediction using the tif (multiply number) |
|
371 |
#for each date of year report data in table. |
|
372 |
|
|
373 |
#go through table and hsow if there are missing data (no prediction) or report min predictions for tile set? |
|
374 |
|
|
375 |
|
|
376 |
|
|
415 | 377 |
############################ END OF SCRIPT ################################## |
Also available in: Unified diff
more edit and adding test files checking for tiles