Revision ed14e5a5
Added by Benoit Parmentier over 9 years ago
climate/research/oregon/interpolation/global_run_scalingup_mosaicing_function.R | ||
---|---|---|
4 | 4 |
#Different options to explore mosaicing are tested. This script only contains functions. |
5 | 5 |
#AUTHOR: Benoit Parmentier |
6 | 6 |
#CREATED ON: 04/14/2015 |
7 |
#MODIFIED ON: 10/05/2015
|
|
7 |
#MODIFIED ON: 10/10/2015
|
|
8 | 8 |
#Version: 1 |
9 | 9 |
#PROJECT: Environmental Layers project |
10 | 10 |
#COMMENTS: first commit of function script to test mosaicing using 1500x4500km and other tiles |
... | ... | |
88 | 88 |
return(y) |
89 | 89 |
} |
90 | 90 |
|
91 |
## Add numcores |
|
92 |
## use mclapply |
|
93 |
create_accuracy_metric_raster <- function(i, list_param){ |
|
94 |
#This function generates weights from a point location on a raster layer. |
|
95 |
#Note that the weights are normatlized on 0-1 scale using max and min values. |
|
96 |
#Inputs: |
|
97 |
#lf: list of raster files |
|
98 |
#tb: data.frame table #fitting or validation table with all days |
|
99 |
#metric_name: accuracy metric selected to be mapped, RMSE, MAE etc. |
|
100 |
#pred_mod_name: model selected, such as mod1, mod_kr etc. |
|
101 |
#y_var_name: variable being modeled e.g."dailyTmax", dailyTmin, precip |
|
102 |
#interpolation_method <- list_param$interpolation_method #c("gam_CAI") #PARAM3 |
|
103 |
#date_processed: day being processed , e.g. 19920101 |
|
104 |
#NA_flag_val: value used as flag in the raster |
|
105 |
#file_format: e.g. tif., .rst |
|
106 |
#out_dir_str: output directory |
|
107 |
#out_suffix_str: <- list_param$out_suffix |
|
108 |
#Outputs: |
|
109 |
#raster list of weights and product of wegihts and inuts |
|
110 |
#TODO: |
|
111 |
|
|
112 |
# - improve efficiency |
|
113 |
# |
|
114 |
############ |
|
115 |
|
|
116 |
lf <- list_param$lf #list of files to mosaic |
|
117 |
tb <- list_param$tb #fitting or validation table with all days |
|
118 |
metric_name <- list_param$metric_name #RMSE, MAE etc. |
|
119 |
pred_mod_name <- list_param$pred_mod_name #mod1, mod_kr etc. |
|
120 |
y_var_name <- list_param$y_var_name #"dailyTmax" #PARAM2 |
|
121 |
interpolation_method <- list_param$interpolation_method #c("gam_CAI") #PARAM3 |
|
122 |
date_processed <- list_param$days_to_process[i] |
|
123 |
NA_flag_val <- list_param$NA_flag_val |
|
124 |
#NAflag,file_format,out_suffix etc... |
|
125 |
file_format <- list_param$file_format |
|
126 |
out_dir_str <- list_param$out_dir |
|
127 |
out_suffix_str <- list_param$out_suffix |
|
128 |
|
|
129 |
####### START SCRIPT ##### |
|
130 |
|
|
131 |
#r_in <- raster(lf[i]) #input image |
|
132 |
|
|
133 |
#date_processed <- day_to_mosaic[i] |
|
134 |
#lf_to_mosaic <-list.files(path=file.path(in_dir_tiles), |
|
135 |
# pattern=paste(".*.",date_processed,".*.tif$",sep=""),full.names=T) #choosing date 2...20100901 |
|
136 |
|
|
137 |
lf_tmp <- gsub(file_format,"",lf) |
|
138 |
tx<-strsplit(as.character(lf_tmp),"_") |
|
139 |
#deal with the fact that we have number "1" attached to the out_suffix (centroids of tiles) |
|
140 |
pos_lat <- lapply(1:length(tx),function(i,x){length(x[[i]])-1},x=tx) |
|
141 |
pos_lon <- lapply(1:length(tx),function(i,x){length(x[[i]])},x=tx) |
|
142 |
lat_val <- unlist(lapply(1:length(tx),function(i,x,y){x[[i]][pos_lat[[i]]]},x=tx,y=pos_lat)) |
|
143 |
lat <- as.character(lapply(1:length(lat_val),function(i,x){substr(x[[i]],2,nchar(x[i]))},x=lat_val)) #first number not in the coordinates |
|
144 |
long <- as.character(lapply(1:length(tx),function(i,x,y){x[[i]][pos_lon[[i]]]},x=tx,y=lon_lat)) |
|
145 |
|
|
146 |
df_centroids <- data.frame(long=as.numeric(long),lat=as.numeric(lat)) |
|
147 |
df_centroids$ID <- as.numeric(1:nrow(df_centroids)) |
|
148 |
df_centroids$tile_coord <- paste(lat,long,sep="_") |
|
149 |
df_centroids$files <- lf |
|
150 |
df_centroids$date <- date_processed |
|
151 |
write.table(df_centroids,paste("df_centroids_",date_processed,"_",out_suffix,".txt",sep=""),sep=',') |
|
152 |
|
|
153 |
#sprintf(" %3.1f", df_centroids$lat) |
|
154 |
|
|
155 |
tb_date <- subset(tb,date==date_processed & pred_mod==pred_mod_name) |
|
156 |
tb_date$tile_coord <- as.character(tb_date$tile_coord) |
|
157 |
df_centroids <- merge(df_centroids,tb_date,by="tile_coord") |
|
158 |
|
|
159 |
#r1 <- raster(lf[i]) |
|
160 |
|
|
161 |
#function(j,df_centroids,metric_name,date_processed,interpolation,) |
|
162 |
#loop over files, make this a function later, works for now |
|
163 |
#use mclapply |
|
164 |
list_raster_name <- vector("list",length=length(lf)) |
|
165 |
|
|
166 |
for(j in 1:length(lf)){ |
|
167 |
|
|
168 |
inFilename <- df_centroids$files[j] |
|
169 |
r1 <- raster(inFilename) |
|
170 |
r1[] <- df_centroids[[metric_name]][j] #improve this |
|
171 |
#set1f <- function(x){rep(NA, x)} |
|
172 |
#r_init <- init(r_in, fun=set1f) |
|
173 |
lf_tmp <- gsub(file_format,"",lf) |
|
174 |
|
|
175 |
extension_str <- extension(inFilename) |
|
176 |
raster_name_tmp <- gsub(extension_str,"",basename(inFilename)) |
|
177 |
outFilename <- file.path(out_dir,paste(raster_name_tmp,"_",metric_name,"_",out_suffix,file_format,sep="")) #for use in function later... |
|
178 |
|
|
179 |
#raster_name <- file.path(out_dir_str,paste(raster_name_tmp,"_",out_suffix,file_format,sep=""))#output file |
|
180 |
|
|
181 |
#raster_name_tmp <- paste("r_",metric_name,"_",interpolation_method,"_",date_processed,"_",out_suffix_str,sep="") |
|
182 |
#raster_name <- file.path(out_dir_str,raster_name_tmp) |
|
183 |
writeRaster(r1, NAflag=NA_flag_val,filename=outFilename,overwrite=TRUE) |
|
184 |
list_raster_name[[j]] <- outFilename |
|
185 |
} |
|
186 |
|
|
187 |
|
|
188 |
raster_created_obj <- list(list_raster_name,df_centroids) |
|
189 |
names(raster_created_obj) <- c("list_raster_name","df_centroids") |
|
190 |
return(raster_created_obj) |
|
191 |
|
|
192 |
} |
|
193 |
|
|
194 |
#### end of function |
|
195 |
|
|
91 | 196 |
create_weights_fun <- function(i, list_param){ |
92 | 197 |
#This function generates weights from a point location on a raster layer. |
93 | 198 |
#Note that the weights are normatlized on 0-1 scale using max and min values. |
... | ... | |
299 | 404 |
r_ref <- init(r_m, fun=set1f, filename=raster_name, overwrite=TRUE) |
300 | 405 |
#NAvalue(r_ref) <- -9999 |
301 | 406 |
|
302 |
cmd_str <- paste("/usr/bin/gdalwarp",inFilename,raster_name,sep=" ") |
|
407 |
cmd_str <- paste("/usr/bin/gdalwarp",inFilename,raster_name,sep=" ") #this may be a problem
|
|
303 | 408 |
#gdalwarp -t_srs '+proj=utm +zone=11 +datum=WGS84' raw_spot.tif utm11.tif |
304 | 409 |
|
305 | 410 |
system(cmd_str) |
... | ... | |
308 | 413 |
return(raster_name) |
309 | 414 |
} |
310 | 415 |
|
311 |
mosaicFiles <- function(lf_mosaic,mosaic_method,num_cores,python_bin=NULL,df_points=NULL,NA_flag_val=-9999,file_format=".tif",out_suffix=NULL,out_dir=NULL){ |
|
312 |
#Add documentation!!!! |
|
313 |
## |
|
314 |
|
|
416 |
mosaicFiles <- function(lf_mosaic,mosaic_method="unweighted",num_cores=1,r_mask_raster_name=NULL,python_bin=NULL,df_points=NULL,NA_flag_val=-9999,file_format=".tif",out_suffix=NULL,out_dir=NULL){ |
|
417 |
#This functions mosaics tiles/files give a list of files |
|
418 |
#There are four options to mosaic: use_sine_weights,use_edge,use_linear_weights, unweighted |
|
419 |
#Sine weights fits sine fuctions across rows and column producing elliptical/spherical patterns from center |
|
420 |
#Use edge uses the distance from the edge of the tiles/fies, higher weights towards the center |
|
421 |
#Linear weights use simple linear average from distance point feature (usually centroid) |
|
422 |
#Unweighted: average without and weigthing surface |
|
423 |
# |
|
424 |
#INPUT Arguments: |
|
425 |
#1)lf_mosaic: list of files to mosaic |
|
426 |
#2)mosaic_method: mosaic methods availbable:use_sine_weights,use_edge,use_linear_weights |
|
427 |
#3)num_cores: number of cores used in parallilization in mclapply |
|
428 |
#4)r_mask_raster_name: mask rference raster image |
|
429 |
#4)python_bin: location of python executables, defaut is NULL |
|
430 |
#5)df_points: point location used in weighting, defaul is NULL |
|
431 |
#6)NA_flag_val: raster flag values, e.g. -9999 |
|
432 |
#7)file_format: raster format used, default is ".tif" |
|
433 |
#8)out_suffix: output suffix, default is NULL, it is recommended to add the variable name |
|
434 |
# here e.g. dailyTmax and date!! |
|
435 |
#9)out_dir: output directory, default is NULL |
|
436 |
# |
|
437 |
#OUTPUT: |
|
438 |
# Ojbec is produced with 3 components: |
|
439 |
# 1) mean_mosaic: list of raster files from produced mosaic , |
|
440 |
# 2) r_weights: list of raster files from weights |
|
441 |
# 3) r_weights_prod: list of raster files from product weights (weights*value) |
|
442 |
# 4) method: weighting average used |
|
443 |
# |
|
444 |
|
|
445 |
#################################### |
|
315 | 446 |
### BEGIN #### |
447 |
|
|
316 | 448 |
out_dir_str <- out_dir |
317 | 449 |
|
318 | 450 |
lf_r_weights <- vector("list",length=length(lf_mosaic)) |
... | ... | |
373 | 505 |
} |
374 | 506 |
|
375 | 507 |
if(mosaic_method=="use_edge_weights"){ |
508 |
#this took 5 minutes for 28 tiles for reg4, South America, 4*28 |
|
376 | 509 |
|
377 | 510 |
method <- "use_edge" |
378 | 511 |
df_points <- NULL |
... | ... | |
381 | 514 |
names(list_param_create_weights) <- c("lf","df_points","r_feature","method","out_dir_str") |
382 | 515 |
#num_cores <- 11 |
383 | 516 |
|
384 |
weights_obj <- create_weights_fun(1,list_param=list_param_create_weights) |
|
517 |
#weights_obj <- create_weights_fun(1,list_param=list_param_create_weights)
|
|
385 | 518 |
|
386 | 519 |
#This is the function creating the weights by tile. Distance from the centroids needs to be change from distance to |
387 | 520 |
#the edges...can use rows and columsn to set edges to 1 and 0 for the others. |
388 | 521 |
use_edge_weights_obj_list <- mclapply(1:length(lf_mosaic),FUN=create_weights_fun,list_param=list_param_create_weights,mc.preschedule=FALSE,mc.cores = num_cores) |
389 | 522 |
|
523 |
#extract the list of files for weights and product weights |
|
390 | 524 |
list_edge_r_weights <- lapply(1:length(use_edge_weights_obj_list), FUN=function(i,x){x[[i]]$r_weights},x=use_edge_weights_obj_list) |
391 | 525 |
list_edge_r_weights_prod <- lapply(1:length(use_edge_weights_obj_list), FUN=function(i,x){x[[i]]$r_weights_prod},x=use_edge_weights_obj_list) |
392 | 526 |
|
... | ... | |
401 | 535 |
### PART 3: prepare weightsfor mosaicing by matching extent ############ |
402 | 536 |
|
403 | 537 |
## Rasters tiles vary slightly in resolution, they need to be matched for the mosaic. Resolve issue in the |
404 |
#mosaic funciton using gdal_merge to compute a reference image to mach.
|
|
538 |
#mosaic function using gdal_merge to compute a reference image to mach.
|
|
405 | 539 |
|
406 | 540 |
rast_ref <- file.path(out_dir,paste("avg_",out_suffix,file_format,sep="")) #this is a the ref |
407 |
|
|
408 | 541 |
cmd_str <- paste("python","/usr/bin/gdal_merge.py","-o ",rast_ref,paste(lf_mosaic,collapse=" ")) |
409 | 542 |
system(cmd_str) |
410 | 543 |
|
... | ... | |
446 | 579 |
list_args_weights_prod <- list_weights_prod_m |
447 | 580 |
method_str <- method |
448 | 581 |
|
449 |
#list_args_weights <- (mixedsort(list.files(pattern="r_weights_m_.*.tif")))
|
|
582 |
#making a list of raster object before mosaicing
|
|
450 | 583 |
list_args_weights <- lapply(1:length(list_args_weights), FUN=function(i,x){raster(x[[i]])},x=list_args_weights) |
451 | 584 |
|
452 | 585 |
#get the list of weights product into raster objects |
453 |
#list_args_weights_prod <- list_weights_prod_m |
|
454 |
#list_args_weights_prod <- (mixedsort(list.files(pattern="r_weights_prod_m_.*.tif"))) |
|
455 |
list_args_weights_prod <- lapply(1:length(list_args_weights_prod), FUN=function(i,x){raster(x[[i]])},x=list_args_weights_prod) |
|
456 | 586 |
|
587 |
list_args_weights_prod <- lapply(1:length(list_args_weights_prod), FUN=function(i,x){raster(x[[i]])},x=list_args_weights_prod) |
|
457 | 588 |
list_args_weights_prod$fun <- "sum" |
458 | 589 |
list_args_weights_prod$na.rm <- TRUE |
590 |
r_weights_sum_raster_name <- file.path(out_dir,paste("r_weights_sum_m_",method_str,"_weighted_mean_",out_suffix,".tif",sep="")) |
|
591 |
list_args_weights$filename <- r_weights_sum_raster_name |
|
592 |
list_args_weights$overwrite<- TRUE |
|
593 |
|
|
459 | 594 |
|
460 | 595 |
list_args_weights$fun <- "sum" #we want the sum to compute the weighted mean |
461 | 596 |
list_args_weights$na.rm <- TRUE |
597 |
r_prod_sum_raster_name <- file.path(out_dir,paste("r_prod_sum_m_",method_str,"_weighted_mean_",out_suffix,".tif",sep="")) |
|
598 |
list_args_weights_prod$filename <- r_prod_sum_raster_name |
|
462 | 599 |
|
463 |
#Mosaic files |
|
600 |
#Mosaic files: this is where we can use Alberto Python function but modified with option for |
|
601 |
#sum in addition ot the current option for mean. |
|
602 |
#This took 23 minutes! |
|
464 | 603 |
r_weights_sum <- do.call(mosaic,list_args_weights) #weights sum image mosaiced |
465 | 604 |
r_prod_sum <- do.call(mosaic,list_args_weights_prod) #weights sum product image mosacied |
466 | 605 |
|
467 |
r_m_weighted_mean <- r_prod_sum/r_weights_sum #this is the mosaic using weighted mean... |
|
468 |
raster_name <- file.path(out_dir,paste("r_m_",method_str,"_weighted_mean_",out_suffix,".tif",sep="")) |
|
469 |
writeRaster(r_m_weighted_mean, NAflag=NA_flag_val,filename=raster_name,overwrite=TRUE) |
|
606 |
#r_m_weighted_mean <- r_prod_sum/r_weights_sum #this is the mosaic using weighted mean... |
|
607 |
#"gdal_calc.py -A input1.tif -B input2.tif --outfile=result.tif --calc="A+B"" |
|
608 |
|
|
609 |
r_m_weighted_mean_raster_name <- file.path(out_dir,paste("r_m_",method_str,"_weighted_mean_",out_suffix,".tif",sep="")) |
|
610 |
|
|
611 |
#cmd_str <- paste("gdal_calc.py" |
|
612 |
# " "-A input1.tif", |
|
613 |
# "-B input2.tif", |
|
614 |
# paste("--outfile=",r_m_weighted_mean_name,sep=""), |
|
615 |
# --calc="A+B" |
|
616 |
#--NoDataValue=0) |
|
617 |
if(is.null(python_bin)){ |
|
618 |
python_bin="" |
|
619 |
} |
|
620 |
|
|
621 |
python_cmd <- file.path(python_bin,"gdal_calc.py") |
|
622 |
cmd_str <- paste(python_cmd, |
|
623 |
paste("-A ", r_prod_sum_raster_name,sep=""), |
|
624 |
paste("-B ", r_weights_sum_raster_name,sep=""), |
|
625 |
paste("--outfile=",r_m_weighted_mean_raster_name,sep=""), |
|
626 |
"--calc='A/B'","--overwrite",sep=" ") |
|
627 |
system(cmd_str) |
|
628 |
|
|
629 |
#writeRaster(r_m_weighted_mean, NAflag=NA_flag_val,filename=raster_name,overwrite=TRUE) |
|
630 |
#now use the mask |
|
631 |
r_m_weighted_mean_mask_raster_name <- file.path(out_dir,paste("r_m_",method_str,"_weighted_mean_mask_",out_suffix,".tif",sep="")) |
|
632 |
|
|
633 |
#mask(raster(r_m_weighted_mean_raster_name),mask=r_mask_raster_name,filename=r_m_weighted_mean_mask_raster_name) |
|
470 | 634 |
|
471 | 635 |
} |
472 | 636 |
|
... | ... | |
492 | 656 |
|
493 | 657 |
list_args_pred_m$fun <- "mean" |
494 | 658 |
list_args_pred_m$na.rm <- TRUE |
659 |
#list_args_pred_m$filename <- |
|
495 | 660 |
|
496 | 661 |
#Mosaic files |
497 | 662 |
r_m_mean <- do.call(mosaic,list_args_pred_m) #this is unweighted mean from the predicted raster |
Also available in: Unified diff
script mosaicing function, adding raster accuracy metric funtion