Project

General

Profile

« Previous | Next » 

Revision ed14e5a5

Added by Benoit Parmentier over 9 years ago

script mosaicing function, adding raster accuracy metric funtion

View differences:

climate/research/oregon/interpolation/global_run_scalingup_mosaicing_function.R
4 4
#Different options to explore mosaicing are tested. This script only contains functions.
5 5
#AUTHOR: Benoit Parmentier 
6 6
#CREATED ON: 04/14/2015  
7
#MODIFIED ON: 10/05/2015            
7
#MODIFIED ON: 10/10/2015            
8 8
#Version: 1
9 9
#PROJECT: Environmental Layers project     
10 10
#COMMENTS: first commit of function script to test mosaicing using 1500x4500km and other tiles
......
88 88
  return(y)
89 89
}
90 90

  
91
## Add numcores
92
## use mclapply
93
create_accuracy_metric_raster <- function(i, list_param){
94
  #This function generates weights from a point location on a raster layer.
95
  #Note that the weights are normatlized on 0-1 scale using max and min values.
96
  #Inputs:
97
  #lf: list of raster files
98
  #tb: data.frame table #fitting or validation table with all days
99
  #metric_name: accuracy metric selected to be mapped, RMSE, MAE etc.
100
  #pred_mod_name: model selected, such as mod1, mod_kr etc.
101
  #y_var_name: variable being modeled e.g."dailyTmax", dailyTmin, precip  
102
  #interpolation_method <- list_param$interpolation_method #c("gam_CAI") #PARAM3
103
  #date_processed: day being processed , e.g. 19920101
104
  #NA_flag_val: value used as flag in the raster 
105
  #file_format: e.g. tif., .rst
106
  #out_dir_str: output directory
107
  #out_suffix_str: <- list_param$out_suffix
108
  #Outputs:
109
  #raster list of weights and product of wegihts and inuts
110
  #TODO: 
111

  
112
  # - improve efficiency
113
  #
114
  ############
115
  
116
  lf <- list_param$lf #list of files to mosaic
117
  tb <- list_param$tb #fitting or validation table with all days
118
  metric_name <- list_param$metric_name #RMSE, MAE etc.
119
  pred_mod_name <- list_param$pred_mod_name #mod1, mod_kr etc.
120
  y_var_name <- list_param$y_var_name #"dailyTmax" #PARAM2
121
  interpolation_method <- list_param$interpolation_method #c("gam_CAI") #PARAM3
122
  date_processed <- list_param$days_to_process[i]
123
  NA_flag_val <- list_param$NA_flag_val
124
  #NAflag,file_format,out_suffix etc...
125
  file_format <- list_param$file_format
126
  out_dir_str <- list_param$out_dir
127
  out_suffix_str <- list_param$out_suffix
128
   
129
  ####### START SCRIPT #####
130
  
131
  #r_in <- raster(lf[i]) #input image
132

  
133
  #date_processed <- day_to_mosaic[i]
134
  #lf_to_mosaic <-list.files(path=file.path(in_dir_tiles),    
135
  #         pattern=paste(".*.",date_processed,".*.tif$",sep=""),full.names=T) #choosing date 2...20100901
136

  
137
  lf_tmp <- gsub(file_format,"",lf)
138
  tx<-strsplit(as.character(lf_tmp),"_")
139
  #deal with the fact that we have number "1" attached to the out_suffix (centroids of tiles)
140
  pos_lat <- lapply(1:length(tx),function(i,x){length(x[[i]])-1},x=tx)
141
  pos_lon <- lapply(1:length(tx),function(i,x){length(x[[i]])},x=tx)
142
  lat_val <- unlist(lapply(1:length(tx),function(i,x,y){x[[i]][pos_lat[[i]]]},x=tx,y=pos_lat))
143
  lat <- as.character(lapply(1:length(lat_val),function(i,x){substr(x[[i]],2,nchar(x[i]))},x=lat_val)) #first number not in the coordinates
144
  long <- as.character(lapply(1:length(tx),function(i,x,y){x[[i]][pos_lon[[i]]]},x=tx,y=lon_lat))
145

  
146
  df_centroids <- data.frame(long=as.numeric(long),lat=as.numeric(lat))
147
  df_centroids$ID <- as.numeric(1:nrow(df_centroids))
148
  df_centroids$tile_coord <- paste(lat,long,sep="_")
149
  df_centroids$files <- lf
150
  df_centroids$date <- date_processed
151
  write.table(df_centroids,paste("df_centroids_",date_processed,"_",out_suffix,".txt",sep=""),sep=',')
152

  
153
  #sprintf(" %3.1f", df_centroids$lat)
154

  
155
  tb_date <- subset(tb,date==date_processed & pred_mod==pred_mod_name)
156
  tb_date$tile_coord <- as.character(tb_date$tile_coord)
157
  df_centroids <- merge(df_centroids,tb_date,by="tile_coord")
158

  
159
  #r1 <- raster(lf[i])
160
  
161
  #function(j,df_centroids,metric_name,date_processed,interpolation,)
162
  #loop over files, make this a function later, works for now
163
  #use mclapply  
164
  list_raster_name <- vector("list",length=length(lf))
165
  
166
  for(j in 1:length(lf)){ 
167
    
168
    inFilename <- df_centroids$files[j]
169
    r1 <- raster(inFilename)
170
    r1[] <- df_centroids[[metric_name]][j] #improve this
171
    #set1f <- function(x){rep(NA, x)}
172
    #r_init <- init(r_in, fun=set1f)
173
    lf_tmp <- gsub(file_format,"",lf)
174
  
175
    extension_str <- extension(inFilename)
176
    raster_name_tmp <- gsub(extension_str,"",basename(inFilename))
177
    outFilename <- file.path(out_dir,paste(raster_name_tmp,"_",metric_name,"_",out_suffix,file_format,sep="")) #for use in function later...
178
  
179
    #raster_name <- file.path(out_dir_str,paste(raster_name_tmp,"_",out_suffix,file_format,sep=""))#output file
180

  
181
    #raster_name_tmp <- paste("r_",metric_name,"_",interpolation_method,"_",date_processed,"_",out_suffix_str,sep="")
182
    #raster_name <- file.path(out_dir_str,raster_name_tmp)
183
    writeRaster(r1, NAflag=NA_flag_val,filename=outFilename,overwrite=TRUE)  
184
    list_raster_name[[j]] <- outFilename
185
  }
186
  
187
    
188
  raster_created_obj <- list(list_raster_name,df_centroids)
189
  names(raster_created_obj) <- c("list_raster_name","df_centroids")
190
  return(raster_created_obj)
191

  
192
}
193

  
194
#### end of function
195

  
91 196
create_weights_fun <- function(i, list_param){
92 197
  #This function generates weights from a point location on a raster layer.
93 198
  #Note that the weights are normatlized on 0-1 scale using max and min values.
......
299 404
  r_ref <- init(r_m, fun=set1f, filename=raster_name, overwrite=TRUE)
300 405
  #NAvalue(r_ref) <- -9999
301 406

  
302
  cmd_str <- paste("/usr/bin/gdalwarp",inFilename,raster_name,sep=" ") 
407
  cmd_str <- paste("/usr/bin/gdalwarp",inFilename,raster_name,sep=" ") #this may be a problem
303 408
  #gdalwarp -t_srs '+proj=utm +zone=11 +datum=WGS84' raw_spot.tif utm11.tif
304 409

  
305 410
  system(cmd_str)
......
308 413
  return(raster_name)
309 414
}
310 415

  
311
mosaicFiles <- function(lf_mosaic,mosaic_method,num_cores,python_bin=NULL,df_points=NULL,NA_flag_val=-9999,file_format=".tif",out_suffix=NULL,out_dir=NULL){
312
  #Add documentation!!!!
313
  ##
314
  
416
mosaicFiles <- function(lf_mosaic,mosaic_method="unweighted",num_cores=1,r_mask_raster_name=NULL,python_bin=NULL,df_points=NULL,NA_flag_val=-9999,file_format=".tif",out_suffix=NULL,out_dir=NULL){
417
  #This functions mosaics tiles/files give a list of files
418
  #There are four options to mosaic:   use_sine_weights,use_edge,use_linear_weights, unweighted
419
  #Sine weights fits sine fuctions across rows and column producing elliptical/spherical patterns from center
420
  #Use edge uses the distance from the edge of the tiles/fies, higher weights towards the center
421
  #Linear weights use simple linear average from distance point feature (usually centroid)
422
  #Unweighted: average without and weigthing surface
423
  #
424
  #INPUT Arguments: 
425
  #1)lf_mosaic: list of files to mosaic
426
  #2)mosaic_method: mosaic methods availbable:use_sine_weights,use_edge,use_linear_weights
427
  #3)num_cores: number of cores used in parallilization in mclapply
428
  #4)r_mask_raster_name: mask rference raster image
429
  #4)python_bin: location of python executables, defaut is NULL
430
  #5)df_points: point location used in weighting, defaul is NULL
431
  #6)NA_flag_val: raster flag values, e.g. -9999
432
  #7)file_format: raster format used, default is ".tif"
433
  #8)out_suffix: output suffix, default is NULL, it is recommended to add the variable name
434
  #             here e.g. dailyTmax and date!!
435
  #9)out_dir: output directory, default is NULL
436
  #
437
  #OUTPUT:
438
  # Ojbec is produced with 3 components:
439
  # 1) mean_mosaic: list of raster files from produced mosaic ,
440
  # 2) r_weights: list of raster files from weights 
441
  # 3) r_weights_prod: list of raster files from product weights (weights*value)
442
  # 4) method: weighting average used
443
  #
444

  
445
  ####################################
315 446
  ### BEGIN ####
447
  
316 448
  out_dir_str <- out_dir
317 449

  
318 450
  lf_r_weights <- vector("list",length=length(lf_mosaic))
......
373 505
  }
374 506
  
375 507
  if(mosaic_method=="use_edge_weights"){
508
    #this took 5 minutes for 28 tiles for reg4, South America,  4*28
376 509
    
377 510
    method <- "use_edge"
378 511
    df_points <- NULL
......
381 514
    names(list_param_create_weights) <- c("lf","df_points","r_feature","method","out_dir_str") 
382 515
    #num_cores <- 11
383 516

  
384
    weights_obj <- create_weights_fun(1,list_param=list_param_create_weights)
517
    #weights_obj <- create_weights_fun(1,list_param=list_param_create_weights)
385 518

  
386 519
    #This is the function creating the weights by tile. Distance from the centroids needs to be change from distance to
387 520
    #the edges...can use rows and columsn to set edges to 1 and 0 for the others.
388 521
    use_edge_weights_obj_list <- mclapply(1:length(lf_mosaic),FUN=create_weights_fun,list_param=list_param_create_weights,mc.preschedule=FALSE,mc.cores = num_cores)                           
389 522

  
523
    #extract the list of files for weights and product weights
390 524
    list_edge_r_weights <- lapply(1:length(use_edge_weights_obj_list), FUN=function(i,x){x[[i]]$r_weights},x=use_edge_weights_obj_list)
391 525
    list_edge_r_weights_prod <- lapply(1:length(use_edge_weights_obj_list), FUN=function(i,x){x[[i]]$r_weights_prod},x=use_edge_weights_obj_list)
392 526
    
......
401 535
  ### PART 3: prepare weightsfor mosaicing by matching extent ############
402 536

  
403 537
  ## Rasters tiles vary slightly in resolution, they need to be matched for the mosaic. Resolve issue in the 
404
  #mosaic funciton using gdal_merge to compute a reference image to mach.
538
  #mosaic function using gdal_merge to compute a reference image to mach.
405 539

  
406 540
  rast_ref <- file.path(out_dir,paste("avg_",out_suffix,file_format,sep="")) #this is a the ref
407

  
408 541
  cmd_str <- paste("python","/usr/bin/gdal_merge.py","-o ",rast_ref,paste(lf_mosaic,collapse=" ")) 
409 542
  system(cmd_str)
410 543

  
......
446 579
    list_args_weights_prod <- list_weights_prod_m
447 580
    method_str <- method
448 581
  
449
    #list_args_weights <- (mixedsort(list.files(pattern="r_weights_m_.*.tif")))
582
    #making a list of raster object before mosaicing
450 583
    list_args_weights <- lapply(1:length(list_args_weights), FUN=function(i,x){raster(x[[i]])},x=list_args_weights)
451 584

  
452 585
    #get the list of weights product into raster objects
453
    #list_args_weights_prod <- list_weights_prod_m
454
    #list_args_weights_prod <- (mixedsort(list.files(pattern="r_weights_prod_m_.*.tif")))
455
    list_args_weights_prod <- lapply(1:length(list_args_weights_prod), FUN=function(i,x){raster(x[[i]])},x=list_args_weights_prod)
456 586

  
587
    list_args_weights_prod <- lapply(1:length(list_args_weights_prod), FUN=function(i,x){raster(x[[i]])},x=list_args_weights_prod)
457 588
    list_args_weights_prod$fun <- "sum"
458 589
    list_args_weights_prod$na.rm <- TRUE
590
    r_weights_sum_raster_name <- file.path(out_dir,paste("r_weights_sum_m_",method_str,"_weighted_mean_",out_suffix,".tif",sep=""))
591
    list_args_weights$filename <- r_weights_sum_raster_name
592
    list_args_weights$overwrite<- TRUE
593
    
459 594
    
460 595
    list_args_weights$fun <- "sum" #we want the sum to compute the weighted mean
461 596
    list_args_weights$na.rm <- TRUE
597
    r_prod_sum_raster_name <- file.path(out_dir,paste("r_prod_sum_m_",method_str,"_weighted_mean_",out_suffix,".tif",sep=""))
598
    list_args_weights_prod$filename <- r_prod_sum_raster_name
462 599

  
463
    #Mosaic files
600
    #Mosaic files: this is where we can use Alberto Python function but modified with option for
601
    #sum in addition ot the current option for mean.
602
    #This took 23 minutes!
464 603
    r_weights_sum <- do.call(mosaic,list_args_weights) #weights sum image mosaiced
465 604
    r_prod_sum <- do.call(mosaic,list_args_weights_prod) #weights sum product image mosacied
466 605

  
467
    r_m_weighted_mean <- r_prod_sum/r_weights_sum #this is the mosaic using weighted mean...
468
    raster_name <- file.path(out_dir,paste("r_m_",method_str,"_weighted_mean_",out_suffix,".tif",sep=""))
469
    writeRaster(r_m_weighted_mean, NAflag=NA_flag_val,filename=raster_name,overwrite=TRUE)  
606
    #r_m_weighted_mean <- r_prod_sum/r_weights_sum #this is the mosaic using weighted mean...
607
    #"gdal_calc.py -A input1.tif -B input2.tif --outfile=result.tif --calc="A+B""
608
    
609
    r_m_weighted_mean_raster_name <- file.path(out_dir,paste("r_m_",method_str,"_weighted_mean_",out_suffix,".tif",sep=""))
610

  
611
    #cmd_str <- paste("gdal_calc.py" 
612
    #            "     "-A input1.tif",
613
    #                 "-B input2.tif",
614
    #                 paste("--outfile=",r_m_weighted_mean_name,sep=""),
615
    #                 --calc="A+B"
616
    #--NoDataValue=0)
617
    if(is.null(python_bin)){
618
      python_bin=""
619
    }
620
    
621
    python_cmd <- file.path(python_bin,"gdal_calc.py")
622
    cmd_str <- paste(python_cmd, 
623
                     paste("-A ", r_prod_sum_raster_name,sep=""),
624
                     paste("-B ", r_weights_sum_raster_name,sep=""),
625
                     paste("--outfile=",r_m_weighted_mean_raster_name,sep=""),
626
                     "--calc='A/B'","--overwrite",sep=" ")
627
    system(cmd_str)
628
    
629
    #writeRaster(r_m_weighted_mean, NAflag=NA_flag_val,filename=raster_name,overwrite=TRUE)  
630
    #now use the mask
631
    r_m_weighted_mean_mask_raster_name <- file.path(out_dir,paste("r_m_",method_str,"_weighted_mean_mask_",out_suffix,".tif",sep=""))
632

  
633
    #mask(raster(r_m_weighted_mean_raster_name),mask=r_mask_raster_name,filename=r_m_weighted_mean_mask_raster_name)
470 634
    
471 635
  }
472 636

  
......
492 656

  
493 657
    list_args_pred_m$fun <- "mean"
494 658
    list_args_pred_m$na.rm <- TRUE
659
    #list_args_pred_m$filename <- 
495 660

  
496 661
    #Mosaic files
497 662
    r_m_mean <- do.call(mosaic,list_args_pred_m) #this is unweighted mean from the predicted raster

Also available in: Unified diff