Project

General

Profile

Download (21.1 KB) Statistics
| Branch: | Revision:
1
#########################    Raster prediction    ####################################
2
############################ Interpolation of temperature for given processing region ##########################################
3
#This script interpolates temperature values using MODIS LST, covariates and GHCND station data.                      
4
#It requires the text file of stations and a shape file of the study area.           
5
#Note that the projection for both GHCND and study area is lonlat WGS84.       
6
#Options to run this program are:
7
#1) Multisampling: vary the porportions of hold out and use random samples for each run
8
#2)Constant sampling: use the same sample over the runs
9
#3)over dates: run over for example 365 dates without mulitsampling
10
#4)use seed number: use seed if random samples must be repeatable
11
#5)possibilty of running single and multiple time scale methods:
12
   # gam_daily, kriging_daily,gwr_daily,gam_CAI,gam_fusion,kriging_fusion,gwr_fusion and other options added.
13
#For multiple time scale methods, the interpolation is done first at the monthly time scale then delta surfaces are added.
14
#AUTHOR: Benoit Parmentier                                                                        
15
#DATE: 07/30/2013                                                                                 
16
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#568--     
17
#
18
# TO DO:
19
#Add methods to for CAI
20

    
21
###################################################################################################
22

    
23
raster_prediction_fun <-function(list_param_raster_prediction){
24

    
25
  ##Function to predict temperature interpolation with 21 input parameters
26
  #9 parameters used in the data preparation stage and input in the current script
27
  #1)list_param_data_prep: used in earlier code for the query from the database and extraction for raster brick
28
  #2)infile_monthly: monthly averages with covariates for GHCND stations obtained after query
29
  #3)infile_daily: daily GHCND stations with covariates, obtained after query
30
  #4)infile_locs: vector file with station locations for the processing/study area (ESRI shapefile)
31
  #5)infile_covariates: raster covariate brick, tif file
32
  #6)covar_names: covar_names #remove at a later stage...
33
  #7)var: variable being interpolated-TMIN or TMAX
34
  #8)out_prefix
35
  #9)CRS_locs_WGS84
36
  #10)screen_data_training
37
  #
38
  #6 parameters for sampling function
39
  #10)seed_number
40
  #11)nb_sample
41
  #12)step
42
  #13)constant
43
  #14)prop_minmax
44
  #15)dates_selected
45
  #
46
  #6 additional parameters for monthly climatology and more
47
  #16)list_models: model formulas in character vector
48
  #17)lst_avg: LST climatology name in the brick of covariate--change later
49
  #18)n_path
50
  #19)out_path
51
  #20)script_path: path to script
52
  #21)interpolation_method: c("gam_fusion","gam_CAI") #other otpions to be added later
53
  
54
  ###Loading R library and packages     
55
  
56
  library(gtools)                                         # loading some useful tools 
57
  library(mgcv)                                           # GAM package by Simon Wood
58
  library(sp)                                             # Spatial pacakge with class definition by Bivand et al.
59
  library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
60
  library(rgdal)                               # GDAL wrapper for R, spatial utilities
61
  library(gstat)                               # Kriging and co-kriging by Pebesma et al.
62
  library(fields)                             # NCAR Spatial Interpolation methods such as kriging, splines
63
  library(raster)                              # Hijmans et al. package for raster processing
64
  library(rasterVis)
65
  library(parallel)                            # Urbanek S. and Ripley B., package for multi cores & parralel processing
66
  library(reshape)
67
  library(plotrix)
68
  library(maptools)
69
  library(gdata) #Nesssary to use cbindX
70
  library(automap)  #autokrige function
71
  library(spgwr)   #GWR method
72
  
73
  ### Parameters and arguments
74
  #PARSING INPUTS/ARGUMENTS
75
#   
76
#   names(list_param_raster_prediction)<-c("list_param_data_prep",
77
#                                          "seed_number","nb_sample","step","constant","prop_minmax","dates_selected",
78
#                                          "list_models","lst_avg","in_path","out_path","script_path",
79
#                                          "interpolation_method")
80
  
81
  #9 parameters used in the data preparation stage and input in the current script
82
  list_param_data_prep<-list_param_raster_prediction$list_param_data_prep
83
  infile_monthly<-list_param_data_prep$infile_monthly
84
  infile_daily<-list_param_data_prep$infile_daily
85
  infile_locs<-list_param_data_prep$infile_locs
86
  infile_covariates<-list_param_data_prep$infile_covariates #raster covariate brick, tif file
87
  covar_names<- list_param_data_prep$covar_names #remove at a later stage...
88
  var<-list_param_data_prep$var
89
  out_prefix<-list_param_data_prep$out_prefix
90
  CRS_locs_WGS84<-list_param_data_prep$CRS_locs_WGS84
91

    
92
  
93
  #6 parameters for sampling function
94
  seed_number<-list_param_raster_prediction$seed_number
95
  nb_sample<-list_param_raster_prediction$nb_sample
96
  step<-list_param_raster_prediction$step
97
  constant<-list_param_raster_prediction$constant
98
  prop_minmax<-list_param_raster_prediction$prop_minmax
99
  dates_selected<-list_param_raster_prediction$dates_selected
100
  
101
  #6 additional parameters for monthly climatology and more
102
  list_models<-list_param_raster_prediction$list_models
103
  lst_avg<-list_param_raster_prediction$lst_avg
104
  out_path<-list_param_raster_prediction$out_path
105
  script_path<-list_param_raster_prediction$script_path
106
  interpolation_method<-list_param_raster_prediction$interpolation_method
107
  screen_data_training <-list_param_raster_prediction$screen_data_training
108
  
109
  setwd(out_path)
110
  
111
  ###################### START OF THE SCRIPT ########################
112
   
113
  if (var=="TMAX"){
114
    y_var_name<-"dailyTmax"                                       
115
  }
116
  
117
  if (var=="TMIN"){
118
    y_var_name<-"dailyTmin"                                       
119
  }
120
  
121
  ################# CREATE LOG FILE #####################
122
  
123
  #create log file to keep track of details such as processing times and parameters.
124
  
125
  #log_fname<-paste("R_log_raster_prediction",out_prefix, ".log",sep="")
126
  log_fname<-paste("R_log_raster_prediction",out_prefix, ".log",sep="")
127
  #sink(log_fname) #create new log file
128
  file.create(file.path(out_path,log_fname)) #create new log file
129
  
130
  time1<-proc.time()    #Start stop watch
131
  
132
  cat(paste("Starting script at this local Date and Time: ",as.character(Sys.time()),sep=""),
133
             file=log_fname,sep="\n")
134
  cat("Starting script process time:",file=log_fname,sep="\n",append=TRUE)
135
  cat(as.character(time1),file=log_fname,sep="\n",append=TRUE)    
136
  
137
  ############### READING INPUTS: DAILY STATION DATA AND OTEHR DATASETS  #################
138
  
139
  ghcn<-readOGR(dsn=dirname(infile_daily),layer=sub(".shp","",basename(infile_daily)))
140
  CRS_interp<-proj4string(ghcn)                       #Storing projection information (ellipsoid, datum,etc.)
141
  stat_loc<-readOGR(dsn=dirname(infile_locs),layer=sub(".shp","",basename(infile_locs)))
142
  #dates2 <-readLines(file.path(in_path,dates_selected)) #dates to be predicted, now read directly from the file
143
  if (dates_selected==""){
144
    dates<-as.character(sort(unique(ghcn$date))) #dates to be predicted 
145
  }
146
  if (dates_selected!=""){
147
    dates<-dates_selected #dates to be predicted 
148
  }
149
  
150
  #Reading in covariate brickcan be changed...
151
  
152
  s_raster<-brick(infile_covariates)                   #read in the data brck
153
  names(s_raster)<-covar_names               #Assigning names to the raster layers: making sure it is included in the extraction
154
    
155
  #Reading monthly data
156
  dst<-readOGR(dsn=dirname(infile_monthly),layer=sub(".shp","",basename(infile_monthly)))
157
    
158
  ########### CREATE SAMPLING -TRAINING AND TESTING STATIONS ###########
159
  
160
  #Input for sampling function...
161
  
162
  #dates #list of dates for prediction
163
  #ghcn_name<-"ghcn" #infile daily data 
164
  
165
  list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn)
166
  #list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn_name)
167
  names(list_param_sampling)<-c("seed_number","nb_sample","step","constant","prop_minmax","dates","ghcn")
168
  
169
  #run function, note that dates must be a character vector!!
170
  sampling_obj<-sampling_training_testing(list_param_sampling)
171
  
172
  ########### PREDICT FOR MONTHLY SCALE  ##################
173
  
174
  #First predict at the monthly time scale: climatology
175
  cat("Predictions at monthly scale:",file=log_fname,sep="\n", append=TRUE)
176
  cat(paste("Local Date and Time: ",as.character(Sys.time()),sep=""),
177
      file=log_fname,sep="\n")
178
  t1<-proc.time()
179
  j=12
180
  #browser() #Missing out_path for gam_fusion list param!!!
181
  #if (interpolation_method=="gam_fusion"){
182
  if (interpolation_method %in% c("gam_fusion","kriging_fusion","gwr_fusion")){
183
    list_param_runClim_KGFusion<-list(j,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, out_prefix,out_path)
184
    names(list_param_runClim_KGFusion)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","out_prefix","out_path")
185
    #source(file.path(script_path,"GAM_fusion_function_multisampling_03122013.R"))
186
    clim_method_mod_obj<-mclapply(1:12, list_param=list_param_runClim_KGFusion, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
187
    #clim_method_mod_obj<-mclapply(1:6, list_param=list_param_runClim_KGFusion, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
188
    #test<-runClim_KGFusion(1,list_param=list_param_runClim_KGFusion)
189
    save(clim_method_mod_obj,file= file.path(out_path,paste(interpolation_method,"_mod_",y_var_name,out_prefix,".RData",sep="")))
190
    list_tmp<-vector("list",length(clim_method_mod_obj))
191
    for (i in 1:length(clim_method_mod_obj)){
192
      tmp<-clim_method_mod_obj[[i]]$clim
193
      list_tmp[[i]]<-tmp
194
    }
195
    clim_yearlist<-list_tmp
196
  }
197
  
198
  if (interpolation_method %in% c("gam_CAI","kriging_CAI", "gwr_CAI")){
199
    list_param_runClim_KGCAI<-list(j,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, out_prefix,out_path)
200
    names(list_param_runClim_KGCAI)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","out_prefix","out_path")
201
    clim_method_mod_obj<-mclapply(1:12, list_param=list_param_runClim_KGCAI, runClim_KGCAI,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
202
    #test<-runClim_KGCAI(1,list_param=list_param_runClim_KGCAI)
203
    #gamclim_fus_mod<-mclapply(1:6, list_param=list_param_runClim_KGFusion, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) 
204
    save(clim_method_mod_obj,file= file.path(out_path,paste(interpolation_method,"_mod_",y_var_name,out_prefix,".RData",sep="")))
205
    list_tmp<-vector("list",length(clim_method_mod_obj))
206
    for (i in 1:length(clim_method_mod_obj)){
207
      tmp<-clim_method_mod_obj[[i]]$clim
208
      list_tmp[[i]]<-tmp
209
    }
210
    clim_yearlist<-list_tmp
211
  }
212
  t2<-proc.time()-t1
213
  cat(as.character(t2),file=log_fname,sep="\n", append=TRUE)
214

    
215
  ################## PREDICT AT DAILY TIME SCALE #################
216
  #Predict at daily time scale from single time scale or multiple time scale methods: 2 methods availabe now
217
  
218
  #put together list of clim models per month...
219
  #rast_clim_yearlist<-list_tmp
220
  
221
  #Second predict at the daily time scale: delta
222
  
223
  #method_mod_obj<-mclapply(1:1, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
224
  cat("Predictions at the daily scale:",file=log_fname,sep="\n", append=TRUE)
225
  t1<-proc.time()
226
  cat(paste("Local Date and Time: ",as.character(Sys.time()),sep=""),
227
      file=log_fname,sep="\n")
228
  
229
  #TODO : Same call for all functions!!! Replace by one "if" for all multi time scale methods...
230
  #The methods could be defined earlier as constant??
231
  if (interpolation_method %in% c("gam_CAI","kriging_CAI","gwr_CAI","gam_fusion","kriging_fusion","gwr_fusion")){
232
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
233
    i<-1
234
    list_param_run_prediction_daily_deviation <-list(i,clim_yearlist,sampling_obj,dst,var,y_var_name, interpolation_method,out_prefix,out_path)
235
    names(list_param_run_prediction_daily_deviation)<-c("list_index","clim_yearlist","sampling_obj","dst","var","y_var_name","interpolation_method","out_prefix","out_path")
236
    #test<-mclapply(1:18, runGAMFusion,list_param=list_param_runGAMFusion,mc.preschedule=FALSE,mc.cores = 9)
237
    #test<-runGAMFusion(1,list_param=list_param_runGAMFusion)
238
    
239
    method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_run_prediction_daily_deviation,run_prediction_daily_deviation,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
240
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
241
  }
242
  
243
  #TODO : Same call for all functions!!! Replace by one "if" for all daily single time scale methods...
244
  if (interpolation_method=="gam_daily"){
245
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
246
    i<-1
247
    list_param_run_prediction_gam_daily <-list(i,s_raster,covar_names,lst_avg,list_models,dst,screen_data_training,var,y_var_name, sampling_obj,interpolation_method,out_prefix,out_path)
248
    names(list_param_run_prediction_gam_daily)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","screen_data_training","var","y_var_name","sampling_obj","interpolation_method","out_prefix","out_path")
249
    #test <- runGAM_day_fun(1,list_param_run_prediction_gam_daily)
250
    
251
    method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_run_prediction_gam_daily,runGAM_day_fun,mc.preschedule=FALSE,mc.cores = 11) #This is the end bracket from mclapply(...) statement
252
    #method_mod_obj<-mclapply(1:11,list_param=list_param_run_prediction_gam_daily,runGAM_day_fun,mc.preschedule=FALSE,mc.cores = 11) #This is the end bracket from mclapply(...) statement
253
    
254
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
255
    
256
  }
257
  
258
  if (interpolation_method=="kriging_daily"){
259
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
260
    i<-1
261
    list_param_run_prediction_kriging_daily <-list(i,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, sampling_obj,interpolation_method,out_prefix,out_path)
262
    names(list_param_run_prediction_kriging_daily)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","sampling_obj","interpolation_method","out_prefix","out_path")
263
    #test <- runKriging_day_fun(1,list_param_run_prediction_kriging_daily)
264
    method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_run_prediction_kriging_daily,runKriging_day_fun,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
265
    #method_mod_obj<-mclapply(1:18,list_param=list_param_run_prediction_kriging_daily,runKriging_day_fun,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
266
    
267
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
268
    
269
  }
270
  
271
  if (interpolation_method=="gwr_daily"){
272
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
273
    i<-1
274
    list_param_run_prediction_gwr_daily <-list(i,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, sampling_obj,interpolation_method,out_prefix,out_path)
275
    names(list_param_run_prediction_gwr_daily)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","sampling_obj","interpolation_method","out_prefix","out_path")
276
    #test <- run_interp_day_fun(1,list_param_run_prediction_gwr_daily)
277
    method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_run_prediction_gwr_daily,run_interp_day_fun,mc.preschedule=FALSE,mc.cores = 11) #This is the end bracket from mclapply(...) statement
278
    #method_mod_obj<-mclapply(1:9,list_param=list_param_run_prediction_gwr_daily,run_interp_day_fun,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
279
    #method_mod_obj<-mclapply(1:18,list_param=list_param_run_prediction_kriging_daily,runKriging_day_fun,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
280
    
281
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
282
    
283
  }
284
  t2<-proc.time()-t1
285
  cat(as.character(t2),file=log_fname,sep="\n", append=TRUE)
286
  #browser()
287
  
288
  ############### NOW RUN VALIDATION #########################
289
  #SIMPLIFY THIS PART: one call
290
  
291
  list_tmp<-vector("list",length(method_mod_obj))
292
  for (i in 1:length(method_mod_obj)){
293
    tmp<-method_mod_obj[[i]][[y_var_name]]  #y_var_name is the variable predicted (dailyTmax or dailyTmin)
294
    list_tmp[[i]]<-tmp
295
  }
296
  rast_day_yearlist<-list_tmp #list of predicted images over full year...
297
  
298
  cat("Validation step:",file=log_fname,sep="\n", append=TRUE)
299
  t1<-proc.time()
300
  cat(paste("Local Date and Time: ",as.character(Sys.time()),sep=""),
301
      file=log_fname,sep="\n")
302
  
303
  list_param_validation<-list(i,rast_day_yearlist,method_mod_obj,y_var_name, out_prefix, out_path)
304
  names(list_param_validation)<-c("list_index","rast_day_year_list","method_mod_obj","y_var_name","out_prefix", "out_path") #same names for any method
305
  
306
  validation_mod_obj <-mclapply(1:length(method_mod_obj), list_param=list_param_validation, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 9) 
307
      #test_val<-calculate_accuracy_metrics(1,list_param_validation)
308
  save(validation_mod_obj,file= file.path(out_path,paste(interpolation_method,"_validation_mod_obj_",y_var_name,out_prefix,".RData",sep="")))
309
  t2<-proc.time()-t1
310
  cat(as.character(t2),file=log_fname,sep="\n", append=TRUE)
311
  
312
  #################### ASSESSMENT OF PREDICTIONS: PLOTS OF ACCURACY METRICS ###########
313
  
314
  ##Create data.frame with validation and fit metrics for a full year/full numbe of runs
315
  tb_diagnostic_v<-extract_from_list_obj(validation_mod_obj,"metrics_v") 
316
  #tb_diagnostic_v contains accuracy metrics for models sample and proportion for every run...if full year then 365 rows maximum
317
  rownames(tb_diagnostic_v)<-NULL #remove row names
318
  tb_diagnostic_v$method_interp <- interpolation_method
319
  tb_diagnostic_s<-extract_from_list_obj(validation_mod_obj,"metrics_s")
320
  rownames(tb_diagnostic_s)<-NULL #remove row names
321
  tb_diagnostic_s$method_interp <- interpolation_method #add type of interpolation...out_prefix too??
322
  
323
  #Call functions to create plots of metrics for validation dataset
324
  metric_names<-c("rmse","mae","me","r","m50")
325
  summary_metrics_v<- boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path) #if adding for fit need to change outprefix
326
  names(summary_metrics_v)<-c("avg","median")
327
  summary_month_metrics_v<- boxplot_month_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path)
328
  
329
  #################### CLOSE LOG FILE  ####################
330
  
331
  #close log_file connection and add meta data
332
  cat("Finished script process time:",file=log_fname,sep="\n", append=TRUE)
333
  time2<-proc.time()-time1
334
  cat(as.character(time2),file=log_fname,sep="\n", append=TRUE)
335
  #later on add all the parameters used in the script...
336
  cat(paste("Finished script at this local Date and Time: ",as.character(Sys.time()),sep=""),
337
             file=log_fname,sep="\n", append=TRUE)
338
  cat("End of script",file=log_fname,sep="\n", append=TRUE)
339
  #close(log_fname)
340
  
341
  ################### PREPARE RETURN OBJECT ###############
342
  #Will add more information to be returned
343
  
344
  if (interpolation_method %in% c("gam_CAI","kriging_CAI","gwr_CAI","gam_fusion","kriging_fusion","gwr_fusion")){
345
    raster_prediction_obj<-list(clim_method_mod_obj,method_mod_obj,validation_mod_obj,tb_diagnostic_v,
346
                                tb_diagnostic_s,summary_metrics_v,summary_month_metrics_v)
347
    names(raster_prediction_obj)<-c("clim_method_mod_obj","method_mod_obj","validation_mod_obj","tb_diagnostic_v",
348
                                    "tb_diagnostic_s","summary_metrics_v","summary_month_metrics_v")  
349
    save(raster_prediction_obj,file= file.path(out_path,paste("raster_prediction_obj_",interpolation_method,"_", y_var_name,out_prefix,".RData",sep="")))
350
    
351
  }
352
  
353
  #use %in% instead of "|" operator
354
  if (interpolation_method=="gam_daily" | interpolation_method=="kriging_daily" | interpolation_method=="gwr_daily"){
355
    raster_prediction_obj<-list(method_mod_obj,validation_mod_obj,tb_diagnostic_v,
356
                                tb_diagnostic_s,summary_metrics_v,summary_month_metrics_v)
357
    names(raster_prediction_obj)<-c("method_mod_obj","validation_mod_obj","tb_diagnostic_v",
358
                                    "tb_diagnostic_s","summary_metrics_v","summary_month_metrics_v")  
359
    save(raster_prediction_obj,file= file.path(out_path,paste("raster_prediction_obj_",interpolation_method,"_", y_var_name,out_prefix,".RData",sep="")))
360
    
361
  }
362
  
363
  return(raster_prediction_obj)
364
}
365

    
366
####################################################################
367
######################## END OF SCRIPT/FUNCTION #####################
(11-11/52)