Project

General

Profile

« Previous | Next » 

Revision 47cfa1ce

Added by Alberto Guzman almost 11 years ago

Adding files from test server

View differences:

climate/research/world/interpolation/GAM_fusion_analysis_raster_prediction_multisampling_01062014.R
1
#########################    Raster prediction    ####################################
2
############################ Interpolation of temperature for given processing region ##########################################
3
#This script interpolates temperature values using MODIS LST, covariates and GHCND station data.                      
4
#It requires the text file of stations and a shape file of the study area.           
5
#Note that the projection for both GHCND and study area is lonlat WGS84.       
6
#Options to run this program are:
7
#1) Multisampling: vary the porportions of hold out and use random samples for each run
8
#2)Constant sampling: use the same sample over the runs
9
#3)over dates: run over for example 365 dates without mulitsampling
10
#4)use seed number: use seed if random samples must be repeatable
11
#5)possibilty of running single and multiple time scale methods:
12
   # gam_daily, kriging_daily,gwr_daily,gam_CAI,gam_fusion,kriging_fusion,gwr_fusion and other options added.
13
#For multiple time scale methods, the interpolation is done first at the monthly time scale then delta surfaces are added.
14
#AUTHOR: Benoit Parmentier                                                                        
15
#DATE: 11/03/2013                                                                                 
16
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#568--     
17
#
18
# TO DO:
19

  
20
###################################################################################################
21

  
22
raster_prediction_fun <-function(list_param_raster_prediction){
23

  
24
  ##Function to predict temperature interpolation with 21 input parameters
25
  #9 parameters used in the data preparation stage and input in the current script
26
  #1)list_param_data_prep: used in earlier code for the query from the database and extraction for raster brick
27
  #2)infile_monthly: monthly averages with covariates for GHCND stations obtained after query
28
  #3)infile_daily: daily GHCND stations with covariates, obtained after query
29
  #4)infile_locs: vector file with station locations for the processing/study area (ESRI shapefile)
30
  #5)infile_covariates: raster covariate brick, tif file
31
  #6)covar_names: covar_names #remove at a later stage...
32
  #7)var: variable being interpolated-TMIN or TMAX
33
  #8)out_prefix
34
  #9)CRS_locs_WGS84
35
  #10)screen_data_training
36
  #
37
  #6 parameters for sampling function
38
  #10)seed_number
39
  #11)nb_sample
40
  #12)step
41
  #13)constant
42
  #14)prop_minmax
43
  #15)seed_number_month
44
  #16)nb_sample_month
45
  #17)step_month
46
  #18)constant_month
47
  #19)prop_minmax_month
48
  #20)dates_selected
49
  #
50
  #6 additional parameters for monthly climatology and more
51
  #21)list_models: model formulas in character vector
52
  #22)lst_avg: LST climatology name in the brick of covariate--change later
53
  #23)n_path
54
  #24)out_path
55
  #25)script_path: path to script
56
  #26)interpolation_method: c("gam_fusion","gam_CAI") #other otpions to be added later
57
  #27) use_clim_image
58
  #28) join_daily
59
  #29)list_models2: models' formulas as string vector for daily devation
60
  #30)interp_method2: intepolation method for daily devation step
61
  #31)num_cores: How many cores to use
62
  #32)max_mem: Max memory to use for predict step
63
  
64
  ###Loading R library and packages     
65
  
66
  library(gtools)                                         # loading some useful tools 
67
  library(mgcv)                                           # GAM package by Simon Wood
68
  library(sp)                                             # Spatial pacakge with class definition by Bivand et al.
69
  library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
70
  library(rgdal)                               # GDAL wrapper for R, spatial utilities
71
  library(gstat)                               # Kriging and co-kriging by Pebesma et al.
72
  library(fields)                             # NCAR Spatial Interpolation methods such as kriging, splines
73
  library(raster)                              # Hijmans et al. package for raster processing
74
  library(rasterVis)
75
  library(parallel)                            # Urbanek S. and Ripley B., package for multi cores & parralel processing
76
  library(reshape)
77
  library(plotrix)
78
  library(maptools)
79
  library(gdata) #Nesssary to use cbindX
80
  library(automap)  #autokrige function
81
  library(spgwr)   #GWR method
82
  
83
  ### Parameters and arguments
84
  #PARSING INPUTS/ARGUMENTS
85
#   
86
#   names(list_param_raster_prediction)<-c("list_param_data_prep",
87
#                                          "seed_number","nb_sample","step","constant","prop_minmax","dates_selected",
88
#                                          "list_models","lst_avg","in_path","out_path","script_path",
89
#                                          "interpolation_method")
90
  
91
  #9 parameters used in the data preparation stage and input in the current script
92
  list_param_data_prep<-list_param_raster_prediction$list_param_data_prep
93
  infile_monthly<-list_param_data_prep$infile_monthly
94
  infile_daily<-list_param_data_prep$infile_daily
95
  infile_locs<-list_param_data_prep$infile_locs
96
  infile_covariates<-list_param_data_prep$infile_covariates #raster covariate brick, tif file
97
  covar_names<- list_param_data_prep$covar_names #remove at a later stage...
98
  var<-list_param_data_prep$var
99
  out_prefix<-list_param_data_prep$out_prefix
100
  CRS_locs_WGS84<-list_param_data_prep$CRS_locs_WGS84
101

  
102
  
103
  #6 parameters for sampling function
104
  seed_number<-list_param_raster_prediction$seed_number
105
  nb_sample<-list_param_raster_prediction$nb_sample
106
  step<-list_param_raster_prediction$step
107
  constant<-list_param_raster_prediction$constant
108
  prop_minmax<-list_param_raster_prediction$prop_minmax
109
  dates_selected<-list_param_raster_prediction$dates_selected
110
  
111
  seed_number_month <-list_param_raster_prediction$seed_number_month
112
  nb_sample_month <-list_param_raster_prediction$nb_sample_month
113
  step_month <-list_param_raster_prediction$step_month
114
  constant_month <-list_param_raster_prediction$constant_month
115
  prop_minmax_month <-list_param_raster_prediction$prop_minmax_month
116
  
117
  #6 additional parameters for monthly climatology and more
118
  list_models<-list_param_raster_prediction$list_models
119
  list_models2<-list_param_raster_prediction$list_models2
120
  interp_method2 <- list_param_raster_prediction$interp_method2
121
  
122
  lst_avg<-list_param_raster_prediction$lst_avg
123
  out_path<-list_param_raster_prediction$out_path
124
  script_path<-list_param_raster_prediction$script_path
125
  interpolation_method<-list_param_raster_prediction$interpolation_method
126
  screen_data_training <-list_param_raster_prediction$screen_data_training
127
  
128
  use_clim_image <- list_param_raster_prediction$use_clim_image # use predicted image as a base...rather than average Tmin at the station for delta
129
  join_daily <- list_param_raster_prediction$join_daily # join monthly and daily station before calucating delta
130

  
131
  #cores and memory usage options
132
  num_cores <- list_param_raster_prediction$num_cores
133
  max_mem<- as.numeric(list_param_raster_prediction$max_mem)
134
 
135
  rasterOptions(maxmemory=max_mem,timer=TRUE)
136
  
137
  setwd(out_path)
138
  
139
  ###################### START OF THE SCRIPT ########################
140
   
141
  #This should not be set here...? master script, modify for precip
142
  if (var=="TMAX"){
143
    y_var_name<-"dailyTmax"
144
    y_var_month<-"TMax"
145
  }
146
  if (var=="TMIN"){
147
    y_var_name<-"dailyTmin"
148
    y_var_month <-"TMin"
149
  }
150
  
151
  ################# CREATE LOG FILE #####################
152
  
153
  #create log file to keep track of details such as processing times and parameters.
154
  
155
  #log_fname<-paste("R_log_raster_prediction",out_prefix, ".log",sep="")
156
  log_fname<-paste("R_log_raster_prediction",out_prefix, ".log",sep="")
157
  #sink(log_fname) #create new log file
158
  file.create(file.path(out_path,log_fname)) #create new log file
159
  
160
  time1<-proc.time()    #Start stop watch
161
  
162
  cat(paste("Starting script at this local Date and Time: ",as.character(Sys.time()),sep=""),
163
             file=log_fname,sep="\n")
164
  cat("Starting script process time:",file=log_fname,sep="\n",append=TRUE)
165
  cat(as.character(time1),file=log_fname,sep="\n",append=TRUE)    
166
  
167
  ############### READING INPUTS: DAILY STATION DATA AND OTHER DATASETS  #################
168
  
169
  ghcn<-readOGR(dsn=dirname(infile_daily),layer=sub(".shp","",basename(infile_daily)))
170
  CRS_interp<-proj4string(ghcn)                       #Storing projection information (ellipsoid, datum,etc.)
171
  stat_loc<-readOGR(dsn=dirname(infile_locs),layer=sub(".shp","",basename(infile_locs)))
172
  #dates2 <-readLines(file.path(in_path,dates_selected)) #dates to be predicted, now read directly from the file
173
  
174
  #Should clean this up, reduce the number of if
175
  if (dates_selected==""){
176
    dates<-as.character(sort(unique(ghcn$date))) #dates to be predicted 
177
  }
178
  if (dates_selected!=""){
179
    dates<-dates_selected #dates to be predicted 
180
  }
181
  if(class(dates_selected)=="numeric"){ #select n every  observation, may change this later.
182
    dates<-as.character(sort(unique(ghcn$date))) #dates to be predicted 
183
    dates <- dates[seq(1, length(dates), dates_selected)]
184
  }
185
  #Reading in covariate brickcan be changed...
186
  
187
  s_raster<-brick(infile_covariates)                   #read in the data brck
188
  names(s_raster)<-covar_names               #Assigning names to the raster layers: making sure it is included in the extraction
189
    
190
  #Reading monthly data
191
  dst<-readOGR(dsn=dirname(infile_monthly),layer=sub(".shp","",basename(infile_monthly)))
192
    
193
  #construct date based on input end_year !!!
194
  day_tmp <- rep("15",length=nrow(dst))
195
  year_tmp <- rep(as.character(end_year),length=nrow(dst))
196
  #dates_month <-do.call(paste,c(list(day_tmp,sprintf( "%02d", dst$month ),year_tmp),sep="")) #reformat integer using formatC or sprintf
197
  dates_month <-do.call(paste,c(list(year_tmp,sprintf( "%02d", dst$month ),day_tmp),sep="")) #reformat integer using formatC or sprintf
198
  
199
  dst$date <- dates_month
200
  
201
  ########### CREATE SAMPLING -TRAINING AND TESTING STATIONS ###########
202

  
203
  #dates #list of dates for prediction
204
  #ghcn_name<-"ghcn" #infile daily data 
205
  
206
  list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn)
207
  #list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn_name)
208
  names(list_param_sampling)<-c("seed_number","nb_sample","step","constant","prop_minmax","dates","ghcn")
209
  
210
  #run function, note that dates must be a character vector!! Daily sampling
211
  sampling_obj<-sampling_training_testing(list_param_sampling)
212

  
213
  #Now run monthly sampling
214
  dates_month<-as.character(sort(unique(dst$date)))
215
  list_param_sampling<-list(seed_number_month,nb_sample_month,step_month,constant_month,prop_minmax_month,dates_month,dst)
216
  #list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn_name)
217
  names(list_param_sampling)<-c("seed_number","nb_sample","step","constant","prop_minmax","dates","ghcn")
218
  
219
  sampling_month_obj <- sampling_training_testing(list_param_sampling)
220
  
221
  ########### PREDICT FOR MONTHLY SCALE  ##################
222
  
223
  #First predict at the monthly time scale: climatology
224
  cat("Predictions at monthly scale:",file=log_fname,sep="\n", append=TRUE)
225
  cat(paste("Local Date and Time: ",as.character(Sys.time()),sep=""),
226
      file=log_fname,sep="\n")
227
  t1<-proc.time()
228
  j=6 #12
229
  
230
  #browser() #Missing out_path for gam_fusion list param!!!
231
  #if (interpolation_method=="gam_fusion"){
232
  if (interpolation_method %in% c("gam_fusion","kriging_fusion","gwr_fusion")){
233
    list_param_runClim_KGFusion<-list(j,s_raster,covar_names,lst_avg,list_models,dst,sampling_month_obj,var,y_var_name, out_prefix,out_path)
234
    names(list_param_runClim_KGFusion)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","sampling_month_obj","var","y_var_name","out_prefix","out_path")
235
    #debug(runClim_KGFusion)
236
    #test<-runClim_KGFusion(1,list_param=list_param_runClim_KGFusion)
237
    clim_method_mod_obj<-mclapply(1:length(sampling_month_obj$ghcn_data), list_param=list_param_runClim_KGFusion, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = num_cores) #This is the end bracket from mclapply(...) statement
238
   
239
    
240
    save(clim_method_mod_obj,file= file.path(out_path,paste(interpolation_method,"_mod_",y_var_name,out_prefix,".RData",sep="")))
241
    #Use function to extract list
242
    list_tmp<-vector("list",length(clim_method_mod_obj))
243

  
244
    for (i in 1:length(clim_method_mod_obj)){
245
      tmp<-clim_method_mod_obj[[i]]$clim
246
      list_tmp[[i]]<-tmp
247
    }
248
    
249
    clim_yearlist<-list_tmp
250
  }
251
  
252
  if (interpolation_method %in% c("gam_CAI","kriging_CAI", "gwr_CAI")){
253
    list_param_runClim_KGCAI<-list(j,s_raster,covar_names,lst_avg,list_models,dst,sampling_month_obj,var,y_var_name, out_prefix,out_path)
254
    names(list_param_runClim_KGCAI)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","sampling_month_obj","var","y_var_name","out_prefix","out_path")
255
    clim_method_mod_obj<-mclapply(1:length(sampling_month_obj$ghcn_data), list_param=list_param_runClim_KGCAI, runClim_KGCAI,mc.preschedule=FALSE,mc.cores = num_cores) #This is the end bracket from mclapply(...) statement
256
    #test<-runClim_KGCAI(1,list_param=list_param_runClim_KGCAI)
257
    save(clim_method_mod_obj,file= file.path(out_path,paste(interpolation_method,"_mod_",y_var_name,out_prefix,".RData",sep="")))
258
    list_tmp<-vector("list",length(clim_method_mod_obj))
259
    for (i in 1:length(clim_method_mod_obj)){
260
      tmp<-clim_method_mod_obj[[i]]$clim
261
      list_tmp[[i]]<-tmp
262
    }
263
    clim_yearlist<-list_tmp
264
  }
265
  t2<-proc.time()-t1
266
  cat(as.character(t2),file=log_fname,sep="\n", append=TRUE)
267
  
268
  #Getting rid of raster temp files
269
  removeTmpFiles(h=0)
270
  #quit()
271

  
272
  ################## PREDICT AT DAILY TIME SCALE #################
273
  #Predict at daily time scale from single time scale or multiple time scale methods: 2 methods availabe now
274
  #put together list of clim models per month...
275
  #rast_clim_yearlist<-list_tmp
276
  
277
  #Second predict at the daily time scale: delta
278
  
279
  #method_mod_obj<-mclapply(1:1, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
280
  cat("Predictions at the daily scale:",file=log_fname,sep="\n", append=TRUE)
281
  t1<-proc.time()
282
  cat(paste("Local Date and Time: ",as.character(Sys.time()),sep=""),
283
      file=log_fname,sep="\n")
284
  
285
  #TODO : Same call for all functions!!! Replace by one "if" for all multi time scale methods...
286
  #The methods could be defined earlier as constant??
287
  #Create data.frame combining sampling at daily and monthly time scales:
288
  
289
  daily_dev_sampling_dat <- combine_sampling_daily_monthly_for_daily_deviation_pred(sampling_obj,sampling_month_obj)
290
    
291
  #use_clim_image<- TRUE
292
  #use_clim_image<-FALSE
293
  #join_daily <- FALSE
294
  #join_daily <- TRUE
295
  
296
  if (interpolation_method %in% c("gam_CAI","kriging_CAI","gwr_CAI","gam_fusion","kriging_fusion","gwr_fusion")){
297
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
298
    i<-1
299
    list_param_run_prediction_daily_deviation <-list(i,clim_yearlist,daily_dev_sampling_dat,sampling_month_obj,sampling_obj,dst,list_models2,interp_method2,
300
                                                     s_raster,use_clim_image,join_daily,var,y_var_name, interpolation_method,out_prefix,out_path)
301
    names(list_param_run_prediction_daily_deviation)<-c("list_index","clim_yearlist","daily_dev_sampling_dat","sampling_month_obj","sampling_obj","dst","list_models2","interp_method2",
302
                                                        "s_raster","use_clim_image","join_daily","var","y_var_name","interpolation_method","out_prefix","out_path")
303
    #method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data),list_param=list_param_run_prediction_daily_deviation,run_prediction_daily_deviation,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
304
    #debug(run_prediction_daily_deviation)
305
    #test <- run_prediction_daily_deviation(1,list_param=list_param_run_prediction_daily_deviation) #This is the end bracket from mclapply(...) statement
306
    #test <- mclapply(1:9,list_param=list_param_run_prediction_daily_deviation,run_prediction_daily_deviation,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
307
    
308
    method_mod_obj<-mclapply(1:nrow(daily_dev_sampling_dat),list_param=list_param_run_prediction_daily_deviation,run_prediction_daily_deviation,mc.preschedule=FALSE,mc.cores = num_cores) #This is the end bracket from mclapply(...) statement
309
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
310
    
311
  }
312
  
313
  #TODO : Same call for all functions!!! Replace by one "if" for all daily single time scale methods...
314
  if (interpolation_method=="gam_daily"){
315
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
316
    i<-1
317
    list_param_run_prediction_gam_daily <-list(i,s_raster,covar_names,lst_avg,list_models,dst,screen_data_training,var,y_var_name, sampling_obj,interpolation_method,out_prefix,out_path)
318
    names(list_param_run_prediction_gam_daily)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","screen_data_training","var","y_var_name","sampling_obj","interpolation_method","out_prefix","out_path")
319
    #test <- runGAM_day_fun(1,list_param_run_prediction_gam_daily)
320
    
321
    method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data),list_param=list_param_run_prediction_gam_daily,runGAM_day_fun,mc.preschedule=FALSE,mc.cores = num_cores) #This is the end bracket from mclapply(...) statement
322
    #method_mod_obj<-mclapply(1:22,list_param=list_param_run_prediction_gam_daily,runGAM_day_fun,mc.preschedule=FALSE,mc.cores = 11) #This is the end bracket from mclapply(...) statement
323
    
324
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
325
    
326
  }
327
  
328
  if (interpolation_method=="kriging_daily"){
329
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
330
    i<-1
331
    list_param_run_prediction_kriging_daily <-list(i,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, sampling_obj,interpolation_method,out_prefix,out_path)
332
    names(list_param_run_prediction_kriging_daily)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","sampling_obj","interpolation_method","out_prefix","out_path")
333
    #test <- runKriging_day_fun(1,list_param_run_prediction_kriging_daily)
334
    method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data),list_param=list_param_run_prediction_kriging_daily,runKriging_day_fun,mc.preschedule=FALSE,mc.cores = num_cores) #This is the end bracket from mclapply(...) statement
335
    #method_mod_obj<-mclapply(1:18,list_param=list_param_run_prediction_kriging_daily,runKriging_day_fun,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
336
    
337
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
338
    
339
  }
340
  
341
  if (interpolation_method=="gwr_daily"){
342
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
343
    i<-1
344
    list_param_run_prediction_gwr_daily <-list(i,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, sampling_obj,interpolation_method,out_prefix,out_path)
345
    names(list_param_run_prediction_gwr_daily)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","sampling_obj","interpolation_method","out_prefix","out_path")
346
    #test <- run_interp_day_fun(1,list_param_run_prediction_gwr_daily)
347
    method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data),list_param=list_param_run_prediction_gwr_daily,run_interp_day_fun,mc.preschedule=FALSE,mc.cores = num_cores) #This is the end bracket from mclapply(...) statement
348
    #method_mod_obj<-mclapply(1:22,list_param=list_param_run_prediction_gwr_daily,run_interp_day_fun,mc.preschedule=FALSE,mc.cores = 11) #This is the end bracket from mclapply(...) statement
349
    #method_mod_obj<-mclapply(1:18,list_param=list_param_run_prediction_kriging_daily,runKriging_day_fun,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
350
    
351
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
352
    
353
  }
354
  t2<-proc.time()-t1
355
  cat(as.character(t2),file=log_fname,sep="\n", append=TRUE)
356
  #browser()
357
  
358
  ############### NOW RUN VALIDATION #########################
359
  #SIMPLIFY THIS PART: one call
360
  cat("Validation step:",file=log_fname,sep="\n", append=TRUE)
361
  t1<-proc.time()
362
  cat(paste("Local Date and Time: ",as.character(Sys.time()),sep=""),
363
      file=log_fname,sep="\n")
364
  
365
  if (interpolation_method=="gam_daily" | interpolation_method=="kriging_daily" | interpolation_method=="gwr_daily"){
366
    multi_time_scale <- FALSE
367
    
368
    list_data_v <- extract_list_from_list_obj(method_mod_obj,"data_v")
369
    list_data_s <- extract_list_from_list_obj(method_mod_obj,"data_s")
370
    rast_day_yearlist <- extract_list_from_list_obj(method_mod_obj,y_var_name) #list_tmp #list of predicted images over full year...
371
    list_sampling_dat <- extract_list_from_list_obj(method_mod_obj,"sampling_dat")
372
    
373
    list_param_validation<-list(i,rast_day_yearlist,list_data_v,list_data_s,list_sampling_dat,y_var_name, multi_time_scale,out_prefix, out_path)
374
    names(list_param_validation)<-c("list_index","rast_day_year_list",
375
                                  "list_data_v","list_data_s","list_sampling_dat","y_ref","multi_time_scale","out_prefix", "out_path") #same names for any method
376
    #debug(calculate_accuracy_metrics)
377
    #test_val2 <-calculate_accuracy_metrics(1,list_param_validation)
378
  
379
    validation_mod_obj <-mclapply(1:length(method_mod_obj), list_param=list_param_validation, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = num_cores) 
380
    save(validation_mod_obj,file= file.path(out_path,paste(interpolation_method,"_validation_mod_obj_",y_var_name,out_prefix,".RData",sep="")))
381
    t2<-proc.time()-t1
382
    cat(as.character(t2),file=log_fname,sep="\n", append=TRUE)
383
  }
384
    
385
  ### Run monthly validation if multi-time scale methods and add information to daily...
386
  if (interpolation_method %in% c("gam_CAI","kriging_CAI","gwr_CAI","gam_fusion","kriging_fusion","gwr_fusion")){
387
    multi_time_scale <- TRUE
388
    i<-1
389
    
390
    ## daily time scale
391
    list_data_v <- extract_list_from_list_obj(method_mod_obj,"data_v")
392
    list_data_s <- extract_list_from_list_obj(method_mod_obj,"data_s")
393
    rast_day_yearlist <- extract_list_from_list_obj(method_mod_obj,y_var_name) #list_tmp #list of predicted images over full year...
394
    list_sampling_dat <- extract_list_from_list_obj(method_mod_obj,"daily_dev_sampling_dat")
395
    
396
    list_param_validation<-list(i,rast_day_yearlist,list_data_v,list_data_s,list_sampling_dat,y_var_name, multi_time_scale,out_prefix, out_path)
397
    names(list_param_validation)<-c("list_index","rast_day_year_list",
398
                                    "list_data_v","list_data_s","list_sampling_dat","y_ref","multi_time_scale","out_prefix", "out_path") #same names for any method
399
    #debug(calculate_accuracy_metrics)
400
    #test_val2 <-calculate_accuracy_metrics(1,list_param_validation)
401
    
402
    validation_mod_obj <-mclapply(1:length(method_mod_obj), list_param=list_param_validation, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = num_cores) 
403
    save(validation_mod_obj,file= file.path(out_path,paste(interpolation_method,"_validation_mod_obj_",y_var_name,out_prefix,".RData",sep="")))
404
    
405
    ### monthly time scale
406
    list_data_v <- extract_list_from_list_obj(clim_method_mod_obj,"data_month_v") #extract monthly testing/validation dataset
407
    list_data_s <- extract_list_from_list_obj(clim_method_mod_obj,"data_month") #extract monthly training/fitting dataset
408
    rast_day_yearlist <- extract_list_from_list_obj(clim_method_mod_obj,"clim") #list_tmp #list of predicted images over full year at monthly time scale
409
    list_sampling_dat <- extract_list_from_list_obj(clim_method_mod_obj,"sampling_month_dat")
410
    
411
    #list_param_validation_month <-list(i,clim_yearlist,clim_method_mod_obj,y_var_name, multi_time_scale ,out_prefix, out_path)
412
    #names(list_param_validation_month)<-c("list_index","rast_day_year_list","method_mod_obj","y_var_name","multi_time_scale","out_prefix", "out_path") #same names for any method
413
    
414
    list_param_validation_month <-list(i,rast_day_yearlist,list_data_v,list_data_s,list_sampling_dat,y_var_month, multi_time_scale,out_prefix, out_path)
415
    names(list_param_validation_month)<-c("list_index","rast_day_year_list",
416
                                    "list_data_v","list_data_s","list_sampling_dat","y_ref","multi_time_scale","out_prefix", "out_path") #same names for any method
417
    #debug(calculate_accuracy_metrics)    
418
    #test_val2 <-calculate_accuracy_metrics(1,list_param_validation_month)
419
    
420
    validation_mod_month_obj <- mclapply(1:length(clim_method_mod_obj), list_param=list_param_validation_month, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = num_cores) 
421
    #test_val<-calculate_accuracy_metrics(1,list_param_validation)
422
    save(validation_mod_month_obj,file= file.path(out_path,paste(interpolation_method,"_validation_mod_month_obj_",y_var_name,out_prefix,".RData",sep="")))
423
  
424
    ##Create data.frame with validation and fit metrics for a full year/full numbe of runs
425
    tb_month_diagnostic_v <- extract_from_list_obj(validation_mod_month_obj,"metrics_v") 
426
    #tb_diagnostic_v contains accuracy metrics for models sample and proportion for every run...if full year then 365 rows maximum
427
    rownames(tb_month_diagnostic_v)<-NULL #remove row names
428
    tb_month_diagnostic_v$method_interp <- interpolation_method
429
    tb_month_diagnostic_s<-extract_from_list_obj(validation_mod_month_obj,"metrics_s")
430
    rownames(tb_month_diagnostic_s)<-NULL #remove row names
431
    tb_month_diagnostic_s$method_interp <- interpolation_method #add type of interpolation...out_prefix too??
432
    
433
  }
434

  
435
  #Cleaning raster temp files
436
  removeTmpFiles(h=0)
437

  
438
  #################### ASSESSMENT OF PREDICTIONS: PLOTS OF ACCURACY METRICS ###########
439
  ##Create data.frame with validation and fit metrics for a full year/full numbe of runs
440
  tb_diagnostic_v<-extract_from_list_obj(validation_mod_obj,"metrics_v") 
441
  #tb_diagnostic_v contains accuracy metrics for models sample and proportion for every run...if full year then 365 rows maximum
442
  rownames(tb_diagnostic_v)<-NULL #remove row names
443
  tb_diagnostic_v$method_interp <- interpolation_method
444
  tb_diagnostic_s<-extract_from_list_obj(validation_mod_obj,"metrics_s")
445
  rownames(tb_diagnostic_s)<-NULL #remove row names
446
  tb_diagnostic_s$method_interp <- interpolation_method #add type of interpolation...out_prefix too??
447
  
448
  #Call functions to create plots of metrics for validation dataset
449
  metric_names<-c("rmse","mae","me","r","m50")
450
  summary_metrics_v<- boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path) #if adding for fit need to change outprefix
451
  names(summary_metrics_v)<-c("avg","median")
452
  summary_month_metrics_v<- boxplot_month_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path)
453
  
454
  #################### CLOSE LOG FILE  ####################
455
  
456
  #close log_file connection and add meta data
457
  cat("Finished script process time:",file=log_fname,sep="\n", append=TRUE)
458
  time2<-proc.time()-time1
459
  cat(as.character(time2),file=log_fname,sep="\n", append=TRUE)
460
  #later on add all the parameters used in the script...
461
  cat(paste("Finished script at this local Date and Time: ",as.character(Sys.time()),sep=""),
462
             file=log_fname,sep="\n", append=TRUE)
463
  cat("End of script",file=log_fname,sep="\n", append=TRUE)
464
  #close(log_fname)
465
  
466
  ################### PREPARE RETURN OBJECT ###############
467
  #Will add more information to be returned
468
  if (interpolation_method %in% c("gam_CAI","kriging_CAI","gwr_CAI","gam_fusion","kriging_fusion","gwr_fusion")){
469
    raster_prediction_obj<-list(clim_method_mod_obj,method_mod_obj,validation_mod_obj,validation_mod_month_obj, tb_diagnostic_v,
470
                                tb_diagnostic_s,tb_month_diagnostic_v,tb_month_diagnostic_s,summary_metrics_v,summary_month_metrics_v)
471
    names(raster_prediction_obj)<-c("clim_method_mod_obj","method_mod_obj","validation_mod_obj","validation_mod_month_obj","tb_diagnostic_v",
472
                                    "tb_diagnostic_s","tb_month_diagnostic_v","tb_month_diagnostic_s","summary_metrics_v","summary_month_metrics_v")  
473
    save(raster_prediction_obj,file= file.path(out_path,paste("raster_prediction_obj_",interpolation_method,"_", y_var_name,out_prefix,".RData",sep="")))
474
    
475
  }
476
  
477
  #use %in% instead of "|" operator
478
  if (interpolation_method=="gam_daily" | interpolation_method=="kriging_daily" | interpolation_method=="gwr_daily"){
479
    raster_prediction_obj<-list(method_mod_obj,validation_mod_obj,tb_diagnostic_v,
480
                                tb_diagnostic_s,summary_metrics_v,summary_month_metrics_v)
481
    names(raster_prediction_obj)<-c("method_mod_obj","validation_mod_obj","tb_diagnostic_v",
482
                                    "tb_diagnostic_s","summary_metrics_v","summary_month_metrics_v")  
483
    save(raster_prediction_obj,file= file.path(out_path,paste("raster_prediction_obj_",interpolation_method,"_", y_var_name,out_prefix,".RData",sep="")))
484
    
485
  }
486
  
487
  print("stage 4 DONE")
488
  return(raster_prediction_obj)
489
}
490

  
491
####################################################################
492
######################## END OF SCRIPT/FUNCTION #####################
climate/research/world/interpolation/GAM_fusion_function_multisampling_01062014.R
1
##################  Functions for use in the raster prediction stage   #######################################
2
############################ Interpolation in a given tile/region ##########################################
3
#This script contains 5 functions used in the interpolation of temperature in the specfied study/processing area:                             
4
# 1)predict_raster_model<-function(in_models,r_stack,out_filename)                                                             
5
# 2)fit_models<-function(list_formulas,data_training)           
6
# 3)runClim_KGCAI<-function(j,list_param) : function that peforms GAM CAI method
7
# 4)runClim_KGFusion<-function(j,list_param) function for monthly step (climatology) in the fusion method
8
# 5)runGAMFusion <- function(i,list_param) : daily step for fusion method, perform daily prediction
9
#
10
#AUTHOR: Benoit Parmentier                                                                       
11
#DATE: 10/03/2013                                                                                 
12
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--   
13

  
14
##Comments and TODO:
15
#This script is meant to be for general processing tile by tile or region by region.
16
# Note that the functions are called from GAM_fusion_analysis_raster_prediction_mutlisampling.R.
17
# This will be expanded to other methods.
18
##################################################################################################
19

  
20

  
21
predict_raster_model<-function(in_models,r_stack,out_filename){
22
  #This functions performs predictions on a raster grid given input models.
23
  #Arguments: list of fitted models, raster stack of covariates
24
  #Output: spatial grid data frame of the subset of tiles
25
  list_rast_pred<-vector("list",length(in_models))
26
  for (i in 1:length(in_models)){
27
    mod <-in_models[[i]] #accessing GAM model ojbect "j"
28
    raster_name<-out_filename[[i]]
29
    if (inherits(mod,"gam")) {           #change to c("gam","autoKrige")
30
      raster_pred<- predict(object=r_stack,model=mod,na.rm=FALSE,block.size=1000) #Using the coeff to predict new values.
31
      raster_pred<- predict(object=r_stack,model=mod,na.rm=FALSE) #Using the coeff to predict new values.
32
      names(raster_pred)<-"y_pred"  
33
      writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
34
      #print(paste("Interpolation:","mod", j ,sep=" "))
35
      list_rast_pred[[i]]<-raster_name
36
    }
37
  }
38
  if (inherits(mod,"try-error")) {
39
    print(paste("no gam model fitted:",mod[1],sep=" ")) #change message for any model type...
40
  }
41
  return(list_rast_pred)
42
}
43

  
44
fit_models<-function(list_formulas,data_training){
45
  #This functions several models and returns model objects.
46
  #Arguments: - list of formulas for GAM models
47
  #           - fitting data in a data.frame or SpatialPointDataFrame
48
  #Output: list of model objects 
49
  list_fitted_models<-vector("list",length(list_formulas))
50
  for (k in 1:length(list_formulas)){
51
    formula<-list_formulas[[k]]
52
    mod<- try(gam(formula, data=data_training)) #change to any model!!
53
    #mod<- try(autoKrige(formula, input_data=data_s,new_data=s_sgdf,data_variogram=data_s))
54
    model_name<-paste("mod",k,sep="")
55
    assign(model_name,mod) 
56
    list_fitted_models[[k]]<-mod
57
  }
58
  return(list_fitted_models) 
59
}
60

  
61
#Function to glue all methods together...still need to separate fit and training for gwr and kriging, ok for now
62
interpolate_area_fun <- function(method_interp,list_models,s_raster,list_out_filename,data_df){
63
  ##Function to fit and predict an interpolation surface
64
  ##Author: Benoit Parmentier
65
  ##Function depends on other functions!!!
66
  #inpputs:
67
  #method_interp: interpolation method with value "gam","gwr","kriging"
68
  #list_models: models to fit and predict as string (i.e.vector char)
69
  #s_raster: stack with covariate variables, must match in name the data.frame input
70
  #data_df: spatial point data.frame with covariates, must be projected match names of covariates
71
  #list_out_filename: list of char containing output names for models
72
  
73
  #Conver to formula object
74
  list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
75
  cname<-paste("mod",1:length(list_formulas),sep="") #change to more meaningful name?
76
  
77
  names(list_out_filename)<-cname  
78
  
79
  ##Now carry out prediction
80
  if(method_interp=="gam"){    
81
    
82
    #First fitting
83
    mod_list<-fit_models(list_formulas,data_df) #only gam at this stage
84
    names(mod_list)<-cname
85
    
86
    #if raster provided then predict surface
87
    if(!is.null(s_raster)){
88
      #Second predict values for raster image...by providing fitted model list, raster brick and list of output file names
89
      rast_pred_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
90
      names(rast_pred_list)<-cname
91
    }
92
  }
93
  
94
  if(method_interp%in%c("gwr","kriging")){
95
    
96
    #Call funciton to fit and predict gwr and/or kriging
97
    #month_prediction_obj<-predict_auto_krige_raster_model(list_formulas,s_raster,data_month,list_out_filename)
98
    rast_prediction_obj<-predict_autokrige_gwr_raster_model(method_interp,list_formulas,s_raster,data_df,list_out_filename)
99
    
100
    mod_list <-rast_prediction_obj$list_fitted_models
101
    rast_pred_list <-rast_prediction_obj$list_rast_pred
102
    names(rast_pred_list)<-cname
103
  }
104
  
105
  #Now prepare to return object
106
  interp_area_obj <-list(mod_list,list_formulas,rast_pred_list)
107
  names(interp_area_obj) <- c("mod_list","list_formulas","rast_pred_list")
108
  return(interp_area_obj)
109
} 
110

  
111
####
112
#TODO:
113
#Add log file and calculate time and sizes for processes-outputs
114
#Can combine runClim_KGFusion and runClim_KGCAI
115
runClim_KGCAI <-function(j,list_param){
116

  
117
  #Make this a function with multiple argument that can be used by mcmapply??
118
  #Arguments: 
119
  #1)list_index: j 
120
  #2)covar_rast: covariates raster images used in the modeling
121
  #3)covar_names: names of input variables 
122
  #4)lst_avg: list of LST climatogy names, may be removed later on
123
  #5)list_models: list input models for bias calculation
124
  #6)dst: data at the monthly time scale
125
  #7)var: TMAX or TMIN, variable being interpolated
126
  #8)y_var_name: output name, not used at this stage
127
  #9)out_prefix
128
  #10) out_path
129
  
130
  #The output is a list of four shapefile names produced by the function:
131
  #1) clim: list of output names for raster climatogies 
132
  #2) data_month: monthly training data for bias surface modeling
133
  #3) mod: list of model objects fitted 
134
  #4) formulas: list of formulas used in bias modeling
135
    
136
  ### PARSING INPUT ARGUMENTS
137
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
138
    
139
  index<-list_param$j
140
  s_raster<-list_param$covar_rast
141
  covar_names<-list_param$covar_names
142
  lst_avg<-list_param$lst_avg
143
  list_models<-list_param$list_models
144
  dst<-list_param$dst #monthly station dataset
145
  var<-list_param$var
146
  y_var_name<-list_param$y_var_name
147
  out_prefix<-list_param$out_prefix
148
  out_path<-list_param$out_path
149

  
150
  #inserted #
151
  sampling_month_obj<-list_param$sampling_month_obj
152
  ghcn.month.subsets<-sampling_month_obj$ghcn_data
153
  sampling_month_dat <- sampling_month_obj$sampling_dat
154
  sampling_month_index <- sampling_month_obj$sampling_index
155
  
156
  #Model and response variable can be changed without affecting the script
157
  #prop_month<-0 #proportion retained for validation...
158
  #run_samp<-1 #sample number, can be introduced later...
159

  
160
  prop_month <- sampling_month_dat$prop[j] #proportion retained for validation...
161
  run_samp <- sampling_month_dat$run_samp[j] #sample number if multisampling...will need create mulitple prediction at daily!!! could be complicated
162
                                       #possibility is to average per proportion !!!
163
  
164
  date_month <-strptime(sampling_month_dat$date[j], "%Y%m%d")   # interpolation date being processed
165
  month_no <-strftime(date_month, "%m")          # current month of the date being processed
166
  LST_month<-paste("mm_",month_no,sep="") # name of LST month to be matched
167
  LST_name <-LST_month  
168
  #### STEP 2: PREPARE DATA
169
    
170
  #change here...use training data...
171
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
172
  
173
  #LST_name <-lst_avg[j] # name of LST month to be matched
174
  #data_month$LST<-data_month[[LST_name]]
175
  
176
  dataset_month <-ghcn.month.subsets[[j]]
177
  mod_LST <- ghcn.month.subsets[[j]][,match(LST_month, names(ghcn.month.subsets[[j]]))]  #Match interpolation date and monthly LST average
178
  dataset_month$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the dataset
179
  #change here...
180
  dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
181
  proj_str<-proj4string(dst) #get the local projection information from monthly data
182
  
183
  #TMax to model..., add precip later
184
  if (var=="TMAX"){   
185
    dataset_month$y_var<-dataset_month$TMax #Adding TMax as the variable modeled
186
  }
187
  if (var=="TMIN"){   
188
    dataset_month$y_var<-dataset_month$TMin #Adding TMin as the variable modeled
189
  }
190
  
191
  ind.training <- sampling_month_index[[j]]
192
  ind.testing  <- setdiff(1:nrow(dataset_month), ind.training)
193
  data_month_s <- dataset_month[ind.training, ]   #Training dataset currently used in the modeling
194
  data_month_v <- dataset_month[ind.testing, ]    #Testing/validation dataset using input sampling
195
  
196
  data_month <- data_month_s #training data for  monthhly predictions...
197

  
198
  #date_proc<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
199
  #mo<-as.integer(strftime(date_proc, "%m"))          # current month of the date being processed
200
  #day<-as.integer(strftime(date_proc, "%d"))
201
  #year<-as.integer(strftime(date_proc, "%Y"))
202
  ## end of pasted
203
    
204
  #end of insert...
205
  
206
  #Fit gam models using data and list of formulas
207
  
208
  list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
209
  cname<-paste("mod",1:length(list_formulas),sep="") #change to more meaningful name?
210
  
211
  #mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
212
  #cname<-paste("mod",1:length(mod_list),sep="") #change to more meaningful name?
213
  
214
  #Adding layer LST to the raster stack  
215
  
216
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
217
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
218
  LST<-subset(s_raster,LST_name)
219
  names(LST)<-"LST"
220
  s_raster<-addLayer(s_raster,LST)            #Adding current month
221
  
222
  #Now generate file names for the predictions...
223
  list_out_filename<-vector("list",length(list_formulas))
224
  names(list_out_filename)<-cname  
225
  
226
  for (k in 1:length(list_out_filename)){
227
    #j indicate which month is predicted
228
    data_name<-paste(var,"_clim_month_",as.integer(month_no),"_",cname[k],"_",prop_month,
229
                     "_",run_samp,sep="")
230
    raster_name<-file.path(out_path,paste("CAI_",data_name,out_prefix,".tif", sep=""))
231
    list_out_filename[[k]]<-raster_name
232
  }
233
  
234
  ## Select the relevant method...
235
  
236
  if (interpolation_method=="gam_CAI"){
237
    
238
    #First fitting
239
    mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
240
    names(mod_list)<-cname
241
    
242
    #Second predict values for raster image...by providing fitted model list, raster brick and list of output file names
243
    #now predict values for raster image...
244
    rast_clim_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
245
    names(rast_clim_list)<-cname
246
    #Some models will not be predicted because of the lack of training data...remove empty string from list of models
247
        
248
  }
249
  
250
  
251
  if (interpolation_method %in% c("kriging_CAI","gwr_CAI")){
252
    if(interpolation_method=="kriging_CAI"){
253
      method_interp <- "kriging"
254
    }else{
255
      method_interp <- "gwr"
256
    }
257
    #Call function to fit and predict gwr and/or kriging
258
    #month_prediction_obj<-predict_auto_krige_raster_model(list_formulas,s_raster,data_month,list_out_filename)
259
    month_prediction_obj<-predict_autokrige_gwr_raster_model(method_interp,list_formulas,s_raster,data_month,list_out_filename)
260
    
261
    mod_list <-month_prediction_obj$list_fitted_models
262
    rast_clim_list <-month_prediction_obj$list_rast_pred
263
    names(rast_clim_list)<-cname
264
  }
265
  
266
  rast_clim_list<-rast_clim_list[!sapply(rast_clim_list,is.null)] #remove NULL elements in list
267
  
268
  #Adding Kriging for Climatology options 
269

  
270
  clim_xy<-coordinates(data_month)
271
  fitclim<-Krig(clim_xy,data_month$y_var,theta=1e5) #use TPS or krige 
272
  #fitclim<-Krig(clim_xy,data_month$TMax,theta=1e5) #use TPS or krige 
273
  mod_krtmp1<-fitclim
274
  model_name<-"mod_kr"
275
  
276
  clim_rast<-interpolate(LST,fitclim) #interpolation using function from raster package
277
  
278
  #Write out modeled layers
279
  data_name<-paste(var,"_clim_month_",as.integer(month_no),"_",model_name,"_",prop_month,
280
                   "_",run_samp,sep="")
281
  raster_name_clim<-file.path(out_path,paste("CAI_",data_name,out_prefix,".tif", sep=""))
282
  writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
283
  
284
  #Adding to current objects
285
  mod_list[[model_name]]<-mod_krtmp1
286
  #rast_bias_list[[model_name]]<-raster_name_bias
287
  rast_clim_list[[model_name]]<-raster_name_clim
288
  
289
  #Prepare object to return
290
  clim_obj<-list(rast_clim_list,data_month,data_month_v,sampling_month_dat[j,],mod_list,list_formulas)
291
  names(clim_obj)<-c("clim","data_month","data_month_v","sampling_month_dat","mod","formulas")
292
  
293
  save(clim_obj,file= file.path(out_path,paste("clim_obj_CAI_month_",as.integer(month_no),"_",var,"_",prop_month,
294
                                               "_",run_samp,"_",out_prefix,".RData",sep="")))
295
  
296
  return(clim_obj) 
297
}
298
#
299

  
300
runClim_KGFusion<-function(j,list_param){
301
  
302
  #Make this a function with multiple argument that can be used by mcmapply??
303
  #Arguments: 
304
  #1)list_index: j 
305
  #2)covar_rast: covariates raster images used in the modeling
306
  #3)covar_names: names of input variables 
307
  #4)lst_avg: list of LST climatogy names, may be removed later on
308
  #5)list_models: list input models for bias calculation
309
  #6)dst: data at the monthly time scale
310
  #7)var: TMAX or TMIN, variable being interpolated
311
  #8)y_var_name: output name, not used at this stage
312
  #9)out_prefix
313
  #
314
  #The output is a list of four shapefile names produced by the function:
315
  #1) clim: list of output names for raster climatogies 
316
  #2) data_month: monthly training data for bias surface modeling
317
  #3) mod: list of model objects fitted 
318
  #4) formulas: list of formulas used in bias modeling
319
  
320
  ### PARSING INPUT ARGUMENTS
321
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
322
  
323
  index<-list_param$j
324
  s_raster<-list_param$covar_rast
325
  covar_names<-list_param$covar_names
326
  lst_avg<-list_param$lst_avg
327
  list_models<-list_param$list_models
328
  dst<-list_param$dst #monthly station dataset
329
  var<-list_param$var
330
  y_var_name<-list_param$y_var_name
331
  out_prefix<-list_param$out_prefix
332
  out_path<-list_param$out_path
333
  
334
  #inserted #
335
  sampling_month_obj<-list_param$sampling_month_obj
336
  ghcn.month.subsets<-sampling_month_obj$ghcn_data
337
  sampling_month_dat <- sampling_month_obj$sampling_dat
338
  sampling_month_index <- sampling_month_obj$sampling_index
339
  
340
  #Model and response variable can be changed without affecting the script
341
  #prop_month<-0 #proportion retained for validation...
342
  #run_samp<-1 #sample number, can be introduced later...
343
  
344
  prop_month <- sampling_month_dat$prop[j] #proportion retained for validation...
345
  run_samp <- sampling_month_dat$run_samp[j] #sample number if multisampling...
346
  #will need create mulitple prediction at daily!!! could be complicated
347
  #possibility is to average per proportion !!!
348
  
349
  date_month <-strptime(sampling_month_dat$date[j], "%Y%m%d")   # interpolation date being processed
350
  month_no <-strftime(date_month, "%m")          # current month of the date being processed
351
  LST_month<-paste("mm_",month_no,sep="") # name of LST month to be matched
352
  LST_name <-LST_month  
353
  #### STEP 2: PREPARE DATA
354
  
355
  #change here...use training data...
356
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
357
  
358
  #LST_name <-lst_avg[j] # name of LST month to be matched
359
  #data_month$LST<-data_month[[LST_name]]
360
  
361
  dataset_month <-ghcn.month.subsets[[j]]
362
  mod_LST <- ghcn.month.subsets[[j]][,match(LST_month, names(ghcn.month.subsets[[j]]))]  #Match interpolation date and monthly LST average
363
  dataset_month$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the dataset
364
  #change here...
365
  dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
366
  proj_str<-proj4string(dst) #get the local projection information from monthly data
367
    
368
  ind.training <- sampling_month_index[[j]]
369
  ind.testing  <- setdiff(1:nrow(dataset_month), ind.training)
370
  data_month_s <- dataset_month[ind.training, ]   #Training dataset currently used in the modeling
371
  data_month_v <- dataset_month[ind.testing, ]    #Testing/validation dataset using input sampling
372
  
373
  data_month <- data_month_s #training data for  monthhly predictions...
374
  
375
  #date_proc<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
376
  #mo<-as.integer(strftime(date_proc, "%m"))          # current month of the date being processed
377
  #day<-as.integer(strftime(date_proc, "%d"))
378
  #year<-as.integer(strftime(date_proc, "%Y"))
379
  ## end of pasted
380
  
381
  #end of insert...09/04
382
  
383
  #### STEP 2: PREPARE DATA
384
  
385
  #data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
386
  #LST_name<-lst_avg[j] # name of LST month to be matched
387
  #data_month$LST<-data_month[[LST_name]]
388
  
389
  #Adding layer LST to the raster stack  
390
  covar_rast<-s_raster
391
  #names(s_raster)<-covar_names
392
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
393
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
394
  LST<-subset(s_raster,LST_name)
395
  names(LST)<-"LST"
396
  s_raster<-addLayer(s_raster,LST)            #Adding current month
397
  
398
  #LST bias to model...
399
  if (var=="TMAX"){
400
    data_month$LSTD_bias<-data_month$LST-data_month$TMax
401
    data_month$y_var<-data_month$LSTD_bias #Adding bias as the variable modeled
402
  }
403
  if (var=="TMIN"){
404
    data_month$LSTD_bias<-data_month$LST-data_month$TMin
405
    data_month$y_var<-data_month$LSTD_bias #Adding bias as the variable modeled
406
  }
407
  
408
  #If CAI model then...
409
  #TMax to model..., add precip later
410
  #if (var=="TMAX"){   
411
  #  dataset_month$y_var<-dataset_month$TMax #Adding TMax as the variable modeled
412
  #}
413
  #if (var=="TMIN"){   
414
  #  dataset_month$y_var<-dataset_month$TMin #Adding TMin as the variable modeled
415
  #}
416
  
417
  #### STEP3:  NOW FIT AND PREDICT  MODEL
418
  
419
  list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
420
  cname<-paste("mod",1:length(list_formulas),sep="") #change to more meaningful name?
421
  
422
  #Now generate file names for the predictions...
423
  list_out_filename<-vector("list",length(list_formulas))
424
  names(list_out_filename)<-cname  
425
  
426
  ##Change name...
427
  for (k in 1:length(list_out_filename)){
428
    #j indicate which month is predicted, var indicates TMIN or TMAX
429
    data_name<-paste(var,"_bias_LST_month_",as.integer(month_no),"_",cname[k],"_",prop_month,
430
                     "_",run_samp,sep="")
431
    raster_name<-file.path(out_path,paste("fusion_",interpolation_method,"_",data_name,out_prefix,".tif", sep=""))
432
    list_out_filename[[k]]<-raster_name
433
  }
434
  
435
  #for (k in 1:length(list_out_filename)){
436
  #  #j indicate which month is predicted
437
  #  data_name<-paste(var,"_clim_month_",as.integer(month_no),"_",cname[k],"_",prop_month,
438
  #                   "_",run_samp,sep="")
439
  #  raster_name<-file.path(out_path,paste("CAI_",data_name,out_prefix,".tif", sep=""))
440
  #  list_out_filename[[k]]<-raster_name
441
  #}
442
  
443
  ## Select the relevant method...
444
  
445
  if (interpolation_method=="gam_fusion"){
446
    #First fitting
447
    mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
448
    names(mod_list)<-cname
449
  
450
    #Second predict values for raster image...by providing fitted model list, raster brick and list of output file names
451
    rast_bias_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
452
    names(rast_bias_list)<-cname
453
  }
454
  
455
  if (interpolation_method %in% c("kriging_fusion","gwr_fusion")){
456
    if(interpolation_method=="kriging_fusion"){
457
      method_interp <- "kriging"
458
    }else{
459
      method_interp <- "gwr"
460
    }
461
    #Call funciton to fit and predict gwr and/or kriging
462
    #month_prediction_obj<-predict_auto_krige_raster_model(list_formulas,s_raster,data_month,list_out_filename)
463
    month_prediction_obj<-predict_autokrige_gwr_raster_model(method_interp,list_formulas,s_raster,data_month,list_out_filename)
464
    
465
    mod_list <-month_prediction_obj$list_fitted_models
466
    rast_bias_list <-month_prediction_obj$list_rast_pred
467
    names(rast_bias_list)<-cname
468
  }
469
  
470
  #Some modles will not be predicted...remove them
471
  rast_bias_list<-rast_bias_list[!sapply(rast_bias_list,is.null)] #remove NULL elements in list
472

  
473
  mod_rast<-stack(rast_bias_list)  #stack of bias raster images from models
474
 
475
  rast_clim_list<-vector("list",nlayers(mod_rast))
476
  
477

  
478
  names(rast_clim_list)<-names(rast_bias_list)
479

  
480
  for (k in 1:nlayers(mod_rast)){
481
    clim_fus_rast<-LST-subset(mod_rast,k)
482
    data_name<-paste(var,"_clim_LST_month_",as.integer(month_no),"_",names(rast_clim_list)[k],"_",prop_month,
483
                     "_",run_samp,sep="")
484
    raster_name<-file.path(out_path,paste("fusion_",interpolation_method,"_",data_name,out_prefix,".tif", sep=""))
485
    rast_clim_list[[k]]<-raster_name
486
    writeRaster(clim_fus_rast, filename=raster_name,overwrite=TRUE)  #Wri
487
  }
488
  
489
  #### STEP 4:Adding Kriging for Climatology options
490
  bias_xy<-coordinates(data_month)
491
  #fitbias<-Krig(bias_xy,data_month$LSTD_bias,theta=1e5) #use TPS or krige 
492
  fitbias<-try(Krig(bias_xy,data_month$LSTD_bias,theta=1e5)) #use TPS or krige 
493

  
494
  model_name<-"mod_kr"
495

  
496
  if (inherits(fitbias,"Krig")){
497
    #Saving kriged surface in raster images
498
    bias_rast<-bias_rast<-interpolate(LST,fitbias) #interpolation using function from raster package
499
    data_name<-paste(var,"_bias_LST_month_",as.integer(month_no),"_",model_name,"_",prop_month,
500
                     "_",run_samp,sep="")
501
    raster_name_bias<-file.path(out_path,paste("fusion_",interpolation_method,"_",data_name,out_prefix,".tif", sep=""))
502
    writeRaster(bias_rast, filename=raster_name_bias,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
503
    
504
    #now climatology layer
505
    clim_rast<-LST-bias_rast
506
    data_name<-paste(var,"_clim_LST_month_",as.integer(month_no),"_",model_name,"_",prop_month,
507
                     "_",run_samp,sep="")
508
    raster_name_clim<-file.path(out_path,paste("fusion_",interpolation_method,"_",data_name,out_prefix,".tif", sep=""))
509
    writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
510
    #Adding to current objects
511
    mod_list[[model_name]]<-fitbias
512
    rast_bias_list[[model_name]]<-raster_name_bias
513
    rast_clim_list[[model_name]]<-raster_name_clim
514
  }
515
  
516
  if (inherits(fitbias,"try-error")){
517
    #NEED TO DEAL WITH THIS!!!
518
    
519
    #Adding to current objects
520
    mod_list[[model_name]]<-NULL
521
    rast_bias_list[[model_name]]<-NULL
522
    rast_clim_list[[model_name]]<-NULL
523
  }
524

  
525
  #### STEP 5: Prepare object and return
526
  #Prepare object to return
527
  clim_obj<-list(rast_bias_list,rast_clim_list,data_month,data_month_v,sampling_month_dat[j,],mod_list,list_formulas)
528
  names(clim_obj)<-c("bias","clim","data_month","data_month_v","sampling_month_dat","mod","formulas")
529
  
530
  save(clim_obj,file= file.path(out_path,paste("clim_obj_fusion_month_",as.integer(month_no),"_",var,"_",prop_month,
531
                                               "_",run_samp,"_",out_prefix,".RData",sep=""))) 
532
  return(clim_obj)
533
}
534

  
535
## Run function for kriging...?
536

  
537
#runGAMFusion <- function(i,list_param) {            # loop over dates
538
run_prediction_daily_deviation <- function(i,list_param) {            # loop over dates
539
  #This function produce daily prediction using monthly predicted clim surface.
540
  #The output is both daily prediction and daily deviation from monthly steps.
541
  
542
  #### Change this to allow explicitly arguments...
543
  #Arguments: 
544
  #1)index: loop list index for individual run/fit
545
  #2)clim_year_list: list of climatology files for all models...(12*nb of models)
546
  #3)sampling_obj: contains, data per date/fit, sampling information
547
  #4)dst: data at the monthly time scale
548
  #5)var: variable predicted -TMAX or TMIN
549
  #6)y_var_name: name of the variable predicted - dailyTMax, dailyTMin
550
  #7)out_prefix
551
  #8)out_path
552
  #9)list_models2 : interpolation model's formulas as string
553
  #10)interp_methods2: "gam","gwr","kriging"
554
  #11)s_raster: stack for covariates and toher variables
555
  
556
  #The output is a list of four shapefile names produced by the function:
557
  #1) list_temp: y_var_name
558
  #2) rast_clim_list: list of files for temperature climatology predictions
559
  #3) delta: list of files for temperature delta predictions
560
  #4) data_s: training data
561
  #5) data_v: testing data
562
  #6) sampling_dat: sampling information for the current prediction (date,proportion of holdout and sample number)
563
  #7) mod_kr: kriging delta fit, field package model object
564
  
565
  ### PARSING INPUT ARGUMENTS
566
  
567
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
568
  rast_clim_yearlist<-list_param$clim_yearlist
569
  sampling_obj<-list_param$sampling_obj
570
  ghcn.subsets<-sampling_obj$ghcn_data
571
  sampling_dat <- sampling_obj$sampling_dat
572
  sampling <- sampling_obj$sampling_index
573
  var<-list_param$var
574
  y_var_name<-list_param$y_var_name
575
  out_prefix<-list_param$out_prefix
576
  dst<-list_param$dst #monthly station dataset
577
  out_path <-list_param$out_path
578
  list_models2 <-list_param$list_models2
579
  interp_method2 <- list_param$interp_method2
580
  s_raster <- list_param$s_raster
581
  
582
  sampling_month_obj <- list_param$sampling_month_obj
583
  daily_dev_sampling_dat <- list_param$daily_dev_sampling_dat
584
  
585
  index_d <- daily_dev_sampling_dat$index_d[i]
586
  index_m <- daily_dev_sampling_dat$index_m[i]
587
  
588
  use_clim_image <- list_param$use_clim_image # use predicted image as a base...rather than average Tmin at the station for delta
589
  join_daily <- list_param$join_daily # join monthly and daily station before calucating delta
590
  
591
  #use_clim_image
592
  ##########
593
  # STEP 1 - Read in information and get traing and testing stations
594
  ############# 
595
  
596
  #use index_d and index_m
597
  
598
  date<-strptime(daily_dev_sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
599
  month<-strftime(date, "%m")          # current month of the date being processed
600
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
601
  proj_str<-proj4string(dst) #get the local projection information from monthly data
602

  
603
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
604
  
605
  data_day<-ghcn.subsets[[index_d]]
606
  mod_LST <- ghcn.subsets[[index_d]][,match(LST_month, names(ghcn.subsets[[index_d]]))]  #Match interpolation date and monthly LST average
607
  data_day$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the dataset
608
  dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
609
  
610
  ind.training<-sampling[[index_d]]
611
  ind.testing <- setdiff(1:nrow(data_day), ind.training)
612
  data_s <- data_day[ind.training, ]   #Training dataset currently used in the modeling
613
  data_v <- data_day[ind.testing, ]    #Testing/validation dataset using input sampling
614
  
615
  ns<-nrow(data_s)
616
  nv<-nrow(data_v)
617
  #i=1
618
  date_proc<-sampling_dat$date[index_d]
619
  date_proc<-strptime(sampling_dat$date[index_d], "%Y%m%d")   # interpolation date being processed
620
  mo<-as.integer(strftime(date_proc, "%m"))          # current month of the date being processed
621
  day<-as.integer(strftime(date_proc, "%d"))
622
  year<-as.integer(strftime(date_proc, "%Y"))
623
  
624
  #Adding layer LST to the raster stack  
625
  #names(s_raster)<-covar_names
626
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
627
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
628
  LST<-subset(s_raster,LST_month)
629
  names(LST)<-"LST"
630
  s_raster<-addLayer(s_raster,LST)            #Adding current month
631
  
632
  #Now get monthly data...
633
  
634
  ghcn.month.subsets<-sampling_month_obj$ghcn_data
635
  sampling_month_dat <- sampling_month_obj$sampling_dat
636
  sampling_month_index <- sampling_month_obj$sampling_index
637
  
638
  dataset_month <-ghcn.month.subsets[[index_m]]
639
  mod_LST <- ghcn.month.subsets[[index_m]][,match(LST_month, names(ghcn.month.subsets[[index_m]]))]  #Match interpolation date and monthly LST average
640
  dataset_month$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the dataset
641
  #change here...
642
  dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
643
  proj_str<-proj4string(dst) #get the local projection information from monthly data
644
  
645
  ind.training_month <- sampling_month_index[[index_m]]
646
  ind.testing_month  <- setdiff(1:nrow(dataset_month), ind.training_month)
647
  data_month_s <- dataset_month[ind.training_month, ]   #Training dataset currently used in the modeling
648
  data_month_v <- dataset_month[ind.testing_month, ]    #Testing/validation dataset using input sampling
649
  
650
  modst <- data_month_s #training data for  monthhly predictions...
651
    
652
  ##########
653
  # STEP 2 - CLEAN DATA AND JOIN DAILY TO MONTHLY STATION INFORMATION
654
  ##########
655
  
656
  #if use join
657
  #modst<-dst[dst$month==mo,] #Subsetting dataset for the relevant month of the date being processed
658
    
659
  if (var=="TMIN"){
660
    modst$LSTD_bias <- modst$LST-modst$TMin; #That is the difference between the monthly LST mean and monthly station mean
661
  }
662
  if (var=="TMAX"){
663
    modst$LSTD_bias <- modst$LST-modst$TMax; #That is the difference between the monthly LST mean and monthly station mean    
664
  }
665
  #This may be unnecessary since LSTD_bias is already in dst?? check the info
666
  #Some loss of observations: LSTD_bias for January has only 56 out of 66 possible TMIN!!! We may need to look into this issue
667
  #to avoid some losses of station data...
668
  
669
  #Clearn out this part: make this a function call
670
  x<-as.data.frame(data_v)
671
  d<-as.data.frame(data_s)
672
  for (j in 1:nrow(x)){
673
    if (x$value[j]== -999.9){
674
      x$value[j]<-NA
675
    }
676
  }
677
  for (j in 1:nrow(d)){
678
    if (d$value[j]== -999.9){
679
      d$value[j]<-NA
680
    }
681
  }
682
  pos<-match("value",names(d)) #Find column with name "value"
683
  #names(d)[pos]<-c("dailyTmax")
684
  names(d)[pos]<-y_var_name
685
  pos<-match("value",names(x)) #Find column with name "value"
686
  names(x)[pos]<-y_var_name
687
  pos<-match("station",names(d)) #Find column with station ID
688
  names(d)[pos]<-c("id")
689
  pos<-match("station",names(x)) #Find column with name station ID
690
  names(x)[pos]<-c("id")
691
  pos<-match("station",names(modst)) #Find column with name station ID
692
  names(modst)[pos]<-c("id")       #modst contains the average tmax per month for every stations...
693

  
694
  ##########
695
  # STEP 3 - interpolate daily delta across space
696
  ##########
697
  
698
  #if used images
699
  # extract from image 
700
  #Change to take into account TMin and TMax
701
  
702
  if(use_clim_image==FALSE){
703
    
704
    #must join daily and monthly data first...
705
    
706
    dmoday <-merge(modst,d,by="id",suffixes=c("",".y2"))  
707
    xmoday <-merge(modst,x,by="id",suffixes=c("",".y2"))  
708
    mod_pat<-glob2rx("*.y2")   #remove duplicate columns that have ".y2" in their names
709
    var_pat<-grep(mod_pat,names(dmoday),value=FALSE) # using grep with "value" extracts the matching names
710
    dmoday<-dmoday[,-var_pat] #dropping relevant columns
711
    mod_pat<-glob2rx("*.y2")   
712
    var_pat<-grep(mod_pat,names(xmoday),value=FALSE) # using grep with "value" extracts the matching names
713
    xmoday<-xmoday[,-var_pat] #Removing duplicate columns
714
      
715
    data_v<-xmoday
716
    
717
    #coords <-dmoday[,c("coords.x1","coords.x2")]
718
    coords <-dmoday[,c("x","y")]
719
    coordinates(dmoday)<-coords
720
    proj4string(dmoday)<-proj_str
721
    
722
    #dmoday contains the daily tmax values for training with TMax/TMin being the monthly station tmax/tmin mean
723
    #xmoday contains the daily tmax values for validation with TMax/TMin being the monthly station tmax/tmin mean
724
    
725
    if (var=="TMIN"){
726
      daily_delta <-dmoday$dailyTmin-dmoday$TMin #daily detl is the difference between monthly and daily temperatures
727
    }
728
    if (var=="TMAX"){
729
      daily_delta <- dmoday$dailyTmax-dmoday$TMax
730
    }  
731
    #daily_delta <- dmoday[[y_var_name]] - 
732
    #only one delta in this case!!!
733
    #list(mod)
734
    
735
    if(is.null(list_models2)){ #change here...
736
      
737
      list_daily_delta_rast <- vector("list",length=1) #only one delta surface in this case!!
738
      list_mod_krtmp2 <- vector("list",length=1) #only one delta model in this case!!
739
      
740
      model_name<-paste("mod_stat_kr",sep="_")
741
      daily_delta_xy<-as.matrix(cbind(dmoday$x,dmoday$y))
742
      fitdelta<-Krig(daily_delta_xy,daily_delta,theta=1e5) #use TPS or krige
743
      mod_krtmp2 <- fitdelta
744
      #names(mod_krtmp2)[k] <- model_name
745
      #data_s$daily_delta<-daily_delta
746
      #rast_clim_list<-rast_clim_yearlist[[index_m]]  #select relevant monthly climatology image ...
747
      rast_clim_list<-rast_clim_yearlist[[index_m]]  #select relevant monthly climatology image ...
748
      rast_clim_mod <- stack(rast_clim_list)
749
      names(rast_clim_mod) <- names(rast_clim_list)
750
      rast_clim_month <- subset(rast_clim_mod,1) #example layer to interpolate to
751
      
752
      daily_delta_rast<-interpolate(rast_clim_month,fitdelta) #Interpolation of the of the daily devation
753
      #there is only one daily devation (delta) sruface in this case
754
      
755
      #To many I/O out of swap memory on atlas
756
      #Saving kriged surface in raster images
757
      data_name<-paste("daily_delta_",y_var_name,"_",model_name,"_",sampling_month_dat$prop[index_m],"_",sampling_month_dat$run_samp[index_m],"_",
758
                       sampling_dat$date[index_d],"_",sampling_dat$prop[index_d],"_",sampling_dat$run_samp[index_d],sep="")
759
      raster_name_delta<-file.path(out_path,paste(interpolation_method,"_",var,"_",data_name,out_prefix,".tif", sep=""))
760
      writeRaster(daily_delta_rast, filename=raster_name_delta,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
761
      
762
      list_daily_delta_rast[[1]] <- raster_name_delta
763
      list_mod_krtmp2[[1]] <- mod_krtmp2
764
    }
765
    
766
    if(!is.null(list_models2)){ #change here...
767
      list_daily_delta_rast <- vector("list",length=1) #several delta surfaces in this case but stored as one list!!
768
      list_mod_krtmp2 <- vector("list",length=1) #several delta model in this case but stored as one list!!
769
      
770
      dev_mod_name<-paste("dev_mod",1:length(list_models2),sep="") #change to more meaningful name?
771
      model_name<-paste("mod_stat_",sep="_")
772
      #Now generate file names for the predictions...
773
      list_out_filename<-vector("list",length(list_models2))
774
      names(list_out_filename)<- dev_mod_name  
775
      
776
      ##Change name...
777
      for (j in 1:length(list_out_filename)){
778
        #j indicate which month is predicted, var indicates TMIN or TMAX
779
        data_name<-paste("daily_delta_",y_var_name,"_",model_name,"_",sampling_month_dat$prop[index_m],"_",sampling_month_dat$run_samp[index_m],"_",
780
                         sampling_dat$date[index_d],"_",sampling_dat$prop[index_d],"_",sampling_dat$run_samp[index_d],
781
                         "_",interp_method2,"_",dev_mod_name[j],sep="")
782
        raster_name_delta<-file.path(out_path,paste(interpolation_method,"_",var,"_",data_name,out_prefix,".tif", sep=""))
783
        
784
        list_out_filename[[j]]<-raster_name_delta
785
      }
786
      
787
      #Now call function
788
      
789
      #for (j in 1:length(list_models2)){
790
      dmoday$y_var <- daily_delta
791
      #coordinates(data_s)<-cbind(data_s$x,data_s$y)
792
      #proj4string(data_s)<-proj_str
793
      #coordinates(data_v)<-cbind(data_v$x,data_v$y)
794
      #proj4string(data_v)<-proj_str
795
      
796
      interp_area_obj <-interpolate_area_fun(interp_method2,list_models2,s_raster,list_out_filename,dmoday)
797
      rast_pred_list <- interp_area_obj$rast_pred_list
798
      rast_pred_list <-rast_pred_list[!sapply(rast_pred_list,is.null)] #remove NULL elements in list
799
      list_daily_delta_rast[[1]] <-rast_pred_list
800
      #names(list_daily_delta_rast) <- names(daily_delta_df)
801
      list_mod_krtmp2[[1]] <-interp_area_obj$mod_list      
802
    }
803
  }
804
  
805
  if(use_clim_image==TRUE){
806
    
807
    # User can choose to join daily and monthly station before interpolation:
808
    #-this ensures that the delta difference is "more" exact since its starting point is basesd on average value but there is risk to loose some stations
809
    #may need to change this option later!!
810
    #if jion_Daily is true then daily station used as training will match monthly station used as training
811
    
812
    if(join_daily==TRUE){
813
      dmoday <-merge(modst,d,by="id",suffixes=c("",".y2"))  
814
      xmoday <-merge(modst,x,by="id",suffixes=c("",".y2"))  
815
      mod_pat<-glob2rx("*.y2")   #remove duplicate columns that have ".y2" in their names
816
      var_pat<-grep(mod_pat,names(dmoday),value=FALSE) # using grep with "value" extracts the matching names
817
      dmoday<-dmoday[,-var_pat] #dropping relevant columns
818
      mod_pat<-glob2rx("*.y2")   
819
      var_pat<-grep(mod_pat,names(xmoday),value=FALSE) # using grep with "value" extracts the matching names
820
      xmoday<-xmoday[,-var_pat] #Removing duplicate columns
821
      
822
      data_v<-xmoday
823
      
824
    }else{
825
      dmoday<-d
826
      data_v<-x
827
    }
828
    
829
    
830
    #dmoday contains the daily tmax values for training with TMax/TMin being the monthly station tmax/tmin mean
831
    #xmoday contains the daily tmax values for validation with TMax/TMin being the monthly station tmax/tmin mean
832
    
833
    #coords <-dmoday[,c("coords.x1","coords.x2")]
834
    coords <-dmoday[,c("x","y")]
835
    coordinates(dmoday)<-coords
836
    proj4string(dmoday)<-proj_str
837
    
838
    #Now compute daily delta deviation from climatology layer:
839
    
840
    rast_clim_list<-rast_clim_yearlist[[index_m]]  #select relevant monthly climatology image ...
841
    rast_clim_mod <- stack(rast_clim_list)
842
    names(rast_clim_mod) <- names(rast_clim_list)
843
    extract_data_s <-extract(rast_clim_mod,dmoday,df=TRUE)
844
    #list_daily_delta
845
    daily_delta_df <- dmoday[[y_var_name]] - extract_data_s  
846
    daily_delta_df <- daily_delta_df[,-1]
847
    names(daily_delta_df) <- paste(names(daily_delta_df),"_del",sep="")
848
    
849
    names(extract_data_s) <- paste(names(extract_data_s),"_m",sep="") # "m" for monthly predictions...
850
    dmoday <-spCbind(dmoday,extract_data_s) #contains the predicted clim at locations
851
    dmoday <-spCbind(dmoday,daily_delta_df) #contains the predicted clim at locations
852
    #Now krige  forevery model !! loop
853
    list_mod_krtmp2 <- vector("list",length=nlayers(rast_clim_mod)) 
854
    list_daily_delta_rast <- vector("list",length=nlayers(rast_clim_mod)) 
855
    names(list_daily_delta_rast) <- names(daily_delta_df)
856
    names(list_mod_krtmp2) <- names(daily_delta_df)
857
    for(k in 1:nlayers(rast_clim_mod)){
858
      
859
      daily_delta <- daily_delta_df[[k]] #Current daily deviation being process: the reference monthly prediction varies...
860
      #model_name<-paste("mod_kr","day",sep="_")
861
      model_name<- names(daily_delta_df)[k]
862
      
863
      if(is.null(list_models2)){
864
        daily_delta_xy<-as.matrix(cbind(dmoday$x,dmoday$y))
865
        fitdelta<-Krig(daily_delta_xy,daily_delta,theta=1e5) #use TPS or krige
866
        list_mod_krtmp2[[k]] <-fitdelta
867
        names(list_mod_krtmp2)[k] <- model_name
868
        #data_s$daily_delta<-daily_delta
869
        #rast_clim_list<-rast_clim_yearlist[[index_m]]  #select relevant monthly climatology image ...
870
        rast_clim_month <- subset(rast_clim_mod,1) #example layer to interpolate to
871
        
872
        daily_delta_rast<-interpolate(rast_clim_month,fitdelta) #Interpolation of the bias surface...
873
        
874
        #list_daily_delta_rast[[k]] <- raster_name_delta
875
        
876
        data_name<-paste("daily_delta_",y_var_name,"_",model_name,"_",sampling_month_dat$prop[index_m],"_",sampling_month_dat$run_samp[index_m],"_",
877
                         sampling_dat$date[index_d],"_",sampling_dat$prop[index_d],"_",sampling_dat$run_samp[index_d],sep="")
878
        raster_name_delta<-file.path(out_path,paste(interpolation_method,"_",var,"_",data_name,out_prefix,".tif", sep=""))
879
        
880
        writeRaster(daily_delta_rast, filename=raster_name_delta,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
881
        #writeRaster(r_spat, NAflag=NA_flag_val,filename=raster_name,bylayer=TRUE,bandorder="BSQ",overwrite=TRUE)   
882
        
883
        #raster_name_delta <- list_daily_delta_rast
884
        #mod_krtmp2 <- list_mod_krtmp2
885
        list_daily_delta_rast[[k]] <- raster_name_delta
886
        
887
      }
888
      
889
      if (!is.null(list_models2)){
890
        
891
        #list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
892
        dev_mod_name<-paste("dev_mod",1:length(list_models2),sep="") #change to more meaningful name?
893
        
894
        #Now generate file names for the predictions...
895
        list_out_filename<-vector("list",length(list_models2))
896
        names(list_out_filename)<- dev_mod_name  
897
        
898
        ##Change name...
899
        for (j in 1:length(list_out_filename)){
900
          #j indicate which month is predicted, var indicates TMIN or TMAX
901
          data_name<-paste("daily_delta_",y_var_name,"_",model_name,"_",sampling_month_dat$prop[index_m],"_",sampling_month_dat$run_samp[index_m],"_",
902
                           sampling_dat$date[index_d],"_",sampling_dat$prop[index_d],"_",sampling_dat$run_samp[index_d],
903
                           "_",interp_method2,"_",dev_mod_name[j],sep="")
904
          raster_name_delta<-file.path(out_path,paste(interpolation_method,"_",var,"_",data_name,out_prefix,".tif", sep=""))
905
          
906
          list_out_filename[[j]]<-raster_name_delta
907
        }
908
        
909
        #Now call function
910
        
911
        #for (j in 1:length(list_models2)){
912
        dmoday$y_var <- daily_delta
913
        #coordinates(data_s)<-cbind(data_s$x,data_s$y)
914
        #proj4string(data_s)<-proj_str
915
        #coordinates(data_v)<-cbind(data_v$x,data_v$y)
916
        #proj4string(data_v)<-proj_str
917
        
918
        interp_area_obj <-interpolate_area_fun(interp_method2,list_models2,s_raster,list_out_filename,dmoday)
919
        rast_pred_list <- interp_area_obj$rast_pred_list
920
        names(rast_pred_list) <- dev_mod_name
921
        rast_pred_list <-rast_pred_list[!sapply(rast_pred_list,is.null)] #remove NULL elements in list
922
        list_daily_delta_rast[[k]] <-rast_pred_list
923
        names(list_daily_delta_rast) <- names(daily_delta_df)
924
        mod_list <-interp_area_obj$mod_list
925
        names(mod_list) <- dev_mod_name
926
        list_mod_krtmp2[[k]] <-interp_area_obj$mod_list
927
      }
928
      
929
    }
930
    
931
    #Too many I/O out of swap memory on atlas
932
    #Saving kriged surface in raster images
933
    #delta_rast_s <-stack(list_daily_delta_rast)
934
    #names(delta_rast_s) <- names(daily_delta_df)
935
    
936
    #Should check that all delta images have been created for every model!!! remove from list empty elements!!
937
    
938
    #data_name<-paste("daily_delta_",y_var_name,"_",model_name,"_",sampling_month_dat$prop[index_m],"_",sampling_month_dat$run_samp[index_m],"_",
939
    #                 sampling_dat$date[index_d],"_",sampling_dat$prop[index_d],"_",sampling_dat$run_samp[index_d],sep="")
940
    #raster_name_delta<-file.path(out_path,paste(interpolation_method,"_",var,"_",data_name,out_prefix,".tif", sep=""))
941
    #writeRaster(daily_delta_rast, filename=raster_name_delta,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
942
    
943
    #data_name<-paste("daily_delta_",y_var_name,"_",sampling_month_dat$prop[index_m],"_",sampling_month_dat$run_samp[index_m],"_",
944
    #                 sampling_dat$date[index_d],"_",sampling_dat$prop[index_d],"_",sampling_dat$run_samp[index_d],sep="")
945
    #raster_name_delta<-file.path(out_path,paste(interpolation_method,"_",var,"_",data_name,out_prefix,".tif", sep=""))
946
    
947
    #writeRaster(delta_rast_s, filename=raster_name_delta,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
948
    #writeRaster(r_spat, NAflag=NA_flag_val,filename=raster_name,bylayer=TRUE,bandorder="BSQ",overwrite=TRUE)   
949
    
950
    #raster_name_delta <- list_daily_delta_rast
951
    #mod_krtmp2 <- list_mod_krtmp2
952
  }
953
  
954
  #########
955
  # STEP 4 - Calculate daily predictions - T(day) = clim(month) + delta(day)
956
  #########
957
  
958
  #if(use_clim_image==FALSE){
959
  #  list_daily_delta_rast <- rep(raster_name_delta,length=nlayers(rast_clim_mod))
960
  #}
961
  #Now predict daily after having selected the relevant month
962
  temp_list<-vector("list",nlayers(rast_clim_mod))  
963
  for (k in 1:nlayers(rast_clim_mod)){
964
    if(use_clim_image==TRUE){
965
      if (is.null(list_models2)){ 
966
        daily_delta_rast <- raster(list_daily_delta_rast[[k]]) #There is only one image of deviation per model if list_models2 is NULL
967
      }
968
      if (!is.null(list_models2)){ #then possible multiple daily dev predictions
969
        daily_delta_rast <- stack(unlist(list_daily_delta_rast[[k]]))
970
      }
971
      #daily_delta_rast <- subset(delta_rast_s,k)
972
    }
973
    #if use_clim_image==FALSE then daily__delta_rast already defined earlier...
974
    
975
    if(use_clim_image==FALSE){
976
      if (is.null(list_models2)){ 
977
        daily_delta_rast <- raster(list_daily_delta_rast[[1]]) #There is only one image of deviation per model if list_models2 is NULL
978
      }
979
      if (!is.null(list_models2)){ #then possible multiple daily dev predictions hence use stack
980
        daily_delta_rast <- stack(unlist(list_daily_delta_rast[[1]]))
981
      }
982
      #daily_delta_rast <- subset(delta_rast_s,k)
983
    }
984
    
985
    #rast_clim_month<-raster(rast_clim_list[[k]])
986
    rast_clim_month <- subset(rast_clim_mod,k) #long term monthly prediction
987
    if (is.null(list_models2)){ 
988
      temp_predicted<-rast_clim_month + daily_delta_rast
989
      data_name<-paste(y_var_name,"_predicted_",names(rast_clim_mod)[k],"_",sampling_month_dat$prop[index_m],"_",sampling_month_dat$run_samp[index_m],"_",
990
                       sampling_dat$date[index_d],"_",sampling_dat$prop[index_d],"_",sampling_dat$run_samp[index_d],sep="")
991
      raster_name<-file.path(out_path,paste(interpolation_method,"_",data_name,out_prefix,".tif", sep=""))
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff