Project

General

Profile

Download (30.8 KB) Statistics
| Branch: | Revision:
1
##################  Functions for use in the raster prediction stage   #######################################
2
############################ Interpolation in a given tile/region ##########################################
3
#This script contains 5 functions used in the interpolation of temperature in the specfied study/processing area:                             
4
# 1)predict_raster_model<-function(in_models,r_stack,out_filename)                                                             
5
# 2)fit_models<-function(list_formulas,data_training)           
6
# 3)runClim_KGCAI<-function(j,list_param) : function that peforms GAM CAI method
7
# 4)runClim_KGFusion<-function(j,list_param) function for monthly step (climatology) in the fusion method
8
# 5)runGAMFusion <- function(i,list_param) : daily step for fusion method, perform daily prediction
9
#
10
#AUTHOR: Benoit Parmentier                                                                       
11
#DATE: 06/05/2013                                                                                 
12
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--   
13

    
14
##Comments and TODO:
15
#This script is meant to be for general processing tile by tile or region by region.
16
# Note that the functions are called from GAM_fusion_analysis_raster_prediction_mutlisampling.R.
17
# This will be expanded to other methods.
18
##################################################################################################
19

    
20

    
21
predict_raster_model<-function(in_models,r_stack,out_filename){
22
  #This functions performs predictions on a raster grid given input models.
23
  #Arguments: list of fitted models, raster stack of covariates
24
  #Output: spatial grid data frame of the subset of tiles
25
  list_rast_pred<-vector("list",length(in_models))
26
  for (i in 1:length(in_models)){
27
    mod <-in_models[[i]] #accessing GAM model ojbect "j"
28
    raster_name<-out_filename[[i]]
29
    if (inherits(mod,"gam")) {           #change to c("gam","autoKrige")
30
      raster_pred<- predict(object=r_stack,model=mod,na.rm=FALSE) #Using the coeff to predict new values.
31
      names(raster_pred)<-"y_pred"  
32
      writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
33
      #print(paste("Interpolation:","mod", j ,sep=" "))
34
      list_rast_pred[[i]]<-raster_name
35
    }
36
  }
37
  if (inherits(mod,"try-error")) {
38
    print(paste("no gam model fitted:",mod[1],sep=" ")) #change message for any model type...
39
  }
40
  return(list_rast_pred)
41
}
42

    
43
fit_models<-function(list_formulas,data_training){
44
  #This functions several models and returns model objects.
45
  #Arguments: - list of formulas for GAM models
46
  #           - fitting data in a data.frame or SpatialPointDataFrame
47
  #Output: list of model objects 
48
  list_fitted_models<-vector("list",length(list_formulas))
49
  for (k in 1:length(list_formulas)){
50
    formula<-list_formulas[[k]]
51
    mod<- try(gam(formula, data=data_training)) #change to any model!!
52
    #mod<- try(autoKrige(formula, input_data=data_s,new_data=s_sgdf,data_variogram=data_s))
53
    model_name<-paste("mod",k,sep="")
54
    assign(model_name,mod) 
55
    list_fitted_models[[k]]<-mod
56
  }
57
  return(list_fitted_models) 
58
}
59

    
60
select_var_stack <-function(r_stack,formula_mod,spdf=TRUE){
61
  ##Write function to return only the relevant layers!!
62
  #Note that default behaviour of the function is to remove na values in the subset 
63
  #of raster layers and return a spdf
64
  
65
  ### Start
66
  
67
  covar_terms<-all.vars(formula_mod) #all covariates terms...+ y_var
68
  if (length(covar_terms)==1){
69
    r_stack_covar<-subset(r_stack,1)
70
  } #use one layer
71
  if (length(covar_terms)> 1){
72
    r_stack_covar <-subset(r_stack,covar_terms[-1])
73
  }
74
  if (spdf==TRUE){
75
    s_sgdf<-as(r_stack_covar,"SpatialGridDataFrame") #Conversion to spatial grid data frame, only convert the necessary layers!!
76
    s_spdf<-as.data.frame(s_sgdf) #Note that this automatically removes all NA rows
77
    s_spdf<-na.omit(s_spdf) #removes all rows that have na...
78
    coords<- s_spdf[,c('s1','s2')]
79
    coordinates(s_spdf)<-coords
80
    proj4string(s_spdf)<-proj4string(s_sgdf)  #Need to assign coordinates...
81
    #raster_pred <- rasterize(s_spdf,r1,"pred",fun=mean)
82
    covar_obj<-s_spdf
83
  } else{
84
    covar_obj<-r_stack_covar
85
  }
86
  
87
  return(covar_obj)
88
}
89

    
90
remove_na_spdf<-function(col_names,d_spdf){
91
  #Purpose: remote na items from a subset of a SpatialPointsDataFrame
92
  x<-d_spdf
93
  coords <-coordinates(x)
94
  x$s1<-coords[,1]
95
  x$s2<-coords[,2]
96
  
97
  x1<-x[c(col_names,"s1","s2")]
98
  #x1$y_var <-data_training$y_var
99
  #names(x1)
100
  x1<-na.omit(as.data.frame(x1))
101
  coordinates(x1)<-x1[c("s1","s2")]
102
  proj4string(x1)<-proj4string(d_spdf)
103
  return(x1)
104
}
105

    
106
predict_auto_krige_raster_model<-function(list_formulas,r_stack,data_training,out_filename){
107
  #This functions performs predictions on a raster grid given input models.
108
  #Arguments: list of fitted models, raster stack of covariates
109
  #Output: spatial grid data frame of the subset of tiles
110
  
111
  list_fitted_models<-vector("list",length(list_formulas))
112
  list_rast_pred<-vector("list",length(list_formulas))
113
  #s_sgdf<-as(r_stack,"SpatialGridDataFrame") #Conversion to spatial grid data frame, only convert the necessary layers!!
114
  proj4string(data_training) <- projection(r_stack)
115
  for (k in 1:length(list_formulas)){
116
    formula_mod<-list_formulas[[k]]
117
    raster_name<-out_filename[[k]]
118
    #mod<- try(gam(formula, data=data_training)) #change to any model!!
119
    s_spdf<-select_var_stack(r_stack,formula_mod,spdf=TRUE)
120
    col_names<-all.vars(formula_mod)
121
    if (length(col_names)==1){
122
      data_fit <-data_training
123
    }else{
124
      data_fit <- remove_na_spdf(col_names,data_training)
125
    }
126
    
127
    mod <- try(autoKrige(formula_mod, input_data=data_fit,new_data=s_spdf,data_variogram=data_fit))
128
    #mod <- try(autoKrige(formula_mod, input_data=data_training,new_data=s_spdf,data_variogram=data_training))
129
    model_name<-paste("mod",k,sep="")
130
    assign(model_name,mod) 
131
    
132
    if (inherits(mod,"autoKrige")) {           #change to c("gam","autoKrige")
133
      rpred<-mod$krige_output  #Extracting the SptialGriDataFrame from the autokrige object
134
      y_pred<-rpred$var1.pred                  #is the order the same?
135
      raster_pred <- rasterize(rpred,r_stack,"var1.pred",fun=mean)
136
      names(raster_pred)<-"y_pred" 
137
      writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...
138
      #print(paste("Interpolation:","mod", j ,sep=" "))
139
      list_rast_pred[[k]]<-raster_name
140
      mod$krige_output<-NULL
141
      list_fitted_models[[k]]<-mod
142
      
143
    }
144
    if (inherits(mod,"try-error")) {
145
      print(paste("no autokrige model fitted:",mod,sep=" ")) #change message for any model type...
146
      list_fitted_models[[k]]<-mod
147
    }
148
  }
149
  day_prediction_obj <-list(list_fitted_models,list_rast_pred)
150
  names(day_prediction_obj) <-c("list_fitted_models","list_rast_pred")
151
  return(day_prediction_obj)
152
}
153

    
154
#Could merge both auto?
155
predict_autokrige_gwr_raster_model<-function(method_interp,list_formulas,r_stack,data_training,out_filename){
156
  #This functions performs predictions on a raster grid given input models.
157
  #It can be used at the daily or/and monthly time scale...
158
  #Arguments: list of fitted models, raster stack of covariates
159
  # method_interp must be equal to "gwr" or "kriging"
160
  #Output: spatial grid data frame of the subset of tiles
161
  
162
  list_fitted_models<-vector("list",length(list_formulas))
163
  list_rast_pred<-vector("list",length(list_formulas))
164
  #s_sgdf<-as(r_stack,"SpatialGridDataFrame") #Conversion to spatial grid data frame, only convert the necessary layers!!
165
  proj4string(data_training) <- projection(r_stack)
166
  for (k in 1:length(list_formulas)){
167
    formula_mod<-list_formulas[[k]]
168
    raster_name<-out_filename[[k]]
169
    #mod<- try(gam(formula, data=data_training)) #change to any model!!
170
    s_spdf<-select_var_stack(r_stack,formula_mod,spdf=TRUE)
171
    col_names<-all.vars(formula_mod) #extract terms names from formula object
172
    if (length(col_names)==1){
173
      data_fit <-data_training
174
    }else{
175
      data_fit <- remove_na_spdf(col_names,data_training)
176
    }
177
    
178
    if(method_interp=="kriging"){
179
      mod <- try(autoKrige(formula_mod, input_data=data_fit,new_data=s_spdf,data_variogram=data_fit))
180
    }
181
    
182
    if(method_interp=="gwr"){
183
      bwGm <-try(gwr.sel(formula_mod,data=data_fit,gweight=gwr.Gauss, verbose = FALSE))
184
      mod <- try(gwr(formula_mod, data=data_fit, bandwidth=bwGm, gweight=gwr.Gauss, hatmatrix=TRUE))
185
    }
186
    #mod <- try(autoKrige(formula_mod, input_data=data_training,new_data=s_spdf,data_variogram=data_training))
187
    
188
    model_name<-paste("mod",k,sep="")
189
    assign(model_name,mod) 
190
    
191
    if (inherits(mod,"autoKrige") | inherits(mod,"gwr")){           #change to c("gam","autoKrige")
192
      if(method_interp=="kriging"){
193
        rpred<-mod$krige_output  #Extracting the SptialGriDataFrame from the autokrige object
194
        y_pred<-rpred$var1.pred                  #is the order the same?
195
        raster_pred <- rasterize(rpred,r_stack,"var1.pred",fun=mean)
196
        mod$krige_output<-NULL
197
      }
198
      if(method_interp=="gwr"){
199
        rpred <- gwr(formula_mod, data_fit, bandwidth=bwGm, fit.points =s_spdf,predict=TRUE, se.fit=TRUE,fittedGWRobject=mod) 
200
        #y_pred<-rpred$var1.pred                  #is the order the same?
201
        raster_pred<-rasterize(rpred$SDF,r_stack,"pred",fun=mean)
202
      }
203
      
204
      names(raster_pred)<-"y_pred" 
205
      writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...
206
      #print(paste("Interpolation:","mod", j ,sep=" "))
207
      list_rast_pred[[k]]<-raster_name
208
      list_fitted_models[[k]]<-mod
209
      
210
    }
211
    if (inherits(mod,"try-error")) {
212
      print(paste("no autokrige/gwr model fitted:",mod,sep=" ")) #change message for any model type...
213
      list_fitted_models[[k]]<-mod
214
    }
215
  }
216
  day_prediction_obj <-list(list_fitted_models,list_rast_pred)
217
  names(day_prediction_obj) <-c("list_fitted_models","list_rast_pred")
218
  return(day_prediction_obj)
219
}
220

    
221
fit_models<-function(list_formulas,data_training){
222
  #This functions several models and returns model objects.
223
  #Arguments: - list of formulas for GAM models
224
  #           - fitting data in a data.frame or SpatialPointDataFrame
225
  #Output: list of model objects 
226
  list_fitted_models<-vector("list",length(list_formulas))
227
  for (k in 1:length(list_formulas)){
228
    formula<-list_formulas[[k]]
229
    mod<- try(gam(formula, data=data_training)) #change to any model!!
230
    #mod<- try(autoKrige(formula, input_data=data_s,new_data=s_sgdf,data_variogram=data_s))
231
    model_name<-paste("mod",k,sep="")
232
    assign(model_name,mod) 
233
    list_fitted_models[[k]]<-mod
234
  }
235
  return(list_fitted_models) 
236
}
237

    
238
####
239
#TODO:Should use interp_day_fun!!
240
#Add log file and calculate time and sizes for processes-outputs
241
runGAM_day_fun <-function(i,list_param){
242

    
243
  #Make this a function with multiple argument that can be used by mcmapply??
244
  #Arguments: 
245
  #1)list_index: j 
246
  #2)covar_rast: covariates raster images used in the modeling
247
  #3)covar_names: names of input variables 
248
  #4)lst_avg: list of LST climatogy names, may be removed later on
249
  #5)list_models: list input models for bias calculation
250
  #6)sampling_obj: data at the daily time scale
251
  #7)var: TMAX or TMIN, variable being interpolated
252
  #8)y_var_name: output name, not used at this stage
253
  #9)out_prefix
254
  #10) out_path
255
  
256
  #The output is a list of four shapefile names produced by the function:
257
  #1) clim: list of output names for raster climatogies 
258
  #2) data_month: monthly training data for bias surface modeling
259
  #3) mod: list of model objects fitted 
260
  #4) formulas: list of formulas used in bias modeling
261
    
262
  ### PARSING INPUT ARGUMENTS
263
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
264
    
265
  index<-list_param$list_index
266
  s_raster<-list_param$covar_rast
267
  covar_names<-list_param$covar_names
268
  lst_avg<-list_param$lst_avg
269
  list_models<-list_param$list_models
270
  dst<-list_param$dst #monthly station dataset
271
  sampling_obj<-list_param$sampling_obj
272
  var<-list_param$var
273
  y_var_name<-list_param$y_var_name
274
  interpolation_method <-list_param$interpolation_method
275
  out_prefix<-list_param$out_prefix
276
  out_path<-list_param$out_path
277
  screen_data_training<-list_param$screen_data_training
278

    
279
  ghcn.subsets<-sampling_obj$ghcn_data_day
280
  sampling_dat <- sampling_obj$sampling_dat
281
  sampling <- sampling_obj$sampling_index
282
    
283
  ##########
284
  # STEP 1 - Read in information and get traing and testing stations
285
  ############# 
286
  
287
  date<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
288
  month<-strftime(date, "%m")          # current month of the date being processed
289
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
290
  proj_str<-proj4string(dst) #get the local projection information from monthly data
291
  
292
  #Adding layer LST to the raster stack  
293
  #names(s_raster)<-covar_names
294
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
295
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
296
  LST<-subset(s_raster,LST_month)
297
  names(LST)<-"LST"
298
  s_raster<-addLayer(s_raster,LST)            #Adding current month
299
  
300
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
301
  data_day<-ghcn.subsets[[i]]
302
  mod_LST <- ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]  #Match interpolation date and monthly LST average
303
  data_day$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the daily dataset
304
  dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
305
  
306
  ind.training<-sampling[[i]]
307
  ind.testing <- setdiff(1:nrow(data_day), ind.training)
308
  data_s <- data_day[ind.training, ]   #Training dataset currently used in the modeling
309
  data_v <- data_day[ind.testing, ]    #Testing/validation dataset using input sampling
310
  
311
  ns<-nrow(data_s)
312
  nv<-nrow(data_v)
313
  #i=1
314
  date_proc<-sampling_dat$date[i]
315
  date_proc<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
316
  mo<-as.integer(strftime(date_proc, "%m"))          # current month of the date being processed
317
  day<-as.integer(strftime(date_proc, "%d"))
318
  year<-as.integer(strftime(date_proc, "%Y"))
319
  
320
  #### STEP 2: PREPARE DATA
321
    
322
  #Clean out this part: make this a function call
323
  x<-as.data.frame(data_v)
324
  d<-as.data.frame(data_s)
325
  for (j in 1:nrow(x)){
326
    if (x$value[j]== -999.9){
327
      x$value[j]<-NA
328
    }
329
  }
330
  for (j in 1:nrow(d)){
331
    if (d$value[j]== -999.9){
332
      d$value[j]<-NA
333
    }
334
  }
335
  pos<-match("value",names(d)) #Find column with name "value"
336
  names(d)[pos]<-y_var_name
337
  pos<-match("value",names(x)) #Find column with name "value"
338
  names(x)[pos]<-y_var_name
339
  pos<-match("station",names(d)) #Find column with station ID
340
  names(d)[pos]<-c("id")
341
  pos<-match("station",names(x)) #Find column with name station ID
342
  names(x)[pos]<-c("id")
343
  
344
  data_s<-d
345
  data_v<-x
346

    
347
  data_s$y_var <- data_s[[y_var_name]] #Adding the variable modeled
348
  data_v$y_var <- data_v[[y_var_name]]
349
  
350
  #Adding back spatal definition
351
  
352
  coordinates(data_s)<-cbind(data_s$x,data_s$y)
353
  proj4string(data_s)<-proj_str
354
  coordinates(data_v)<-cbind(data_v$x,data_v$y)
355
  proj4string(data_v)<-proj_str
356
  #### STEP3:  NOW FIT AND PREDICT  MODEL
357
  
358
  list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
359
  
360
  if(screen_data_training==TRUE){
361
    col_names <-unlist(lapply(list_formulas,all.vars)) #extract all covariates names used in the models
362
    col_names<-unique(col_names)
363
    data_fit <- remove_na_spdf(col_names,data_s)
364
  }else{
365
    data_fit <- data_s
366
  }
367
  mod_list<-fit_models(list_formulas,data_fit) #only gam at this stage
368
  #mod_list<-fit_models(list_formulas,data_s) #only gam at this stage
369
  cname<-paste("mod",1:length(mod_list),sep="") #change to more meaningful name?
370
  names(mod_list)<-cname
371
  
372
  #Now generate file names for the predictions...
373
  list_out_filename<-vector("list",length(mod_list))
374
  names(list_out_filename)<-cname  
375
  
376
  for (k in 1:length(list_out_filename)){
377
    #i indicate which day is predicted, y_var_name indicates TMIN or TMAX
378
    data_name<-paste(y_var_name,"_predicted_",names(mod_list)[k],"_",
379
                     sampling_dat$date[i],"_",sampling_dat$prop[i],
380
                     "_",sampling_dat$run_samp[i],sep="")
381
    raster_name<-file.path(out_path,paste(interpolation_method,"_",data_name,out_prefix,".tif", sep=""))
382
    list_out_filename[[k]]<-raster_name 
383
  }
384
  
385
  #now predict values for raster image...
386
  rast_day_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
387
  names(rast_day_list)<-cname
388
  #Some models will not be predicted...remove them
389
  rast_day_list<-rast_day_list[!sapply(rast_day_list,is.null)] #remove NULL elements in list
390
    
391
  #Prepare object to return
392
  
393
  day_obj<- list(rast_day_list,data_s,data_v,sampling_dat[i,],mod_list,list_models)
394
  obj_names<-c(y_var_name,"data_s","data_v","sampling_dat","mod","formulas")
395
  names(day_obj)<-obj_names 
396
  save(day_obj,file= file.path(out_path,paste("day_obj_",interpolation_method,"_",var,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],
397
                                                "_",sampling_dat$run_samp[i],out_prefix,".RData",sep="")))
398
  return(day_obj)
399
  
400
}
401

    
402
#Maybe should just use the same code...
403

    
404
runKriging_day_fun <-function(i,list_param){
405
  
406
  #Make this a function with multiple argument that can be used by mcmapply??
407
  #Arguments: 
408
  #1)list_index: j 
409
  #2)covar_rast: covariates raster images used in the modeling
410
  #3)covar_names: names of input variables 
411
  #4)lst_avg: list of LST climatogy names, may be removed later on
412
  #5)list_models: list input models for bias calculation
413
  #6)sampling_obj: data at the daily time scale
414
  #7)var: TMAX or TMIN, variable being interpolated
415
  #8)y_var_name: output name, not used at this stage
416
  #9)out_prefix
417
  #10) out_path
418
  
419
  #The output is a list of four shapefile names produced by the function:
420
  #1) clim: list of output names for raster climatogies 
421
  #2) data_month: monthly training data for bias surface modeling
422
  #3) mod: list of model objects fitted 
423
  #4) formulas: list of formulas used in bias modeling
424
  
425
  ### PARSING INPUT ARGUMENTS
426
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
427
  
428
  index<-list_param$list_index
429
  s_raster<-list_param$covar_rast
430
  covar_names<-list_param$covar_names
431
  lst_avg<-list_param$lst_avg
432
  list_models<-list_param$list_models
433
  dst<-list_param$dst #monthly station dataset
434
  sampling_obj<-list_param$sampling_obj
435
  var<-list_param$var
436
  y_var_name<-list_param$y_var_name
437
  interpolation_method <-list_param$interpolation_method
438
  out_prefix<-list_param$out_prefix
439
  out_path<-list_param$out_path
440
  
441
  
442
  ghcn.subsets<-sampling_obj$ghcn_data_day
443
  sampling_dat <- sampling_obj$sampling_dat
444
  sampling <- sampling_obj$sampling_index
445
  
446
  ##########
447
  # STEP 1 - Read in information and get traing and testing stations
448
  ############# 
449
  
450
  date<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
451
  month<-strftime(date, "%m")          # current month of the date being processed
452
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
453
  proj_str<-proj4string(dst) #get the local projection information from monthly data
454
  
455
  #Adding layer LST to the raster stack  
456
  #names(s_raster)<-covar_names
457
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
458
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
459
  LST<-subset(s_raster,LST_month)
460
  names(LST)<-"LST"
461
  s_raster<-addLayer(s_raster,LST)            #Adding current month
462
  
463
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
464
  data_day<-ghcn.subsets[[i]]
465
  mod_LST <- ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]  #Match interpolation date and monthly LST average
466
  data_day$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the daily dataset
467
  dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
468
  
469
  ind.training<-sampling[[i]]
470
  ind.testing <- setdiff(1:nrow(data_day), ind.training)
471
  data_s <- data_day[ind.training, ]   #Training dataset currently used in the modeling
472
  data_v <- data_day[ind.testing, ]    #Testing/validation dataset using input sampling
473
  
474
  ns<-nrow(data_s)
475
  nv<-nrow(data_v)
476
  #i=1
477
  date_proc<-sampling_dat$date[i]
478
  date_proc<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
479
  mo<-as.integer(strftime(date_proc, "%m"))          # current month of the date being processed
480
  day<-as.integer(strftime(date_proc, "%d"))
481
  year<-as.integer(strftime(date_proc, "%Y"))
482
  
483
  #### STEP 2: PREPARE DATA
484
  
485
  #Clean out this part: make this a function call
486
  x<-as.data.frame(data_v)
487
  d<-as.data.frame(data_s)
488
  for (j in 1:nrow(x)){
489
    if (x$value[j]== -999.9){
490
      x$value[j]<-NA
491
    }
492
  }
493
  for (j in 1:nrow(d)){
494
    if (d$value[j]== -999.9){
495
      d$value[j]<-NA
496
    }
497
  }
498
  pos<-match("value",names(d)) #Find column with name "value"
499
  names(d)[pos]<-y_var_name
500
  pos<-match("value",names(x)) #Find column with name "value"
501
  names(x)[pos]<-y_var_name
502
  pos<-match("station",names(d)) #Find column with station ID
503
  names(d)[pos]<-c("id")
504
  pos<-match("station",names(x)) #Find column with name station ID
505
  names(x)[pos]<-c("id")
506
  
507
  data_s<-d
508
  data_v<-x
509
  
510
  data_s$y_var <- data_s[[y_var_name]] #Adding the variable modeled
511
  data_v$y_var <- data_v[[y_var_name]]
512
  
513
  #Adding back spatal definition
514
  
515
  coordinates(data_s)<-cbind(data_s$x,data_s$y)
516
  proj4string(data_s)<-proj_str
517
  coordinates(data_v)<-cbind(data_v$x,data_v$y)
518
  proj4string(data_v)<-proj_str
519
  #### STEP3:  NOW FIT AND PREDICT  MODEL
520
  
521
  list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
522
  #models names
523
  cname<-paste("mod",1:length(list_formulas),sep="") #change to more meaningful name?
524
  names(list_formulas) <- cname
525
  #Now generate output file names for the predictions...
526
  list_out_filename<-vector("list",length(list_formulas))
527
  names(list_out_filename)<-cname  
528
  
529
  for (k in 1:length(list_out_filename)){
530
    #i indicate which day is predicted, y_var_name indicates TMIN or TMAX
531
    data_name<-paste(y_var_name,"_predicted_",names(list_formulas)[k],"_",
532
                     sampling_dat$date[i],"_",sampling_dat$prop[i],
533
                     "_",sampling_dat$run_samp[i],sep="")
534
    raster_name<-file.path(out_path,paste(interpolation_method,"_",data_name,out_prefix,".tif", sep=""))
535
    list_out_filename[[k]]<-raster_name 
536
  }
537
  
538
  #now fit and predict values for raster image...
539
  
540
  if (interpolation_method=="gam_daily"){
541
    mod_list<-fit_models(list_formulas,data_s) #only gam at this stage
542
    names(mod_list)<-cname
543
    rast_day_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
544
    names(rast_day_list)<-cname
545
  }
546
  
547
  if (interpolation_method=="kriging_daily"){
548
    day_prediction_obj<-predict_auto_krige_raster_model(list_formulas,s_raster,data_s,list_out_filename)
549
    mod_list <-day_prediction_obj$list_fitted_models
550
    rast_day_list <-day_prediction_obj$list_rast_pred
551
    names(rast_day_list)<-cname
552
  }
553
    
554
  #Some models will not be predicted...remove them
555
  rast_day_list<-rast_day_list[!sapply(rast_day_list,is.null)] #remove NULL elements in list
556
  
557
  #Prepare object to return
558
  
559
  day_obj<- list(rast_day_list,data_s,data_v,sampling_dat[i,],mod_list,list_models)
560
  obj_names<-c(y_var_name,"data_s","data_v","sampling_dat","mod","formulas")
561
  names(day_obj)<-obj_names 
562
  save(day_obj,file= file.path(out_path,paste("day_obj_",interpolation_method,"_",var,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],
563
                                              "_",sampling_dat$run_samp[i],out_prefix,".RData",sep="")))
564
  return(day_obj)
565
  
566
}
567

    
568
run_interp_day_fun <-function(i,list_param){
569
  
570
  #Make this a function with multiple argument that can be used by mcmapply??
571
  #This function performs interpolation at daily time scale. Modifications made
572
  #to run three possible methods: gwr, kriging and gam.
573
  #Arguments: 
574
  #1)list_index: j 
575
  #2)covar_rast: covariates raster images used in the modeling
576
  #3)covar_names: names of input variables 
577
  #4)lst_avg: list of LST climatogy names, may be removed later on
578
  #5)list_models: list input models for bias calculation
579
  #6)sampling_obj: data at the daily time scale
580
  #7)var: TMAX or TMIN, variable being interpolated
581
  #8)y_var_name: output name, not used at this stage
582
  #9)out_prefix
583
  #10) out_path
584
  
585
  #The output is a list of four shapefile names produced by the function:
586
  #1) clim: list of output names for raster climatologies 
587
  #2) data_month: monthly training data for bias surface modeling
588
  #3) mod: list of model objects fitted 
589
  #4) formulas: list of formulas used in bias modeling
590
  
591
  ### PARSING INPUT ARGUMENTS
592
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
593
  
594
  index<-list_param$list_index
595
  s_raster<-list_param$covar_rast
596
  covar_names<-list_param$covar_names
597
  lst_avg<-list_param$lst_avg
598
  list_models<-list_param$list_models
599
  dst<-list_param$dst #monthly station dataset
600
  sampling_obj<-list_param$sampling_obj
601
  var<-list_param$var
602
  y_var_name<-list_param$y_var_name
603
  interpolation_method <-list_param$interpolation_method
604
  out_prefix<-list_param$out_prefix
605
  out_path<-list_param$out_path
606
  
607
  
608
  ghcn.subsets<-sampling_obj$ghcn_data_day
609
  sampling_dat <- sampling_obj$sampling_dat
610
  sampling <- sampling_obj$sampling_index
611
  
612
  ##########
613
  # STEP 1 - Read in information and get traing and testing stations
614
  ############# 
615
  
616
  date<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
617
  month<-strftime(date, "%m")          # current month of the date being processed
618
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
619
  proj_str<-proj4string(dst) #get the local projection information from monthly data
620
  
621
  #Adding layer LST to the raster stack  
622
  #names(s_raster)<-covar_names
623
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
624
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
625
  LST<-subset(s_raster,LST_month)
626
  names(LST)<-"LST"
627
  s_raster<-addLayer(s_raster,LST)            #Adding current month
628
  
629
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
630
  data_day<-ghcn.subsets[[i]]
631
  mod_LST <- ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]  #Match interpolation date and monthly LST average
632
  data_day$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the daily dataset
633
  dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
634
  
635
  ind.training<-sampling[[i]]
636
  ind.testing <- setdiff(1:nrow(data_day), ind.training)
637
  data_s <- data_day[ind.training, ]   #Training dataset currently used in the modeling
638
  data_v <- data_day[ind.testing, ]    #Testing/validation dataset using input sampling
639
  
640
  ns<-nrow(data_s)
641
  nv<-nrow(data_v)
642
  #i=1
643
  date_proc<-sampling_dat$date[i]
644
  date_proc<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
645
  mo<-as.integer(strftime(date_proc, "%m"))          # current month of the date being processed
646
  day<-as.integer(strftime(date_proc, "%d"))
647
  year<-as.integer(strftime(date_proc, "%Y"))
648
  
649
  #### STEP 2: PREPARE DATA
650
  
651
  #Clean out this part: make this a function call, should be done ine data preparation to retain the generality of the function
652
  
653
  x<-as.data.frame(data_v)
654
  d<-as.data.frame(data_s)
655
  for (j in 1:nrow(x)){
656
    if (x$value[j]== -999.9){
657
      x$value[j]<-NA
658
    }
659
  }
660
  for (j in 1:nrow(d)){
661
    if (d$value[j]== -999.9){
662
      d$value[j]<-NA
663
    }
664
  }
665
  pos<-match("value",names(d)) #Find column with name "value"
666
  names(d)[pos]<-y_var_name
667
  pos<-match("value",names(x)) #Find column with name "value"
668
  names(x)[pos]<-y_var_name
669
  pos<-match("station",names(d)) #Find column with station ID
670
  names(d)[pos]<-c("id")
671
  pos<-match("station",names(x)) #Find column with name station ID
672
  names(x)[pos]<-c("id")
673
  
674
  data_s<-d
675
  data_v<-x
676
  
677
  data_s$y_var <- data_s[[y_var_name]] #Adding the variable modeled
678
  data_v$y_var <- data_v[[y_var_name]]
679
  
680
  #Adding back spatal definition
681
  
682
  coordinates(data_s)<-cbind(data_s$x,data_s$y)
683
  proj4string(data_s)<-proj_str
684
  coordinates(data_v)<-cbind(data_v$x,data_v$y)
685
  proj4string(data_v)<-proj_str
686
  #### STEP3:  NOW FIT AND PREDICT  MODEL
687
  
688
  list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
689
  #models names
690
  cname<-paste("mod",1:length(list_formulas),sep="") #change to more meaningful name?
691
  names(list_formulas) <- cname
692
  #Now generate output file names for the predictions...
693
  list_out_filename<-vector("list",length(list_formulas))
694
  names(list_out_filename)<-cname  
695
  
696
  for (k in 1:length(list_out_filename)){
697
    #i indicate which day is predicted, y_var_name indicates TMIN or TMAX
698
    data_name<-paste(y_var_name,"_predicted_",names(list_formulas)[k],"_",
699
                     sampling_dat$date[i],"_",sampling_dat$prop[i],
700
                     "_",sampling_dat$run_samp[i],sep="")
701
    raster_name<-file.path(out_path,paste(interpolation_method,"_",data_name,out_prefix,".tif", sep=""))
702
    list_out_filename[[k]]<-raster_name 
703
  }
704
  
705
  #now fit and predict values for raster image...
706
  
707
  if (interpolation_method=="gam_daily"){
708
    if(screen_data_training==TRUE){
709
      col_names <-unlist(lapply(list_formulas,all.vars)) #extract all covariates names used in the models
710
      col_names<-unique(col_names)
711
      data_fit <- remove_na_spdf(col_names,data_s)
712
    }else{
713
      data_fit <- data_s
714
    }
715
    #mod_list<-fit_models(list_formulas,data_s) #only gam at this stage
716
    mod_list<-fit_models(list_formulas,data_fit) #only gam at this stage
717
    names(mod_list)<-cname
718
    rast_day_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
719
    names(rast_day_list)<-cname
720
  }
721
  
722
  ## need to change to use combined gwr autokrige function
723
  if (interpolation_method=="kriging_daily"){
724
    day_prediction_obj<-predict_auto_krige_raster_model(list_formulas,s_raster,data_s,list_out_filename)
725
    mod_list <-day_prediction_obj$list_fitted_models
726
    rast_day_list <-day_prediction_obj$list_rast_pred
727
    names(rast_day_list)<-cname
728
  }
729
  
730
  if (interpolation_method=="gwr_daily"){
731
    method_interp <- "gwr"
732
    day_prediction_obj<-predict_autokrige_gwr_raster_model(method_interp,list_formulas,s_raster,data_s,list_out_filename)
733
    mod_list <-day_prediction_obj$list_fitted_models
734
    rast_day_list <-day_prediction_obj$list_rast_pred
735
    names(rast_day_list)<-cname
736
  }
737
  #Some models will not be predicted...remove them
738
  rast_day_list<-rast_day_list[!sapply(rast_day_list,is.null)] #remove NULL elements in list
739
  
740
  #Prepare object to return
741
  
742
  day_obj<- list(rast_day_list,data_s,data_v,sampling_dat[i,],mod_list,list_models)
743
  obj_names<-c(y_var_name,"data_s","data_v","sampling_dat","mod","formulas")
744
  names(day_obj)<-obj_names 
745
  save(day_obj,file= file.path(out_path,paste("day_obj_",interpolation_method,"_",var,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],
746
                                              "_",sampling_dat$run_samp[i],out_prefix,".RData",sep="")))
747
  return(day_obj)
748
  
749
}
750

    
(37-37/53)