Project

General

Profile

Download (19.4 KB) Statistics
| Branch: | Revision:
1
##################  Functions for use in the raster prediction stage   #######################################
2
############################ Interpolation in a given tile/region ##########################################
3
#This script contains 5 functions used in the interpolation of temperature in the specfied study/processing area:                             
4
# 1)predict_raster_model<-function(in_models,r_stack,out_filename)                                                             
5
# 2)fit_models<-function(list_formulas,data_training)           
6
# 3)runClimCAI<-function(j) : not working yet
7
# 4)runClim_KGFusion<-function(j,list_param)
8
# 5)runGAMFusion <- function(i,list_param) 
9
#
10
#AUTHOR: Benoit Parmentier                                                                       
11
#DATE: 03/12/2013                                                                                 
12
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--   
13

    
14
##Comments and TODO:
15
#This script is meant to be for general processing tile by tile or region by region.
16
# Note that the functions are called from GAM_fusion_analysis_raster_prediction_mutlisampling.R.
17
# This will be expanded to other methods.
18

    
19
##################################################################################################
20

    
21

    
22
predict_raster_model<-function(in_models,r_stack,out_filename){
23
  #This functions performs predictions on a raster grid given input models.
24
  #Arguments: list of fitted models, raster stack of covariates
25
  #Output: spatial grid data frame of the subset of tiles
26
  list_rast_pred<-vector("list",length(in_models))
27
  for (i in 1:length(in_models)){
28
    mod <-in_models[[i]] #accessing GAM model ojbect "j"
29
    raster_name<-out_filename[[i]]
30
    if (inherits(mod,"gam")) {           #change to c("gam","autoKrige")
31
      raster_pred<- predict(object=r_stack,model=mod,na.rm=FALSE) #Using the coeff to predict new values.
32
      names(raster_pred)<-"y_pred"  
33
      writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
34
      #print(paste("Interpolation:","mod", j ,sep=" "))
35
      list_rast_pred[[i]]<-raster_name
36
    }
37
  }
38
  if (inherits(mod,"try-error")) {
39
    print(paste("no gam model fitted:",mod[1],sep=" ")) #change message for any model type...
40
  }
41
  return(list_rast_pred)
42
}
43

    
44
fit_models<-function(list_formulas,data_training){
45
  #This functions several models and returns model objects.
46
  #Arguments: - list of formulas for GAM models
47
  #           - fitting data in a data.frame or SpatialPointDataFrame
48
  #Output: list of model objects 
49
  list_fitted_models<-vector("list",length(list_formulas))
50
  for (k in 1:length(list_formulas)){
51
    formula<-list_formulas[[k]]
52
    mod<- try(gam(formula, data=data_training)) #change to any model!!
53
    #mod<- try(autoKrige(formula, input_data=data_s,new_data=s_sgdf,data_variogram=data_s))
54
    model_name<-paste("mod",k,sep="")
55
    assign(model_name,mod) 
56
    list_fitted_models[[k]]<-mod
57
  }
58
  return(list_fitted_models) 
59
}
60

    
61
####
62
#TODO:
63
#Add log file and calculate time and sizes for processes-outputs
64
runClimCAI<-function(j){
65
  #Make this a function with multiple argument that can be used by mcmapply??
66
  #This creates clim fusion layers...still needs more code testing
67
  
68
  #Model and response variable can be changed without affecting the script
69
  prop_month<-0 #proportion retained for validation
70
  run_samp<-1
71
  
72
  list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
73
  
74
  data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
75
  LST_name<-lst_avg[j] # name of LST month to be matched
76
  data_month$LST<-data_month[[LST_name]]
77
  
78
  #TMax to model...
79
  data_month$y_var<-data_month$TMax #Adding TMax as the variable modeled
80
  mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
81
  cname<-paste("mod",1:length(mod_list),sep="") #change to more meaningful name?
82
  names(mod_list)<-cname
83
  #Adding layer LST to the raster stack  
84
  pos<-match("elev",names(s_raster))
85
  layerNames(s_raster)[pos]<-"elev_1"
86
  
87
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
88
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
89
  LST<-subset(s_raster,LST_name)
90
  names(LST)<-"LST"
91
  #Screen for extreme values": this needs more thought, min and max val vary with regions
92
  #min_val<-(-15+273.16) #if values less than -15C then screen out (note the Kelvin units that will need to be changed later in all datasets)
93
  #r1[r1 < (min_val)]<-NA
94
  s_raster<-addLayer(s_raster,LST)            #Adding current month
95
  
96
  #Now generate file names for the predictions...
97
  list_out_filename<-vector("list",length(mod_list))
98
  names(list_out_filename)<-cname  
99
  
100
  for (k in 1:length(list_out_filename)){
101
    #j indicate which month is predicted
102
    data_name<-paste("clim_month_",j,"_",cname[k],"_",prop_month,
103
                     "_",run_samp,sep="")
104
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
105
    list_out_filename[[k]]<-raster_name
106
  }
107
  
108
  #now predict values for raster image...
109
  rast_clim_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
110
  names(rast_clim_list)<-cname
111
  #Some modles will not be predicted...remove them
112
  rast_clim_list<-rast_clim_list[!sapply(rast_clim_list,is.null)] #remove NULL elements in list
113
  
114
  #Adding Kriging for Climatology options
115
  
116
  clim_xy<-coordinates(data_month)
117
  fitclim<-Krig(clim_xy,data_month$TMax,theta=1e5) #use TPS or krige 
118
  mod_krtmp1<-fitclim
119
  model_name<-"mod_kr"
120
  
121
  clim_rast<-interpolate(LST,fitclim) #interpolation using function from raster package
122
  #Saving kriged surface in raster images
123
  #data_name<-paste("clim_month_",j,"_",model_name,"_",prop_month,
124
  #                 "_",run_samp,sep="")
125
  #raster_name_clim<-paste("fusion_",data_name,out_prefix,".tif", sep="")
126
  #writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
127
  
128
  #now climatology layer
129
  #clim_rast<-LST-bias_rast
130
  data_name<-paste("clim_month_",j,"_",model_name,"_",prop_month,
131
                   "_",run_samp,sep="")
132
  raster_name_clim<-paste("fusion_",data_name,out_prefix,".tif", sep="")
133
  writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
134
  
135
  #Adding to current objects
136
  mod_list[[model_name]]<-mod_krtmp1
137
  #rast_bias_list[[model_name]]<-raster_name_bias
138
  rast_clim_list[[model_name]]<-raster_name_clim
139
  
140
  #Prepare object to return
141
  clim_obj<-list(rast_clim_list,data_month,mod_list,list_formulas)
142
  names(clim_obj)<-c("clim","data_month","mod","formulas")
143
  
144
  save(clim_obj,file= paste("clim_obj_month_",j,"_",out_prefix,".RData",sep=""))
145
  
146
  return(clim_obj) 
147
}
148
#
149

    
150
runClim_KGFusion<-function(j,list_param){
151
  
152
  #Make this a function with multiple argument that can be used by mcmapply??
153
  #Arguments: 
154
  #1)list_index: j 
155
  #2)covar_rast: covariates raster images used in the modeling
156
  #3)covar_names: names of input variables 
157
  #4)lst_avg: list of LST climatogy names, may be removed later on
158
  #5)list_models: list input models for bias calculation
159
  #6)dst: data at the monthly time scale
160
  #7)var: TMAX or TMIN, variable being interpolated
161
  #8)y_var_name: output name, not used at this stage
162
  #9)out_prefix
163
  #
164
  #The output is a list of four shapefile names produced by the function:
165
  #1) clim: list of output names for raster climatogies 
166
  #2) data_month: monthly training data for bias surface modeling
167
  #3) mod: list of model objects fitted 
168
  #4) formulas: list of formulas used in bias modeling
169
  
170
  ### PARSING INPUT ARGUMENTS
171
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
172
  
173
  index<-list_param$j
174
  s_raster<-list_param$covar_rast
175
  covar_names<-list_param$covar_names
176
  lst_avg<-list_param$lst_avg
177
  list_models<-list_param$list_models
178
  dst<-list_param$dst #monthly station dataset
179
  var<-list_param$var
180
  y_var_name<-list_param$y_var_name
181
  out_prefix<-list_param$out_prefix
182

    
183
  #Model and response variable can be changed without affecting the script
184
  prop_month<-0 #proportion retained for validation
185
  run_samp<-1 #This option can be added later on if/when neeeded
186
  
187
  #### STEP 2: PREPARE DATA
188
  
189
  data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
190
  LST_name<-lst_avg[j] # name of LST month to be matched
191
  data_month$LST<-data_month[[LST_name]]
192
  
193
  #Adding layer LST to the raster stack  
194
  covar_rast<-s_raster
195
  #names(s_raster)<-covar_names
196
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
197
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
198
  LST<-subset(s_raster,LST_name)
199
  names(LST)<-"LST"
200
  s_raster<-addLayer(s_raster,LST)            #Adding current month
201
  
202
  #LST bias to model...
203
  #if (var==TMAX): will need to modify to take into account TMAX, TMIN and LST_day,LST_night
204
  data_month$LSTD_bias<-data_month$LST-data_month$TMax
205
  data_month$y_var<-data_month$LSTD_bias #Adding bias as the variable modeled
206
  
207
  #### STEP3:  NOW FIT AND PREDICT  MODEL
208
  
209
  list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
210
  
211
  mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
212
  cname<-paste("mod",1:length(mod_list),sep="") #change to more meaningful name?
213
  names(mod_list)<-cname
214
  
215
  #Now generate file names for the predictions...
216
  list_out_filename<-vector("list",length(mod_list))
217
  names(list_out_filename)<-cname  
218
  
219
  for (k in 1:length(list_out_filename)){
220
    #j indicate which month is predicted
221
    data_name<-paste("bias_LST_month_",j,"_",cname[k],"_",prop_month,
222
                     "_",run_samp,sep="")
223
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
224
    list_out_filename[[k]]<-raster_name
225
  }
226

    
227
  #now predict values for raster image...
228
  rast_bias_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
229
  names(rast_bias_list)<-cname
230
  #Some modles will not be predicted...remove them
231
  rast_bias_list<-rast_bias_list[!sapply(rast_bias_list,is.null)] #remove NULL elements in list
232

    
233
  mod_rast<-stack(rast_bias_list)  #stack of bias raster images from models
234
  rast_clim_list<-vector("list",nlayers(mod_rast))
235
  names(rast_clim_list)<-names(rast_bias_list)
236
  for (k in 1:nlayers(mod_rast)){
237
    clim_fus_rast<-LST-subset(mod_rast,k)
238
    data_name<-paste("clim_LST_month_",j,"_",names(rast_clim_list)[k],"_",prop_month,
239
                     "_",run_samp,sep="")
240
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
241
    rast_clim_list[[k]]<-raster_name
242
    writeRaster(clim_fus_rast, filename=raster_name,overwrite=TRUE)  #Wri
243
  }
244
  
245
  #### STEP 4:Adding Kriging for Climatology options
246
  
247
  bias_xy<-coordinates(data_month)
248
  fitbias<-Krig(bias_xy,data_month$LSTD_bias,theta=1e5) #use TPS or krige 
249
  mod_krtmp1<-fitbias
250
  model_name<-"mod_kr"
251
  
252
   
253
  bias_rast<-interpolate(LST,fitbias) #interpolation using function from raster package
254
  #Saving kriged surface in raster images
255
  data_name<-paste("bias_LST_month_",j,"_",model_name,"_",prop_month,
256
                   "_",run_samp,sep="")
257
  raster_name_bias<-paste("fusion_",data_name,out_prefix,".tif", sep="")
258
  writeRaster(bias_rast, filename=raster_name_bias,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
259
  
260
  #now climatology layer
261
  clim_rast<-LST-bias_rast
262
  data_name<-paste("clim_LST_month_",j,"_",model_name,"_",prop_month,
263
                   "_",run_samp,sep="")
264
  raster_name_clim<-paste("fusion_",data_name,out_prefix,".tif", sep="")
265
  writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
266
  
267
  #Adding to current objects
268
  mod_list[[model_name]]<-mod_krtmp1
269
  rast_bias_list[[model_name]]<-raster_name_bias
270
  rast_clim_list[[model_name]]<-raster_name_clim
271
  
272
  #### STEP 5: Prepare object and return
273
  
274
  clim_obj<-list(rast_bias_list,rast_clim_list,data_month,mod_list,list_formulas)
275
  names(clim_obj)<-c("bias","clim","data_month","mod","formulas")
276
  
277
  save(clim_obj,file= paste("clim_obj_month_",j,"_",out_prefix,".RData",sep=""))
278
  
279
  return(clim_obj)
280
}
281

    
282
## Run function for kriging...?
283

    
284
#runGAMFusion <- function(i) {            # loop over dates
285
runGAMFusion <- function(i,list_param) {            # loop over dates
286
    #### Change this to allow explicitly arguments...
287
  #Arguments: 
288
  #1)index: loop list index for individual run/fit
289
  #2)clim_year_list: list of climatology files for all models...(12*nb of models)
290
  #3)sampling_obj: contains, data per date/fit, sampling information
291
  #4)dst: data at the monthly time scale
292
  #5)var: variable predicted -TMAX or TMIN
293
  #6)y_var_name: name of the variable predicted - dailyTMax, dailyTMin
294
  #7)out_prefix
295
  #
296
  #The output is a list of four shapefile names produced by the function:
297
  #1) list_temp: y_var_name
298
  #2) rast_clim_list: list of files for temperature climatology predictions
299
  #3) delta: list of files for temperature delta predictions
300
  #4) data_s: training data
301
  #5) data_v: testing data
302
  #6) sampling_dat: sampling information for the current prediction (date,proportion of holdout and sample number)
303
  #7) mod_kr: kriging delta fit, field package model object
304
  
305
  ### PARSING INPUT ARGUMENTS
306
  
307
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
308
  rast_clim_yearlist<-list_param$clim_yearlist
309
  sampling_obj<-list_param$sampling_obj
310
  ghcn.subsets<-sampling_obj$ghcn_data_day
311
  sampling_dat <- sampling_obj$sampling_dat
312
  sampling <- sampling_obj$sampling_index
313
  var<-list_param$var
314
  y_var_name<-list_param$y_var_name
315
  out_prefix<-list_param$out_prefix
316
  dst<-list_param$dst #monthly station dataset
317
  
318
  ##########
319
  # STEP 1 - interpolate delta across space
320
  ############# Read in information and get traiing and testing stations
321
  
322
  date<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
323
  month<-strftime(date, "%m")          # current month of the date being processed
324
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
325
  proj_str<-proj4string(dst) #get the local projection information from monthly data
326

    
327
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
328
  data_day<-ghcn.subsets[[i]]
329
  mod_LST <- ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]  #Match interpolation date and monthly LST average
330
  data_day$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the dataset
331
  dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
332
  
333
  ind.training<-sampling[[i]]
334
  ind.testing <- setdiff(1:nrow(data_day), ind.training)
335
  data_s <- data_day[ind.training, ]   #Training dataset currently used in the modeling
336
  data_v <- data_day[ind.testing, ]    #Testing/validation dataset using input sampling
337
  
338
  ns<-nrow(data_s)
339
  nv<-nrow(data_v)
340
  #i=1
341
  date_proc<-sampling_dat$date[i]
342
  date_proc<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
343
  mo<-as.integer(strftime(date_proc, "%m"))          # current month of the date being processed
344
  day<-as.integer(strftime(date_proc, "%d"))
345
  year<-as.integer(strftime(date_proc, "%Y"))
346
  
347
  ##########
348
  # STEP 2 - JOIN DAILY AND MONTHLY STATION INFORMATION
349
  ##########
350
  
351
  modst<-dst[dst$month==mo,] #Subsetting dataset for the relevant month of the date being processed
352
  #Change to y_var...could be TMin
353
  #modst$LSTD_bias <- modst$LST-modst$y_var
354
  modst$LSTD_bias <- modst$LST-modst$TMax; #That is the difference between the monthly LST mean and monthly station mean
355
  
356
  #Clearn out this part: make this a function call
357
  x<-as.data.frame(data_v)
358
  d<-as.data.frame(data_s)
359
  #x[x$value==-999.9]<-NA
360
  for (j in 1:nrow(x)){
361
    if (x$value[j]== -999.9){
362
      x$value[j]<-NA
363
    }
364
  }
365
  for (j in 1:nrow(d)){
366
    if (d$value[j]== -999.9){
367
      d$value[j]<-NA
368
    }
369
  }
370
  #x[x$value==-999.9]<-NA
371
  #d[d$value==-999.9]<-NA
372
  pos<-match("value",names(d)) #Find column with name "value"
373
  #names(d)[pos]<-c("dailyTmax")
374
  names(d)[pos]<-y_var_name
375
  names(x)[pos]<-y_var_name
376
  #names(x)[pos]<-c("dailyTmax")
377
  pos<-match("station",names(d)) #Find column with name "value"
378
  names(d)[pos]<-c("id")
379
  names(x)[pos]<-c("id")
380
  names(modst)[1]<-c("id")       #modst contains the average tmax per month for every stations...
381
  
382
  dmoday <-merge(modst,d,by="id",suffixes=c("",".y2"))  
383
  xmoday <-merge(modst,x,by="id",suffixes=c("",".y2"))  
384
  mod_pat<-glob2rx("*.y2")   
385
  var_pat<-grep(mod_pat,names(dmoday),value=FALSE) # using grep with "value" extracts the matching names
386
  dmoday<-dmoday[,-var_pat]
387
  mod_pat<-glob2rx("*.y2")   
388
  var_pat<-grep(mod_pat,names(xmoday),value=FALSE) # using grep with "value" extracts the matching names
389
  xmoday<-xmoday[,-var_pat] #Removing duplicate columns
390
  
391
  data_v<-xmoday
392
  
393
  #dmoday contains the daily tmax values for training with TMax being the monthly station tmax mean
394
  #xmoday contains the daily tmax values for validation with TMax being the monthly station tmax mean
395
  
396
  ##########
397
  # STEP 3 - interpolate daily delta across space
398
  ##########
399
  
400
  #Change to take into account TMin and TMax
401
  daily_delta<-dmoday$dailyTmax-dmoday$TMax
402
  daily_delta_xy<-as.matrix(cbind(dmoday$x,dmoday$y))
403
  fitdelta<-Krig(daily_delta_xy,daily_delta,theta=1e5) #use TPS or krige
404
  mod_krtmp2<-fitdelta
405
  model_name<-paste("mod_kr","day",sep="_")
406
  data_s<-dmoday #put the 
407
  data_s$daily_delta<-daily_delta
408
  
409
  #########
410
  # STEP 4 - Calculate daily predictions - T(day) = clim(month) + delta(day)
411
  #########
412
  
413
  rast_clim_list<-rast_clim_yearlist[[mo]]  #select relevant month
414
  rast_clim_month<-raster(rast_clim_list[[1]])
415
  
416
  daily_delta_rast<-interpolate(rast_clim_month,fitdelta) #Interpolation of the bias surface...
417
  
418
  #Saving kriged surface in raster images
419
  data_name<-paste("daily_delta_",sampling_dat$date[i],"_",sampling_dat$prop[i],
420
                   "_",sampling_dat$run_samp[i],sep="")
421
  raster_name_delta<-paste("fusion_",data_name,out_prefix,".tif", sep="")
422
  writeRaster(daily_delta_rast, filename=raster_name_delta,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
423
  
424
  #Now predict daily after having selected the relevant month
425
  temp_list<-vector("list",length(rast_clim_list))  
426
  for (j in 1:length(rast_clim_list)){
427
    rast_clim_month<-raster(rast_clim_list[[j]])
428
    temp_predicted<-rast_clim_month+daily_delta_rast
429
    
430
    data_name<-paste(y_var_name,"_predicted_",names(rast_clim_list)[j],"_",
431
                     sampling_dat$date[i],"_",sampling_dat$prop[i],
432
                     "_",sampling_dat$run_samp[i],sep="")
433
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
434
    writeRaster(temp_predicted, filename=raster_name,overwrite=TRUE) 
435
    temp_list[[j]]<-raster_name
436
  }
437
  
438
  ##########
439
  # STEP 5 - Prepare output object to return
440
  ##########
441
  
442
  mod_krtmp2<-fitdelta
443
  model_name<-paste("mod_kr","day",sep="_")
444
  names(temp_list)<-names(rast_clim_list)
445
  coordinates(data_s)<-cbind(data_s$x,data_s$y)
446
  proj4string(data_s)<-proj_str
447
  coordinates(data_v)<-cbind(data_v$x,data_v$y)
448
  proj4string(data_v)<-proj_str
449
  
450
  delta_obj<-list(temp_list,rast_clim_list,raster_name_delta,data_s,
451
                  data_v,sampling_dat[i,],mod_krtmp2)
452
  
453
  obj_names<-c(y_var_name,"clim","delta","data_s","data_v",
454
               "sampling_dat",model_name)
455
  names(delta_obj)<-obj_names
456
  save(delta_obj,file= paste("delta_obj_",sampling_dat$date[i],"_",sampling_dat$prop[i],
457
                                "_",sampling_dat$run_samp[i],out_prefix,".RData",sep=""))
458
  return(delta_obj)
459
  
460
}
461
 
(11-11/40)