Project

General

Profile

Download (16.9 KB) Statistics
| Branch: | Revision:
1
#Function to be used with GAM_fusion_analysis_raster_prediction_mutlisampling.R
2
#runClimFusion<-function(r_stack,data_training,data_testing,data_training){
3

    
4
#Functions used in the script
5

    
6
predict_raster_model<-function(in_models,r_stack,out_filename){
7
  #This functions performs predictions on a raster grid given input models.
8
  #Arguments: list of fitted models, raster stack of covariates
9
  #Output: spatial grid data frame of the subset of tiles
10
  list_rast_pred<-vector("list",length(in_models))
11
  for (i in 1:length(in_models)){
12
    mod <-in_models[[i]] #accessing GAM model ojbect "j"
13
    raster_name<-out_filename[[i]]
14
    if (inherits(mod,"gam")) {           #change to c("gam","autoKrige")
15
      raster_pred<- predict(object=r_stack,model=mod,na.rm=FALSE) #Using the coeff to predict new values.
16
      names(raster_pred)<-"y_pred"  
17
      writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
18
      #print(paste("Interpolation:","mod", j ,sep=" "))
19
      list_rast_pred[[i]]<-raster_name
20
    }
21
  }
22
  if (inherits(mod,"try-error")) {
23
    print(paste("no gam model fitted:",mod[1],sep=" ")) #change message for any model type...
24
  }
25
  return(list_rast_pred)
26
}
27

    
28
fit_models<-function(list_formulas,data_training){
29
  #This functions several models and returns model objects.
30
  #Arguments: - list of formulas for GAM models
31
  #           - fitting data in a data.frame or SpatialPointDataFrame
32
  #Output: list of model objects 
33
  list_fitted_models<-vector("list",length(list_formulas))
34
  for (k in 1:length(list_formulas)){
35
    formula<-list_formulas[[k]]
36
    mod<- try(gam(formula, data=data_training)) #change to any model!!
37
    #mod<- try(autoKrige(formula, input_data=data_s,new_data=s_sgdf,data_variogram=data_s))
38
    model_name<-paste("mod",k,sep="")
39
    assign(model_name,mod) 
40
    list_fitted_models[[k]]<-mod
41
  }
42
  return(list_fitted_models) 
43
}
44

    
45
####
46
#TODO:
47
#Add log file and calculate time and sizes for processes-outputs
48
runClimCAI<-function(j){
49
  #Make this a function with multiple argument that can be used by mcmapply??
50
  #This creates clim fusion layers...still needs more code testing
51
  
52
  #Model and response variable can be changed without affecting the script
53
  prop_month<-0 #proportion retained for validation
54
  run_samp<-1
55
  
56
  list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
57
  
58
  data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
59
  LST_name<-lst_avg[j] # name of LST month to be matched
60
  data_month$LST<-data_month[[LST_name]]
61
  
62
  #TMax to model...
63
  data_month$y_var<-data_month$TMax #Adding TMax as the variable modeled
64
  mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
65
  cname<-paste("mod",1:length(mod_list),sep="") #change to more meaningful name?
66
  names(mod_list)<-cname
67
  #Adding layer LST to the raster stack  
68
  pos<-match("elev",names(s_raster))
69
  layerNames(s_raster)[pos]<-"elev_1"
70
  
71
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
72
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
73
  LST<-subset(s_raster,LST_name)
74
  names(LST)<-"LST"
75
  #Screen for extreme values": this needs more thought, min and max val vary with regions
76
  #min_val<-(-15+273.16) #if values less than -15C then screen out (note the Kelvin units that will need to be changed later in all datasets)
77
  #r1[r1 < (min_val)]<-NA
78
  s_raster<-addLayer(s_raster,LST)            #Adding current month
79
  
80
  #Now generate file names for the predictions...
81
  list_out_filename<-vector("list",length(mod_list))
82
  names(list_out_filename)<-cname  
83
  
84
  for (k in 1:length(list_out_filename)){
85
    #j indicate which month is predicted
86
    data_name<-paste("clim_month_",j,"_",cname[k],"_",prop_month,
87
                     "_",run_samp,sep="")
88
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
89
    list_out_filename[[k]]<-raster_name
90
  }
91
  
92
  #now predict values for raster image...
93
  rast_clim_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
94
  names(rast_clim_list)<-cname
95
  #Some modles will not be predicted...remove them
96
  rast_clim_list<-rast_clim_list[!sapply(rast_clim_list,is.null)] #remove NULL elements in list
97
  
98
  #Adding Kriging for Climatology options
99
  
100
  clim_xy<-coordinates(data_month)
101
  fitclim<-Krig(clim_xy,data_month$TMax,theta=1e5) #use TPS or krige 
102
  mod_krtmp1<-fitclim
103
  model_name<-"mod_kr"
104
  
105
  clim_rast<-interpolate(LST,fitclim) #interpolation using function from raster package
106
  #Saving kriged surface in raster images
107
  #data_name<-paste("clim_month_",j,"_",model_name,"_",prop_month,
108
  #                 "_",run_samp,sep="")
109
  #raster_name_clim<-paste("fusion_",data_name,out_prefix,".tif", sep="")
110
  #writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
111
  
112
  #now climatology layer
113
  #clim_rast<-LST-bias_rast
114
  data_name<-paste("clim_month_",j,"_",model_name,"_",prop_month,
115
                   "_",run_samp,sep="")
116
  raster_name_clim<-paste("fusion_",data_name,out_prefix,".tif", sep="")
117
  writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
118
  
119
  #Adding to current objects
120
  mod_list[[model_name]]<-mod_krtmp1
121
  #rast_bias_list[[model_name]]<-raster_name_bias
122
  rast_clim_list[[model_name]]<-raster_name_clim
123
  
124
  #Prepare object to return
125
  clim_obj<-list(rast_clim_list,data_month,mod_list,list_formulas)
126
  names(clim_obj)<-c("clim","data_month","mod","formulas")
127
  
128
  save(clim_obj,file= paste("clim_obj_month_",j,"_",out_prefix,".RData",sep=""))
129
  
130
  return(clim_obj) 
131
}
132
#
133

    
134
runClim_KGFusion<-function(j){
135
  
136
  #Make this a function with multiple argument that can be used by mcmapply??
137
  #This creates clim fusion layers...
138
  #Parameters:
139
  
140
  #1)s_raster: brick of covariates   : could pass as argument only specific variables??
141
  #2)dst: monthly data (infile_monthly): could pass only the subset from the month??
142
  #3)list_models
143
  #4)brick names covarnames
144
  #5)out_prefix
145
  
146
  
147
  ## STEP 1: PARSE PARAMETERS AND ARGUMENTS
148
  
149
  #Model and response variable can be changed without affecting the script
150
  prop_month<-0 #proportion retained for validation
151
  run_samp<-1
152
  
153
  ## STEP 2: PREPARE DATA
154
  
155
  data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
156
  LST_name<-lst_avg[j] # name of LST month to be matched
157
  data_month$LST<-data_month[[LST_name]]
158
  
159
  #Adding layer LST to the raster stack  
160
  covar_rast<-s_raster
161
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
162
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
163
  LST<-subset(s_raster,LST_name)
164
  names(LST)<-"LST"
165
  s_raster<-addLayer(s_raster,LST)            #Adding current month
166
  
167
  #LST bias to model...
168
  #if (var==TMAX): will need to modify to take into account TMAX, TMIN and LST_day,LST_night
169
  data_month$LSTD_bias<-data_month$LST-data_month$TMax
170
  data_month$y_var<-data_month$LSTD_bias #Adding bias as the variable modeled
171
  
172
  #### STEP3:  NOW FIT AND PREDICT  MODEL
173
  
174
  list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
175
  
176
  mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
177
  cname<-paste("mod",1:length(mod_list),sep="") #change to more meaningful name?
178
  names(mod_list)<-cname
179
  
180
  #Now generate file names for the predictions...
181
  list_out_filename<-vector("list",length(mod_list))
182
  names(list_out_filename)<-cname  
183
  
184
  for (k in 1:length(list_out_filename)){
185
    #j indicate which month is predicted
186
    data_name<-paste("bias_LST_month_",j,"_",cname[k],"_",prop_month,
187
                     "_",run_samp,sep="")
188
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
189
    list_out_filename[[k]]<-raster_name
190
  }
191

    
192
  #now predict values for raster image...
193
  rast_bias_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
194
  names(rast_bias_list)<-cname
195
  #Some modles will not be predicted...remove them
196
  rast_bias_list<-rast_bias_list[!sapply(rast_bias_list,is.null)] #remove NULL elements in list
197

    
198
  mod_rast<-stack(rast_bias_list)  #stack of bias raster images from models
199
  rast_clim_list<-vector("list",nlayers(mod_rast))
200
  names(rast_clim_list)<-names(rast_bias_list)
201
  for (k in 1:nlayers(mod_rast)){
202
    clim_fus_rast<-LST-subset(mod_rast,k)
203
    data_name<-paste("clim_LST_month_",j,"_",names(rast_clim_list)[k],"_",prop_month,
204
                     "_",run_samp,sep="")
205
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
206
    rast_clim_list[[k]]<-raster_name
207
    writeRaster(clim_fus_rast, filename=raster_name,overwrite=TRUE)  #Wri
208
  }
209
  
210
  #### STEP 4:Adding Kriging for Climatology options
211
  
212
  bias_xy<-coordinates(data_month)
213
  fitbias<-Krig(bias_xy,data_month$LSTD_bias,theta=1e5) #use TPS or krige 
214
  mod_krtmp1<-fitbias
215
  model_name<-"mod_kr"
216
  
217
   
218
  bias_rast<-interpolate(LST,fitbias) #interpolation using function from raster package
219
  #Saving kriged surface in raster images
220
  data_name<-paste("bias_LST_month_",j,"_",model_name,"_",prop_month,
221
                   "_",run_samp,sep="")
222
  raster_name_bias<-paste("fusion_",data_name,out_prefix,".tif", sep="")
223
  writeRaster(bias_rast, filename=raster_name_bias,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
224
  
225
  #now climatology layer
226
  clim_rast<-LST-bias_rast
227
  data_name<-paste("clim_LST_month_",j,"_",model_name,"_",prop_month,
228
                   "_",run_samp,sep="")
229
  raster_name_clim<-paste("fusion_",data_name,out_prefix,".tif", sep="")
230
  writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
231
  
232
  #Adding to current objects
233
  mod_list[[model_name]]<-mod_krtmp1
234
  rast_bias_list[[model_name]]<-raster_name_bias
235
  rast_clim_list[[model_name]]<-raster_name_clim
236
  
237
  #### STEP 5: Prepare object and return
238
  
239
  clim_obj<-list(rast_bias_list,rast_clim_list,data_month,mod_list,list_formulas)
240
  names(clim_obj)<-c("bias","clim","data_month","mod","formulas")
241
  
242
  save(clim_obj,file= paste("clim_obj_month_",j,"_",out_prefix,".RData",sep=""))
243
  
244
  return(clim_obj)
245
}
246

    
247
## Run function for kriging...?
248

    
249
#runGAMFusion <- function(i) {            # loop over dates
250
runGAMFusion <- function(i,list_param) {            # loop over dates
251
    #### Change this to allow explicitly arguments...
252
  #Arguments: 
253
  #1)list of climatology files for all models...(12*nb of models)
254
  #2)sampling_obj$data_day_gcn: ghcn.subsets (data per date )
255
  #4)sampling_dat: list of sampling information for every run (proporation, month,sample)
256
  #5)ampling_index : list of training
257
  #6)dst: data at the monthly time scale
258
  #7)var:
259
  #8)y_var_name:
260
  #9)out_prefix
261
  
262
  ### PARSING INPUT ARGUMENTS
263
  
264
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
265
  rast_clim_yearlist<-list_param$clim_yearlist
266
  sampling_obj<-list_param$sampling_obj
267
  ghcn.subsets<-sampling_obj$ghcn_data_day
268
  sampling_dat <- sampling_obj$sampling_dat
269
  sampling <- sampling_obj$sampling_index
270
  var<-list_param$var
271
  y_var_name<-list_param$y_var_name
272
  out_prefix<-list_param$out_prefix
273
  dst<-list_param$dst #monthly station dataset
274
  
275
  ##########
276
  # STEP 1 - interpolate delta across space
277
  ############# Read in information and get traiing and testing stations
278
  
279
  date<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
280
  month<-strftime(date, "%m")          # current month of the date being processed
281
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
282
  proj_str<-proj4string(dst) #get the local projection information from monthly data
283

    
284
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
285
  data_day<-ghcn.subsets[[i]]
286
  mod_LST <- ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]  #Match interpolation date and monthly LST average
287
  data_day$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the dataset
288
  dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
289
  
290
  ind.training<-sampling[[i]]
291
  ind.testing <- setdiff(1:nrow(data_day), ind.training)
292
  data_s <- data_day[ind.training, ]   #Training dataset currently used in the modeling
293
  data_v <- data_day[ind.testing, ]    #Testing/validation dataset using input sampling
294
  
295
  ns<-nrow(data_s)
296
  nv<-nrow(data_v)
297
  #i=1
298
  date_proc<-sampling_dat$date[i]
299
  date_proc<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
300
  mo<-as.integer(strftime(date_proc, "%m"))          # current month of the date being processed
301
  day<-as.integer(strftime(date_proc, "%d"))
302
  year<-as.integer(strftime(date_proc, "%Y"))
303
  
304
  ##########
305
  # STEP 2 - JOIN DAILY AND MONTHLY STATION INFORMATION
306
  ##########
307
  
308
  modst<-dst[dst$month==mo,] #Subsetting dataset for the relevant month of the date being processed
309
  #Change to y_var...could be TMin
310
  #modst$LSTD_bias <- modst$LST-modst$y_var
311
  modst$LSTD_bias <- modst$LST-modst$TMax; #That is the difference between the monthly LST mean and monthly station mean
312
  
313
  #Clearn out this part: make this a function call
314
  x<-as.data.frame(data_v)
315
  d<-as.data.frame(data_s)
316
  #x[x$value==-999.9]<-NA
317
  for (j in 1:nrow(x)){
318
    if (x$value[j]== -999.9){
319
      x$value[j]<-NA
320
    }
321
  }
322
  for (j in 1:nrow(d)){
323
    if (d$value[j]== -999.9){
324
      d$value[j]<-NA
325
    }
326
  }
327
  #x[x$value==-999.9]<-NA
328
  #d[d$value==-999.9]<-NA
329
  pos<-match("value",names(d)) #Find column with name "value"
330
  #names(d)[pos]<-c("dailyTmax")
331
  names(d)[pos]<-y_var_name
332
  names(x)[pos]<-y_var_name
333
  #names(x)[pos]<-c("dailyTmax")
334
  pos<-match("station",names(d)) #Find column with name "value"
335
  names(d)[pos]<-c("id")
336
  names(x)[pos]<-c("id")
337
  names(modst)[1]<-c("id")       #modst contains the average tmax per month for every stations...
338
  
339
  dmoday <-merge(modst,d,by="id",suffixes=c("",".y2"))  
340
  xmoday <-merge(modst,x,by="id",suffixes=c("",".y2"))  
341
  mod_pat<-glob2rx("*.y2")   
342
  var_pat<-grep(mod_pat,names(dmoday),value=FALSE) # using grep with "value" extracts the matching names
343
  dmoday<-dmoday[,-var_pat]
344
  mod_pat<-glob2rx("*.y2")   
345
  var_pat<-grep(mod_pat,names(xmoday),value=FALSE) # using grep with "value" extracts the matching names
346
  xmoday<-xmoday[,-var_pat] #Removing duplicate columns
347
  
348
  data_v<-xmoday
349
  
350
  #dmoday contains the daily tmax values for training with TMax being the monthly station tmax mean
351
  #xmoday contains the daily tmax values for validation with TMax being the monthly station tmax mean
352
  
353
  ##########
354
  # STEP 3 - interpolate daily delta across space
355
  ##########
356
  
357
  #Change to take into account TMin and TMax
358
  daily_delta<-dmoday$dailyTmax-dmoday$TMax
359
  daily_delta_xy<-as.matrix(cbind(dmoday$x,dmoday$y))
360
  fitdelta<-Krig(daily_delta_xy,daily_delta,theta=1e5) #use TPS or krige
361
  mod_krtmp2<-fitdelta
362
  model_name<-paste("mod_kr","day",sep="_")
363
  data_s<-dmoday #put the 
364
  data_s$daily_delta<-daily_delta
365
  
366
  #########
367
  # STEP 4 - Calculate daily predictions - T(day) = clim(month) + delta(day)
368
  #########
369
  
370
  rast_clim_list<-rast_clim_yearlist[[mo]]  #select relevant month
371
  rast_clim_month<-raster(rast_clim_list[[1]])
372
  
373
  daily_delta_rast<-interpolate(rast_clim_month,fitdelta) #Interpolation of the bias surface...
374
  
375
  #Saving kriged surface in raster images
376
  data_name<-paste("daily_delta_",sampling_dat$date[i],"_",sampling_dat$prop[i],
377
                   "_",sampling_dat$run_samp[i],sep="")
378
  raster_name_delta<-paste("fusion_",data_name,out_prefix,".tif", sep="")
379
  writeRaster(daily_delta_rast, filename=raster_name_delta,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
380
  
381
  #Now predict daily after having selected the relevant month
382
  temp_list<-vector("list",length(rast_clim_list))  
383
  for (j in 1:length(rast_clim_list)){
384
    rast_clim_month<-raster(rast_clim_list[[j]])
385
    temp_predicted<-rast_clim_month+daily_delta_rast
386
    
387
    data_name<-paste(y_var_name,"_predicted_",names(rast_clim_list)[j],"_",
388
                     sampling_dat$date[i],"_",sampling_dat$prop[i],
389
                     "_",sampling_dat$run_samp[i],sep="")
390
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
391
    writeRaster(temp_predicted, filename=raster_name,overwrite=TRUE) 
392
    temp_list[[j]]<-raster_name
393
  }
394
  
395
  ##########
396
  # STEP 5 - Prepare output object to return
397
  ##########
398
  
399
  mod_krtmp2<-fitdelta
400
  model_name<-paste("mod_kr","day",sep="_")
401
  names(temp_list)<-names(rast_clim_list)
402
  coordinates(data_s)<-cbind(data_s$x,data_s$y)
403
  proj4string(data_s)<-proj_str
404
  coordinates(data_v)<-cbind(data_v$x,data_v$y)
405
  proj4string(data_v)<-proj_str
406
  
407
  delta_obj<-list(temp_list,rast_clim_list,raster_name_delta,data_s,
408
                  data_v,sampling_dat[i,],mod_krtmp2)
409
  
410
  obj_names<-c(y_var_name,"clim","delta","data_s","data_v",
411
               "sampling_dat",model_name)
412
  names(delta_obj)<-obj_names
413
  save(delta_obj,file= paste("delta_obj_",sampling_dat$date[i],"_",sampling_dat$prop[i],
414
                                "_",sampling_dat$run_samp[i],out_prefix,".RData",sep=""))
415
  return(delta_obj)
416
  
417
}
418
 
(10-10/39)