1
|
#Function to be used with GAM_fusion_analysis_raster_prediction_mutlisampling.R
|
2
|
#runClimFusion<-function(r_stack,data_training,data_testing,data_training){
|
3
|
|
4
|
#Functions used in the script
|
5
|
|
6
|
predict_raster_model<-function(in_models,r_stack,out_filename){
|
7
|
#This functions performs predictions on a raster grid given input models.
|
8
|
#Arguments: list of fitted models, raster stack of covariates
|
9
|
#Output: spatial grid data frame of the subset of tiles
|
10
|
list_rast_pred<-vector("list",length(in_models))
|
11
|
for (i in 1:length(in_models)){
|
12
|
mod <-in_models[[i]] #accessing GAM model ojbect "j"
|
13
|
raster_name<-out_filename[[i]]
|
14
|
if (inherits(mod,"gam")) { #change to c("gam","autoKrige")
|
15
|
raster_pred<- predict(object=r_stack,model=mod,na.rm=FALSE) #Using the coeff to predict new values.
|
16
|
names(raster_pred)<-"y_pred"
|
17
|
writeRaster(raster_pred, filename=raster_name,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
18
|
#print(paste("Interpolation:","mod", j ,sep=" "))
|
19
|
list_rast_pred[[i]]<-raster_name
|
20
|
}
|
21
|
}
|
22
|
if (inherits(mod,"try-error")) {
|
23
|
print(paste("no gam model fitted:",mod[1],sep=" ")) #change message for any model type...
|
24
|
}
|
25
|
return(list_rast_pred)
|
26
|
}
|
27
|
|
28
|
fit_models<-function(list_formulas,data_training){
|
29
|
#This functions several models and returns model objects.
|
30
|
#Arguments: - list of formulas for GAM models
|
31
|
# - fitting data in a data.frame or SpatialPointDataFrame
|
32
|
#Output: list of model objects
|
33
|
list_fitted_models<-vector("list",length(list_formulas))
|
34
|
for (k in 1:length(list_formulas)){
|
35
|
formula<-list_formulas[[k]]
|
36
|
mod<- try(gam(formula, data=data_training)) #change to any model!!
|
37
|
#mod<- try(autoKrige(formula, input_data=data_s,new_data=s_sgdf,data_variogram=data_s))
|
38
|
model_name<-paste("mod",k,sep="")
|
39
|
assign(model_name,mod)
|
40
|
list_fitted_models[[k]]<-mod
|
41
|
}
|
42
|
return(list_fitted_models)
|
43
|
}
|
44
|
|
45
|
####
|
46
|
#TODO:
|
47
|
#Add log file and calculate time and sizes for processes-outputs
|
48
|
runClimCAI<-function(j){
|
49
|
#Make this a function with multiple argument that can be used by mcmapply??
|
50
|
#This creates clim fusion layers...still needs more code testing
|
51
|
|
52
|
#Model and response variable can be changed without affecting the script
|
53
|
prop_month<-0 #proportion retained for validation
|
54
|
run_samp<-1
|
55
|
|
56
|
list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
|
57
|
|
58
|
data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
|
59
|
LST_name<-lst_avg[j] # name of LST month to be matched
|
60
|
data_month$LST<-data_month[[LST_name]]
|
61
|
|
62
|
#TMax to model...
|
63
|
data_month$y_var<-data_month$TMax #Adding TMax as the variable modeled
|
64
|
mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
|
65
|
cname<-paste("mod",1:length(mod_list),sep="") #change to more meaningful name?
|
66
|
names(mod_list)<-cname
|
67
|
#Adding layer LST to the raster stack
|
68
|
pos<-match("elev",names(s_raster))
|
69
|
layerNames(s_raster)[pos]<-"elev_1"
|
70
|
|
71
|
pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
|
72
|
s_raster<-dropLayer(s_raster,pos) # If it exists drop layer
|
73
|
LST<-subset(s_raster,LST_name)
|
74
|
names(LST)<-"LST"
|
75
|
#Screen for extreme values": this needs more thought, min and max val vary with regions
|
76
|
#min_val<-(-15+273.16) #if values less than -15C then screen out (note the Kelvin units that will need to be changed later in all datasets)
|
77
|
#r1[r1 < (min_val)]<-NA
|
78
|
s_raster<-addLayer(s_raster,LST) #Adding current month
|
79
|
|
80
|
#Now generate file names for the predictions...
|
81
|
list_out_filename<-vector("list",length(mod_list))
|
82
|
names(list_out_filename)<-cname
|
83
|
|
84
|
for (k in 1:length(list_out_filename)){
|
85
|
#j indicate which month is predicted
|
86
|
data_name<-paste("clim_month_",j,"_",cname[k],"_",prop_month,
|
87
|
"_",run_samp,sep="")
|
88
|
raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
89
|
list_out_filename[[k]]<-raster_name
|
90
|
}
|
91
|
|
92
|
#now predict values for raster image...
|
93
|
rast_clim_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
|
94
|
names(rast_clim_list)<-cname
|
95
|
#Some modles will not be predicted...remove them
|
96
|
rast_clim_list<-rast_clim_list[!sapply(rast_clim_list,is.null)] #remove NULL elements in list
|
97
|
|
98
|
#Adding Kriging for Climatology options
|
99
|
|
100
|
clim_xy<-coordinates(data_month)
|
101
|
fitclim<-Krig(clim_xy,data_month$TMax,theta=1e5) #use TPS or krige
|
102
|
mod_krtmp1<-fitclim
|
103
|
model_name<-"mod_kr"
|
104
|
|
105
|
clim_rast<-interpolate(LST,fitclim) #interpolation using function from raster package
|
106
|
#Saving kriged surface in raster images
|
107
|
#data_name<-paste("clim_month_",j,"_",model_name,"_",prop_month,
|
108
|
# "_",run_samp,sep="")
|
109
|
#raster_name_clim<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
110
|
#writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
111
|
|
112
|
#now climatology layer
|
113
|
#clim_rast<-LST-bias_rast
|
114
|
data_name<-paste("clim_month_",j,"_",model_name,"_",prop_month,
|
115
|
"_",run_samp,sep="")
|
116
|
raster_name_clim<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
117
|
writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
118
|
|
119
|
#Adding to current objects
|
120
|
mod_list[[model_name]]<-mod_krtmp1
|
121
|
#rast_bias_list[[model_name]]<-raster_name_bias
|
122
|
rast_clim_list[[model_name]]<-raster_name_clim
|
123
|
|
124
|
#Prepare object to return
|
125
|
clim_obj<-list(rast_clim_list,data_month,mod_list,list_formulas)
|
126
|
names(clim_obj)<-c("clim","data_month","mod","formulas")
|
127
|
|
128
|
save(clim_obj,file= paste("clim_obj_month_",j,"_",out_prefix,".RData",sep=""))
|
129
|
|
130
|
return(clim_obj)
|
131
|
}
|
132
|
#
|
133
|
|
134
|
runClim_KGFusion<-function(j){
|
135
|
|
136
|
#Make this a function with multiple argument that can be used by mcmapply??
|
137
|
#This creates clim fusion layers...
|
138
|
#Parameters:
|
139
|
|
140
|
#1)s_raster: brick of covariates : could pass as argument only specific variables??
|
141
|
#2)dst: monthly data (infile_monthly): could pass only the subset from the month??
|
142
|
#3)list_models
|
143
|
#4)brick names covarnames
|
144
|
#5)out_prefix
|
145
|
|
146
|
|
147
|
## STEP 1: PARSE PARAMETERS AND ARGUMENTS
|
148
|
|
149
|
#Model and response variable can be changed without affecting the script
|
150
|
prop_month<-0 #proportion retained for validation
|
151
|
run_samp<-1
|
152
|
|
153
|
## STEP 2: PREPARE DATA
|
154
|
|
155
|
data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
|
156
|
LST_name<-lst_avg[j] # name of LST month to be matched
|
157
|
data_month$LST<-data_month[[LST_name]]
|
158
|
|
159
|
#Adding layer LST to the raster stack
|
160
|
covar_rast<-s_raster
|
161
|
pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
|
162
|
s_raster<-dropLayer(s_raster,pos) # If it exists drop layer
|
163
|
LST<-subset(s_raster,LST_name)
|
164
|
names(LST)<-"LST"
|
165
|
s_raster<-addLayer(s_raster,LST) #Adding current month
|
166
|
|
167
|
#LST bias to model...
|
168
|
#if (var==TMAX): will need to modify to take into account TMAX, TMIN and LST_day,LST_night
|
169
|
data_month$LSTD_bias<-data_month$LST-data_month$TMax
|
170
|
data_month$y_var<-data_month$LSTD_bias #Adding bias as the variable modeled
|
171
|
|
172
|
#### STEP3: NOW FIT AND PREDICT MODEL
|
173
|
|
174
|
list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
|
175
|
|
176
|
mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
|
177
|
cname<-paste("mod",1:length(mod_list),sep="") #change to more meaningful name?
|
178
|
names(mod_list)<-cname
|
179
|
|
180
|
#Now generate file names for the predictions...
|
181
|
list_out_filename<-vector("list",length(mod_list))
|
182
|
names(list_out_filename)<-cname
|
183
|
|
184
|
for (k in 1:length(list_out_filename)){
|
185
|
#j indicate which month is predicted
|
186
|
data_name<-paste("bias_LST_month_",j,"_",cname[k],"_",prop_month,
|
187
|
"_",run_samp,sep="")
|
188
|
raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
189
|
list_out_filename[[k]]<-raster_name
|
190
|
}
|
191
|
|
192
|
#now predict values for raster image...
|
193
|
rast_bias_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
|
194
|
names(rast_bias_list)<-cname
|
195
|
#Some modles will not be predicted...remove them
|
196
|
rast_bias_list<-rast_bias_list[!sapply(rast_bias_list,is.null)] #remove NULL elements in list
|
197
|
|
198
|
mod_rast<-stack(rast_bias_list) #stack of bias raster images from models
|
199
|
rast_clim_list<-vector("list",nlayers(mod_rast))
|
200
|
names(rast_clim_list)<-names(rast_bias_list)
|
201
|
for (k in 1:nlayers(mod_rast)){
|
202
|
clim_fus_rast<-LST-subset(mod_rast,k)
|
203
|
data_name<-paste("clim_LST_month_",j,"_",names(rast_clim_list)[k],"_",prop_month,
|
204
|
"_",run_samp,sep="")
|
205
|
raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
206
|
rast_clim_list[[k]]<-raster_name
|
207
|
writeRaster(clim_fus_rast, filename=raster_name,overwrite=TRUE) #Wri
|
208
|
}
|
209
|
|
210
|
#### STEP 4:Adding Kriging for Climatology options
|
211
|
|
212
|
bias_xy<-coordinates(data_month)
|
213
|
fitbias<-Krig(bias_xy,data_month$LSTD_bias,theta=1e5) #use TPS or krige
|
214
|
mod_krtmp1<-fitbias
|
215
|
model_name<-"mod_kr"
|
216
|
|
217
|
|
218
|
bias_rast<-interpolate(LST,fitbias) #interpolation using function from raster package
|
219
|
#Saving kriged surface in raster images
|
220
|
data_name<-paste("bias_LST_month_",j,"_",model_name,"_",prop_month,
|
221
|
"_",run_samp,sep="")
|
222
|
raster_name_bias<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
223
|
writeRaster(bias_rast, filename=raster_name_bias,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
224
|
|
225
|
#now climatology layer
|
226
|
clim_rast<-LST-bias_rast
|
227
|
data_name<-paste("clim_LST_month_",j,"_",model_name,"_",prop_month,
|
228
|
"_",run_samp,sep="")
|
229
|
raster_name_clim<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
230
|
writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
231
|
|
232
|
#Adding to current objects
|
233
|
mod_list[[model_name]]<-mod_krtmp1
|
234
|
rast_bias_list[[model_name]]<-raster_name_bias
|
235
|
rast_clim_list[[model_name]]<-raster_name_clim
|
236
|
|
237
|
#### STEP 5: Prepare object and return
|
238
|
|
239
|
clim_obj<-list(rast_bias_list,rast_clim_list,data_month,mod_list,list_formulas)
|
240
|
names(clim_obj)<-c("bias","clim","data_month","mod","formulas")
|
241
|
|
242
|
save(clim_obj,file= paste("clim_obj_month_",j,"_",out_prefix,".RData",sep=""))
|
243
|
|
244
|
return(clim_obj)
|
245
|
}
|
246
|
|
247
|
## Run function for kriging...?
|
248
|
|
249
|
#runGAMFusion <- function(i) { # loop over dates
|
250
|
runGAMFusion <- function(i,list_param) { # loop over dates
|
251
|
#### Change this to allow explicitly arguments...
|
252
|
#Arguments:
|
253
|
#1)list of climatology files for all models...(12*nb of models)
|
254
|
#2)sampling_obj$data_day_gcn: ghcn.subsets (data per date )
|
255
|
#4)sampling_dat: list of sampling information for every run (proporation, month,sample)
|
256
|
#5)ampling_index : list of training
|
257
|
#6)dst: data at the monthly time scale
|
258
|
#7)var:
|
259
|
#8)y_var_name:
|
260
|
#9)out_prefix
|
261
|
|
262
|
### PARSING INPUT ARGUMENTS
|
263
|
|
264
|
#list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
|
265
|
rast_clim_yearlist<-list_param$clim_yearlist
|
266
|
sampling_obj<-list_param$sampling_obj
|
267
|
ghcn.subsets<-sampling_obj$ghcn_data_day
|
268
|
sampling_dat <- sampling_obj$sampling_dat
|
269
|
sampling <- sampling_obj$sampling_index
|
270
|
var<-list_param$var
|
271
|
y_var_name<-list_param$y_var_name
|
272
|
out_prefix<-list_param$out_prefix
|
273
|
dst<-list_param$dst #monthly station dataset
|
274
|
|
275
|
##########
|
276
|
# STEP 1 - interpolate delta across space
|
277
|
############# Read in information and get traiing and testing stations
|
278
|
|
279
|
date<-strptime(sampling_dat$date[i], "%Y%m%d") # interpolation date being processed
|
280
|
month<-strftime(date, "%m") # current month of the date being processed
|
281
|
LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
|
282
|
proj_str<-proj4string(dst) #get the local projection information from monthly data
|
283
|
|
284
|
###Regression part 1: Creating a validation dataset by creating training and testing datasets
|
285
|
data_day<-ghcn.subsets[[i]]
|
286
|
mod_LST <- ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))] #Match interpolation date and monthly LST average
|
287
|
data_day$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the dataset
|
288
|
dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
|
289
|
|
290
|
ind.training<-sampling[[i]]
|
291
|
ind.testing <- setdiff(1:nrow(data_day), ind.training)
|
292
|
data_s <- data_day[ind.training, ] #Training dataset currently used in the modeling
|
293
|
data_v <- data_day[ind.testing, ] #Testing/validation dataset using input sampling
|
294
|
|
295
|
ns<-nrow(data_s)
|
296
|
nv<-nrow(data_v)
|
297
|
#i=1
|
298
|
date_proc<-sampling_dat$date[i]
|
299
|
date_proc<-strptime(sampling_dat$date[i], "%Y%m%d") # interpolation date being processed
|
300
|
mo<-as.integer(strftime(date_proc, "%m")) # current month of the date being processed
|
301
|
day<-as.integer(strftime(date_proc, "%d"))
|
302
|
year<-as.integer(strftime(date_proc, "%Y"))
|
303
|
|
304
|
##########
|
305
|
# STEP 2 - JOIN DAILY AND MONTHLY STATION INFORMATION
|
306
|
##########
|
307
|
|
308
|
modst<-dst[dst$month==mo,] #Subsetting dataset for the relevant month of the date being processed
|
309
|
#Change to y_var...could be TMin
|
310
|
#modst$LSTD_bias <- modst$LST-modst$y_var
|
311
|
modst$LSTD_bias <- modst$LST-modst$TMax; #That is the difference between the monthly LST mean and monthly station mean
|
312
|
|
313
|
#Clearn out this part: make this a function call
|
314
|
x<-as.data.frame(data_v)
|
315
|
d<-as.data.frame(data_s)
|
316
|
#x[x$value==-999.9]<-NA
|
317
|
for (j in 1:nrow(x)){
|
318
|
if (x$value[j]== -999.9){
|
319
|
x$value[j]<-NA
|
320
|
}
|
321
|
}
|
322
|
for (j in 1:nrow(d)){
|
323
|
if (d$value[j]== -999.9){
|
324
|
d$value[j]<-NA
|
325
|
}
|
326
|
}
|
327
|
#x[x$value==-999.9]<-NA
|
328
|
#d[d$value==-999.9]<-NA
|
329
|
pos<-match("value",names(d)) #Find column with name "value"
|
330
|
#names(d)[pos]<-c("dailyTmax")
|
331
|
names(d)[pos]<-y_var_name
|
332
|
names(x)[pos]<-y_var_name
|
333
|
#names(x)[pos]<-c("dailyTmax")
|
334
|
pos<-match("station",names(d)) #Find column with name "value"
|
335
|
names(d)[pos]<-c("id")
|
336
|
names(x)[pos]<-c("id")
|
337
|
names(modst)[1]<-c("id") #modst contains the average tmax per month for every stations...
|
338
|
|
339
|
dmoday <-merge(modst,d,by="id",suffixes=c("",".y2"))
|
340
|
xmoday <-merge(modst,x,by="id",suffixes=c("",".y2"))
|
341
|
mod_pat<-glob2rx("*.y2")
|
342
|
var_pat<-grep(mod_pat,names(dmoday),value=FALSE) # using grep with "value" extracts the matching names
|
343
|
dmoday<-dmoday[,-var_pat]
|
344
|
mod_pat<-glob2rx("*.y2")
|
345
|
var_pat<-grep(mod_pat,names(xmoday),value=FALSE) # using grep with "value" extracts the matching names
|
346
|
xmoday<-xmoday[,-var_pat] #Removing duplicate columns
|
347
|
|
348
|
data_v<-xmoday
|
349
|
|
350
|
#dmoday contains the daily tmax values for training with TMax being the monthly station tmax mean
|
351
|
#xmoday contains the daily tmax values for validation with TMax being the monthly station tmax mean
|
352
|
|
353
|
##########
|
354
|
# STEP 3 - interpolate daily delta across space
|
355
|
##########
|
356
|
|
357
|
#Change to take into account TMin and TMax
|
358
|
daily_delta<-dmoday$dailyTmax-dmoday$TMax
|
359
|
daily_delta_xy<-as.matrix(cbind(dmoday$x,dmoday$y))
|
360
|
fitdelta<-Krig(daily_delta_xy,daily_delta,theta=1e5) #use TPS or krige
|
361
|
mod_krtmp2<-fitdelta
|
362
|
model_name<-paste("mod_kr","day",sep="_")
|
363
|
data_s<-dmoday #put the
|
364
|
data_s$daily_delta<-daily_delta
|
365
|
|
366
|
#########
|
367
|
# STEP 4 - Calculate daily predictions - T(day) = clim(month) + delta(day)
|
368
|
#########
|
369
|
|
370
|
rast_clim_list<-rast_clim_yearlist[[mo]] #select relevant month
|
371
|
rast_clim_month<-raster(rast_clim_list[[1]])
|
372
|
|
373
|
daily_delta_rast<-interpolate(rast_clim_month,fitdelta) #Interpolation of the bias surface...
|
374
|
|
375
|
#Saving kriged surface in raster images
|
376
|
data_name<-paste("daily_delta_",sampling_dat$date[i],"_",sampling_dat$prop[i],
|
377
|
"_",sampling_dat$run_samp[i],sep="")
|
378
|
raster_name_delta<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
379
|
writeRaster(daily_delta_rast, filename=raster_name_delta,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
380
|
|
381
|
#Now predict daily after having selected the relevant month
|
382
|
temp_list<-vector("list",length(rast_clim_list))
|
383
|
for (j in 1:length(rast_clim_list)){
|
384
|
rast_clim_month<-raster(rast_clim_list[[j]])
|
385
|
temp_predicted<-rast_clim_month+daily_delta_rast
|
386
|
|
387
|
data_name<-paste(y_var_name,"_predicted_",names(rast_clim_list)[j],"_",
|
388
|
sampling_dat$date[i],"_",sampling_dat$prop[i],
|
389
|
"_",sampling_dat$run_samp[i],sep="")
|
390
|
raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
391
|
writeRaster(temp_predicted, filename=raster_name,overwrite=TRUE)
|
392
|
temp_list[[j]]<-raster_name
|
393
|
}
|
394
|
|
395
|
##########
|
396
|
# STEP 5 - Prepare output object to return
|
397
|
##########
|
398
|
|
399
|
mod_krtmp2<-fitdelta
|
400
|
model_name<-paste("mod_kr","day",sep="_")
|
401
|
names(temp_list)<-names(rast_clim_list)
|
402
|
coordinates(data_s)<-cbind(data_s$x,data_s$y)
|
403
|
proj4string(data_s)<-proj_str
|
404
|
coordinates(data_v)<-cbind(data_v$x,data_v$y)
|
405
|
proj4string(data_v)<-proj_str
|
406
|
|
407
|
delta_obj<-list(temp_list,rast_clim_list,raster_name_delta,data_s,
|
408
|
data_v,sampling_dat[i,],mod_krtmp2)
|
409
|
|
410
|
obj_names<-c(y_var_name,"clim","delta","data_s","data_v",
|
411
|
"sampling_dat",model_name)
|
412
|
names(delta_obj)<-obj_names
|
413
|
save(delta_obj,file= paste("delta_obj_",sampling_dat$date[i],"_",sampling_dat$prop[i],
|
414
|
"_",sampling_dat$run_samp[i],out_prefix,".RData",sep=""))
|
415
|
return(delta_obj)
|
416
|
|
417
|
}
|
418
|
|