1
|
#Function to be used with GAM_fusion_analysis_raster_prediction_mutlisampling.R
|
2
|
#runClimFusion<-function(r_stack,data_training,data_testing,data_training){
|
3
|
|
4
|
####
|
5
|
#TODO:
|
6
|
#Add log file and calculate time and sizes for processes-outputs
|
7
|
|
8
|
runClim_KGFusion<-function(j){
|
9
|
#Make this a function with multiple argument that can be used by mcmapply??
|
10
|
#This creates clim fusion layers...
|
11
|
|
12
|
#Functions used in the script
|
13
|
predict_raster_model<-function(in_models,r_stack,out_filename){
|
14
|
#This functions performs predictions on a raster grid given input models.
|
15
|
#Arguments: list of fitted models, raster stack of covariates
|
16
|
#Output: spatial grid data frame of the subset of tiles
|
17
|
list_rast_pred<-vector("list",length(in_models))
|
18
|
for (i in 1:length(in_models)){
|
19
|
mod <-in_models[[i]] #accessing GAM model ojbect "j"
|
20
|
raster_name<-out_filename[[i]]
|
21
|
if (inherits(mod,"gam")) { #change to c("gam","autoKrige")
|
22
|
raster_pred<- predict(object=s_raster,model=mod,na.rm=FALSE) #Using the coeff to predict new values.
|
23
|
names(raster_pred)<-"y_pred"
|
24
|
writeRaster(raster_pred, filename=raster_name,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
25
|
print(paste("Interpolation:","mod", j ,sep=" "))
|
26
|
list_rast_pred[[i]]<-raster_name
|
27
|
}
|
28
|
}
|
29
|
if (inherits(mod,"try-error")) {
|
30
|
print(paste("no gam model fitted:",mod[1],sep=" ")) #change message for any model type...
|
31
|
}
|
32
|
return(list_rast_pred)
|
33
|
}
|
34
|
|
35
|
fit_models<-function(list_formulas,data_training){
|
36
|
#This functions several models and returns model objects.
|
37
|
#Arguments: - list of formulas for GAM models
|
38
|
# - fitting data in a data.frame or SpatialPointDataFrame
|
39
|
#Output: list of model objects
|
40
|
list_fitted_models<-vector("list",length(list_formulas))
|
41
|
for (k in 1:length(list_formulas)){
|
42
|
formula<-list_formulas[[k]]
|
43
|
mod<- try(gam(formula, data=data_training)) #change to any model!!
|
44
|
#mod<- try(autoKrige(formula, input_data=data_s,new_data=s_sgdf,data_variogram=data_s))
|
45
|
model_name<-paste("mod",k,sep="")
|
46
|
assign(model_name,mod)
|
47
|
list_fitted_models[[k]]<-mod
|
48
|
}
|
49
|
return(list_fitted_models)
|
50
|
}
|
51
|
#Model and response variable can be changed without affecting the script
|
52
|
prop_month<-0 #proportion retained for validation
|
53
|
run_samp<-1
|
54
|
|
55
|
list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
|
56
|
|
57
|
data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
|
58
|
LST_name<-lst_avg[j] # name of LST month to be matched
|
59
|
data_month$LST<-data_month[[LST_name]]
|
60
|
|
61
|
#LST bias to model...
|
62
|
data_month$LSTD_bias<-data_month$LST-data_month$TMax
|
63
|
data_month$y_var<-data_month$LSTD_bias #Adding bias as the variable modeled
|
64
|
mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
|
65
|
cname<-paste("mod",1:length(mod_list),sep="") #change to more meaningful name?
|
66
|
names(mod_list)<-cname
|
67
|
#Adding layer LST to the raster stack
|
68
|
pos<-match("elev",names(s_raster))
|
69
|
layerNames(s_raster)[pos]<-"elev_1"
|
70
|
|
71
|
pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
|
72
|
s_raster<-dropLayer(s_raster,pos) # If it exists drop layer
|
73
|
LST<-subset(s_raster,LST_name)
|
74
|
names(LST)<-"LST"
|
75
|
#Screen for extreme values": this needs more thought, min and max val vary with regions
|
76
|
#min_val<-(-15+273.16) #if values less than -15C then screen out (note the Kelvin units that will need to be changed later in all datasets)
|
77
|
#r1[r1 < (min_val)]<-NA
|
78
|
s_raster<-addLayer(s_raster,LST) #Adding current month
|
79
|
|
80
|
#Now generate file names for the predictions...
|
81
|
list_out_filename<-vector("list",length(mod_list))
|
82
|
names(list_out_filename)<-cname
|
83
|
|
84
|
for (k in 1:length(list_out_filename)){
|
85
|
#j indicate which month is predicted
|
86
|
data_name<-paste("bias_LST_month_",j,"_",cname[k],"_",prop_month,
|
87
|
"_",run_samp,sep="")
|
88
|
raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
89
|
list_out_filename[[k]]<-raster_name
|
90
|
}
|
91
|
|
92
|
#now predict values for raster image...
|
93
|
rast_bias_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
|
94
|
names(rast_bias_list)<-cname
|
95
|
#Some modles will not be predicted...remove them
|
96
|
rast_bias_list<-rast_bias_list[!sapply(rast_bias_list,is.null)] #remove NULL elements in list
|
97
|
|
98
|
mod_rast<-stack(rast_bias_list) #stack of bias raster images from models
|
99
|
rast_clim_list<-vector("list",nlayers(mod_rast))
|
100
|
names(rast_clim_list)<-names(rast_bias_list)
|
101
|
for (k in 1:nlayers(mod_rast)){
|
102
|
clim_fus_rast<-LST-subset(mod_rast,k)
|
103
|
data_name<-paste("clim_LST_month_",j,"_",names(rast_clim_list)[k],"_",prop_month,
|
104
|
"_",run_samp,sep="")
|
105
|
raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
106
|
rast_clim_list[[k]]<-raster_name
|
107
|
writeRaster(clim_fus_rast, filename=raster_name,overwrite=TRUE) #Wri
|
108
|
}
|
109
|
|
110
|
#Adding Kriging for Climatology options
|
111
|
|
112
|
bias_xy<-coordinates(data_month)
|
113
|
fitbias<-Krig(bias_xy,data_month$LSTD_bias,theta=1e5) #use TPS or krige
|
114
|
mod_krtmp1<-fitbias
|
115
|
model_name<-"mod_kr"
|
116
|
|
117
|
bias_rast<-interpolate(LST,fitbias) #interpolation using function from raster package
|
118
|
#Saving kriged surface in raster images
|
119
|
data_name<-paste("bias_LST_month_",j,"_",model_name,"_",prop_month,
|
120
|
"_",run_samp,sep="")
|
121
|
raster_name_bias<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
122
|
writeRaster(bias_rast, filename=raster_name_bias,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
123
|
|
124
|
#now climatology layer
|
125
|
clim_rast<-LST-bias_rast
|
126
|
data_name<-paste("clim_LST_month_",j,"_",model_name,"_",prop_month,
|
127
|
"_",run_samp,sep="")
|
128
|
raster_name_clim<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
129
|
writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
130
|
|
131
|
#Adding to current objects
|
132
|
mod_list[[model_name]]<-mod_krtmp1
|
133
|
rast_bias_list[[model_name]]<-raster_name_bias
|
134
|
rast_clim_list[[model_name]]<-raster_name_clim
|
135
|
|
136
|
#Prepare object to return
|
137
|
clim_obj<-list(rast_bias_list,rast_clim_list,data_month,mod_list,list_formulas)
|
138
|
names(clim_obj)<-c("bias","clim","data_month","mod","formulas")
|
139
|
|
140
|
save(clim_obj,file= paste("clim_obj_month_",j,"_",out_prefix,".RData",sep=""))
|
141
|
|
142
|
return(clim_obj)
|
143
|
}
|
144
|
|
145
|
## Run function for kriging...?
|
146
|
|
147
|
runGAMFusion <- function(i) { # loop over dates
|
148
|
#Change this to allow explicitly arguments...
|
149
|
#Arguments:
|
150
|
#1)list of climatology files for all models...(12*nb of models)
|
151
|
#2)data_s:training
|
152
|
#3)data_v:testing
|
153
|
#4)list of dates??
|
154
|
#5)stack of covariates: not needed at this this stage
|
155
|
#6)dst: data at the monthly time scale
|
156
|
|
157
|
#Function used in the script
|
158
|
|
159
|
date<-strptime(sampling_dat$date[i], "%Y%m%d") # interpolation date being processed
|
160
|
month<-strftime(date, "%m") # current month of the date being processed
|
161
|
LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
|
162
|
proj_str<-proj4string(dst)
|
163
|
|
164
|
###Regression part 1: Creating a validation dataset by creating training and testing datasets
|
165
|
data_day<-ghcn.subsets[[i]]
|
166
|
mod_LST <- ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))] #Match interpolation date and monthly LST average
|
167
|
data_day$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the dataset
|
168
|
dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
|
169
|
|
170
|
ind.training<-sampling[[i]]
|
171
|
ind.testing <- setdiff(1:nrow(data_day), ind.training)
|
172
|
data_s <- data_day[ind.training, ] #Training dataset currently used in the modeling
|
173
|
data_v <- data_day[ind.testing, ] #Testing/validation dataset using input sampling
|
174
|
|
175
|
ns<-nrow(data_s)
|
176
|
nv<-nrow(data_v)
|
177
|
#i=1
|
178
|
date_proc<-sampling_dat$date[i]
|
179
|
date_proc<-strptime(sampling_dat$date[i], "%Y%m%d") # interpolation date being processed
|
180
|
mo<-as.integer(strftime(date_proc, "%m")) # current month of the date being processed
|
181
|
day<-as.integer(strftime(date_proc, "%d"))
|
182
|
year<-as.integer(strftime(date_proc, "%Y"))
|
183
|
|
184
|
modst<-dst[dst$month==mo,] #Subsetting dataset for the relevant month of the date being processed
|
185
|
#Change to y_var...could be TMin
|
186
|
#modst$LSTD_bias <- modst$LST-modst$y_var
|
187
|
modst$LSTD_bias <- modst$LST-modst$TMax; #That is the difference between the monthly LST mean and monthly station mean
|
188
|
|
189
|
x<-as.data.frame(data_v)
|
190
|
d<-as.data.frame(data_s)
|
191
|
#x[x$value==-999.9]<-NA
|
192
|
for (j in 1:nrow(x)){
|
193
|
if (x$value[j]== -999.9){
|
194
|
x$value[j]<-NA
|
195
|
}
|
196
|
}
|
197
|
for (j in 1:nrow(d)){
|
198
|
if (d$value[j]== -999.9){
|
199
|
d$value[j]<-NA
|
200
|
}
|
201
|
}
|
202
|
#x[x$value==-999.9]<-NA
|
203
|
#d[d$value==-999.9]<-NA
|
204
|
pos<-match("value",names(d)) #Find column with name "value"
|
205
|
#names(d)[pos]<-c("dailyTmax")
|
206
|
names(d)[pos]<-y_var_name
|
207
|
names(x)[pos]<-y_var_name
|
208
|
#names(x)[pos]<-c("dailyTmax")
|
209
|
pos<-match("station",names(d)) #Find column with name "value"
|
210
|
names(d)[pos]<-c("id")
|
211
|
names(x)[pos]<-c("id")
|
212
|
names(modst)[1]<-c("id") #modst contains the average tmax per month for every stations...
|
213
|
|
214
|
dmoday <-merge(modst,d,by="id",suffixes=c("",".y2"))
|
215
|
xmoday <-merge(modst,x,by="id",suffixes=c("",".y2"))
|
216
|
mod_pat<-glob2rx("*.y2")
|
217
|
var_pat<-grep(mod_pat,names(dmoday),value=FALSE) # using grep with "value" extracts the matching names
|
218
|
dmoday<-dmoday[,-var_pat]
|
219
|
mod_pat<-glob2rx("*.y2")
|
220
|
var_pat<-grep(mod_pat,names(xmoday),value=FALSE) # using grep with "value" extracts the matching names
|
221
|
xmoday<-xmoday[,-var_pat] #Removing duplicate columns
|
222
|
|
223
|
data_v<-xmoday
|
224
|
|
225
|
#dmoday contains the daily tmax values for training with TMax being the monthly station tmax mean
|
226
|
#xmoday contains the daily tmax values for validation with TMax being the monthly station tmax mean
|
227
|
|
228
|
##########
|
229
|
# STEP 7 - interpolate delta across space
|
230
|
##########
|
231
|
|
232
|
daily_delta<-dmoday$dailyTmax-dmoday$TMax
|
233
|
daily_delta_xy<-as.matrix(cbind(dmoday$x,dmoday$y))
|
234
|
fitdelta<-Krig(daily_delta_xy,daily_delta,theta=1e5) #use TPS or krige
|
235
|
mod_krtmp2<-fitdelta
|
236
|
model_name<-paste("mod_kr","day",sep="_")
|
237
|
data_s<-dmoday #put the
|
238
|
data_s$daily_delta<-daily_delta
|
239
|
|
240
|
#########
|
241
|
# STEP 8 - assemble final answer - T=LST+Bias(interpolated)+delta(interpolated)
|
242
|
#########
|
243
|
|
244
|
rast_clim_list<-rast_clim_yearlist[[mo]] #select relevant month
|
245
|
rast_clim_month<-raster(rast_clim_list[[1]])
|
246
|
|
247
|
daily_delta_rast<-interpolate(rast_clim_month,fitdelta) #Interpolation of the bias surface...
|
248
|
|
249
|
#Saving kriged surface in raster images
|
250
|
data_name<-paste("daily_delta_",sampling_dat$date[i],"_",sampling_dat$prop[i],
|
251
|
"_",sampling_dat$run_samp[i],sep="")
|
252
|
raster_name_delta<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
253
|
writeRaster(daily_delta_rast, filename=raster_name_delta,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
254
|
|
255
|
#Now predict daily after having selected the relevant month
|
256
|
temp_list<-vector("list",length(rast_clim_list))
|
257
|
for (j in 1:length(rast_clim_list)){
|
258
|
rast_clim_month<-raster(rast_clim_list[[j]])
|
259
|
temp_predicted<-rast_clim_month+daily_delta_rast
|
260
|
|
261
|
data_name<-paste(y_var_name,"_predicted_",names(rast_clim_list)[j],"_",
|
262
|
sampling_dat$date[i],"_",sampling_dat$prop[i],
|
263
|
"_",sampling_dat$run_samp[i],sep="")
|
264
|
raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
265
|
writeRaster(temp_predicted, filename=raster_name,overwrite=TRUE)
|
266
|
temp_list[[j]]<-raster_name
|
267
|
}
|
268
|
|
269
|
mod_krtmp2<-fitdelta
|
270
|
model_name<-paste("mod_kr","day",sep="_")
|
271
|
names(temp_list)<-names(rast_clim_list)
|
272
|
coordinates(data_s)<-cbind(data_s$x,data_s$y)
|
273
|
proj4string(data_s)<-proj_str
|
274
|
coordinates(data_v)<-cbind(data_v$x,data_v$y)
|
275
|
proj4string(data_v)<-proj_str
|
276
|
|
277
|
delta_obj<-list(temp_list,rast_clim_list,raster_name_delta,data_s,
|
278
|
data_v,sampling_dat[i,],mod_krtmp2)
|
279
|
|
280
|
obj_names<-c(y_var_name,"clim","delta","data_s","data_v",
|
281
|
"sampling_dat",model_name)
|
282
|
names(delta_obj)<-obj_names
|
283
|
save(delta_obj,file= paste("delta_obj_",sampling_dat$date[i],"_",sampling_dat$prop[i],
|
284
|
"_",sampling_dat$run_samp[i],out_prefix,".RData",sep=""))
|
285
|
return(delta_obj)
|
286
|
|
287
|
}
|
288
|
|