|
1 |
#Function to be used with GAM_fusion_analysis_raster_prediction_mutlisampling.R
|
|
2 |
#runClimFusion<-function(r_stack,data_training,data_testing,data_training){
|
|
3 |
|
|
4 |
####
|
|
5 |
#TODO:
|
|
6 |
#Add log file and calculate time and sizes for processes-outputs
|
|
7 |
|
|
8 |
|
|
9 |
runClimFusion<-function(j){
|
|
10 |
#Make this a function with multiple argument that can be used by mcmapply??
|
|
11 |
#This creates clim fusion layers...
|
|
12 |
|
|
13 |
#Functions used in the script
|
|
14 |
predict_raster_model<-function(in_models,r_stack,out_filename){
|
|
15 |
#This functions performs predictions on a raster grid given input models.
|
|
16 |
#Arguments: list of fitted models, raster stack of covariates
|
|
17 |
#Output: spatial grid data frame of the subset of tiles
|
|
18 |
#s_sgdf<-as(r_stack,"SpatialGridDataFrame") #Conversion to spatial grid data frame
|
|
19 |
list_rast_pred<-vector("list",length(in_models))
|
|
20 |
for (i in 1:length(in_models)){
|
|
21 |
mod <-in_models[[i]] #accessing GAM model ojbect "j"
|
|
22 |
raster_name<-out_filename[[i]]
|
|
23 |
if (inherits(mod,"gam")) {
|
|
24 |
#rpred<- predict(mod, newdata=s_sgdf, se.fit = TRUE) #Using the coeff to predict new values.
|
|
25 |
#rast_pred2<- predict(object=s_raster,model=mod,na.rm=TRUE) #Using the coeff to predict new values.
|
|
26 |
raster_pred<- predict(object=s_raster,model=mod,na.rm=FALSE) #Using the coeff to predict new values.
|
|
27 |
names(raster_pred)<-"y_pred"
|
|
28 |
writeRaster(raster_pred, filename=raster_name,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
|
29 |
print(paste("Interpolation:","mod", j ,sep=" "))
|
|
30 |
list_rast_pred[[i]]<-raster_name
|
|
31 |
}
|
|
32 |
}
|
|
33 |
if (inherits(mod,"try-error")) {
|
|
34 |
print(paste("no gam model fitted:",mod[1],sep=" "))
|
|
35 |
}
|
|
36 |
return(list_rast_pred)
|
|
37 |
}
|
|
38 |
|
|
39 |
fit_models<-function(list_formulas,data_training){
|
|
40 |
#This functions several models and returns model objects.
|
|
41 |
#Arguments: - list of formulas for GAM models
|
|
42 |
# - fitting data in a data.frame or SpatialPointDataFrame
|
|
43 |
#Output: list of model objects
|
|
44 |
list_fitted_models<-vector("list",length(list_formulas))
|
|
45 |
for (k in 1:length(list_formulas)){
|
|
46 |
formula<-list_formulas[[k]]
|
|
47 |
mod<- try(gam(formula, data=data_training))
|
|
48 |
model_name<-paste("mod",k,sep="")
|
|
49 |
assign(model_name,mod)
|
|
50 |
list_fitted_models[[k]]<-mod
|
|
51 |
}
|
|
52 |
return(list_fitted_models)
|
|
53 |
}
|
|
54 |
#Model and response variable can be changed without affecting the script
|
|
55 |
prop_month<-0 #proportion retained for validation
|
|
56 |
run_samp<-1
|
|
57 |
list_formulas<-vector("list",nmodels)
|
|
58 |
|
|
59 |
list_formulas[[1]] <- as.formula("y_var ~ s(elev_1)", env=.GlobalEnv)
|
|
60 |
list_formulas[[2]] <- as.formula("y_var ~ s(LST)", env=.GlobalEnv)
|
|
61 |
list_formulas[[3]] <- as.formula("y_var ~ s(elev_1,LST)", env=.GlobalEnv)
|
|
62 |
list_formulas[[4]] <- as.formula("y_var ~ s(lat) + s(lon)+ s(elev_1)", env=.GlobalEnv)
|
|
63 |
list_formulas[[5]] <- as.formula("y_var ~ s(lat,lon,elev_1)", env=.GlobalEnv)
|
|
64 |
list_formulas[[6]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST)", env=.GlobalEnv)
|
|
65 |
list_formulas[[7]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC2)", env=.GlobalEnv)
|
|
66 |
list_formulas[[8]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC6)", env=.GlobalEnv)
|
|
67 |
list_formulas[[9]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(DISTOC)", env=.GlobalEnv)
|
|
68 |
lst_avg<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")
|
|
69 |
|
|
70 |
data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
|
|
71 |
LST_name<-lst_avg[j] # name of LST month to be matched
|
|
72 |
data_month$LST<-data_month[[LST_name]]
|
|
73 |
|
|
74 |
#LST bias to model...
|
|
75 |
data_month$LSTD_bias<-data_month$LST-data_month$TMax
|
|
76 |
data_month$y_var<-data_month$LSTD_bias #Adding bias as the variable modeled
|
|
77 |
mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
|
|
78 |
cname<-paste("mod",1:length(mod_list),sep="")
|
|
79 |
names(mod_list)<-cname
|
|
80 |
#Adding layer LST to the raster stack
|
|
81 |
pos<-match("elev",layerNames(s_raster))
|
|
82 |
layerNames(s_raster)[pos]<-"elev_1"
|
|
83 |
|
|
84 |
pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
|
|
85 |
s_raster<-dropLayer(s_raster,pos) # If it exists drop layer
|
|
86 |
LST<-subset(s_raster,LST_name)
|
|
87 |
names(LST)<-"LST"
|
|
88 |
#Screen for extreme values": this needs more thought, min and max val vary with regions
|
|
89 |
#min_val<-(-15+273.16) #if values less than -15C then screen out (note the Kelvin units that will need to be changed later in all datasets)
|
|
90 |
#r1[r1 < (min_val)]<-NA
|
|
91 |
s_raster<-addLayer(s_raster,LST) #Adding current month
|
|
92 |
|
|
93 |
#Now generate file names for the predictions...
|
|
94 |
list_out_filename<-vector("list",length(in_models))
|
|
95 |
|
|
96 |
for (k in 1:length(list_out_filename)){
|
|
97 |
data_name<-paste("bias_LST_month_",j,"_mod_",k,"_",prop_month,
|
|
98 |
"_",run_samp,sep="")
|
|
99 |
raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
|
100 |
list_out_filename[[k]]<-raster_name
|
|
101 |
}
|
|
102 |
|
|
103 |
#now predict values for raster image...
|
|
104 |
rast_list<-predict_raster_model(mod_list,s_raster,out_filename)
|
|
105 |
rast_list<-rast_list[!sapply(rast_list,is.null)] #remove NULL elements in list
|
|
106 |
|
|
107 |
mod_rast<-stack(rast_list)
|
|
108 |
rast_clim_list<-vector("list",nlayers(mod_rast))
|
|
109 |
for (k in 1:nlayers(mod_rast)){
|
|
110 |
clim_fus_rast<-LST-subset(mod_rast,k)
|
|
111 |
data_name<-paste("clim_LST_month_",j,"_mod_",k,"_",prop_month,
|
|
112 |
"_",run_samp,sep="")
|
|
113 |
raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
|
|
114 |
rast_clim_list[[k]]<-raster_name
|
|
115 |
writeRaster(clim_fus_rast, filename=raster_name,overwrite=TRUE) #Wri
|
|
116 |
}
|
|
117 |
clim_obj<-list(rast_list,rast_clim_list,data_month,mod_list,list_formulas)
|
|
118 |
names(clim_obj)<-c("bias","clim","data_month","mod","formulas")
|
|
119 |
return(clim_obj)
|
|
120 |
}
|
|
121 |
|
|
122 |
## Run function for kriging...?
|
|
123 |
|
|
124 |
|
1 |
125 |
runGAMFusion <- function(i) { # loop over dates
|
2 |
126 |
|
|
127 |
#Function used in the script
|
|
128 |
|
|
129 |
lookup<-function(r,lat,lon) {
|
|
130 |
#This functions extracts values in a projected raster
|
|
131 |
#given latitude and longitude vector locations
|
|
132 |
#Output: matrix with values
|
|
133 |
xy<-project(cbind(lon,lat),proj_str);
|
|
134 |
cidx<-cellFromXY(r,xy);
|
|
135 |
return(r[cidx])
|
|
136 |
}
|
|
137 |
|
|
138 |
|
3 |
139 |
date<-strptime(sampling_dat$date[i], "%Y%m%d") # interpolation date being processed
|
4 |
140 |
month<-strftime(date, "%m") # current month of the date being processed
|
5 |
141 |
LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
|
... | ... | |
62 |
198 |
#sta_lola=modst[,c("lon","lat")] #Extracting locations of stations for the current month..
|
63 |
199 |
|
64 |
200 |
#proj_str="+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
|
65 |
|
lookup<-function(r,lat,lon) {
|
66 |
|
xy<-project(cbind(lon,lat),proj_str);
|
67 |
|
cidx<-cellFromXY(r,xy);
|
68 |
|
return(r[cidx])
|
69 |
|
}
|
|
201 |
|
70 |
202 |
#sta_tmax_from_lst=lookup(themolst,sta_lola$lat,sta_lola$lon) #Extracted values of LST for the stations
|
71 |
203 |
sta_tmax_from_lst<-modst$LST
|
72 |
204 |
#########
|
... | ... | |
74 |
206 |
#########
|
75 |
207 |
|
76 |
208 |
sta_bias=sta_tmax_from_lst-modst$TMax; #That is the difference between the monthly LST mean and monthly station mean
|
77 |
|
#Added by Benoit
|
78 |
209 |
modst$LSTD_bias<-sta_bias #Adding bias to data frame modst containning the monthly average for 10 years
|
79 |
210 |
|
80 |
211 |
#bias_xy=project(as.matrix(sta_lola),proj_str)
|
... | ... | |
138 |
269 |
# STEP 5 - interpolate bias
|
139 |
270 |
########
|
140 |
271 |
|
141 |
|
# ?? include covariates like elev, distance to coast, cloud frequency, tree heig
|
142 |
|
#fitbias<-Tps(bias_xy,sta_bias) #use TPS or krige
|
143 |
|
|
144 |
272 |
#Adding options to use only training stations : 07/11/2012
|
145 |
273 |
#bias_xy=project(as.matrix(sta_lola),proj_str)
|
146 |
274 |
#bias_xy2=project(as.matrix(c(dmoday$lon,dmoday$lat),proj_str)
|
... | ... | |
298 |
426 |
list_formulas[[8]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC6)", env=.GlobalEnv)
|
299 |
427 |
list_formulas[[9]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(DISTOC)", env=.GlobalEnv)
|
300 |
428 |
|
301 |
|
#list_formulas[[1]] <- as.formula("y_var ~ s(ELEV_SRTM)", env=.GlobalEnv)
|
302 |
|
#list_formulas[[2]] <- as.formula("y_var ~ s(lat,lon)", env=.GlobalEnv)
|
303 |
|
#list_formulas[[3]] <- as.formula("y_var~ s(lat,lon,ELEV_SRTM)", env=.GlobalEnv)
|
304 |
|
#list_formulas[[4]] <- as.formula("y_var~ s(lat) + s (lon) + s (ELEV_SRTM) + s(DISTOC)", env=.GlobalEnv)
|
305 |
|
#list_formulas[[5]] <- as.formula("y_var~ s(lat,lon,ELEV_SRTM) + s(Northness) + s (Eastness) + s(DISTOC)", env=.GlobalEnv)
|
306 |
|
#list_formulas[[6]] <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC)", env=.GlobalEnv)
|
307 |
|
|
308 |
|
if (bias_prediction==1){
|
309 |
|
#mod1<- try(gam(formula1, data=data_month))
|
310 |
|
#mod2<- try(gam(formula2, data=data_month)) #modified nesting....from 3 to 2
|
311 |
|
#mod3<- try(gam(formula3, data=data_month))
|
312 |
|
#mod4<- try(gam(formula4, data=data_month))
|
313 |
|
#mod5<- try(gam(formula5, data=data_month))
|
314 |
|
#mod6<- try(gam(formula6, data=data_month))
|
315 |
|
#mod7<- try(gam(formula7, data=data_month))
|
316 |
|
#mod8<- try(gam(formula8, data=data_month))
|
317 |
|
|
318 |
|
for (j in 1:nmodels){
|
319 |
|
formula<-list_formulas[[j]]
|
320 |
|
mod<- try(gam(formula, data=data_month))
|
321 |
|
model_name<-paste("mod",j,sep="")
|
322 |
|
assign(model_name,mod)
|
323 |
|
}
|
324 |
|
|
325 |
|
} else if (bias_prediction==0){ #Use daily data for direct prediction using GAM
|
326 |
|
|
327 |
|
#mod1<- try(gam(formula1, data=data_s))
|
328 |
|
#mod2<- try(gam(formula2, data=data_s)) #modified nesting....from 3 to 2
|
329 |
|
#mod3<- try(gam(formula3, data=data_s))
|
330 |
|
#mod4<- try(gam(formula4, data=data_s))
|
331 |
|
#mod5<- try(gam(formula5, data=data_s))
|
332 |
|
#mod6<- try(gam(formula6, data=data_s))
|
333 |
|
#mod7<- try(gam(formula7, data=data_s))
|
334 |
|
#mod8<- try(gam(formula8, data=data_s))
|
335 |
|
|
336 |
|
for (j in 1:nmodels){
|
337 |
|
formula<-list_formulas[[j]]
|
338 |
|
mod<- try(gam(formula, data=data_s))
|
339 |
|
model_name<-paste("mod",j,sep="")
|
340 |
|
assign(model_name,mod)
|
341 |
|
}
|
342 |
|
|
343 |
|
}
|
|
429 |
mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
|
344 |
430 |
|
345 |
431 |
#Added
|
346 |
432 |
#tmax_predicted=themolst+daily_delta_rast-bias_rast #Final surface?? but daily_rst
|
... | ... | |
399 |
485 |
mod_obj<-vector("list",nmodels+2) #This will contain the model objects fitting: 10/30
|
400 |
486 |
mod_obj[[nmodels+1]]<-mod_kr_month #Storing climatology object
|
401 |
487 |
mod_obj[[nmodels+2]]<-mod_kr_day #Storing delta object
|
|
488 |
pred_sgdf<-as(raster_pred,"SpatialGridDataFrame") #Conversion to spatial grid data frame
|
402 |
489 |
|
403 |
490 |
for (j in 1:nmodels){
|
404 |
491 |
|
405 |
492 |
##Model assessment: specific diagnostic/metrics for GAM
|
406 |
493 |
|
407 |
494 |
name<-paste("mod",j,sep="") #modj is the name of The "j" model (mod1 if j=1)
|
408 |
|
mod<-get(name) #accessing GAM model ojbect "j"
|
|
495 |
#mod<-get(name) #accessing GAM model ojbect "j"
|
|
496 |
mod<-mod_list[[j]]
|
409 |
497 |
mod_obj[[j]]<-mod #storing current model object
|
410 |
|
|
411 |
498 |
#If mod "j" is not a model object
|
412 |
499 |
if (inherits(mod,"try-error")) {
|
413 |
500 |
results_AIC[1]<- sampling_dat$date[i] #storing the interpolation dates in the first column
|
... | ... | |
524 |
611 |
writeRaster(raster_pred, filename=raster_name,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
525 |
612 |
|
526 |
613 |
}
|
527 |
|
|
528 |
|
if (bias_prediction==0){
|
529 |
|
data_name<-paste("predicted_mod",j,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],
|
530 |
|
"_",sampling_dat$run_samp[i],sep="")
|
531 |
|
raster_name<-paste("GAM_",data_name,out_prefix,".rst", sep="")
|
532 |
|
writeRaster(raster_pred, filename=raster_name,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
533 |
|
#writeRaster(r2, filename=raster_name,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
534 |
|
|
535 |
|
}
|
536 |
|
|
537 |
|
|
538 |
|
pred_sgdf<-as(raster_pred,"SpatialGridDataFrame") #Conversion to spatial grid data frame
|
|
614 |
|
539 |
615 |
#rpred_val_s <- overlay(raster_pred,data_s) #This overlays the kriged surface tmax and the location of weather stations
|
540 |
616 |
|
541 |
617 |
rpred_val_s <- overlay(pred_sgdf,data_s) #This overlays the interpolated surface tmax and the location of weather stations
|
GAM Fusion fusion initial reorganization of code to reduce time and predict for Venezuela