Project

General

Profile

Download (13.1 KB) Statistics
| Branch: | Revision:
1
#Function to be used with GAM_fusion_analysis_raster_prediction_mutlisampling.R
2
#runClimFusion<-function(r_stack,data_training,data_testing,data_training){
3

    
4
####
5
#TODO:
6
#Add log file and calculate time and sizes for processes-outputs
7

    
8
runClim_KGFusion<-function(j){
9
  #Make this a function with multiple argument that can be used by mcmapply??
10
  #This creates clim fusion layers...
11
  
12
  #Functions used in the script
13
  predict_raster_model<-function(in_models,r_stack,out_filename){
14
    #This functions performs predictions on a raster grid given input models.
15
    #Arguments: list of fitted models, raster stack of covariates
16
    #Output: spatial grid data frame of the subset of tiles
17
    #s_sgdf<-as(r_stack,"SpatialGridDataFrame") #Conversion to spatial grid data frame
18
    list_rast_pred<-vector("list",length(in_models))
19
    for (i in 1:length(in_models)){
20
      mod <-in_models[[i]] #accessing GAM model ojbect "j"
21
      raster_name<-out_filename[[i]]
22
      if (inherits(mod,"gam")) {           
23
        #rpred<- predict(mod, newdata=s_sgdf, se.fit = TRUE) #Using the coeff to predict new values.
24
        #rast_pred2<- predict(object=s_raster,model=mod,na.rm=TRUE) #Using the coeff to predict new values.
25
        raster_pred<- predict(object=s_raster,model=mod,na.rm=FALSE) #Using the coeff to predict new values.
26
        names(raster_pred)<-"y_pred"  
27
        writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
28
        print(paste("Interpolation:","mod", j ,sep=" "))
29
        list_rast_pred[[i]]<-raster_name
30
      }
31
    }
32
    if (inherits(mod,"try-error")) {
33
      print(paste("no gam model fitted:",mod[1],sep=" "))
34
    }
35
    return(list_rast_pred)
36
  }
37
  
38
  fit_models<-function(list_formulas,data_training){
39
    #This functions several models and returns model objects.
40
    #Arguments: - list of formulas for GAM models
41
    #           - fitting data in a data.frame or SpatialPointDataFrame
42
    #Output: list of model objects 
43
    list_fitted_models<-vector("list",length(list_formulas))
44
    for (k in 1:length(list_formulas)){
45
      formula<-list_formulas[[k]]
46
      mod<- try(gam(formula, data=data_training))
47
      model_name<-paste("mod",k,sep="")
48
      assign(model_name,mod) 
49
      list_fitted_models[[k]]<-mod
50
    }
51
    return(list_fitted_models) 
52
  }
53
  #Model and response variable can be changed without affecting the script
54
  prop_month<-0 #proportion retained for validation
55
  run_samp<-1
56
  list_formulas<-vector("list",nmodels)
57
  
58
  list_formulas[[1]] <- as.formula("y_var ~ s(elev_1)", env=.GlobalEnv)
59
  list_formulas[[2]] <- as.formula("y_var ~ s(LST)", env=.GlobalEnv)
60
  list_formulas[[3]] <- as.formula("y_var ~ s(elev_1,LST)", env=.GlobalEnv)
61
  list_formulas[[4]] <- as.formula("y_var ~ s(lat) + s(lon)+ s(elev_1)", env=.GlobalEnv)
62
  list_formulas[[5]] <- as.formula("y_var ~ s(lat,lon,elev_1)", env=.GlobalEnv)
63
  list_formulas[[6]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST)", env=.GlobalEnv)
64
  list_formulas[[7]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC2)", env=.GlobalEnv)
65
  list_formulas[[8]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC6)", env=.GlobalEnv)
66
  list_formulas[[9]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(DISTOC)", env=.GlobalEnv)
67
  lst_avg<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")  
68
  
69
  data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
70
  LST_name<-lst_avg[j] # name of LST month to be matched
71
  data_month$LST<-data_month[[LST_name]]
72
  
73
  #LST bias to model...
74
  data_month$LSTD_bias<-data_month$LST-data_month$TMax
75
  data_month$y_var<-data_month$LSTD_bias #Adding bias as the variable modeled
76
  mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
77
  cname<-paste("mod",1:length(mod_list),sep="") #change to more meaningful name?
78
  names(mod_list)<-cname
79
  #Adding layer LST to the raster stack  
80
  pos<-match("elev",names(s_raster))
81
  layerNames(s_raster)[pos]<-"elev_1"
82
  
83
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
84
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
85
  LST<-subset(s_raster,LST_name)
86
  names(LST)<-"LST"
87
  #Screen for extreme values": this needs more thought, min and max val vary with regions
88
  #min_val<-(-15+273.16) #if values less than -15C then screen out (note the Kelvin units that will need to be changed later in all datasets)
89
  #r1[r1 < (min_val)]<-NA
90
  s_raster<-addLayer(s_raster,LST)            #Adding current month
91
  
92
  #Now generate file names for the predictions...
93
  list_out_filename<-vector("list",length(mod_list))
94
  names(list_out_filename)<-cname  
95
  
96
  for (k in 1:length(list_out_filename)){
97
    #j indicate which month is predicted
98
    data_name<-paste("bias_LST_month_",j,"_",cname[k],"_",prop_month,
99
                     "_",run_samp,sep="")
100
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
101
    list_out_filename[[k]]<-raster_name
102
  }
103

    
104
  #now predict values for raster image...
105
  rast_bias_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
106
  names(rast_bias_list)<-cname
107
  #Some modles will not be predicted...remove them
108
  rast_bias_list<-rast_bias_list[!sapply(rast_bias_list,is.null)] #remove NULL elements in list
109

    
110
  mod_rast<-stack(rast_bias_list)  #stack of bias raster images from models
111
  rast_clim_list<-vector("list",nlayers(mod_rast))
112
  names(rast_clim_list)<-names(rast_bias_list)
113
  for (k in 1:nlayers(mod_rast)){
114
    clim_fus_rast<-LST-subset(mod_rast,k)
115
    data_name<-paste("clim_LST_month_",j,"_",names(rast_clim_list)[k],"_",prop_month,
116
                     "_",run_samp,sep="")
117
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
118
    rast_clim_list[[k]]<-raster_name
119
    writeRaster(clim_fus_rast, filename=raster_name,overwrite=TRUE)  #Wri
120
  }
121
  
122
  #Adding Kriging for Climatology options
123
  
124
  bias_xy<-coordinates(data_month)
125
  fitbias<-Krig(bias_xy,data_month$LSTD_bias,theta=1e5) #use TPS or krige 
126
  mod_krtmp1<-fitbias
127
  model_name<-"mod_kr"
128
   
129
  bias_rast<-interpolate(LST,fitbias) #interpolation using function from raster package
130
  #Saving kriged surface in raster images
131
  data_name<-paste("bias_LST_month_",j,"_",model_name,"_",prop_month,
132
                   "_",run_samp,sep="")
133
  raster_name_bias<-paste("fusion_",data_name,out_prefix,".tif", sep="")
134
  writeRaster(bias_rast, filename=raster_name_bias,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
135
  
136
  #now climatology layer
137
  clim_rast<-LST-bias_rast
138
  data_name<-paste("clim_LST_month_",j,"_",model_name,"_",prop_month,
139
                   "_",run_samp,sep="")
140
  raster_name_clim<-paste("fusion_",data_name,out_prefix,".tif", sep="")
141
  writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
142
  
143
  #Adding to current objects
144
  mod_list[[model_name]]<-mod_krtmp1
145
  rast_bias_list[[model_name]]<-raster_name_bias
146
  rast_clim_list[[model_name]]<-raster_name_clim
147
  
148
  #Prepare object to return
149
  clim_obj<-list(rast_bias_list,rast_clim_list,data_month,mod_list,list_formulas)
150
  names(clim_obj)<-c("bias","clim","data_month","mod","formulas")
151
  
152
  save(clim_obj,file= paste("clim_obj_month_",j,"_",out_prefix,".RData",sep=""))
153
  
154
  return(clim_obj)
155
}
156

    
157
## Run function for kriging...?
158

    
159
runGAMFusion <- function(i) {            # loop over dates
160
  #Change this to allow explicitly arguments...
161
  #Arguments: 
162
  #1)list of climatology files for all models...(12*nb of models)
163
  #2)data_s:training
164
  #3)data_v:testing
165
  #4)list of dates??
166
  #5)stack of covariates: not needed at this this stage
167
  #6)dst: data at the monthly time scale
168
  
169
  #Function used in the script
170
  
171
  date<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
172
  month<-strftime(date, "%m")          # current month of the date being processed
173
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
174
  proj_str<-proj4string(dst)
175

    
176
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
177
  data_day<-ghcn.subsets[[i]]
178
  mod_LST <- ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]  #Match interpolation date and monthly LST average
179
  data_day$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the dataset
180
  dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
181
  
182
  ind.training<-sampling[[i]]
183
  ind.testing <- setdiff(1:nrow(data_day), ind.training)
184
  data_s <- data_day[ind.training, ]   #Training dataset currently used in the modeling
185
  data_v <- data_day[ind.testing, ]    #Testing/validation dataset using input sampling
186
  
187
  ns<-nrow(data_s)
188
  nv<-nrow(data_v)
189
  #i=1
190
  date_proc<-sampling_dat$date[i]
191
  date_proc<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
192
  mo<-as.integer(strftime(date_proc, "%m"))          # current month of the date being processed
193
  day<-as.integer(strftime(date_proc, "%d"))
194
  year<-as.integer(strftime(date_proc, "%Y"))
195
  
196
  modst<-dst[dst$month==mo,] #Subsetting dataset for the relevant month of the date being processed
197
  #Change to y_var...could be TMin
198
  #modst$LSTD_bias <- modst$LST-modst$y_var
199
  modst$LSTD_bias <- modst$LST-modst$TMax; #That is the difference between the monthly LST mean and monthly station mean
200

    
201
  x<-as.data.frame(data_v)
202
  d<-as.data.frame(data_s)
203
  #x[x$value==-999.9]<-NA
204
  for (j in 1:nrow(x)){
205
    if (x$value[j]== -999.9){
206
      x$value[j]<-NA
207
    }
208
  }
209
  for (j in 1:nrow(d)){
210
    if (d$value[j]== -999.9){
211
      d$value[j]<-NA
212
    }
213
  }
214
  #x[x$value==-999.9]<-NA
215
  #d[d$value==-999.9]<-NA
216
  pos<-match("value",names(d)) #Find column with name "value"
217
  #names(d)[pos]<-c("dailyTmax")
218
  names(d)[pos]<-y_var_name
219
  names(x)[pos]<-y_var_name
220
  #names(x)[pos]<-c("dailyTmax")
221
  pos<-match("station",names(d)) #Find column with name "value"
222
  names(d)[pos]<-c("id")
223
  names(x)[pos]<-c("id")
224
  names(modst)[1]<-c("id")       #modst contains the average tmax per month for every stations...
225
  
226
  dmoday <-merge(modst,d,by="id",suffixes=c("",".y2"))  #LOOSING DATA HERE!!! from 113 t0 103
227
  xmoday <-merge(modst,x,by="id",suffixes=c("",".y2"))  #LOOSING DATA HERE!!! from 48 t0 43
228
  mod_pat<-glob2rx("*.y2")   
229
  var_pat<-grep(mod_pat,names(dmoday),value=FALSE) # using grep with "value" extracts the matching names
230
  dmoday<-dmoday[,-var_pat]
231
  mod_pat<-glob2rx("*.y2")   
232
  var_pat<-grep(mod_pat,names(xmoday),value=FALSE) # using grep with "value" extracts the matching names
233
  xmoday<-xmoday[,-var_pat] #Removing duplicate columns
234
  
235
  data_v<-xmoday
236
  
237
  #dmoday contains the daily tmax values for training with TMax being the monthly station tmax mean
238
  #xmoday contains the daily tmax values for validation with TMax being the monthly station tmax mean
239
  
240
  ##########
241
  # STEP 7 - interpolate delta across space
242
  ##########
243
  
244
  daily_delta<-dmoday$dailyTmax-dmoday$TMax
245
  daily_delta_xy<-as.matrix(cbind(dmoday$x,dmoday$y))
246
  fitdelta<-Krig(daily_delta_xy,daily_delta,theta=1e5) #use TPS or krige
247
  mod_krtmp2<-fitdelta
248
  model_name<-paste("mod_kr","day",sep="_")
249
  data_s<-dmoday #put the 
250
  data_s$daily_delta<-daily_delta
251
  
252
  #########
253
  # STEP 8 - assemble final answer - T=LST+Bias(interpolated)+delta(interpolated)
254
  #########
255
  
256
  rast_clim_list<-rast_clim_yearlist[[mo]]  #select relevant month
257
  rast_clim_month<-raster(rast_clim_list[[1]])
258
  
259
  daily_delta_rast<-interpolate(rast_clim_month,fitdelta) #Interpolation of the bias surface...
260
  
261
  #Saving kriged surface in raster images
262
  data_name<-paste("daily_delta_",sampling_dat$date[i],"_",sampling_dat$prop[i],
263
                   "_",sampling_dat$run_samp[i],sep="")
264
  raster_name_delta<-paste("fusion_",data_name,out_prefix,".tif", sep="")
265
  writeRaster(daily_delta_rast, filename=raster_name_delta,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
266
  
267
  #Now predict daily after having selected the relevant month
268
  temp_list<-vector("list",length(rast_clim_list))  
269
  for (j in 1:length(rast_clim_list)){
270
    rast_clim_month<-raster(rast_clim_list[[j]])
271
    temp_predicted<-rast_clim_month+daily_delta_rast
272
    
273
    data_name<-paste(y_var_name,"_predicted_",names(rast_clim_list)[j],"_",
274
                     sampling_dat$date[i],"_",sampling_dat$prop[i],
275
                     "_",sampling_dat$run_samp[i],sep="")
276
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
277
    writeRaster(temp_predicted, filename=raster_name,overwrite=TRUE) 
278
    temp_list[[j]]<-raster_name
279
  }
280
  
281
  mod_krtmp2<-fitdelta
282
  model_name<-paste("mod_kr","day",sep="_")
283
  names(tmp_list)<-names(rast_clim_list)
284
  coordinates(data_s)<-cbind(data_s$x,data_s$y)
285
  proj4string(data_s)<-proj_str
286
  coordinates(data_v)<-cbind(data_v$x,data_v$y)
287
  proj4string(data_v)<-proj_str
288
  
289
  delta_obj<-list(temp_list,rast_clim_list,raster_name_delta,data_s,
290
                  data_v,sampling_dat[i,],mod_krtmp2)
291
  
292
  obj_names<-c(y_var_name,"clim","delta","data_s","data_v",
293
               "sampling_dat",model_name)
294
  names(delta_obj)<-obj_names
295
  save(delta_obj,file= paste("delta_obj_",sampling_dat$date[i],"_",sampling_dat$prop[i],
296
                                "_",sampling_dat$run_samp[i],out_prefix,".RData",sep=""))
297
  return(delta_obj)
298
  
299
}
300
 
(10-10/36)