Project

General

Profile

Download (12.1 KB) Statistics
| Branch: | Revision:
1
#Function to be used with GAM_fusion_analysis_raster_prediction_mutlisampling.R
2
#runClimFusion<-function(r_stack,data_training,data_testing,data_training){
3

    
4
####
5
#TODO:
6
#Add log file and calculate time and sizes for processes-outputs
7

    
8
runClim_KGFusion<-function(j){
9
  #Make this a function with multiple argument that can be used by mcmapply??
10
  #This creates clim fusion layers...
11
  
12
  #Functions used in the script
13
  predict_raster_model<-function(in_models,r_stack,out_filename){
14
    #This functions performs predictions on a raster grid given input models.
15
    #Arguments: list of fitted models, raster stack of covariates
16
    #Output: spatial grid data frame of the subset of tiles
17
    list_rast_pred<-vector("list",length(in_models))
18
    for (i in 1:length(in_models)){
19
      mod <-in_models[[i]] #accessing GAM model ojbect "j"
20
      raster_name<-out_filename[[i]]
21
      if (inherits(mod,"gam")) {           #change to c("gam","autoKrige")
22
        raster_pred<- predict(object=s_raster,model=mod,na.rm=FALSE) #Using the coeff to predict new values.
23
        names(raster_pred)<-"y_pred"  
24
        writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
25
        print(paste("Interpolation:","mod", j ,sep=" "))
26
        list_rast_pred[[i]]<-raster_name
27
      }
28
    }
29
    if (inherits(mod,"try-error")) {
30
      print(paste("no gam model fitted:",mod[1],sep=" ")) #change message for any model type...
31
    }
32
    return(list_rast_pred)
33
  }
34
  
35
  fit_models<-function(list_formulas,data_training){
36
    #This functions several models and returns model objects.
37
    #Arguments: - list of formulas for GAM models
38
    #           - fitting data in a data.frame or SpatialPointDataFrame
39
    #Output: list of model objects 
40
    list_fitted_models<-vector("list",length(list_formulas))
41
    for (k in 1:length(list_formulas)){
42
      formula<-list_formulas[[k]]
43
      mod<- try(gam(formula, data=data_training)) #change to any model!!
44
      #mod<- try(autoKrige(formula, input_data=data_s,new_data=s_sgdf,data_variogram=data_s))
45
      model_name<-paste("mod",k,sep="")
46
      assign(model_name,mod) 
47
      list_fitted_models[[k]]<-mod
48
    }
49
    return(list_fitted_models) 
50
  }
51
  #Model and response variable can be changed without affecting the script
52
  prop_month<-0 #proportion retained for validation
53
  run_samp<-1
54
  
55
  list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
56

    
57
  data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
58
  LST_name<-lst_avg[j] # name of LST month to be matched
59
  data_month$LST<-data_month[[LST_name]]
60
  
61
  #LST bias to model...
62
  data_month$LSTD_bias<-data_month$LST-data_month$TMax
63
  data_month$y_var<-data_month$LSTD_bias #Adding bias as the variable modeled
64
  mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
65
  cname<-paste("mod",1:length(mod_list),sep="") #change to more meaningful name?
66
  names(mod_list)<-cname
67
  #Adding layer LST to the raster stack  
68
  pos<-match("elev",names(s_raster))
69
  layerNames(s_raster)[pos]<-"elev_1"
70
  
71
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
72
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
73
  LST<-subset(s_raster,LST_name)
74
  names(LST)<-"LST"
75
  #Screen for extreme values": this needs more thought, min and max val vary with regions
76
  #min_val<-(-15+273.16) #if values less than -15C then screen out (note the Kelvin units that will need to be changed later in all datasets)
77
  #r1[r1 < (min_val)]<-NA
78
  s_raster<-addLayer(s_raster,LST)            #Adding current month
79
  
80
  #Now generate file names for the predictions...
81
  list_out_filename<-vector("list",length(mod_list))
82
  names(list_out_filename)<-cname  
83
  
84
  for (k in 1:length(list_out_filename)){
85
    #j indicate which month is predicted
86
    data_name<-paste("bias_LST_month_",j,"_",cname[k],"_",prop_month,
87
                     "_",run_samp,sep="")
88
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
89
    list_out_filename[[k]]<-raster_name
90
  }
91

    
92
  #now predict values for raster image...
93
  rast_bias_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
94
  names(rast_bias_list)<-cname
95
  #Some modles will not be predicted...remove them
96
  rast_bias_list<-rast_bias_list[!sapply(rast_bias_list,is.null)] #remove NULL elements in list
97

    
98
  mod_rast<-stack(rast_bias_list)  #stack of bias raster images from models
99
  rast_clim_list<-vector("list",nlayers(mod_rast))
100
  names(rast_clim_list)<-names(rast_bias_list)
101
  for (k in 1:nlayers(mod_rast)){
102
    clim_fus_rast<-LST-subset(mod_rast,k)
103
    data_name<-paste("clim_LST_month_",j,"_",names(rast_clim_list)[k],"_",prop_month,
104
                     "_",run_samp,sep="")
105
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
106
    rast_clim_list[[k]]<-raster_name
107
    writeRaster(clim_fus_rast, filename=raster_name,overwrite=TRUE)  #Wri
108
  }
109
  
110
  #Adding Kriging for Climatology options
111
  
112
  bias_xy<-coordinates(data_month)
113
  fitbias<-Krig(bias_xy,data_month$LSTD_bias,theta=1e5) #use TPS or krige 
114
  mod_krtmp1<-fitbias
115
  model_name<-"mod_kr"
116
   
117
  bias_rast<-interpolate(LST,fitbias) #interpolation using function from raster package
118
  #Saving kriged surface in raster images
119
  data_name<-paste("bias_LST_month_",j,"_",model_name,"_",prop_month,
120
                   "_",run_samp,sep="")
121
  raster_name_bias<-paste("fusion_",data_name,out_prefix,".tif", sep="")
122
  writeRaster(bias_rast, filename=raster_name_bias,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
123
  
124
  #now climatology layer
125
  clim_rast<-LST-bias_rast
126
  data_name<-paste("clim_LST_month_",j,"_",model_name,"_",prop_month,
127
                   "_",run_samp,sep="")
128
  raster_name_clim<-paste("fusion_",data_name,out_prefix,".tif", sep="")
129
  writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
130
  
131
  #Adding to current objects
132
  mod_list[[model_name]]<-mod_krtmp1
133
  rast_bias_list[[model_name]]<-raster_name_bias
134
  rast_clim_list[[model_name]]<-raster_name_clim
135
  
136
  #Prepare object to return
137
  clim_obj<-list(rast_bias_list,rast_clim_list,data_month,mod_list,list_formulas)
138
  names(clim_obj)<-c("bias","clim","data_month","mod","formulas")
139
  
140
  save(clim_obj,file= paste("clim_obj_month_",j,"_",out_prefix,".RData",sep=""))
141
  
142
  return(clim_obj)
143
}
144

    
145
## Run function for kriging...?
146

    
147
runGAMFusion <- function(i) {            # loop over dates
148
  #Change this to allow explicitly arguments...
149
  #Arguments: 
150
  #1)list of climatology files for all models...(12*nb of models)
151
  #2)data_s:training
152
  #3)data_v:testing
153
  #4)list of dates??
154
  #5)stack of covariates: not needed at this this stage
155
  #6)dst: data at the monthly time scale
156
  
157
  #Function used in the script
158
  
159
  date<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
160
  month<-strftime(date, "%m")          # current month of the date being processed
161
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
162
  proj_str<-proj4string(dst)
163

    
164
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
165
  data_day<-ghcn.subsets[[i]]
166
  mod_LST <- ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]  #Match interpolation date and monthly LST average
167
  data_day$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the dataset
168
  dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
169
  
170
  ind.training<-sampling[[i]]
171
  ind.testing <- setdiff(1:nrow(data_day), ind.training)
172
  data_s <- data_day[ind.training, ]   #Training dataset currently used in the modeling
173
  data_v <- data_day[ind.testing, ]    #Testing/validation dataset using input sampling
174
  
175
  ns<-nrow(data_s)
176
  nv<-nrow(data_v)
177
  #i=1
178
  date_proc<-sampling_dat$date[i]
179
  date_proc<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
180
  mo<-as.integer(strftime(date_proc, "%m"))          # current month of the date being processed
181
  day<-as.integer(strftime(date_proc, "%d"))
182
  year<-as.integer(strftime(date_proc, "%Y"))
183
  
184
  modst<-dst[dst$month==mo,] #Subsetting dataset for the relevant month of the date being processed
185
  #Change to y_var...could be TMin
186
  #modst$LSTD_bias <- modst$LST-modst$y_var
187
  modst$LSTD_bias <- modst$LST-modst$TMax; #That is the difference between the monthly LST mean and monthly station mean
188

    
189
  x<-as.data.frame(data_v)
190
  d<-as.data.frame(data_s)
191
  #x[x$value==-999.9]<-NA
192
  for (j in 1:nrow(x)){
193
    if (x$value[j]== -999.9){
194
      x$value[j]<-NA
195
    }
196
  }
197
  for (j in 1:nrow(d)){
198
    if (d$value[j]== -999.9){
199
      d$value[j]<-NA
200
    }
201
  }
202
  #x[x$value==-999.9]<-NA
203
  #d[d$value==-999.9]<-NA
204
  pos<-match("value",names(d)) #Find column with name "value"
205
  #names(d)[pos]<-c("dailyTmax")
206
  names(d)[pos]<-y_var_name
207
  names(x)[pos]<-y_var_name
208
  #names(x)[pos]<-c("dailyTmax")
209
  pos<-match("station",names(d)) #Find column with name "value"
210
  names(d)[pos]<-c("id")
211
  names(x)[pos]<-c("id")
212
  names(modst)[1]<-c("id")       #modst contains the average tmax per month for every stations...
213
  
214
  dmoday <-merge(modst,d,by="id",suffixes=c("",".y2"))  
215
  xmoday <-merge(modst,x,by="id",suffixes=c("",".y2"))  
216
  mod_pat<-glob2rx("*.y2")   
217
  var_pat<-grep(mod_pat,names(dmoday),value=FALSE) # using grep with "value" extracts the matching names
218
  dmoday<-dmoday[,-var_pat]
219
  mod_pat<-glob2rx("*.y2")   
220
  var_pat<-grep(mod_pat,names(xmoday),value=FALSE) # using grep with "value" extracts the matching names
221
  xmoday<-xmoday[,-var_pat] #Removing duplicate columns
222
  
223
  data_v<-xmoday
224
  
225
  #dmoday contains the daily tmax values for training with TMax being the monthly station tmax mean
226
  #xmoday contains the daily tmax values for validation with TMax being the monthly station tmax mean
227
  
228
  ##########
229
  # STEP 7 - interpolate delta across space
230
  ##########
231
  
232
  daily_delta<-dmoday$dailyTmax-dmoday$TMax
233
  daily_delta_xy<-as.matrix(cbind(dmoday$x,dmoday$y))
234
  fitdelta<-Krig(daily_delta_xy,daily_delta,theta=1e5) #use TPS or krige
235
  mod_krtmp2<-fitdelta
236
  model_name<-paste("mod_kr","day",sep="_")
237
  data_s<-dmoday #put the 
238
  data_s$daily_delta<-daily_delta
239
  
240
  #########
241
  # STEP 8 - assemble final answer - T=LST+Bias(interpolated)+delta(interpolated)
242
  #########
243
  
244
  rast_clim_list<-rast_clim_yearlist[[mo]]  #select relevant month
245
  rast_clim_month<-raster(rast_clim_list[[1]])
246
  
247
  daily_delta_rast<-interpolate(rast_clim_month,fitdelta) #Interpolation of the bias surface...
248
  
249
  #Saving kriged surface in raster images
250
  data_name<-paste("daily_delta_",sampling_dat$date[i],"_",sampling_dat$prop[i],
251
                   "_",sampling_dat$run_samp[i],sep="")
252
  raster_name_delta<-paste("fusion_",data_name,out_prefix,".tif", sep="")
253
  writeRaster(daily_delta_rast, filename=raster_name_delta,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
254
  
255
  #Now predict daily after having selected the relevant month
256
  temp_list<-vector("list",length(rast_clim_list))  
257
  for (j in 1:length(rast_clim_list)){
258
    rast_clim_month<-raster(rast_clim_list[[j]])
259
    temp_predicted<-rast_clim_month+daily_delta_rast
260
    
261
    data_name<-paste(y_var_name,"_predicted_",names(rast_clim_list)[j],"_",
262
                     sampling_dat$date[i],"_",sampling_dat$prop[i],
263
                     "_",sampling_dat$run_samp[i],sep="")
264
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
265
    writeRaster(temp_predicted, filename=raster_name,overwrite=TRUE) 
266
    temp_list[[j]]<-raster_name
267
  }
268
  
269
  mod_krtmp2<-fitdelta
270
  model_name<-paste("mod_kr","day",sep="_")
271
  names(temp_list)<-names(rast_clim_list)
272
  coordinates(data_s)<-cbind(data_s$x,data_s$y)
273
  proj4string(data_s)<-proj_str
274
  coordinates(data_v)<-cbind(data_v$x,data_v$y)
275
  proj4string(data_v)<-proj_str
276
  
277
  delta_obj<-list(temp_list,rast_clim_list,raster_name_delta,data_s,
278
                  data_v,sampling_dat[i,],mod_krtmp2)
279
  
280
  obj_names<-c(y_var_name,"clim","delta","data_s","data_v",
281
               "sampling_dat",model_name)
282
  names(delta_obj)<-obj_names
283
  save(delta_obj,file= paste("delta_obj_",sampling_dat$date[i],"_",sampling_dat$prop[i],
284
                                "_",sampling_dat$run_samp[i],out_prefix,".RData",sep=""))
285
  return(delta_obj)
286
  
287
}
288
 
(10-10/37)