Project

General

Profile

« Previous | Next » 

Revision 55056785

Added by Benoit Parmentier over 11 years ago

GAM fusion function, added GAM models for bias surface and extraction of monthly mean tmax OR

View differences:

climate/research/oregon/interpolation/GAM_fusion_function_multisampling.R
8 8
  
9 9
  #Adding layer LST to the raster stack
10 10
  
11
  pos<-match("LST",layerNames(s_raster)) #Find column with name "LST"
12
  s_raster<-dropLayer(s_raster,pos) # If it exists drop layer
11 13
  pos<-match(LST_month,layerNames(s_raster)) #Find column with the current month for instance mm12
12 14
  r1<-raster(s_raster,layer=pos)             #Select layer from stack
13 15
  layerNames(r1)<-"LST"
14
  s_raster<-addLayer(s_raster,r1)            #Adding current month
16
  s_raster<-addLayer(s_raster,r1)            #Adding current month as "LST"
15 17
  
16 18
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
17 19
  
18 20
  mod_LST <-ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]  #Match interpolation date and monthly LST average
19 21
  ghcn.subsets[[i]] = transform(ghcn.subsets[[i]],LST = mod_LST)            #Add the variable LST to the subset dataset
22
  dst$LST<-dst[[LST_month]]
20 23
  #n<-nrow(ghcn.subsets[[i]])
21 24
  #ns<-n-round(n*prop)   #Create a sample from the data frame with 70% of the rows
22 25
  #nv<-n-ns              #create a sample for validation with prop of the rows
......
48 51
  # STEP 2 - Weather station means across same days: Monthly mean calculation
49 52
  ###########
50 53
  
51
  modst=dst[dst$month==mo,] #Subsetting dataset for the relevant month of the date being processed
54
  modst<-dst[dst$month==mo,] #Subsetting dataset for the relevant month of the date being processed
52 55
  
53 56
  ##########
54
  # STEP 3 - get LST at stations
57
  # STEP 3 - get LST at stations  
55 58
  ##########
56 59
  
57 60
  sta_lola=modst[,c("lon","lat")] #Extracting locations of stations for the current month..
......
94 97
  names(d)[pos]<-c("id")
95 98
  names(x)[pos]<-c("id")
96 99
  names(modst)[1]<-c("id")       #modst contains the average tmax per month for every stations...
97
  dmoday=merge(modst,d,by="id")  #LOOSING DATA HERE!!! from 113 t0 103
98
  xmoday=merge(modst,x,by="id")  #LOOSING DATA HERE!!! from 48 t0 43
99
  names(dmoday)[4]<-c("lat")
100
  names(dmoday)[5]<-c("lon")     #dmoday contains all the the information: BIAS, monn
101
  names(xmoday)[4]<-c("lat")
102
  names(xmoday)[5]<-c("lon")     #dmoday contains all the the information: BIAS, monn
100
  
101
  dmoday=merge(modst,d,by="id",suffixes=c("",".y2"))  #LOOSING DATA HERE!!! from 113 t0 103
102
  xmoday=merge(modst,x,by="id",suffixes=c("",".y2"))  #LOOSING DATA HERE!!! from 48 t0 43
103
  mod_pat<-glob2rx("*.y2")   
104
  var_pat<-grep(mod_pat,names(dmoday),value=FALSE) # using grep with "value" extracts the matching names
105
  dmoday<-dmoday[,-var_pat]
106
  mod_pat<-glob2rx("*.y2")   
107
  var_pat<-grep(mod_pat,names(xmoday),value=FALSE) # using grep with "value" extracts the matching names
108
  xmoday<-xmoday[,-var_pat] #Removing duplicate columns
103 109
  
104 110
  data_v<-xmoday
105 111
  ###
......
107 113
  #dmoday contains the daily tmax values for training with TMax being the monthly station tmax mean
108 114
  #xmoday contains the daily tmax values for validation with TMax being the monthly station tmax mean
109 115
  
110
  # windows()
111
  #png(paste("LST_TMax_scatterplot_",sampling_dat$date[i],out_prefix,".png", sep=""))
112 116
  png(paste("Daily_tmax_monthly_TMax_scatterplot_",sampling_dat$date[i],"_",sampling_dat$prop[i],
113 117
            "_",sampling_dat$run_samp[i],out_prefix,".png", sep=""))
114 118
  plot(dailyTmax~TMax,data=dmoday,xlab="Mo Tmax",ylab=paste("Daily for",datelabel),main="across stations in OR")
115
  #savePlot(paste("Daily_tmax_monthly_TMax_scatterplot_",sampling_dat$date[i],out_prefix,".png", sep=""), type="png")
116
  #png(paste("LST_TMax_scatterplot_",sampling_dat$date[i],out_prefix,".png", sep=""))
117 119
  dev.off()
118 120
  
119 121
  ########
120 122
  # STEP 5 - interpolate bias
121 123
  ########
122 124
  
123
  # ?? include covariates like elev, distance to coast, cloud frequency, tree height
124
  #library(fields)
125
  #windows()
126
  quilt.plot(sta_lola,sta_bias,main="Bias at stations",asp=1)
127
  US(add=T,col="magenta",lwd=2)
125
  # ?? include covariates like elev, distance to coast, cloud frequency, tree heig
128 126
  #fitbias<-Tps(bias_xy,sta_bias) #use TPS or krige
129 127
  
130
  #Adding options to use only training stations: 07/11/2012
128
  #Adding options to use only training stations : 07/11/2012
131 129
  bias_xy=project(as.matrix(sta_lola),proj_str)
132 130
  #bias_xy2=project(as.matrix(c(dmoday$lon,dmoday$lat),proj_str)
133 131
  if(bias_val==1){
......
138 136
  fitbias<-Krig(bias_xy,sta_bias,theta=1e5) #use TPS or krige 
139 137
  #The output is a krig object using fields
140 138
  mod9a<-fitbias
141
  # Creating plot of bias surface and saving it
142
  #X11()
143
  png(paste("Bias_surface_LST_TMax_",sampling_dat$date[i],"_",sampling_dat$prop[i],
144
            "_",sampling_dat$run_samp[i],out_prefix,".png", sep="")) #Create file to write a plot
145
  datelabel2=format(ISOdate(year,mo,day),"%B ") #added by Benoit, label
146
  surface(fitbias,col=rev(terrain.colors(100)),asp=1,main=paste("Interpolated bias for",datelabel2,sep=" ")) #Plot to file
147
  #savePlot(paste("Bias_surface_LST_TMax_",sampling_dat$date[i],out_prefix,".png", sep=""), type="png")
148
  dev.off()  #Release the hold to the file
149
  
150
  #US(add=T,col="magenta",lwd=2)
151
  
139
    
152 140
  ##########
153 141
  # STEP 7 - interpolate delta across space
154 142
  ##########
......
156 144
  daily_sta_lola=dmoday[,c("lon","lat")] #could be same as before but why assume merge does this - assume not
157 145
  daily_sta_xy=project(as.matrix(daily_sta_lola),proj_str)
158 146
  daily_delta=dmoday$dailyTmax-dmoday$TMax
159
  #windows()
160
  quilt.plot(daily_sta_lola,daily_delta,asp=1,main="Station delta for Jan 15")
161
  US(add=T,col="magenta",lwd=2)
147
  
162 148
  #fitdelta<-Tps(daily_sta_xy,daily_delta) #use TPS or krige
163 149
  fitdelta<-Krig(daily_sta_xy,daily_delta,theta=1e5) #use TPS or krige
164 150
  #Kriging using fields package
165 151
  mod9b<-fitdelta
166
  # Creating plot of bias surface and saving it
167
  #X11()
152

  
168 153
  png(paste("Delta_surface_LST_TMax_",sampling_dat$date[i],"_",sampling_dat$prop[i],
169 154
            "_",sampling_dat$run_samp[i],out_prefix,".png", sep=""))
170 155
  surface(fitdelta,col=rev(terrain.colors(100)),asp=1,main=paste("Interpolated delta for",datelabel,sep=" "))
171
  #savePlot(paste("Delta_surface_LST_TMax_",sampling_dat$date[i],out_prefix,".png", sep=""), type="png")
172 156
  dev.off()
173
  #US(add=T,col="magenta",lwd=2)
174 157
  #
175 158
  
176 159
  #### Added by Benoit on 06/19
......
237 220
  mae<-MAE_fun(sta_pred,tmax)
238 221
  r2<-cor(sta_pred,tmax)^2              #R2, coef. of var
239 222
  me<-mean(sta_pred-tmax)
240
 
241
  #plot(sta_pred~dmoday$dailyTmax,xlab=paste("Actual daily for",datelabel),ylab="Pred daily",main=paste("RMSE=",rmse))
242
  
223
   
243 224
  png(paste("Predicted_tmax_versus_observed_scatterplot_",sampling_dat$date[i],"_",sampling_dat$prop[i],
244 225
            "_",sampling_dat$run_samp[i],out_prefix,".png", sep=""))
245 226
  plot(sta_pred~tmax,xlab=paste("Actual daily for",datelabel),ylab="Pred daily",main=paste("RMSE=",rmse))
246 227
  abline(0,1)
247
  #savePlot(paste("Predicted_tmax_versus_observed_scatterplot_",sampling_dat$date[i],out_prefix,".png", sep=""), type="png")
248 228
  dev.off()
249 229
  #resid=sta_pred-dmoday$dailyTmax
250 230
  resid=sta_pred-tmax
251
  #quilt.plot(daily_sta_lola,resid)
252
  
253
  ### END OF BRIAN's code
254
  
255
  ### Added by benoit
256 231
  
257 232
  ###BEFORE GAM prediction the data object must be transformed to SDF
258 233
  
......
262 237
  coords<- data_s[,c('x_OR83M','y_OR83M')]
263 238
  coordinates(data_s)<-coords
264 239
  proj4string(data_s)<-CRS  #Need to assign coordinates..
240
  coords<- modst[,c('x_OR83M','y_OR83M')]
241
  coordinates(modst)<-coords
242
  proj4string(modst)<-CRS  #Need to assign coordinates..
265 243
  
266 244
  ns<-nrow(data_s) #This is added to because some loss of data might have happened because of the averaging...
267 245
  nv<-nrow(data_v)
268 246
  
269 247
  ###GAM PREDICTION
270 248
  
271
  #data_s$y_var<-data_s$dailyTmax  #This shoudl be changed for any variable!!!
272
  #data_v$y_var<-data_v$dailyTmax
273
  data_v$y_var<-data_v[[y_var_name]]
274
  data_s$y_var<-data_s[[y_var_name]]
249
  if (bias_prediction==1){
250
    data_s$y_var<-data_s$LSTD_bias  #This shoudl be changed for any variable!!!
251
    data_v$y_var<-data_v$LSTD_bias
252
    data_month<-modst
253
    data_month$y_var<-modst$LSTD_bias
254
  }
255

  
256
  if (bias_prediction==0){
257
    data_v$y_var<-data_v[[y_var_name]]
258
    data_s$y_var<-data_s[[y_var_name]]
259
  }
275 260
  
276 261
  #Model and response variable can be changed without affecting the script
277 262
  
......
282 267
  formula5 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)", env=.GlobalEnv)
283 268
  formula6 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)+s(LC1)", env=.GlobalEnv)
284 269
  formula7 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)+s(LC3)", env=.GlobalEnv)
285
  formula8 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LST,LC1)", env=.GlobalEnv)
286
  
287
  #formula1 <- as.formula("y_var ~ s(lat,lon,ELEV_SRTM)", env=.GlobalEnv)
288
  #formula2 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM)  + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(CANHEIGHT)", env=.GlobalEnv)
289
  #formula3 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,CANHEIGHT)", env=.GlobalEnv)
290
  #formula4 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC1)", env=.GlobalEnv)
291
  #formula5 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC3)", env=.GlobalEnv)
292
  #formula6 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)+s(LC1)", env=.GlobalEnv)
293
  #formula7 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)+s(LC3)", env=.GlobalEnv)
294
  #formula8 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC1) + s(LST,LC3)", env=.GlobalEnv)
295
  
296
  mod1<- try(gam(formula1, data=data_s))
297
  mod2<- try(gam(formula2, data=data_s)) #modified nesting....from 3 to 2
298
  mod3<- try(gam(formula3, data=data_s))
299
  mod4<- try(gam(formula4, data=data_s))
300
  mod5<- try(gam(formula5, data=data_s))
301
  mod6<- try(gam(formula6, data=data_s))
302
  mod7<- try(gam(formula7, data=data_s))
303
  mod8<- try(gam(formula8, data=data_s))
304
  
305
#   mod1<- try(gam(formula1, data=data_s))
306
#   mod2<- try(gam(formula2, data=data_s)) #modified nesting....from 3 to 2
307
#   mod3<- try(gam(y_var~ s(lat) + s (lon) + s (ELEV_SRTM) +  s (Northness)+ s (Eastness) + s(DISTOC), data=data_s))
308
#   mod4<- try(gam(y_var~ s(lat) + s (lon) + s(ELEV_SRTM) + s(Northness) + s (Eastness) + s(DISTOC) + s(LST), data=data_s))
309
#   mod5<- try(gam(y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST), data=data_s))
310
#   mod6<- try(gam(y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)+s(LC1), data=data_s))
311
#   mod7<- try(gam(y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)+s(LC3), data=data_s))
312
#   mod8<- try(gam(y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LC1,LC3), data=data_s))
313
#   
270
  formula8 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LC1,LC3)", env=.GlobalEnv)
271
  
272
  if (bias_prediction==1){
273
    mod1<- try(gam(formula1, data=data_month))
274
    mod2<- try(gam(formula2, data=data_month)) #modified nesting....from 3 to 2
275
    mod3<- try(gam(formula3, data=data_month))
276
    mod4<- try(gam(formula4, data=data_month))
277
    mod5<- try(gam(formula5, data=data_month))
278
    mod6<- try(gam(formula6, data=data_month))
279
    mod7<- try(gam(formula7, data=data_month))
280
    mod8<- try(gam(formula8, data=data_month)) 
281
    
282
  } else if (bias_prediction==0){
283
    
284
    mod1<- try(gam(formula1, data=data_s))
285
    mod2<- try(gam(formula2, data=data_s)) #modified nesting....from 3 to 2
286
    mod3<- try(gam(formula3, data=data_s))
287
    mod4<- try(gam(formula4, data=data_s))
288
    mod5<- try(gam(formula5, data=data_s))
289
    mod6<- try(gam(formula6, data=data_s))
290
    mod7<- try(gam(formula7, data=data_s))
291
    mod8<- try(gam(formula8, data=data_s))
292
  }
293

  
314 294
  #Added
315 295
  #tmax_predicted=themolst+daily_delta_rast-bias_rast #Final surface?? but daily_rst
316 296
  
......
351 331
  #ns<-nrow(data_s) #This is added to because some loss of data might have happened because of the averaging...
352 332
  #nv<-nrow(data_v)
353 333
  
334
  pred_mod<-paste("pred_mod",j,sep="")
335
  #Adding the results back into the original dataframes.
336
  data_s[[pred_mod]]<-sta_pred_data_s
337
  data_v[[pred_mod]]<-sta_pred 
338
  
339
  #Model assessment: RMSE and then krig the residuals....!
340
  
341
  res_mod_s<- data_s$dailyTmax - data_s[[pred_mod]]           #Residuals from kriging training
342
  res_mod_v<- data_v$dailyTmax - data_v[[pred_mod]]           #Residuals from kriging validation
343
  
344
  name2<-paste("res_mod",j,sep="")
345
  data_v[[name2]]<-as.numeric(res_mod_v)
346
  data_s[[name2]]<-as.numeric(res_mod_s)
347
  
354 348
  
355 349
  for (j in 1:nmodels){
356 350
    
......
453 447
        s_sgdf<-as(s_raster,"SpatialGridDataFrame") #Conversion to spatial grid data frame
454 448
        
455 449
        rpred<- predict(mod, newdata=s_sgdf, se.fit = TRUE) #Using the coeff to predict new values.
456
        y_pred<-rpred$fit
450
        y_pred<-rpred$fit #rpred is a list with fit being and array
457 451
        raster_pred<-r1
458 452
        layerNames(raster_pred)<-"y_pred"
459 453
        values(raster_pred)<-as.numeric(y_pred)
460
        data_name<-paste("predicted_mod",j,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],
461
                         "_",sampling_dat$run_samp[i],sep="")
462
        raster_name<-paste("GAM_",data_name,out_prefix,".rst", sep="")
463
        writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
464
        #writeRaster(r2, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
454
        
455
        if (bias_prediction==1){
456
          data_name<-paste("predicted_mod",j,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],
457
                           "_",sampling_dat$run_samp[i],sep="")
458
          raster_name<-paste("GAM_bias_",data_name,out_prefix,".rst", sep="")
459
          writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
460
          bias_rast<-raster_pred
461
          
462
          raster_pred=themolst+daily_delta_rast-bias_rast #Final surface  as a raster layer...
463
          layerNames(raster_pred)<-"y_pred"
464
          #=themolst+daily_delta_rast-bias_rast #Final surface  as a raster layer...
465
          
466
          data_name<-paste("predicted_mod",j,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],
467
                           "_",sampling_dat$run_samp[i],sep="")
468
          raster_name<-paste("GAM_bias_tmax_",data_name,out_prefix,".rst", sep="")
469
          writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
470
          
471
        }
472
        
473
        if (bias_prediction==0){
474
          data_name<-paste("predicted_mod",j,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],
475
                           "_",sampling_dat$run_samp[i],sep="")
476
          raster_name<-paste("GAM_",data_name,out_prefix,".rst", sep="")
477
          writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
478
          #writeRaster(r2, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
479
          
480
        }
481
        
465 482
        
466 483
        pred_sgdf<-as(raster_pred,"SpatialGridDataFrame") #Conversion to spatial grid data frame
467 484
        #rpred_val_s <- overlay(raster_pred,data_s)             #This overlays the kriged surface tmax and the location of weather stations
468 485
        
469
        rpred_val_s <- overlay(pred_sgdf,data_s)             #This overlays the kriged surface tmax and the location of weather stations
470
        rpred_val_v <- overlay(pred_sgdf,data_v)             #This overlays the kriged surface tmax and the location of weather stations
486
        rpred_val_s <- overlay(pred_sgdf,data_s)             #This overlays the interpolated surface tmax and the location of weather stations
487
        rpred_val_v <- overlay(pred_sgdf,data_v)             #This overlays the interpolated surface tmax and the location of weather stations
471 488
        
472 489
        pred_mod<-paste("pred_mod",j,sep="")
473 490
        #Adding the results back into the original dataframes.
......
477 494
        
478 495
        #Model assessment: RMSE and then krig the residuals....!
479 496
        
480
        res_mod_s<- data_s$y_var - data_s[[pred_mod]]           #Residuals from kriging training
481
        res_mod_v<- data_v$y_var - data_v[[pred_mod]]           #Residuals from kriging validation
497
        res_mod_s<-data_s[[y_var_name]] - data_s[[pred_mod]] #residuals from modeling training
498
        res_mod_v<-data_v[[y_var_name]] - data_v[[pred_mod]] #residuals from modeling validation
482 499
        
483 500
      }
484 501
      
......
493 510
        
494 511
        #Model assessment: RMSE and then krig the residuals....!
495 512
        
496
        res_mod_s<- data_s$y_var - data_s[[pred_mod]]           #Residuals from kriging training
497
        res_mod_v<- data_v$y_var - data_v[[pred_mod]]           #Residuals from kriging validation
513
        #res_mod_s<- data_s$y_var - data_s[[pred_mod]]           #Residuals from modeling training
514
        #res_mod_v<- data_v$y_var - data_v[[pred_mod]]           #Residuals from modeling validation
515
        res_mod_s<-data_s[[y_var_name]] - data_s[[pred_mod]]
516
        res_mod_v<-data_v[[y_var_name]] - data_v[[pred_mod]]
517
        
498 518
      }
499 519

  
500 520
      ####ADDED ON JULY 20th
......
568 588
  mod_obj<-list(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8,mod9a,mod9b)
569 589
  names(mod_obj)<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9a","mod9b")
570 590
  #results_list<-list(data_s,data_v,tb_metrics1,tb_metrics2)
571
  results_list<-list(data_s,data_v,tb_metrics1,tb_metrics2,mod_obj)
572
  names(results_list)<-c("data_s","data_v","tb_metrics1","tb_metrics2","mod_obj")
591
  results_list<-list(data_s,data_v,tb_metrics1,tb_metrics2,mod_obj,sampling_dat[i,],data_month)
592
  names(results_list)<-c("data_s","data_v","tb_metrics1","tb_metrics2","mod_obj","sampling_dat","data_month")
573 593
  save(results_list,file= paste(path,"/","results_list_metrics_objects_",sampling_dat$date[i],"_",sampling_dat$prop[i],
574 594
                                "_",sampling_dat$run_samp[i],out_prefix,".RData",sep=""))
575 595
  return(results_list)

Also available in: Unified diff