Project

General

Profile

« Previous | Next » 

Revision d13c7dc1

Added by Benoit Parmentier over 11 years ago

GAM CAI function added climatology GAM models for OR tmax interpolation

View differences:

climate/research/oregon/interpolation/GAM_CAI_function_multisampling.R
1 1
runGAMCAI <- function(i) {            # loop over dates
2 2
  
3 3
  #date<-strptime(dates[i], "%Y%m%d")   # interpolation date being processed
4
  date<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
4
  date<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed, converting the string using specific format
5 5
  month<-strftime(date, "%m")          # current month of the date being processed
6
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
6
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched in the raster stack of covariates and data.frame
7 7
  
8 8
  #Adding layer LST to the raster stack
9 9
  
10
  pos<-match("LST",layerNames(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
11
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
10 12
  pos<-match(LST_month,layerNames(s_raster)) #Find column with the current month for instance mm12
11 13
  r1<-raster(s_raster,layer=pos)             #Select layer from stack
12 14
  layerNames(r1)<-"LST"
......
15 17
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
16 18
  
17 19
  mod_LST <-ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]  #Match interpolation date and monthly LST average
18
  ghcn.subsets[[i]] = transform(ghcn.subsets[[i]],LST = mod_LST)            #Add the variable LST to the subset dataset
20
  ghcn.subsets[[i]] <- transform(ghcn.subsets[[i]],LST = mod_LST)            #Add the variable LST to the subset dataset
21
  dst$LST<-dst[[LST_month]] #Add also to monthly dataset
22
  
19 23
  #n<-nrow(ghcn.subsets[[i]])
20 24
  #ns<-n-round(n*prop)   #Create a sample from the data frame with 70% of the rows
21 25
  #nv<-n-ns              #create a sample for validation with prop of the rows
......
37 41
  datelabel=format(ISOdate(year,mo,day),"%b %d, %Y")
38 42
  
39 43
  ###########
40
  #  STEP 1 - LST 10 year monthly averages: THIS IS NOT USED IN CAE
44
  #  STEP 1 - LST 10 year monthly averages: THIS IS NOT USED IN CAI method
41 45
  ###########
42 46

  
43 47
  themolst<-raster(molst,mo) #current month being processed saved in a raster image
......
92 96
  pos<-match("station",names(d)) #Find column with name "value"
93 97
  names(d)[pos]<-c("id")
94 98
  names(x)[pos]<-c("id")
95
  names(modst)[1]<-c("id")       #modst contains the average tmax per month for every stations...
96
  dmoday=merge(modst,d,by="id")  #LOOSING DATA HERE!!! from 113 t0 103
97
  xmoday=merge(modst,x,by="id")  #LOOSING DATA HERE!!! from 48 t0 43
98
  names(dmoday)[4]<-c("lat")
99
  names(dmoday)[5]<-c("lon")     #dmoday contains all the the information: BIAS, monn
100
  names(xmoday)[4]<-c("lat")
101
  names(xmoday)[5]<-c("lon")     #dmoday contains all the the information: BIAS, monn
99
  names(modst)[1]<-c("id")       #modst contains the average tmax per month for every stations...it has 193 rows
100
  
101
  dmoday=merge(modst,d,by="id",suffixes=c("",".y2"))  #LOOSING DATA HERE!!! from 113 t0 103
102
  xmoday=merge(modst,x,by="id",suffixes=c("",".y2"))  #LOOSING DATA HERE!!! from 48 t0 43
103
  mod_pat<-glob2rx("*.y2")   
104
  var_pat<-grep(mod_pat,names(dmoday),value=FALSE) # using grep with "value" extracts the matching names
105
  dmoday<-dmoday[,-var_pat]
106
  mod_pat<-glob2rx("*.y2")   
107
  var_pat<-grep(mod_pat,names(xmoday),value=FALSE) # using grep with "value" extracts the matching names
108
  xmoday<-xmoday[,-var_pat] #Removing duplicate columns
109
  
110
  #dmoday=merge(modst,d,by="id")  #LOOSING DATA HERE!!! from 113 t0 103
111
  #xmoday=merge(modst,x,by="id")  #LOOSING DATA HERE!!! from 48 t0 43
112
  #names(dmoday)[4]<-c("lat")
113
  #names(dmoday)[5]<-c("lon")     #dmoday contains all the the information: BIAS, monn
114
  #names(xmoday)[4]<-c("lat")
115
  #names(xmoday)[5]<-c("lon")     #dmoday contains all the the information: BIAS, monn
102 116
  
103 117
  data_v<-xmoday
104 118
  ###
......
128 142
  
129 143
  #Adding options to use only training stations: 07/11/2012
130 144
  bias_xy<-project(as.matrix(sta_lola),proj_str)
131
  clim_xy<-project(as.matrix(sta_lola),proj_str)
145
  clim_xy<-project(as.matrix(sta_lola),proj_str)     #This is the coordinates of monthly station location (193)
132 146
  #bias_xy2=project(as.matrix(c(dmoday$lon,dmoday$lat),proj_str)
133 147
  if(bias_val==1){
134
    sta_bias<-dmoday$LSTD_bias
135
    bias_xy<-cbind(dmoday$x_OR83M,dmoday$y_OR83M)
148
    sta_bias<-dmoday$LSTD_bias         
149
    bias_xy<-cbind(dmoday$x_OR83M,dmoday$y_OR83M) #This will use only stations from training daily samples for climatology step if bias_val=1
136 150
  }
137 151
  
138
  sta_clim<-modst$TMax #This contains the monthly climatology...
152
  sta_clim<-modst$TMax #This contains the monthly climatology...used in the prediction of the monthly surface
139 153
  
140 154
  #fitbias<-Krig(bias_xy,sta_bias,theta=1e5) #use TPS or krige 
141 155
  fitclim<-Krig(clim_xy,sta_clim,theta=1e5)
......
156 170
  #US(add=T,col="magenta",lwd=2)
157 171
  
158 172
  ##########
159
  # STEP 7 - interpolate delta across space
173
  # STEP 7 - interpolate delta across space: this is the daily deviation from the monthly average
160 174
  ##########
161 175
  
162 176
  daily_sta_lola=dmoday[,c("lon","lat")] #could be same as before but why assume merge does this - assume not
163 177
  daily_sta_xy=project(as.matrix(daily_sta_lola),proj_str)
164 178
  daily_delta=dmoday$dailyTmax-dmoday$TMax
165 179
  
166
  daily_deltaclim<-dmoday$dailyTmax-dmoday$TMax
167
  daily_deltaclim_v<-data_v$dailyTmax-data_v$TMax  #For validation
180
  daily_deltaclim<-dmoday$dailyTmax-dmoday$TMax    #For daily surface interpolation...
181
  daily_deltaclim_v<-data_v$dailyTmax-data_v$TMax  #For validation...
168 182
  #dmoday$daily_deltaclim <-daily_deltaclim
169 183
  #fitdelta<-Tps(daily_sta_xy,daily_delta) #use TPS or krige
170 184
  fitdelta<-Krig(daily_sta_xy,daily_delta,theta=1e5) #use TPS or krige
......
193 207
  #### Added by Benoit ends
194 208
  
195 209
  #########
196
  # STEP 8 - assemble final answer - T= LST+Bias(interpolated)+delta(interpolated)
197
  #                                  T= clim(interpolated) + deltaclim(interpolated)
210
  # STEP 8 - assemble final answer - T= LST-Bias(interpolated)+delta(interpolated)    (This is for fusion not implemented in this script...)
211
  #                                  T= clim(interpolated) + deltaclim(interpolated)  (This is for CAI)
198 212
  #########
199 213

  
200 214
  #bias_rast=interpolate(themolst,fitbias) #interpolation using function from raster package
......
263 277
  resid=sta_pred-tmax
264 278
  #quilt.plot(daily_sta_lola,resid)
265 279
  
266
  
267
  
280

  
268 281
  ###BEFORE GAM prediction the data object must be transformed to SDF
269 282
  
270 283
  coords<- data_v[,c('x_OR83M','y_OR83M')]
......
273 286
  coords<- data_s[,c('x_OR83M','y_OR83M')]
274 287
  coordinates(data_s)<-coords
275 288
  proj4string(data_s)<-CRS  #Need to assign coordinates..
289
  coords<- modst[,c('x_OR83M','y_OR83M')]
290
  coordinates(modst)<-coords
291
  proj4string(modst)<-CRS  #Need to assign coordinates..
276 292
  
277 293
  ns<-nrow(data_s) #This is added to because some loss of data might have happened because of the averaging...
278 294
  nv<-nrow(data_v)
......
285 301
  data_s$y_var<-data_s$daily_deltaclim
286 302
  data_v$y_var<-data_v$daily_deltaclim
287 303
  
288
  if (climgam==1){
304
  if (climgam==1){          #This is an option to use covariates in the daily surface...
289 305
    data_s$y_var<-data_s$TMax
290 306
    data_v$y_var<-data_v$TMax
307
    data_month<-modst
308
    data_month$y_var<-modst$TMax
291 309
  }
292 310
  
293 311
  #Model and response variable can be changed without affecting the script
......
301 319
  formula7 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)+s(LC3)", env=.GlobalEnv)
302 320
  formula8 <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LC1,LC3)", env=.GlobalEnv)
303 321
  
304
  mod1<- try(gam(formula1, data=data_s))
305
  mod2<- try(gam(formula2, data=data_s)) #modified nesting....from 3 to 2
306
  mod3<- try(gam(formula3, data=data_s))
307
  mod4<- try(gam(formula4, data=data_s))
308
  mod5<- try(gam(formula5, data=data_s))
309
  mod6<- try(gam(formula6, data=data_s))
310
  mod7<- try(gam(formula7, data=data_s))
311
  mod8<- try(gam(formula8, data=data_s))
322
  #mod1<- try(gam(formula1, data=data_s))
323
  #mod2<- try(gam(formula2, data=data_s)) #modified nesting....from 3 to 2
324
  #mod3<- try(gam(formula3, data=data_s))
325
  #mod4<- try(gam(formula4, data=data_s))
326
  #mod5<- try(gam(formula5, data=data_s))
327
  #mod6<- try(gam(formula6, data=data_s))
328
  #mod7<- try(gam(formula7, data=data_s))
329
  #mod8<- try(gam(formula8, data=data_s))
312 330

  
331
  if (climgam==1){          #This will automatically use monthly station data in the second step
332
    mod1<- try(gam(formula1, data=data_month))
333
    mod2<- try(gam(formula2, data=data_month)) #modified nesting....from 3 to 2
334
    mod3<- try(gam(formula3, data=data_month))
335
    mod4<- try(gam(formula4, data=data_month))
336
    mod5<- try(gam(formula5, data=data_month))
337
    mod6<- try(gam(formula6, data=data_month))
338
    mod7<- try(gam(formula7, data=data_month))
339
    mod8<- try(gam(formula8, data=data_month)) 
340
    
341
  } else if (climgam==0){ #This will use daily delta in the second step
342
    
343
    mod1<- try(gam(formula1, data=data_s))
344
    mod2<- try(gam(formula2, data=data_s)) #modified nesting....from 3 to 2
345
    mod3<- try(gam(formula3, data=data_s))
346
    mod4<- try(gam(formula4, data=data_s))
347
    mod5<- try(gam(formula5, data=data_s))
348
    mod6<- try(gam(formula6, data=data_s))
349
    mod7<- try(gam(formula7, data=data_s))
350
    mod8<- try(gam(formula8, data=data_s))
351
  }
352
  
313 353
  ### Added by benoit
314 354
  #Store results using TPS
315 355
  j=nmodels+1
......
360 400
  
361 401
  #ns<-nrow(data_s) #This is added to because some loss of data might have happened because of the averaging...
362 402
  #nv<-nrow(data_v)
363
  browser()
403
  #browser()
364 404
  
365 405
  for (j in 1:nmodels){
366 406
    
......
426 466
    #If mod "j" is not a model object
427 467
    if (inherits(mod,"gam")) {
428 468
      
469
      # model specific metrics
429 470
      results_m1[1,1]<- sampling_dat$date[i]  #storing the interpolation dates in the first column
430 471
      results_m1[1,2]<- ns        #number of stations used in the training stage
431 472
      results_m1[1,3]<- "AIC"
......
441 482
      results_m3[1,3]<- "DEV"
442 483
      results_m3[1,j+3]<- mod$deviance
443 484
      
444
      y_var_fit= mod$fit
445
      R2_mod_f<- cor(data_s$dailyTmax,y_var_fit, use="complete")^2
446
      #RMSE_mod_f<- sqrt(mean(res_mod_s^2,na.rm=TRUE))
447
      
448
      results_RMSE_f[1,1]<- sampling_dat$date[i]  #storing the interpolation dates in the first column
449
      results_RMSE_f[1,2]<- ns        #number of stations used in the training stage
450
      results_RMSE_f[1,3]<- "RSME_f"
451
      #results_RMSE_f[j+3]<- sqrt(sum((y_var_fit-data_s$y_var)^2)/ns)
452
      results_RMSE_f[1,j+3]<-sqrt(mean(mod$residuals^2,na.rm=TRUE))
453
      
454
      results_MAE_f[1,1]<- sampling_dat$date[i]  #storing the interpolation dates in the first column
455
      results_MAE_f[1,2]<- ns        #number of stations used in the training stage
456
      results_MAE_f[1,3]<- "MAE_f"
457
      #results_MAE_f[j+3]<-sum(abs(y_var_fit-data_s$y_var))/ns
458
      results_MAE_f[1,j+3]<-mean(abs(mod$residuals),na.rm=TRUE)
459
      
460
      results_R2_f[1,1]<- sampling_dat$date[i]      #storing the interpolation dates in the first column
461
      results_R2_f[1,2]<- ns            #number of stations used in the training stage
462
      results_R2_f[1,3]<- "R2_f"
463
      results_R2_f[1,j+3]<- R2_mod_f      #Storing R2 for the model j
464
      
465 485
      ##Model assessment: general diagnostic/metrics
466 486
      ##validation: using the testing data
467 487
      if (predval==1) {
......
483 503
        writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
484 504
        #writeRaster(r2, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
485 505
        
486
        tmax_predicted_CAI<-raster_pred + clim_rast #Final surface  as a raster layer...
506
        tmax_predicted_CAI<-raster_pred + clim_rast #Final surface  as a raster layer...taht is if daily prediction with GAM
487 507
        if (climgam==1){
488 508
          tmax_predicted_CAI<-raster_pred + daily_deltaclim_rast #Final surface  as a raster layer...
489 509
        }
490 510
          
491 511
        layerNames(tmax_predicted_CAI)<-"y_pred"
492 512
        data_name<-paste("predicted_mod",j,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],"_",sampling_dat$run_samp[i],sep="")
493
        raster_name<-paste("GAMCAI_tmax_pred_",data_name,out_prefix,".rst", sep="")
513
        raster_name<-paste("GAMCAI_tmax_predicted_",data_name,out_prefix,".rst", sep="")
494 514
        writeRaster(tmax_predicted_CAI, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
495 515
        #writeRaster(r2, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
496 516
        
......
513 533
      }
514 534
      
515 535
      if (predval==0) {
516
      
536
        
517 537
        y_mod<- predict(mod, newdata=data_v, se.fit = TRUE) #Using the coeff to predict new values.
518 538
        
519 539
        pred_mod<-paste("pred_mod",j,sep="")
......
527 547
        res_mod_v<- data_v$dailyTmax - data_v[[pred_mod]]           #Residuals from kriging validation
528 548
      }
529 549

  
530
      ####ADDED ON JULY 20th
550
      #y_var_fit= mod$fit #move it
551
      #Use res_mod_s so the R2 is based on daily station training
552
      R2_mod_f<- cor(data_s$dailyTmax,res_mod_s, use="complete")^2
553
      RMSE_mod_f<- sqrt(mean(res_mod_s^2,na.rm=TRUE))
554
      
555
      results_RMSE_f[1,1]<- sampling_dat$date[i]  #storing the interpolation dates in the first column
556
      results_RMSE_f[1,2]<- ns        #number of stations used in the training stage
557
      results_RMSE_f[1,3]<- "RSME_f"
558
      #results_RMSE_f[1,j+3]<-sqrt(mean(mod$residuals^2,na.rm=TRUE))
559
      results_RMSE_f[1,j+3]<-sqrt(mean(res_mod_s^2,na.rm=TRUE))
560
      
561
      results_MAE_f[1,1]<- sampling_dat$date[i]  #storing the interpolation dates in the first column
562
      results_MAE_f[1,2]<- ns        #number of stations used in the training stage
563
      results_MAE_f[1,3]<- "MAE_f"
564
      #results_MAE_f[j+3]<-sum(abs(y_var_fit-data_s$y_var))/ns
565
      results_MAE_f[1,j+3]<-mean(abs(res_mod_s),na.rm=TRUE)
566
      
567
      results_R2_f[1,1]<- sampling_dat$date[i]      #storing the interpolation dates in the first column
568
      results_R2_f[1,2]<- ns            #number of stations used in the training stage
569
      results_R2_f[1,3]<- "R2_f"
570
      results_R2_f[1,j+3]<- R2_mod_f      #Storing R2 for the model j
571
      
572
      #### Now calculate validation metrics
531 573
      res_mod<-res_mod_v
532 574
      
533 575
      #RMSE_mod <- sqrt(sum(res_mod^2)/nv)                 #RMSE FOR REGRESSION STEP 1: GAM  
......
604 646
  mod_obj<-list(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8,mod9a,mod9b)
605 647
  names(mod_obj)<-c("mod1","mod2","mod3","mod4","mod5","mod6","mod7","mod8","mod9a","mod9b") #generate names automatically??
606 648
  #results_list<-list(data_s,data_v,tb_metrics1,tb_metrics2)
607
  results_list<-list(data_s,data_v,tb_metrics1,tb_metrics2,mod_obj)
608
  names(results_list)<-c("data_s","data_v","tb_metrics1","tb_metrics2","mod_obj")
649
  #results_list<-list(data_s,data_v,tb_metrics1,tb_metrics2,mod_obj)
650
  results_list<-list(data_s,data_v,tb_metrics1,tb_metrics2,mod_obj,data_month)
651
  names(results_list)<-c("data_s","data_v","tb_metrics1","tb_metrics2","mod_obj","data_month")
609 652
  save(results_list,file= paste(path,"/","results_list_metrics_objects_",sampling_dat$date[i],"_",sampling_dat$prop[i],"_",sampling_dat$run_samp[i],
610 653
                                out_prefix,".RData",sep=""))
611 654
  return(results_list)

Also available in: Unified diff