Project

General

Profile

« Previous | Next » 

Revision d2272742

Added by Benoit Parmentier over 12 years ago

FUSION, training and testing for year 2010 read from the GHCND database

View differences:

climate/research/oregon/interpolation/fusion_reg.R
19 19
library(raster)                              # Hijmans et al. package for raster processing
20 20
### Parameters and argument
21 21

  
22
infile1<- "ghcn_or_tmax_b_04142012_OR83M.shp"             #GHCN shapefile containing variables for modeling 2010                 
22
infile1<- "ghcn_or_tmax_covariates_06262012_OR83M.shp"             #GHCN shapefile containing variables for modeling 2010                 
23 23
infile2<-"list_10_dates_04212012.txt"                     #List of 10 dates for the regression
24 24
#infile2<-"list_365_dates_04212012.txt"
25 25
infile3<-"LST_dates_var_names.txt"                        #LST dates name
......
36 36

  
37 37
prop<-0.3                                                                           #Proportion of testing retained for validation   
38 38
seed_number<- 100                                                                   #Seed number for random sampling
39
out_prefix<-"_06192012_10d_fusion5"                                                   #User defined output prefix
39
out_prefix<-"_07022012_10d_fusion8"                                                   #User defined output prefix
40 40
setwd(path)
41 41
############ START OF THE SCRIPT ##################
42 42

  
......
84 84
# cor_LST_LC3<-matrix(1,10,1)      #correlation LST-LC3
85 85
# cor_LST_tmax<-matrix(1,10,1)     #correlation LST-tmax
86 86

  
87
#Screening for bad values
87
#Screening for bad values: element is tmax in this case
88
ghcn$element<-as.numeric(ghcn$element)
88 89
ghcn_all<-ghcn
89
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400)
90
ghcn_test<-subset(ghcn,ghcn$element>-150 & ghcn$element<400)
90 91
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0)
91 92
ghcn<-ghcn_test2
92 93
#coords<- ghcn[,c('x_OR83M','y_OR83M')]
......
248 249
  #added by Benoit 
249 250
  #d<-ghcn.subsets[[i]]
250 251
  d<-data_s
251
  names(d)[8]<-c("dailyTmax")
252
  d$dailyTmax=d$dailyTmax/10 #stored as 1/10 degree C to allow integer storage
252
  names(d)[5]<-c("dailyTmax")
253
  d$dailyTmax=(as.numeric(d$dailyTmax))/10 #stored as 1/10 degree C to allow integer storage
253 254
  names(d)[1]<-c("id")
254
  names(modst)[1]<-c("id")
255
  names(modst)[1]<-c("id")       #modst contains the average tmax per month for every stations...
255 256
  dmoday=merge(modst,d,by="id")  #LOOSING DATA HERE!!! from 162 t0 146
256 257
  names(dmoday)[4]<-c("lat")
257 258
  names(dmoday)[5]<-c("lon")
258 259
  ###
260
  
259 261
  #dmoday contains the daily tmax values with TMax being the monthly station tmax mean
260 262
  
261 263
  # windows()
......
288 290
  #### Added by Benoit on 06/19
289 291
  data_s<-dmoday #put the 
290 292
  data_s$daily_delta<-daily_delta
291
  data_s$y_var<-daily_delta  #y_var is the variable currently being modeled, may be better with BIAS!!
292
  
293
  #data_s$y_var<-daily_delta  #y_var is the variable currently being modeled, may be better with BIAS!!
294
  data_s$y_var<-data_s$dailyTmax
293 295
  #Model and response variable can be changed without affecting the script
294 296
  
295 297
  mod1<- gam(y_var~ s(lat) + s (lon) + s (ELEV_SRTM), data=data_s)
......
390 392
    ##validation: using the testing data
391 393
    
392 394
    #This was modified on 06192012
395
    
396
    #data_v$y_var<-data_v$tmax/10
397
    data_v$y_var<-tmax
393 398
    y_mod<- predict(mod, newdata=data_v, se.fit = TRUE) #Using the coeff to predict new values.
394
    sta_LST=lookup(themolst,data_v$lat,data_v$lon)
395
    sta_bias=lookup(bias_rast,data_v$lat,data_v$lon)
396
    tmax_predicted=sta_LST+sta_bias-y_mod$fit
397 399
    
398
    data_v$tmax<-(data_v$tmax)/10
399
    res_mod<- data_v$tmax - tmax_predicted              #Residuals for the model
400
    #res_mod<- data_v$tmax - y_mod$fit                  #Residuals for the model
400
    #sta_LST=lookup(themolst,data_v$lat,data_v$lon)
401
    #sta_bias=lookup(bias_rast,data_v$lat,data_v$lon)
402
    #tmax_predicted=sta_LST+sta_bias-y_mod$fit
403
    
404
    #data_v$tmax<-(data_v$tmax)/10
405
    #res_mod<- data_v$tmax - tmax_predicted              #Residuals for the model for fusion
406
    res_mod<- data_v$y_var - y_mod$fit                  #Residuals for the model
401 407
    
402 408
    RMSE_mod <- sqrt(sum(res_mod^2)/nv)                 #RMSE FOR REGRESSION STEP 1: GAM     
403 409
    MAE_mod<- sum(abs(res_mod))/nv                     #MAE, Mean abs. Error FOR REGRESSION STEP 1: GAM   

Also available in: Unified diff