Project

General

Profile

« Previous | Next » 

Revision aac7c36c

Added by Benoit Parmentier over 12 years ago

  • ID aac7c36c047121785d502578086e556a3a353ea5
  • Child 23ed3053

Added 3 new models and nesting to GAM, task #361

View differences:

climate/research/oregon/interpolation/Linear_reg.R
19 19
infile2<-"dates_interpolation_03052012.txt"
20 20
prop<-0.3                                                                            #Proportion of testing retained for validation   
21 21
out_prefix<-"_03042012_r1"
22
infile3<-"models_interpolation_03052012.txt"
23

  
22 24

  
23 25
#######START OF THE SCRIPT #############
24 26

  
......
31 33
ghcn = transform(ghcn,Eastness_w = sin(slope)*sin(ASPECT))  #adding variable to the dataframe.
32 34
set.seed(100)
33 35
dates <-readLines(paste(path,"/",infile2, sep=""))
36
models <-readLines(paste(path,"/",infile3, sep=""))
34 37

  
35 38
results <- matrix(1,length(dates),14)            #This is a matrix containing the diagnostic measures from the GAM models.
36 39

  
40
results_AIC<- matrix(1,length(dates),length(models)+3)  
41
results_GCV<- matrix(1,length(dates),length(models)+3)
42
results_RMSE<- matrix(1,length(dates),length(models)+3)
43

  
37 44
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==d)) #this creates a list of 10 subsets data
38 45
#note that compare to the previous version date_ column was changed to date
39 46

  
40 47
## looping through the dates...
48
#Change this into  a nested loop, looping through the number of models
41 49

  
42 50
for(i in 1:length(dates)){            # start of the for loop #1
43 51
  
......
54 62
  
55 63
  ####Regression part 2: GAM models
56 64

  
57
  GAM_ANUSPLIN1<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM), data=data_s)
58
  GAM_PRISM1<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) +  s (Northness)+ s (Eastness) + s(DISTOC), data=data_s)
59
  GAM_PRISM2<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness_w)+ s (Eastness_w) + s(DISTOC), data=data_s)
65
  mod1<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM), data=data_s)
66
  mod2<- gam(tmax~ s(lat,lon,ELEV_SRTM), data=data_s)
67
  mod3<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) +  s (Northness)+ s (Eastness) + s(DISTOC), data=data_s)
68
  mod4<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness_w)+ s (Eastness_w) + s(DISTOC), data=data_s)
69
  mod5<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM, Northness) + s (Eastness) + s(DISTOC), data=data_s)
70
  mod6<- gam(tmax~ s(lat,lon) + s (ELEV_SRTM, Northness) + s (Eastness) + s(DISTOC), data=data_s)
60 71
  
61 72
  
62 73
  ####Regression part 3: Calculating and storing diagnostic measures
63 74
  
64
  results[i,1]<- dates[i]  #storing the interpolation dates in the first column
65
  results[i,2]<- ns        #number of stations used in the training stage
75
  results_AIC[i,1]<- dates[i]  #storing the interpolation dates in the first column
76
  results_AIC[i,2]<- ns        #number of stations used in the training stage
77
  results_AIC[i,3]<- AIC (mod1)
78
  results_AIC[i,4]<- AIC (mod2)
79
  results_AIC[i,5]<- AIC (mod3)
80
  results_AIC[i,6]<- AIC (mod4)
81
  results_AIC[i,7]<- AIC (mod5)
82
  results_AIC[i,8]<- AIC (mod6)
66 83
  
67
  results[i,6]<- AIC (GAM_ANUSPLIN1)
68
  results[i,7]<- AIC (GAM_PRISM1)
69
  results[i,8]<- AIC (GAM_PRISM2)
84
  results_GCV[i,1]<- dates[i]  #storing the interpolation dates in the first column
85
  results_GCV[i,2]<- ns        #number of stations used in the training stage
86
  results_GCV[i,3]<- mod1$gcv.ubre
87
  results_GCV[i,4]<- mod2$gcv.ubre
88
  results_GCV[i,5]<- mod3$gcv.ubre
89
  results_GCV[i,6]<- mod4$gcv.ubre
90
  results_GCV[i,7]<- mod5$gcv.ubre
91
  results_GCV[i,8]<- mod6$gcv.ubre
70 92
  
71
  results[i,9]<- GAM_ANUSPLIN1$gcv.ubre
72
  results[i,10]<- GAM_PRISM1$gcv.ubre
73
  results[i,11]<- GAM_PRISM2$gcv.ubre
74
  
75
  results[i,12]<-GAM_ANUSPLIN1$deviance
76
  results[i,13]<-GAM_PRISM1$deviance
77
  results[i,14]<-GAM_PRISM2$deviance
93
  results_DEV[i,1]<- dates[i]  #storing the interpolation dates in the first column
94
  results_DEV[i,2]<- ns        #number of stations used in the training stage
95
  results_DEV[i,3]<- mod1$deviance
96
  results_DEV[i,4]<- mod2$deviance
97
  results_DEV[i,5]<- mod3$deviance
98
  results_DEV[i,6]<- mod4$deviance
99
  results_DEV[i,7]<- mod5$deviance
100
  results_DEV[i,8]<- mod6$deviance
78 101
  
79 102
  #####VALIDATION: Prediction checking the results using the testing data########
80 103
 
81
  y_pgANUSPLIN1<- predict(GAM_ANUSPLIN1, newdata=data_v, se.fit = TRUE) #Using the coeff to predict new values.
82
  y_pgPRISM1<- predict(GAM_PRISM1, newdata=data_v, se.fit = TRUE)            
83
  y_pgPRISM2<- predict(GAM_PRISM2, newdata=data_v, se.fit = TRUE) 
104
  y_mod1<- predict(mod1, newdata=data_v, se.fit = TRUE) #Using the coeff to predict new values.
105
  y_mod2<- predict(mod2, newdata=data_v, se.fit = TRUE)            
106
  y_mod3<- predict(mod3, newdata=data_v, se.fit = TRUE) 
107
  y_mod4<- predict(mod4, newdata=data_v, se.fit = TRUE) 
108
  y_mod5<- predict(mod5, newdata=data_v, se.fit = TRUE) 
109
  y_mod5<- predict(mod6, newdata=data_v, se.fit = TRUE)
84 110
  
85
  res_ypgA1<- data_v$tmax - y_pgANUSPLIN1$fit #Residuals for GMA model that resembles the ANUSPLIN interpolation
86
  res_ypgP1<- data_v$tmax - y_pgPRISM1$fit   #Residuals for GAM model that resembles the PRISM interpolation                               
87
  res_ypgP2<- data_v$tmax - y_pgPRISM2$fit  
111
  res_mod1<- data_v$tmax - y_mod1$fit #Residuals for GMA model that resembles the ANUSPLIN interpolation
112
  res_mod2<- data_v$tmax - y_mod2$fit   #Residuals for GAM model that resembles the PRISM interpolation                               
113
  res_mod3<- data_v$tmax - y_mod3$fit  
114
  res_mod4<- data_v$tmax - y_mod4$fit
115
  res_mod5<- data_v$tmax - y_mod5$fit
116
  res_mod6<- data_v$tmax - y_mod6$fit
88 117
  
89
  RMSE_ypgA1 <- sqrt(sum(res_ypgA1^2)/nv)          
90
  RMSE_ypgP1 <- sqrt(sum(res_ypgP1^2)/nv)
91
  RMSE_ypgP2 <- sqrt(sum(res_ypgP2^2)/nv)
118
  RMSE_mod1 <- sqrt(sum(res_mod1^2)/nv)          
119
  RMSE_mod2 <- sqrt(sum(res_mod2^2)/nv)
120
  RMSE_mod3 <- sqrt(sum(res_mod3^2)/nv)
121
  RMSE_mod4 <- sqrt(sum(res_mod4^2)/nv)
122
  RMSE_mod5 <- sqrt(sum(res_mod5^2)/nv)
123
  RMSE_mod6 <- sqrt(sum(res_mod6^2)/nv)
92 124
  
93
  results[i,3]<-RMSE_ypgA1
94
  results[i,4]<-RMSE_ypgP1
95
  results[i,5]<-RMSE_ypgP2
125

  
126
  results_RMSE[i,1]<- dates[i]  #storing the interpolation dates in the first column
127
  results_RMSE[i,2]<- ns        #number of stations used in the training stage
128
  results_RMSE[i,3]<- RMSE_mod1
129
  results_RMSE[i,4]<- RMSE_mod2
130
  results_RMSE[i,5]<- RMSE_mod3
131
  results_RMSE[i,6]<- RMSE_mod4
132
  results_RMSE[i,7]<- RMSE_mod5
133
  results_RMSE[i,8]<- RMSE_mod6
96 134
  
97 135
  data_name<-paste("ghcn_v_",dates[[i]],sep="")
98 136
  assign(data_name,data_v)
......
103 141

  
104 142
## Plotting and saving diagnostic measures
105 143

  
106
results_num <-results
107
mode(results_num)<- "numeric"
144
results_RMSEnum <-results_RMSE
145
mode(results_RMSEnum)<- "numeric"
108 146
# Make it numeric first
109 147
# Now turn it into a data.frame...
110 148

  
111
results_table<-as.data.frame(results_num)
112
colnames(results_table)<-c("dates","ns","RMSE_A1", "RMSE_P1","RMSE_P2", "AIC_A1", "AIC_P1", "AIC_P2", "GCV_A1", "GCV_P1", "GCV_P2", "Deviance_A1", "Deviance_P1", "Deviance_P2")
149
results_table_RMSE<-as.data.frame(results_RMSEnum)
150
colnames(results_table_RMSE)<-c("dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6")
113 151

  
114 152
win.graph()
115 153
barplot(results_table$RMSE_A1/10,main="RMSE for the A1 models",names.arg=results_table$dates,ylab="Temp (deg. C)",xlab="interpolated date")

Also available in: Unified diff