Project

General

Profile

« Previous | Next » 

Revision fbc95c44

Added by Benoit Parmentier over 12 years ago

GAM, modification of nesting of models and clean up, task #361

View differences:

climate/research/oregon/interpolation/Linear_reg.R
13 13
###Parameters and arguments
14 14
#infile1<-"ghcn_or_b_02122012_OR83M.csv"
15 15
infile1<-"ghcn_or_tmax_b_03032012_OR83M.csv"
16
path<-"C:/Data/Benoit/NCEAS/window_Oregon_data"
16
#path<-"C:/Data/Benoit/NCEAS/window_Oregon_data
17
path<-"/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations"
17 18
setwd(path)
18 19
#infile2<-"dates_interpolation_03012012.txt"  # list of 10 dates for the regression
19 20
infile2<-"dates_interpolation_03052012.txt"
20 21
prop<-0.3                                                                            #Proportion of testing retained for validation   
21
out_prefix<-"_03042012_r1"
22
out_prefix<-"_03042012_interaction_r1"
22 23
infile3<-"models_interpolation_03052012.txt"
23 24

  
24 25

  
......
37 38

  
38 39
results <- matrix(1,length(dates),14)            #This is a matrix containing the diagnostic measures from the GAM models.
39 40

  
40
results_AIC<- matrix(1,length(dates),length(models)+3)  
41
results_GCV<- matrix(1,length(dates),length(models)+3)
42
results_RMSE<- matrix(1,length(dates),length(models)+3)
41
results_AIC<- matrix(1,length(dates),length(models)+2)  
42
results_GCV<- matrix(1,length(dates),length(models)+2)
43
results_DEV<- matrix(1,length(dates),length(models)+2)
44
results_RMSE<- matrix(1,length(dates),length(models)+2)
45

  
46
#Screening for bad values
47

  
48

  
49
#tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC
50
#tmax range: min max)
51
ghcn_all<-ghcn
52
ghcn_test<-subset(ghcn,ghcn$tmax>0 & ghcn$tmax<400)
53
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0)
54
ghcn<-ghcn_test2
55
#lon range
56
#lat range
57
#ELEV_SRTM
58
#Eastness
59
#Northness
60

  
43 61

  
44 62
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==d)) #this creates a list of 10 subsets data
45 63
#note that compare to the previous version date_ column was changed to date
......
47 65
## looping through the dates...
48 66
#Change this into  a nested loop, looping through the number of models
49 67

  
68

  
50 69
for(i in 1:length(dates)){            # start of the for loop #1
51 70
  
52 71
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
......
63 82
  ####Regression part 2: GAM models
64 83

  
65 84
  mod1<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM), data=data_s)
66
  mod2<- gam(tmax~ s(lat,lon,ELEV_SRTM), data=data_s)
85
  mod2<- gam(tmax~ s(lat,lon) + s(ELEV_SRTM), data=data_s)
67 86
  mod3<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) +  s (Northness)+ s (Eastness) + s(DISTOC), data=data_s)
68 87
  mod4<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness_w)+ s (Eastness_w) + s(DISTOC), data=data_s)
69
  mod5<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM, Northness) + s (Eastness) + s(DISTOC), data=data_s)
70
  mod6<- gam(tmax~ s(lat,lon) + s (ELEV_SRTM, Northness) + s (Eastness) + s(DISTOC), data=data_s)
88
  mod5<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC), data=data_s)
89
  mod6<- gam(tmax~ s(lat,lon) + s (ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC), data=data_s)
71 90
  
72 91
  
73 92
  ####Regression part 3: Calculating and storing diagnostic measures
74
  
75 93
  results_AIC[i,1]<- dates[i]  #storing the interpolation dates in the first column
76 94
  results_AIC[i,2]<- ns        #number of stations used in the training stage
77 95
  results_AIC[i,3]<- AIC (mod1)
......
106 124
  y_mod3<- predict(mod3, newdata=data_v, se.fit = TRUE) 
107 125
  y_mod4<- predict(mod4, newdata=data_v, se.fit = TRUE) 
108 126
  y_mod5<- predict(mod5, newdata=data_v, se.fit = TRUE) 
109
  y_mod5<- predict(mod6, newdata=data_v, se.fit = TRUE)
127
  y_mod6<- predict(mod6, newdata=data_v, se.fit = TRUE)
110 128
  
111 129
  res_mod1<- data_v$tmax - y_mod1$fit #Residuals for GMA model that resembles the ANUSPLIN interpolation
112 130
  res_mod2<- data_v$tmax - y_mod2$fit   #Residuals for GAM model that resembles the PRISM interpolation                               
......
149 167
results_table_RMSE<-as.data.frame(results_RMSEnum)
150 168
colnames(results_table_RMSE)<-c("dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6")
151 169

  
152
win.graph()
153
barplot(results_table$RMSE_A1/10,main="RMSE for the A1 models",names.arg=results_table$dates,ylab="Temp (deg. C)",xlab="interpolated date")
154
savePlot(paste("GAM_ANUSPLIN1_RMSE",out_prefix,".emf", sep=""), type="emf")
155
win.graph()
156
barplot(results_table$RMSE_P1/10,main="RMSE for the P1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date")
157
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
158
win.graph()
159
barplot(results_table$RMSE_P2/10,main="RMSE for the P2 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date")
160
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
161
win.graph()
162
barplot(results_table$AIC_A1,main="AIC for the A1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date")
163
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
164
win.graph()
165
barplot(results_table$AIC_P1/10,main="AIC for the P1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date")
166
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
167
win.graph()
168
barplot(results_table$AIC_P2/10,main="AIC for the P2 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date")
169
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
170
win.graph()
171
barplot(results_table$Deviance_A1/10,main="Deviance for the A1 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date")
172
savePlot(paste("GAM_ANUSPLIN1_Deviance",out_prefix,".emf", sep=""), type="emf")
173
win.graph()
174
barplot(results_table$Deviance_P1/10,main="Deviance for the P1 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date")
175
savePlot(paste("GAM_PRISM1_Deviance",out_prefix,".emf", sep=""), type="emf")
176
win.graph()
177
barplot(results_table$Deviance_P2/10,main="Deviance for the P2 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date")
178
savePlot(paste("GAM_PRISM2_Deviance",out_prefix,".emf", sep=""), type="emf")
179

  
180
write.csv(results_table, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""))
170
# win.graph()
171
# barplot(results_table$RMSE_A1/10,main="RMSE for the A1 models",names.arg=results_table$dates,ylab="Temp (deg. C)",xlab="interpolated date")
172
# savePlot(paste("GAM_ANUSPLIN1_RMSE",out_prefix,".emf", sep=""), type="emf")
173
# win.graph()
174
# barplot(results_table$RMSE_P1/10,main="RMSE for the P1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date")
175
# savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
176
# win.graph()
177
# barplot(results_table$RMSE_P2/10,main="RMSE for the P2 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date")
178
# savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
179
# win.graph()
180
# barplot(results_table$AIC_A1,main="AIC for the A1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date")
181
# savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
182
# win.graph()
183
# barplot(results_table$AIC_P1/10,main="AIC for the P1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date")
184
# savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
185
# win.graph()
186
# barplot(results_table$AIC_P2/10,main="AIC for the P2 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date")
187
# savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", s‰‰ep=""), type="emf")
188
# win.graph()
189
# barplot(results_table$Deviance_A1/10,main="Deviance for the A1 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date")
190
# savePlot(paste("GAM_ANUSPLIN1_Deviance",out_prefix,".emf", sep=""), type="emf")
191
# win.graph()
192
# barplot(results_table$Deviance_P1/10,main="Deviance for the P1 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date")
193
# savePlot(paste("GAM_PRISM1_Deviance",out_prefix,".emf", sep=""), type="emf")
194
# win.graph()
195
# barplot(results_table$Deviance_P2/10,main="Deviance for the P2 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date")
196
# savePlot(paste("GAM_PRISM2_Deviance",out_prefix,".emf", sep=""), type="emf")
197

  
198
#results_table_RMSE
199
write.csv(results_table_RMSE, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""))
181 200

  
182 201

  
183 202
# End of script##########

Also available in: Unified diff