Revision fbc95c44
Added by Benoit Parmentier over 12 years ago
climate/research/oregon/interpolation/Linear_reg.R | ||
---|---|---|
13 | 13 |
###Parameters and arguments |
14 | 14 |
#infile1<-"ghcn_or_b_02122012_OR83M.csv" |
15 | 15 |
infile1<-"ghcn_or_tmax_b_03032012_OR83M.csv" |
16 |
path<-"C:/Data/Benoit/NCEAS/window_Oregon_data" |
|
16 |
#path<-"C:/Data/Benoit/NCEAS/window_Oregon_data |
|
17 |
path<-"/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations" |
|
17 | 18 |
setwd(path) |
18 | 19 |
#infile2<-"dates_interpolation_03012012.txt" # list of 10 dates for the regression |
19 | 20 |
infile2<-"dates_interpolation_03052012.txt" |
20 | 21 |
prop<-0.3 #Proportion of testing retained for validation |
21 |
out_prefix<-"_03042012_r1" |
|
22 |
out_prefix<-"_03042012_interaction_r1"
|
|
22 | 23 |
infile3<-"models_interpolation_03052012.txt" |
23 | 24 |
|
24 | 25 |
|
... | ... | |
37 | 38 |
|
38 | 39 |
results <- matrix(1,length(dates),14) #This is a matrix containing the diagnostic measures from the GAM models. |
39 | 40 |
|
40 |
results_AIC<- matrix(1,length(dates),length(models)+3) |
|
41 |
results_GCV<- matrix(1,length(dates),length(models)+3) |
|
42 |
results_RMSE<- matrix(1,length(dates),length(models)+3) |
|
41 |
results_AIC<- matrix(1,length(dates),length(models)+2) |
|
42 |
results_GCV<- matrix(1,length(dates),length(models)+2) |
|
43 |
results_DEV<- matrix(1,length(dates),length(models)+2) |
|
44 |
results_RMSE<- matrix(1,length(dates),length(models)+2) |
|
45 |
|
|
46 |
#Screening for bad values |
|
47 |
|
|
48 |
|
|
49 |
#tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC |
|
50 |
#tmax range: min max) |
|
51 |
ghcn_all<-ghcn |
|
52 |
ghcn_test<-subset(ghcn,ghcn$tmax>0 & ghcn$tmax<400) |
|
53 |
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0) |
|
54 |
ghcn<-ghcn_test2 |
|
55 |
#lon range |
|
56 |
#lat range |
|
57 |
#ELEV_SRTM |
|
58 |
#Eastness |
|
59 |
#Northness |
|
60 |
|
|
43 | 61 |
|
44 | 62 |
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==d)) #this creates a list of 10 subsets data |
45 | 63 |
#note that compare to the previous version date_ column was changed to date |
... | ... | |
47 | 65 |
## looping through the dates... |
48 | 66 |
#Change this into a nested loop, looping through the number of models |
49 | 67 |
|
68 |
|
|
50 | 69 |
for(i in 1:length(dates)){ # start of the for loop #1 |
51 | 70 |
|
52 | 71 |
###Regression part 1: Creating a validation dataset by creating training and testing datasets |
... | ... | |
63 | 82 |
####Regression part 2: GAM models |
64 | 83 |
|
65 | 84 |
mod1<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM), data=data_s) |
66 |
mod2<- gam(tmax~ s(lat,lon,ELEV_SRTM), data=data_s)
|
|
85 |
mod2<- gam(tmax~ s(lat,lon) + s(ELEV_SRTM), data=data_s)
|
|
67 | 86 |
mod3<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness)+ s (Eastness) + s(DISTOC), data=data_s) |
68 | 87 |
mod4<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness_w)+ s (Eastness_w) + s(DISTOC), data=data_s) |
69 |
mod5<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM, Northness) + s (Eastness) + s(DISTOC), data=data_s)
|
|
70 |
mod6<- gam(tmax~ s(lat,lon) + s (ELEV_SRTM, Northness) + s (Eastness) + s(DISTOC), data=data_s)
|
|
88 |
mod5<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC), data=data_s)
|
|
89 |
mod6<- gam(tmax~ s(lat,lon) + s (ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC), data=data_s)
|
|
71 | 90 |
|
72 | 91 |
|
73 | 92 |
####Regression part 3: Calculating and storing diagnostic measures |
74 |
|
|
75 | 93 |
results_AIC[i,1]<- dates[i] #storing the interpolation dates in the first column |
76 | 94 |
results_AIC[i,2]<- ns #number of stations used in the training stage |
77 | 95 |
results_AIC[i,3]<- AIC (mod1) |
... | ... | |
106 | 124 |
y_mod3<- predict(mod3, newdata=data_v, se.fit = TRUE) |
107 | 125 |
y_mod4<- predict(mod4, newdata=data_v, se.fit = TRUE) |
108 | 126 |
y_mod5<- predict(mod5, newdata=data_v, se.fit = TRUE) |
109 |
y_mod5<- predict(mod6, newdata=data_v, se.fit = TRUE)
|
|
127 |
y_mod6<- predict(mod6, newdata=data_v, se.fit = TRUE)
|
|
110 | 128 |
|
111 | 129 |
res_mod1<- data_v$tmax - y_mod1$fit #Residuals for GMA model that resembles the ANUSPLIN interpolation |
112 | 130 |
res_mod2<- data_v$tmax - y_mod2$fit #Residuals for GAM model that resembles the PRISM interpolation |
... | ... | |
149 | 167 |
results_table_RMSE<-as.data.frame(results_RMSEnum) |
150 | 168 |
colnames(results_table_RMSE)<-c("dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6") |
151 | 169 |
|
152 |
win.graph() |
|
153 |
barplot(results_table$RMSE_A1/10,main="RMSE for the A1 models",names.arg=results_table$dates,ylab="Temp (deg. C)",xlab="interpolated date") |
|
154 |
savePlot(paste("GAM_ANUSPLIN1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
155 |
win.graph() |
|
156 |
barplot(results_table$RMSE_P1/10,main="RMSE for the P1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date") |
|
157 |
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
158 |
win.graph() |
|
159 |
barplot(results_table$RMSE_P2/10,main="RMSE for the P2 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date") |
|
160 |
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
161 |
win.graph() |
|
162 |
barplot(results_table$AIC_A1,main="AIC for the A1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date") |
|
163 |
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
164 |
win.graph() |
|
165 |
barplot(results_table$AIC_P1/10,main="AIC for the P1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date") |
|
166 |
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
167 |
win.graph() |
|
168 |
barplot(results_table$AIC_P2/10,main="AIC for the P2 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date") |
|
169 |
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
170 |
win.graph() |
|
171 |
barplot(results_table$Deviance_A1/10,main="Deviance for the A1 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date") |
|
172 |
savePlot(paste("GAM_ANUSPLIN1_Deviance",out_prefix,".emf", sep=""), type="emf") |
|
173 |
win.graph() |
|
174 |
barplot(results_table$Deviance_P1/10,main="Deviance for the P1 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date") |
|
175 |
savePlot(paste("GAM_PRISM1_Deviance",out_prefix,".emf", sep=""), type="emf") |
|
176 |
win.graph() |
|
177 |
barplot(results_table$Deviance_P2/10,main="Deviance for the P2 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date") |
|
178 |
savePlot(paste("GAM_PRISM2_Deviance",out_prefix,".emf", sep=""), type="emf") |
|
179 |
|
|
180 |
write.csv(results_table, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep="")) |
|
170 |
# win.graph() |
|
171 |
# barplot(results_table$RMSE_A1/10,main="RMSE for the A1 models",names.arg=results_table$dates,ylab="Temp (deg. C)",xlab="interpolated date") |
|
172 |
# savePlot(paste("GAM_ANUSPLIN1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
173 |
# win.graph() |
|
174 |
# barplot(results_table$RMSE_P1/10,main="RMSE for the P1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date") |
|
175 |
# savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
176 |
# win.graph() |
|
177 |
# barplot(results_table$RMSE_P2/10,main="RMSE for the P2 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date") |
|
178 |
# savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
179 |
# win.graph() |
|
180 |
# barplot(results_table$AIC_A1,main="AIC for the A1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date") |
|
181 |
# savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
182 |
# win.graph() |
|
183 |
# barplot(results_table$AIC_P1/10,main="AIC for the P1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date") |
|
184 |
# savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
185 |
# win.graph() |
|
186 |
# barplot(results_table$AIC_P2/10,main="AIC for the P2 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date") |
|
187 |
# savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", s‰‰ep=""), type="emf") |
|
188 |
# win.graph() |
|
189 |
# barplot(results_table$Deviance_A1/10,main="Deviance for the A1 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date") |
|
190 |
# savePlot(paste("GAM_ANUSPLIN1_Deviance",out_prefix,".emf", sep=""), type="emf") |
|
191 |
# win.graph() |
|
192 |
# barplot(results_table$Deviance_P1/10,main="Deviance for the P1 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date") |
|
193 |
# savePlot(paste("GAM_PRISM1_Deviance",out_prefix,".emf", sep=""), type="emf") |
|
194 |
# win.graph() |
|
195 |
# barplot(results_table$Deviance_P2/10,main="Deviance for the P2 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date") |
|
196 |
# savePlot(paste("GAM_PRISM2_Deviance",out_prefix,".emf", sep=""), type="emf") |
|
197 |
|
|
198 |
#results_table_RMSE |
|
199 |
write.csv(results_table_RMSE, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep="")) |
|
181 | 200 |
|
182 | 201 |
|
183 | 202 |
# End of script########## |
Also available in: Unified diff
GAM, modification of nesting of models and clean up, task #361