Revision 80402fdf
Added by Benoit Parmentier over 12 years ago
climate/research/oregon/interpolation/GAM_LST_reg.R | ||
---|---|---|
24 | 24 |
#infile2<-"dates_interpolation_03012012.txt" # list of 10 dates for the regression |
25 | 25 |
infile2<-"dates_interpolation_03052012.txt" |
26 | 26 |
prop<-0.3 #Proportion of testing retained for validation |
27 |
out_prefix<-"_04032012_LST_r1"
|
|
27 |
out_prefix<-"_04032012_LST_r4"
|
|
28 | 28 |
infile3<-"LST_dates_var_names.txt" |
29 |
infile4<-"models_interpolation_04032012.txt" |
|
29 |
infile4<-"models_interpolation_04032012b.txt"
|
|
30 | 30 |
|
31 | 31 |
|
32 | 32 |
#######START OF THE SCRIPT ############# |
... | ... | |
70 | 70 |
|
71 | 71 |
mod <-ghcn.subsets[[i]][,match(LST_dates[i], names(ghcn.subsets[[i]]))] |
72 | 72 |
ghcn.subsets[[i]] = transform(ghcn.subsets[[i]],LST = mod) |
73 |
#Screening LST values |
|
74 |
#ghcn.subsets[[i]]<-subset(ghcn.subsets[[i]],ghcn.subsets[[i]]$LST> 258 & ghcn.subsets[[i]]$LST<313) |
|
73 | 75 |
n<-nrow(ghcn.subsets[[i]]) |
74 | 76 |
ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
75 | 77 |
nv<-n-ns #create a sample for validation with prop of the rows |
... | ... | |
89 | 91 |
mod4<- gam(tmax~ s(lat) + s (lon) + s(ELEV_SRTM) + s(Northness) + s (Eastness) + s(DISTOC) + s(LST), data=data_s) |
90 | 92 |
mod5<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST), data=data_s) |
91 | 93 |
mod6<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC1), data=data_s) |
92 |
|
|
94 |
mod7<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC3), data=data_s) |
|
93 | 95 |
|
94 | 96 |
####Regression part 3: Calculating and storing diagnostic measures |
95 | 97 |
results_AIC[i,1]<- dates[i] #storing the interpolation dates in the first column |
... | ... | |
100 | 102 |
results_AIC[i,6]<- AIC (mod4) |
101 | 103 |
results_AIC[i,7]<- AIC (mod5) |
102 | 104 |
results_AIC[i,8]<- AIC (mod6) |
105 |
results_AIC[i,9]<- AIC (mod7) |
|
103 | 106 |
|
104 | 107 |
results_GCV[i,1]<- dates[i] #storing the interpolation dates in the first column |
105 | 108 |
results_GCV[i,2]<- ns #number of stations used in the training stage |
... | ... | |
109 | 112 |
results_GCV[i,6]<- mod4$gcv.ubre |
110 | 113 |
results_GCV[i,7]<- mod5$gcv.ubre |
111 | 114 |
results_GCV[i,8]<- mod6$gcv.ubre |
115 |
results_GCV[i,9]<- mod7$gcv.ubre |
|
112 | 116 |
|
113 | 117 |
results_DEV[i,1]<- dates[i] #storing the interpolation dates in the first column |
114 | 118 |
results_DEV[i,2]<- ns #number of stations used in the training stage |
... | ... | |
118 | 122 |
results_DEV[i,6]<- mod4$deviance |
119 | 123 |
results_DEV[i,7]<- mod5$deviance |
120 | 124 |
results_DEV[i,8]<- mod6$deviance |
125 |
results_DEV[i,9]<- mod6$deviance |
|
121 | 126 |
|
122 | 127 |
#####VALIDATION: Prediction checking the results using the testing data######## |
123 | 128 |
|
... | ... | |
127 | 132 |
y_mod4<- predict(mod4, newdata=data_v, se.fit = TRUE) |
128 | 133 |
y_mod5<- predict(mod5, newdata=data_v, se.fit = TRUE) |
129 | 134 |
y_mod6<- predict(mod6, newdata=data_v, se.fit = TRUE) |
135 |
y_mod7<- predict(mod7, newdata=data_v, se.fit = TRUE) |
|
130 | 136 |
|
131 | 137 |
res_mod1<- data_v$tmax - y_mod1$fit #Residuals for GMA model that resembles the ANUSPLIN interpolation |
132 | 138 |
res_mod2<- data_v$tmax - y_mod2$fit #Residuals for GAM model that resembles the PRISM interpolation |
... | ... | |
134 | 140 |
res_mod4<- data_v$tmax - y_mod4$fit |
135 | 141 |
res_mod5<- data_v$tmax - y_mod5$fit |
136 | 142 |
res_mod6<- data_v$tmax - y_mod6$fit |
143 |
res_mod7<- data_v$tmax - y_mod7$fit |
|
137 | 144 |
|
138 | 145 |
RMSE_mod1 <- sqrt(sum(res_mod1^2)/nv) |
139 | 146 |
RMSE_mod2 <- sqrt(sum(res_mod2^2)/nv) |
... | ... | |
141 | 148 |
RMSE_mod4 <- sqrt(sum(res_mod4^2)/nv) |
142 | 149 |
RMSE_mod5 <- sqrt(sum(res_mod5^2)/nv) |
143 | 150 |
RMSE_mod6 <- sqrt(sum(res_mod6^2)/nv) |
144 |
|
|
151 |
RMSE_mod7 <- sqrt(sum(res_mod7^2)/nv) |
|
145 | 152 |
|
146 | 153 |
results_RMSE[i,1]<- dates[i] #storing the interpolation dates in the first column |
147 | 154 |
results_RMSE[i,2]<- ns #number of stations used in the training stage |
... | ... | |
151 | 158 |
results_RMSE[i,6]<- RMSE_mod4 |
152 | 159 |
results_RMSE[i,7]<- RMSE_mod5 |
153 | 160 |
results_RMSE[i,8]<- RMSE_mod6 |
154 |
|
|
161 |
results_RMSE[i,9]<- RMSE_mod7 |
|
155 | 162 |
#Saving dataset in dataframes |
156 | 163 |
data_name<-paste("ghcn_v_",dates[[i]],sep="") |
157 | 164 |
assign(data_name,data_v) |
... | ... | |
165 | 172 |
## Plotting and saving diagnostic measures |
166 | 173 |
|
167 | 174 |
results_RMSEnum <-results_RMSE |
175 |
results_AICnum <-results_AIC |
|
168 | 176 |
mode(results_RMSEnum)<- "numeric" |
177 |
mode(results_AICnum)<- "numeric" |
|
169 | 178 |
# Make it numeric first |
170 | 179 |
# Now turn it into a data.frame... |
171 | 180 |
|
172 | 181 |
results_table_RMSE<-as.data.frame(results_RMSEnum) |
173 |
colnames(results_table_RMSE)<-c("dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6") |
|
174 |
|
|
175 |
# win.graph() |
|
176 |
# barplot(results_table$RMSE_A1/10,main="RMSE for the A1 models",names.arg=results_table$dates,ylab="Temp (deg. C)",xlab="interpolated date") |
|
177 |
# savePlot(paste("GAM_ANUSPLIN1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
178 |
# win.graph() |
|
179 |
# barplot(results_table$RMSE_P1/10,main="RMSE for the P1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date") |
|
180 |
# savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
181 |
# win.graph() |
|
182 |
# barplot(results_table$RMSE_P2/10,main="RMSE for the P2 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date") |
|
183 |
# savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
184 |
# win.graph() |
|
185 |
# barplot(results_table$AIC_A1,main="AIC for the A1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date") |
|
186 |
# savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
187 |
# win.graph() |
|
188 |
# barplot(results_table$AIC_P1/10,main="AIC for the P1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interpolated date") |
|
189 |
# savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
190 |
# win.graph() |
|
191 |
# barplot(results_table$AIC_P2/10,main="AIC for the P2 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date") |
|
192 |
# savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", s‰‰ep=""), type="emf") |
|
193 |
# win.graph() |
|
194 |
# barplot(results_table$Deviance_A1/10,main="Deviance for the A1 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date") |
|
195 |
# savePlot(paste("GAM_ANUSPLIN1_Deviance",out_prefix,".emf", sep=""), type="emf") |
|
196 |
# win.graph() |
|
197 |
# barplot(results_table$Deviance_P1/10,main="Deviance for the P1 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date") |
|
198 |
# savePlot(paste("GAM_PRISM1_Deviance",out_prefix,".emf", sep=""), type="emf") |
|
199 |
# win.graph() |
|
200 |
# barplot(results_table$Deviance_P2/10,main="Deviance for the P2 models",names.arg=results_table$dates,ylab="Temp (10 X deg. C)",xlab="interolated date") |
|
201 |
# savePlot(paste("GAM_PRISM2_Deviance",out_prefix,".emf", sep=""), type="emf") |
|
182 |
results_table_AIC<-as.data.frame(results_AICnum) |
|
183 |
colnames(results_table_RMSE)<-c("dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6", "mod7") |
|
184 |
colnames(results_table_AIC)<-c("dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6", "mod7") |
|
202 | 185 |
|
203 | 186 |
#results_table_RMSE |
204 |
write.csv(results_table_RMSE, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep="")) |
|
187 |
#write.csv(results_table_RMSE, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep="")) |
|
188 |
#write.csv(results_table_AIC, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""),append=TRUE) |
|
205 | 189 |
|
190 |
write.table(results_table_RMSE, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""), sep=",") |
|
191 |
write.table(results_table_AIC, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""),sep=",", append=TRUE) |
|
206 | 192 |
|
193 |
#Can also use file connection |
|
194 |
f<-paste(path,"/","results_GAM_Assessment",out_prefix,"2.txt",sep="") |
|
195 |
write(results_table_RMSE, file=f) |
|
196 |
close(f) |
|
207 | 197 |
# End of script########## |
208 | 198 |
|
209 |
# ############################### |
|
210 |
|
|
211 |
# |
|
212 |
# ############Diagnostic GAM plots############# |
|
213 |
# win.graph() |
|
214 |
# gam.check(GAM_ANUSPLIN1) |
|
215 |
# savePlot("GAM_ANUSPLIN1_diagnostic1.emf", type="emf") |
|
216 |
# win.graph() |
|
217 |
# gam.check(GAM_PRISM1) #This will produce basic plots of residuals |
|
218 |
# savePlot("GAM_PRISM_diagnostic1.emf", type="emf") |
|
219 |
# gam.check(GAM_ANUSPLIN1) |
|
220 |
# win.graph() |
|
221 |
# vis.gam(GAM_ANUSPLIN1) |
|
222 |
# savePlot("GAM_ANUSPLIN1_prediction.emf", type="emf") |
|
223 |
# win.graph() |
|
224 |
# vis.gam(GAM_PRISM1) |
|
225 |
# savePlot("GAM_PRISM1_prediction.emf", type="emf") |
|
226 |
# win.graph() |
|
227 |
# vis.gam(GAM_ANUSPLIN1, view=c("lat","ELEV_SRTM")) |
|
228 |
# #vis.gam(GAM_ANUSPLIN1, view=c("lat","ELEV_SRTM", theta=100,phi=200)) |
|
229 |
# savePlot("GAM_ANUSPLIN1_prediction2.emf", type="emf") |
|
230 |
# |
|
231 |
|
|
232 |
# |
|
233 |
|
|
234 |
|
|
235 |
|
|
199 |
#Selecting dates and files based on names |
|
200 |
#date<-strptime(dates[1], "%Y%m%d") |
|
201 |
#class(date) #This shows the type of the object |
|
202 |
#strftime(t, "%m") #This gives as output the month of the date... |
|
236 | 203 |
|
237 |
|
Also available in: Unified diff
GAM LST, adding new model with grass land cover, task #361