Revision f042de0f
Added by Benoit Parmentier over 12 years ago
climate/research/oregon/interpolation/GAM_LST_reg.R | ||
---|---|---|
18 | 18 |
###Parameters and arguments |
19 | 19 |
|
20 | 20 |
infile1<-"ghcn_or_tmax_b_04142012_OR83M.shp" |
21 |
#path<-"/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations"
|
|
22 |
path<-"H:/Data/IPLANT_project/data_Oregon_stations" |
|
21 |
path<-"/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations" |
|
22 |
#path<-"H:/Data/IPLANT_project/data_Oregon_stations"
|
|
23 | 23 |
setwd(path) |
24 | 24 |
infile2<-"dates_interpolation_03052012.txt" #List of 10 dates for the regression |
25 |
infile2<-"list_365_dates_04212012.txt" |
|
25 | 26 |
prop<-0.3 #Proportion of testing retained for validation |
26 |
out_prefix<-"_04252012_LST_residuals"
|
|
27 |
out_prefix<-"_05012012_mod8_LST"
|
|
27 | 28 |
infile3<-"LST_dates_var_names.txt" |
28 | 29 |
infile4<-"models_interpolation_04032012b.txt" |
29 | 30 |
|
... | ... | |
33 | 34 |
filename<-sub(".shp","",infile1) #Removing the extension from file. |
34 | 35 |
ghcn<-readOGR(".", filename) #reading shapefile |
35 | 36 |
|
36 |
CRS<-proj4string(ghcn)
|
|
37 |
|
|
37 |
proj4string(ghcn) #This retrieves the coordinate system for the SDF
|
|
38 |
CRS_ghcn<-proj4string(ghcn) #this can be assigned to mean_LST!!! |
|
38 | 39 |
|
39 | 40 |
ghcn = transform(ghcn,Northness = cos(ASPECT)) #Adding a variable to the dataframe |
40 | 41 |
ghcn = transform(ghcn,Eastness = sin(ASPECT)) #adding variable to the dataframe. |
... | ... | |
46 | 47 |
LST_dates <-readLines(paste(path,"/",infile3, sep="")) |
47 | 48 |
models <-readLines(paste(path,"/",infile4, sep="")) |
48 | 49 |
|
49 |
results <- matrix(1,length(dates),14) #This is a matrix containing the diagnostic measures from the GAM models.
|
|
50 |
results <- matrix(1,length(dates),15) #This is a matrix containing the diagnostic measures from the GAM models.
|
|
50 | 51 |
|
51 |
results_AIC<- matrix(1,length(dates),length(models)+2)
|
|
52 |
results_GCV<- matrix(1,length(dates),length(models)+2)
|
|
53 |
results_DEV<- matrix(1,length(dates),length(models)+2)
|
|
54 |
results_RMSE<- matrix(1,length(dates),length(models)+2)
|
|
55 |
cor_LST_LC1<-matrix(1,10,1) #correlation LST-LC1
|
|
56 |
cor_LST_LC3<-matrix(1,10,1) #correlation LST-LC3
|
|
57 |
cor_LST_tmax<-matrix(1,10,1) #correlation LST-tmax
|
|
52 |
results_AIC<- matrix(1,length(dates),length(models)+3)
|
|
53 |
results_GCV<- matrix(1,length(dates),length(models)+3)
|
|
54 |
results_DEV<- matrix(1,length(dates),length(models)+3)
|
|
55 |
results_RMSE<- matrix(1,length(dates),length(models)+3)
|
|
56 |
cor_LST_LC1<-matrix(1,length(dates),1) #correlation LST-LC1
|
|
57 |
cor_LST_LC3<-matrix(1,length(dates),1) #correlation LST-LC3
|
|
58 |
cor_LST_tmax<-matrix(1,length(dates),1) #correlation LST-tmax
|
|
58 | 59 |
#Screening for bad values |
59 | 60 |
|
60 | 61 |
ghcn_all<-ghcn |
61 | 62 |
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400) |
62 | 63 |
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0) |
63 | 64 |
ghcn<-ghcn_test2 |
64 |
#coords<- ghcn[,c('x_OR83M','y_OR83M')] |
|
65 | 65 |
|
66 | 66 |
month_var<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09", "mm_10", "mm_11", "mm_12") |
67 | 67 |
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==d)) #this creates a list of 10 subsets data |
... | ... | |
94 | 94 |
####Regression part 2: GAM models |
95 | 95 |
|
96 | 96 |
mod1<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM), data=data_s) |
97 |
mod2<- gam(tmax~ s(lat,lon,ELEV_SRTM), data=data_s) |
|
97 |
#mod2<- gam(tmax~ s(lat,lon,ELEV_SRTM), data=data_s) |
|
98 |
mod2<- gam(tmax~ s(lat,lon) + s(ELEV_SRTM), data=data_s) |
|
98 | 99 |
mod3<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness)+ s (Eastness) + s(DISTOC), data=data_s) |
99 | 100 |
mod4<- gam(tmax~ s(lat) + s (lon) + s(ELEV_SRTM) + s(Northness) + s (Eastness) + s(DISTOC) + s(LST), data=data_s) |
100 | 101 |
mod5<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST), data=data_s) |
101 | 102 |
mod6<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC1), data=data_s) |
102 | 103 |
mod7<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC3), data=data_s) |
104 |
mod8<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LC1), data=data_s) |
|
103 | 105 |
|
104 | 106 |
####Regression part 3: Calculating and storing diagnostic measures |
105 | 107 |
results_AIC[i,1]<- dates[i] #storing the interpolation dates in the first column |
... | ... | |
111 | 113 |
results_AIC[i,7]<- AIC (mod5) |
112 | 114 |
results_AIC[i,8]<- AIC (mod6) |
113 | 115 |
results_AIC[i,9]<- AIC (mod7) |
116 |
results_AIC[i,10]<- AIC (mod8) |
|
114 | 117 |
|
115 | 118 |
results_GCV[i,1]<- dates[i] #storing the interpolation dates in the first column |
116 | 119 |
results_GCV[i,2]<- ns #number of stations used in the training stage |
... | ... | |
121 | 124 |
results_GCV[i,7]<- mod5$gcv.ubre |
122 | 125 |
results_GCV[i,8]<- mod6$gcv.ubre |
123 | 126 |
results_GCV[i,9]<- mod7$gcv.ubre |
127 |
results_GCV[i,10]<- mod7$gcv.ubre |
|
124 | 128 |
|
125 | 129 |
results_DEV[i,1]<- dates[i] #storing the interpolation dates in the first column |
126 | 130 |
results_DEV[i,2]<- ns #number of stations used in the training stage |
... | ... | |
130 | 134 |
results_DEV[i,6]<- mod4$deviance |
131 | 135 |
results_DEV[i,7]<- mod5$deviance |
132 | 136 |
results_DEV[i,8]<- mod6$deviance |
133 |
results_DEV[i,9]<- mod6$deviance |
|
137 |
results_DEV[i,9]<- mod7$deviance |
|
138 |
results_DEV[i,10]<- mod8$deviance |
|
134 | 139 |
|
135 | 140 |
#####VALIDATION: Prediction checking the results using the testing data######## |
136 | 141 |
|
... | ... | |
141 | 146 |
y_mod5<- predict(mod5, newdata=data_v, se.fit = TRUE) |
142 | 147 |
y_mod6<- predict(mod6, newdata=data_v, se.fit = TRUE) |
143 | 148 |
y_mod7<- predict(mod7, newdata=data_v, se.fit = TRUE) |
149 |
y_mod8<- predict(mod8, newdata=data_v, se.fit = TRUE) |
|
144 | 150 |
|
145 | 151 |
res_mod1<- data_v$tmax - y_mod1$fit #Residuals for GMA model that resembles the ANUSPLIN interpolation |
146 | 152 |
res_mod2<- data_v$tmax - y_mod2$fit #Residuals for GAM model that resembles the PRISM interpolation |
... | ... | |
149 | 155 |
res_mod5<- data_v$tmax - y_mod5$fit |
150 | 156 |
res_mod6<- data_v$tmax - y_mod6$fit |
151 | 157 |
res_mod7<- data_v$tmax - y_mod7$fit |
158 |
res_mod8<- data_v$tmax - y_mod8$fit |
|
152 | 159 |
|
153 | 160 |
RMSE_mod1 <- sqrt(sum(res_mod1^2)/nv) |
154 | 161 |
RMSE_mod2 <- sqrt(sum(res_mod2^2)/nv) |
... | ... | |
157 | 164 |
RMSE_mod5 <- sqrt(sum(res_mod5^2)/nv) |
158 | 165 |
RMSE_mod6 <- sqrt(sum(res_mod6^2)/nv) |
159 | 166 |
RMSE_mod7 <- sqrt(sum(res_mod7^2)/nv) |
167 |
RMSE_mod8 <- sqrt(sum(res_mod8^2)/nv) |
|
160 | 168 |
|
161 | 169 |
results_RMSE[i,1]<- dates[i] #storing the interpolation dates in the first column |
162 | 170 |
results_RMSE[i,2]<- ns #number of stations used in the training stage |
... | ... | |
167 | 175 |
results_RMSE[i,7]<- RMSE_mod5 |
168 | 176 |
results_RMSE[i,8]<- RMSE_mod6 |
169 | 177 |
results_RMSE[i,9]<- RMSE_mod7 |
178 |
results_RMSE[i,10]<- RMSE_mod8 |
|
170 | 179 |
|
171 |
#Saving dataset in dataframes: residuals from RMSE |
|
172 |
|
|
173 |
# data_v$mod1<-y_mod1$fit |
|
174 |
# data_v$mod2<-y_mod2$fit |
|
175 |
# data_v$mod3<-y_mod3$fit |
|
176 |
# data_v$mod4<-y_mod4$fit |
|
177 |
# data_v$mod5<-y_mod5$fit |
|
178 |
# data_v$mod6<-y_mod6$fit |
|
179 |
# data_v$mod7<-y_mod7$fit |
|
180 |
# |
|
181 |
# data_s$mod1<-mod1$fit |
|
182 |
# data_s$mod2<-mod2$fit |
|
183 |
# data_s$mod3<-mod3$fit |
|
184 |
# data_s$mod4<-mod4$fit |
|
185 |
# data_s$mod5<-mod5$fit |
|
186 |
# data_s$mod6<-mod6$fit |
|
187 |
# data_s$mod7<-mod7$fit |
|
188 |
# |
|
189 |
data_v$res_mod1<-as.numeric(res_mod1) |
|
190 |
data_v$res_mod2<-as.numeric(res_mod2) |
|
191 |
data_v$res_mod3<-as.numeric(res_mod3) |
|
192 |
data_v$res_mod4<-as.numeric(res_mod4) |
|
193 |
data_v$res_mod5<-as.numeric(res_mod5) |
|
194 |
data_v$res_mod6<-as.numeric(res_mod6) |
|
195 |
data_v$res_mod7<-as.numeric(res_mod7) |
|
196 |
|
|
197 |
data_s$res_mod1<-as.numeric(mod1$residuals) |
|
198 |
data_s$res_mod2<-as.numeric(mod2$residuals) |
|
199 |
data_s$res_mod3<-as.numeric(mod3$residuals) |
|
200 |
data_s$res_mod4<-as.numeric(mod4$residuals) |
|
201 |
data_s$res_mod5<-as.numeric(mod5$residuals) |
|
202 |
data_s$res_mod6<-as.numeric(mod6$residuals) |
|
203 |
data_s$res_mod7<-as.numeric(mod7$residuals) |
|
204 |
|
|
180 |
#Saving dataset in dataframes |
|
205 | 181 |
data_name<-paste("ghcn_v_",dates[[i]],sep="") |
206 | 182 |
assign(data_name,data_v) |
207 |
write.table(data_v, file= paste(path,"/",data_name,".txt",sep=""), sep=" ") |
|
208 |
#write out a new shapefile (including .prj component) |
|
209 |
coords<- data_v[,c('x_OR83M','y_OR83M')] |
|
210 |
coordinates(data_v)<-coords |
|
211 |
proj4string(data_v)<-CRS #Need to assign coordinates... |
|
212 |
outfile<-sub(".shp","",data_name) #Removing extension if it is present |
|
213 |
writeOGR(data_v,".", outfile, driver ="ESRI Shapefile") |
|
214 |
|
|
215 | 183 |
data_name<-paste("ghcn_s_",dates[[i]],sep="") |
216 | 184 |
assign(data_name,data_s) |
217 |
write.table(data_s, file= paste(path,"/",data_name,".txt",sep=""), sep=" ") |
|
218 |
#write out a new shapefile (including .prj component) |
|
219 |
coords<- data_s[,c('x_OR83M','y_OR83M')] |
|
220 |
coordinates(data_s)<-coords |
|
221 |
proj4string(data_s)<-CRS #Need to assign coordinates.. |
|
222 |
outfile<-sub(".shp","",data_name) #Removing extension if it is present |
|
223 |
writeOGR(data_s,".", outfile, driver ="ESRI Shapefile") |
|
185 |
#ghcn_v<-ls(pattern="ghcn_v_") |
|
186 |
|
|
187 |
|
|
188 |
cor_LST_LC1[i]<-cor(ghcn.subsets[[i]]$LST,ghcn.subsets[[i]]$LC1) |
|
189 |
cor_LST_LC3[i]<-cor(ghcn.subsets[[i]]$LST,ghcn.subsets[[i]]$LC3) |
|
224 | 190 |
|
225 |
# end of the for loop #1
|
|
191 |
#end of the for loop #1 |
|
226 | 192 |
} |
227 | 193 |
|
228 | 194 |
## Plotting and saving diagnostic measures |
... | ... | |
250 | 216 |
f<-file(paste(path,"/","results_GAM_Assessment",out_prefix,"2.txt",sep=""),"w") |
251 | 217 |
write(results_table_RMSE, file=f) |
252 | 218 |
close(f) |
253 |
|
|
254 | 219 |
# End of script########## |
255 | 220 |
|
256 | 221 |
#Selecting dates and files based on names |
... | ... | |
262 | 227 |
# cor_LST_LC3[i]<-cor(ghcn.subsets[[i]]$LST,ghcn.subsets[[i]]$LC3) |
263 | 228 |
# } |
264 | 229 |
|
230 |
#mod9<- gam(tmax~ te(lat,lon,ELEV_SRTM), data=ghcn_s_20101031) |
Also available in: Unified diff
GAM LST, added model with forest only term -364 dates assessment, task #406