Revision 25a68ae3
Added by Benoit Parmentier over 12 years ago
- ID 25a68ae32a5fdc3ee3af1594e88f4743d617db37
climate/research/oregon/interpolation/Linear_reg.R | ||
---|---|---|
16 | 16 |
path<-"C:/Data/Benoit/NCEAS/window_Oregon_data" |
17 | 17 |
setwd(path) |
18 | 18 |
#infile2<-"dates_interpolation_03012012.txt" # list of 10 dates for the regression |
19 |
infile2<-"dates_interpolation_03032012.txt"
|
|
19 |
infile2<-"dates_interpolation_03052012.txt"
|
|
20 | 20 |
prop<-0.3 #Proportion of testing retained for validation |
21 | 21 |
out_prefix<-"_03042012_r1" |
22 | 22 |
|
... | ... | |
24 | 24 |
|
25 | 25 |
###Reading the station data and setting up for models' comparison |
26 | 26 |
ghcn<-read.csv(paste(path,"/",infile1, sep=""), header=TRUE) #The "paste" function concatenates the path and file name in common string. |
27 |
ghcn = transform(ghcn,Northness = cos(ASPECT)) #Adding a variable to the dataframe |
|
28 |
ghcn = transform(ghcn,Eastness = sin(ASPECT)) #adding variable to the dataframe. |
|
29 |
|
|
27 | 30 |
ghcn = transform(ghcn,Northness_w = sin(slope)*cos(ASPECT)) #Adding a variable to the dataframe |
28 | 31 |
ghcn = transform(ghcn,Eastness_w = sin(slope)*sin(ASPECT)) #adding variable to the dataframe. |
29 | 32 |
set.seed(100) |
30 | 33 |
dates <-readLines(paste(path,"/",infile2, sep="")) |
31 | 34 |
|
32 |
results <- matrix(1,length(dates),10) #This is a matrix containing the diagnostic measures from the GAM models. |
|
35 |
results <- matrix(1,length(dates),14) #This is a matrix containing the diagnostic measures from the GAM models. |
|
36 |
|
|
33 | 37 |
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==d)) #this creates a list of 10 subsets data |
34 | 38 |
#note that compare to the previous version date_ column was changed to date |
35 | 39 |
|
... | ... | |
51 | 55 |
####Regression part 2: GAM models |
52 | 56 |
|
53 | 57 |
GAM_ANUSPLIN1<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM), data=data_s) |
54 |
GAM_PRISM1<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s (ASPECT)+ s(DISTOC), data=data_s)
|
|
58 |
GAM_PRISM1<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness)+ s (Eastness) + s(DISTOC), data=data_s)
|
|
55 | 59 |
GAM_PRISM2<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness_w)+ s (Eastness_w) + s(DISTOC), data=data_s) |
56 | 60 |
|
57 | 61 |
|
... | ... | |
60 | 64 |
results[i,1]<- dates[i] #storing the interpolation dates in the first column |
61 | 65 |
results[i,2]<- ns #number of stations used in the training stage |
62 | 66 |
|
63 |
results[i,5]<- AIC (GAM_ANUSPLIN1) |
|
64 |
results[i,6]<- AIC (GAM_PRISM1) |
|
65 |
GCVA1<-GAM_ANUSPLIN1$gcv.ubre |
|
66 |
results[i,7]<- GCVA1 |
|
67 |
GCVP1<-GAM_PRISM1$gcv.ubre |
|
68 |
results[i,8]<- GCVP1 |
|
67 |
results[i,6]<- AIC (GAM_ANUSPLIN1) |
|
68 |
results[i,7]<- AIC (GAM_PRISM1) |
|
69 |
results[i,8]<- AIC (GAM_PRISM2) |
|
70 |
|
|
71 |
results[i,9]<- GAM_ANUSPLIN1$gcv.ubre |
|
72 |
results[i,10]<- GAM_PRISM1$gcv.ubre |
|
73 |
results[i,11]<- GAM_PRISM2$gcv.ubre |
|
69 | 74 |
|
70 |
results[i,9]<-GAM_ANUSPLIN1$deviance |
|
71 |
results[i,10]<-GAM_PRISM1$deviance |
|
75 |
results[i,12]<-GAM_ANUSPLIN1$deviance |
|
76 |
results[i,13]<-GAM_PRISM1$deviance |
|
77 |
results[i,14]<-GAM_PRISM2$deviance |
|
72 | 78 |
|
73 | 79 |
#####VALIDATION: Prediction checking the results using the testing data######## |
74 | 80 |
|
75 | 81 |
y_pgANUSPLIN1<- predict(GAM_ANUSPLIN1, newdata=data_v, se.fit = TRUE) #Using the coeff to predict new values. |
76 | 82 |
y_pgPRISM1<- predict(GAM_PRISM1, newdata=data_v, se.fit = TRUE) |
77 |
|
|
83 |
y_pgPRISM2<- predict(GAM_PRISM2, newdata=data_v, se.fit = TRUE) |
|
84 |
|
|
78 | 85 |
res_ypgA1<- data_v$tmax - y_pgANUSPLIN1$fit #Residuals for GMA model that resembles the ANUSPLIN interpolation |
79 | 86 |
res_ypgP1<- data_v$tmax - y_pgPRISM1$fit #Residuals for GAM model that resembles the PRISM interpolation |
80 |
|
|
87 |
res_ypgP2<- data_v$tmax - y_pgPRISM2$fit |
|
88 |
|
|
81 | 89 |
RMSE_ypgA1 <- sqrt(sum(res_ypgA1^2)/nv) |
82 | 90 |
RMSE_ypgP1 <- sqrt(sum(res_ypgP1^2)/nv) |
91 |
RMSE_ypgP2 <- sqrt(sum(res_ypgP2^2)/nv) |
|
83 | 92 |
|
84 | 93 |
results[i,3]<-RMSE_ypgA1 |
85 | 94 |
results[i,4]<-RMSE_ypgP1 |
95 |
results[i,5]<-RMSE_ypgP2 |
|
96 |
|
|
97 |
data_name<-paste("ghcn_v_",dates[[i]],sep="") |
|
98 |
assign(data_name,data_v) |
|
99 |
#ghcn_v<-ls(pattern="ghcn_v_") |
|
86 | 100 |
|
87 | 101 |
# end of the for loop #1 |
88 | 102 |
} |
... | ... | |
95 | 109 |
# Now turn it into a data.frame... |
96 | 110 |
|
97 | 111 |
results_table<-as.data.frame(results_num) |
98 |
colnames(results_table)<-c("dates","ns","RMSE_A1", "RMSE_P1", "AIC_A1", "AIC_P1", "GCV_A1", "GCV_P1", "Deviance_A1", "Deviance_P1")
|
|
112 |
colnames(results_table)<-c("dates","ns","RMSE_A1", "RMSE_P1","RMSE_P2", "AIC_A1", "AIC_P1", "AIC_P2", "GCV_A1", "GCV_P1", "GCV_P2", "Deviance_A1", "Deviance_P1", "Deviance_P2")
|
|
99 | 113 |
|
100 | 114 |
win.graph() |
101 |
barplot(results_table$RMSE_A1,main="RMSE for the A1 models",names.arg=results_table$dates,ylab="Temp (0.1 X deg. C)",xlab="interolated date")
|
|
115 |
barplot(results_table$RMSE_A1/10,main="RMSE for the A1 models",names.arg=results_table$dates,ylab="Temp (deg. C)",xlab="interolated date")
|
|
102 | 116 |
savePlot(paste("GAM_ANUSPLIN1_RMSE",out_prefix,".emf", sep=""), type="emf") |
103 | 117 |
win.graph() |
104 |
barplot(results_table$RMSE_P1,main="RMSE for the P1 models",names.arg=results_table$dates,ylab="Temp (0.1 X deg. C)",xlab="interolated date") |
|
118 |
barplot(results_table$RMSE_P1/10,main="RMSE for the P1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interolated date") |
|
119 |
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
120 |
win.graph() |
|
121 |
barplot(results_table$RMSE_P2/10,main="RMSE for the P2 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interolated date") |
|
105 | 122 |
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
106 | 123 |
win.graph() |
107 |
barplot(results_table$AIC_P1,main="AIC for the P1 models",names.arg=results_table$dates,ylab="Temp (0.1 X deg. C)",xlab="interolated date")
|
|
124 |
barplot(results_table$AIC_A1,main="AIC for the A1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interolated date")
|
|
108 | 125 |
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
109 | 126 |
win.graph() |
110 |
barplot(results_table$AIC_A1,main="AIC for the A1 models",names.arg=results_table$dates,ylab="Temp (0.1 X deg. C)",xlab="interolated date")
|
|
127 |
barplot(results_table$AIC_P1/10,main="AIC for the P1 models",names.arg=results_table$dates,ylab="Temp ( deg. C)",xlab="interolated date")
|
|
111 | 128 |
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
112 | 129 |
win.graph() |
113 |
barplot(results_table$Deviance_A1,main="Deviance for the A1 models",names.arg=results_table$dates,ylab="Temp (0.1 X deg. C)",xlab="interolated date") |
|
130 |
barplot(results_table$AIC_P2/10,main="AIC for the P2 models",names.arg=results_table$dates,ylab="Temp (0.1 X deg. C)",xlab="interolated date") |
|
131 |
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf") |
|
132 |
win.graph() |
|
133 |
barplot(results_table$Deviance_A1/10,main="Deviance for the A1 models",names.arg=results_table$dates,ylab="Temp (0.1 X deg. C)",xlab="interolated date") |
|
114 | 134 |
savePlot(paste("GAM_ANUSPLIN1_Deviance",out_prefix,".emf", sep=""), type="emf") |
115 | 135 |
win.graph() |
116 |
barplot(results_table$Deviance_P1,main="Deviance for the P1 models",names.arg=results_table$dates,ylab="Temp (0.1 X deg. C)",xlab="interolated date") |
|
136 |
barplot(results_table$Deviance_P1/10,main="Deviance for the P1 models",names.arg=results_table$dates,ylab="Temp (0.1 X deg. C)",xlab="interolated date")
|
|
117 | 137 |
savePlot(paste("GAM_PRISM1_Deviance",out_prefix,".emf", sep=""), type="emf") |
138 |
win.graph() |
|
139 |
barplot(results_table$Deviance_P2/10,main="Deviance for the P2 models",names.arg=results_table$dates,ylab="Temp (0.1 X deg. C)",xlab="interolated date") |
|
140 |
savePlot(paste("GAM_PRISM2_Deviance",out_prefix,".emf", sep=""), type="emf") |
|
141 |
|
|
118 | 142 |
write.csv(results_table, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep="")) |
119 | 143 |
|
120 | 144 |
|
... | ... | |
142 | 166 |
# #vis.gam(GAM_ANUSPLIN1, view=c("lat","ELEV_SRTM", theta=100,phi=200)) |
143 | 167 |
# savePlot("GAM_ANUSPLIN1_prediction2.emf", type="emf") |
144 | 168 |
# |
145 |
# #results_val <-c(res_yplA1,res_ypgA1,res_yplP1,res_ypgP1) |
|
146 |
# results_val<-data.frame(res_yplA1=res_yplA1,res_ypgA1=res_ypgA1,res_yplP1=res_yplP1,res_ypgP1=res_ypgP1) |
|
147 |
# nv<- nrow(ghcn1507_v) |
|
148 |
# |
|
149 |
# mod_name<-c("yplA1","ypgA1","yplP1","ypgP1") |
|
150 |
# mod_type<-c("lm_ANUSPLIN1","GAM_ANUSPLIN1","lm_PRISM1","GAM_PRISM1") |
|
151 |
# |
|
152 |
# RMSE_all<-c(RMSE_yplA1,RMSE_ypgA1,RMSE_yplP1,RMSE_ypgP1) |
|
153 |
# AIC_all<-AIC(lm_ANUSPLIN1,GAM_ANUSPLIN1,lm_PRISM1,GAM_PRISM1) |
|
154 |
# GCV_all<-c(0,GCVA1,0,GCVP1) #This places the GCV values for each model in a vector |
|
155 |
# Deviance_all<-c(0,DevianceA1,0,DevianceP1) |
|
156 |
# |
|
157 |
# #results<-data.frame(model=mod_name,RMSE=RMSE_all,df=AIC_all$df,AIC=AIC_all$AIC) |
|
158 |
# #results_val<-data.frame(model=mod_name,RMSE=RMSE_all,df=AIC_all$df,AIC=AIC_all$AIC) |
|
159 |
# results<-data.frame(model=mod_name,RMSE=RMSE_all,df=AIC_all$df,AIC=AIC_all$AIC, GCV=GCV_all,Deviance=Deviance_all) |
|
160 |
# |
|
161 |
# #Add deviance |
|
162 |
# |
|
163 |
# barplot(results$RMSE,main="RMSE for the models",names.arg=c("yplA1","ypgA1","yplP1","ypgP1"),ylab="Temp (deg. C)") |
|
164 |
# barplot(results$AIC,main="AIC for the models",names.arg=c("yplA1","ypgA1","yplP1","ypgP1"),ylab="AIC") |
|
165 |
# #dump("results", file= paste(path,"/","results_reg1507.txt",sep="")) |
|
166 |
# write.csv(results, file= paste(path,"/","results_reg1507_Assessment.txt",sep="")) |
|
167 |
# #write.csv(results_val, file= paste(path,"/","results_reg1507_val.txt",sep="")) |
|
169 |
|
|
168 | 170 |
# |
169 | 171 |
|
170 | 172 |
|
Also available in: Unified diff
Transform aspect in Eastness and Northness variables in the GAM.