Project

General

Profile

« Previous | Next » 

Revision e2d673e4

Added by Benoit Parmentier about 12 years ago

  • ID e2d673e42b81d3fc56e248b6009ad94a9b5306b4

Corrected errors in RMSE calculation and added plots to view results

View differences:

climate/research/oregon/interpolation/Linear_reg.R
1
#This script interpolates station value for the Oregon case study. This program loads the station data from a csv file
1
#This script interpolates station value for the Oregon case study. This program loads the station data from a csv file 
2 2
#and perform two types  of regression: multiple linear model and general additive model (GAM). Note that this program:
3
#1)assumes that the csv file is in the current working
4
#2)extract relevant variables from raster images before performing the regressions.
3
#1)assumes that the csv file is in the current working 
4
#2)extract relevant variables from raster images before performing the regressions. 
5 5
#The user must provide the list of raster images in  a textile.
6
#Script created by Benoit Parmentier on February 15, 2012.
6
#Script created by Benoit Parmentier on February 28, 2012. 
7 7

  
8
###Loading r library and packages
9
library(raster)                                                                        # loading the raster package
8
###Loading r library and packages                                                                       # loading the raster package
10 9
library(gtools)                                                                        # loading ...
11
library(sp)
12 10
library(mgcv)
13 11

  
14 12
###Parameters and arguments
15 13
infile1<-"ghcn_or_b_02122012_OR83M.csv"
16
inlistf<-"list_files.txt"
17 14
path<-"C:/Data/Benoit/NCEAS/window_Oregon_data"
18
#path<-getwd()
19
#date<-"20101507"                                                                       #Date selected for the regression
20
#prop<-0.3                                                                              #Proportion of testing retained for validation
21
#list_var<-list.files()                                                                 # displaying the files in the current folder which should contain only the relevant raster images.
22
#dump("list_var", file="list_var.txt")                                                  # Saving the values of list_var in a ascii text file.
23
#inlistvar<-"ghcn_var_02122012.txt"
15
setwd(path)
16
#date<-"20101507"                                                                       #Date selected for the regression 
17
#prop<-0.3                                                                              #Proportion of testing retained for validation   
24 18

  
25 19
#######START OF THE SCRIPT #############
26 20

  
27 21
###Reading the station data
28
ghcn<-read.csv(paste(path,"/",infile1, sep=""), header=TRUE)                            #The "paste" function concatenates the path and file name in common string.
22
ghcn<-read.csv(paste(path,"/",infile1, sep=""), header=TRUE)                            #The "paste" function concatenates the path and file name in common string. 
29 23
#ghcn<-read.csv(infile1)                                                                 #Use read.table if the input file is space delimited or read.table(infile1, headername=TRUE, sep=',')
30 24
names(ghcn)                                                                             #Checking that the columns are correctly labelled.
31 25

  
32
###Extracting the variables values from the raster files
33
coords<- ghcn[,c('x_OR83M','y_OR83M')]                                                  #Selecting all rows for the x and y columns and packaging the output in a data frame.
34
lines <-readLines(paste(path,"/",inlistf, sep="")
35
inlistvar<-paste(path,"/",lines,sep="")
36

  
37
#s_raster<- stack(inlistvar)                                                            #Creating a stack of raster images from the list of variables.
38
#stat_val<- extract(s_raster, coords)                                                   #Extracting values from the raster stack for every point location in coords data frame.
39
#create a shape file and data_frame with names??
40

  
41 26
###Creating a validation dataset by creating training and testing datasets (%30)
42 27
#ghcn1507 <-subset(ghcn,ghcn$date_== date)
43 28
ghcn1507 <-subset(ghcn,ghcn$date_=="20100715")
......
69 54

  
70 55
AIC (lm_ANUSPLIN1,GAM_ANUSPLIN1) #list the AIC and for the results
71 56
AIC (lm_PRISM1,GAM_PRISM1) #list the AIC and for the results
72
anova(lm_ANUSPLIN1, GAM_ANUSPLIN1,test="F") #compare the different models in terms of F; a reference model should be set for comparison.
57

  
58
GCVA1<-GAM_ANUSPLIN1$gcv.ubre
59
GCVP1<-GAM_PRISM1$gcv.ubre
60

  
61
DevianceA1<-GAM_ANUSPLIN1$deviance
62
DevianceP1<-GAM_PRISM1$deviance
63

  
64
anova(lm_ANUSPLIN1, GAM_ANUSPLIN1,test="F") #compare the different models in terms of F; a reference model should be set for comparison. 
73 65
anova(lm_PRISM1, GAM_PRISM1,test="F")
74 66

  
75
#GAM plot of partial residuals
67
summary(lm_ANUSPLIN1)$r.squared
68
summary(GAM_ANUSPLIN1)$r.squared
69
summary(lm_PRISM1)$r.squared                
70
summary(GAM_PRISM1)$r.squared
71

  
72
############Diagnostic GAM plots#############
73
win.graph()
74
gam.check(GAM_ANUSPLIN1)
75
savePlot("GAM_ANUSPLIN1_diagnostic1.emf", type="emf")
76
win.graph()
76 77
gam.check(GAM_PRISM1)   #This will produce basic plots of residuals
78
savePlot("GAM_PRISM_diagnostic1.emf", type="emf")
77 79
gam.check(GAM_ANUSPLIN1)
78

  
79
#Prediction checking the results using the testing data
80
win.graph()
81
vis.gam(GAM_ANUSPLIN1)
82
savePlot("GAM_ANUSPLIN1_prediction.emf", type="emf")        
83
win.graph()
84
vis.gam(GAM_PRISM1)
85
savePlot("GAM_PRISM1_prediction.emf", type="emf")
86
win.graph()
87
vis.gam(GAM_ANUSPLIN1, view=c("lat","ELEV_SRTM"))
88
#vis.gam(GAM_ANUSPLIN1, view=c("lat","ELEV_SRTM", theta=100,phi=200))
89
savePlot("GAM_ANUSPLIN1_prediction2.emf", type="emf")
90

  
91
####VALIDATION: Prediction checking the results using the testing data########
80 92
y_plANUSPLIN1<- predict(lm_ANUSPLIN1, newdata=ghcn1507_v, se.fit = TRUE)
81
y_pgANUSPLIN1<- predict(GAM_ANUSPLIN1, newdata=ghcn1507_v, se.fit = TRUE)
93
y_pgANUSPLIN1<- predict(GAM_ANUSPLIN1, newdata=ghcn1507_v, se.fit = TRUE) #Using the coeff to predict new values.
82 94
y_plPRISM1<- predict(lm_PRISM1, newdata=ghcn1507_v, se.fit = TRUE)
83
y_pgPRISM1<- predict(GAM_PRISM1, newdata=ghcn1507_v, se.fit = TRUE)
84

  
95
y_pgPRISM1<- predict(GAM_PRISM1, newdata=ghcn1507_v, se.fit = TRUE)            
96
         
85 97
res_yplA1<- ghcn1507_v$tmax - y_plANUSPLIN1$fit
86 98
res_ypgA1<- ghcn1507_v$tmax - y_pgANUSPLIN1$fit
87 99
res_yplP1<- ghcn1507_v$tmax - y_plPRISM1$fit   #Residuals for lm model that resembles the PRISM interpolation
88
res_ypgP1<- ghcn1507_v$tmax - y_pgPRISM1$fit   #Residuals for GAM model that resembles the PRISM interpolation
89

  
90
RMSE_yplA1 <- sqrt(sum(res_yplA1^2))
91
#RMSE_ypgA1 <- sqrt(sum(res_ypgA1^2))
92
RMSE_ypgA1 <- sqrt(sum(res_ypgA1^2))
93
RMSE_yplP1 <- sqrt(sum(res_yplP1^2))
94
RMSE_ypgP1 <- sqrt(sum(res_ypgP1^2))
95

  
96
#Printing the RMSE values for the different models
97
RMSE_yplA1
98
RMSE_ypgA1
99
RMSE_yplP1
100
res_ypgP1<- ghcn1507_v$tmax - y_pgPRISM1$fit   #Residuals for GAM model that resembles the PRISM interpolation                               
101

  
102
#results_val <-c(res_yplA1,res_ypgA1,res_yplP1,res_ypgP1)
103
results_val<-data.frame(res_yplA1=res_yplA1,res_ypgA1=res_ypgA1,res_yplP1=res_yplP1,res_ypgP1=res_ypgP1)
104
nv<- nrow(ghcn1507_v)
105
                  
106
RMSE_yplA1 <- sqrt(sum(res_yplA1^2)*1/nv)            
107
#RMSE_ypgA1 <- sqrt(sum(res_ypgA1^2)/nv)
108
RMSE_ypgA1 <- sqrt(sum(res_ypgA1^2)/nv)
109
RMSE_yplP1 <- sqrt(sum(res_yplP1^2)/nv)            
110
RMSE_ypgP1 <- sqrt(sum(res_ypgP1^2)/nv)
111
      
112
#Printing the RMSE values for the different models                  
113
RMSE_yplA1             
114
RMSE_ypgA1 
115
RMSE_yplP1          
100 116
RMSE_ypgP1
101 117

  
102
RMSE_all<-c(RMSE_yplA1,RMSE_ypgA1,RMSE_yplP1,RMSE_ypgP1)
103 118
mod_name<-c("yplA1","ypgA1","yplP1","ypgP1")
104 119
mod_type<-c("lm_ANUSPLIN1","GAM_ANUSPLIN1","lm_PRISM1","GAM_PRISM1")
120

  
121
RMSE_all<-c(RMSE_yplA1,RMSE_ypgA1,RMSE_yplP1,RMSE_ypgP1)
105 122
AIC_all<-AIC(lm_ANUSPLIN1,GAM_ANUSPLIN1,lm_PRISM1,GAM_PRISM1)
106
results<-data.frame(model=mod_name,RMSE=RMSE_all,df=AIC_all$df,AIC=AIC_all$AIC)
123
GCV_all<-c(0,GCVA1,0,GCVP1)     #This places the GCV values for each model in a vector
124
Deviance_all<-c(0,DevianceA1,0,DevianceP1)
125

  
126
#results<-data.frame(model=mod_name,RMSE=RMSE_all,df=AIC_all$df,AIC=AIC_all$AIC)
127
#results_val<-data.frame(model=mod_name,RMSE=RMSE_all,df=AIC_all$df,AIC=AIC_all$AIC)  
128
results<-data.frame(model=mod_name,RMSE=RMSE_all,df=AIC_all$df,AIC=AIC_all$AIC, GCV=GCV_all,Deviance=Deviance_all)
129

  
130
#Add deviance
107 131

  
132
barplot(results$RMSE,main="RMSE for the models",names.arg=c("yplA1","ypgA1","yplP1","ypgP1"),ylab="Temp (deg. C)")                  
133
barplot(results$AIC,main="AIC for the models",names.arg=c("yplA1","ypgA1","yplP1","ypgP1"),ylab="AIC")
108 134
#dump("results", file= paste(path,"/","results_reg1507.txt",sep=""))
109
write.csv(results, file= paste(path,"/","results_reg1507.txt",sep=""))
135
write.csv(results, file= paste(path,"/","results_reg1507_Assessment.txt",sep=""))
136
#write.csv(results_val, file= paste(path,"/","results_reg1507_val.txt",sep=""))
110 137

  
111
##End of script
138
                
139
library(lattice)                 
140
g <- expand.grid(x = ghcn1507_v$lon, y = ghcn1507_v$lat)
141
g$z <- y_plANUSPLIN1$fit
142
wireframe(z ~ x * y, data = g)                   
112 143

  
144
#####End of script##########
145
###############################
113 146

  
114 147

  
115 148

  
116
 
149
 

Also available in: Unified diff