Project

General

Profile

« Previous | Next » 

Revision 2a89fa5f

Added by Benoit Parmentier over 12 years ago

  • ID 2a89fa5f5da86b42ecf7339408e46e75de9a4134

GAM prediction added to the loop with RMSE calculation

View differences:

climate/research/oregon/interpolation/Linear_reg.R
4 4
#1)assumes that the csv file is in the current working 
5 5
#2)extract relevant variables from raster images before performing the regressions. 
6 6
#The user must provide the list of raster images in  a textile.
7
#Script created by Benoit Parmentier on March 1, 2012. 
7
#Script created by Benoit Parmentier on March 3, 2012. 
8 8

  
9 9
###Loading r library and packages                                                                       # loading the raster package
10 10
library(gtools)                                                                        # loading ...
......
14 14
infile1<-"ghcn_or_b_02122012_OR83M.csv"
15 15
path<-"C:/Data/Benoit/NCEAS/window_Oregon_data"
16 16
setwd(path)
17
infile2<-"dates_interpolation_03012012.txt"  # list of 10 dates
18
#dates<-"20101507"  # list of 10 dates in a textfile                                   #Date selected for the regression 
19
#prop<-0.3                                                                            #Proportion of testing retained for validation   
17
#infile2<-"dates_interpolation_03012012.txt"  # list of 10 dates for the regression
18
infile2<-"dates_interpolation_03032012.txt"
19
prop<-0.3                                                                            #Proportion of testing retained for validation   
20
out_prefix<-"_03022012_r3"
20 21

  
21 22
#######START OF THE SCRIPT #############
22 23

  
23
###Reading the station data
24
###Reading the station data and setting up for models' comparison
24 25
ghcn<-read.csv(paste(path,"/",infile1, sep=""), header=TRUE)                            #The "paste" function concatenates the path and file name in common string. 
25 26
dates <-readLines(paste(path,"/",infile2, sep=""))
26
                  
27
###Creating a validation dataset by creating training and testing datasets (%30)
28 27

  
28
results <- matrix(1,length(dates),10)            #This is a matrix containing the diagnostic measures from the GAM models.
29
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date_==d)) #this creates a list of 10 subsets data
29 30

  
30
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date_==d))
31
## looping through the dates...
31 32

  
32
for(i in 1:length(dates)){
33
  #data_name<-cat("ghcn_",dates[[i]],sep="")
34
  data_name<-paste("ghcn_",dates[[i]],sep="")
35
  data<-subset(ghcn,ghcn$date_==dates[[i]])
36
  assign(data_name,data)
37
  }                                                   #This loops creates 2 subsets of ghcn based on dates and reassign the names using the date
33
for(i in 1:length(dates)){            # start of the for loop #1
34
  
35
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
36
  
37
  n<-nrow(ghcn.subsets[[i]])
38
  ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
39
  nv<-n-ns             #create a sample for validation with prop of the rows
40
  #ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
41
  ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
42
  ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training)
43
  data_s <- ghcn.subsets[[i]][ind.training, ]
44
  data_v <- ghcn.subsets[[i]][ind.testing, ]
45
  
46
  ####Regression part 2: GAM models
47

  
48
  GAM_ANUSPLIN1<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM), data=data_s)
49
  GAM_PRISM1<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s (ASPECT)+ s(DISTOC), data=data_s)
50
  
51
  ####Regression part 3: Calculating and storing diagnostic measures
52
  
53
  results[i,1]<- dates[i]  #storing the interpolation dates in the first column
54
  results[i,2]<- ns        #number of stations used in the training stage
55
  
56
  results[i,5]<- AIC (GAM_ANUSPLIN1)
57
  results[i,6]<- AIC (GAM_PRISM1)
58
  GCVA1<-GAM_ANUSPLIN1$gcv.ubre
59
  results[i,7]<- GCVA1
60
  GCVP1<-GAM_PRISM1$gcv.ubre
61
  results[i,8]<- GCVP1
62
  
63
  results[i,9]<-GAM_ANUSPLIN1$deviance
64
  results[i,10]<-GAM_PRISM1$deviance
65
  
66
  #####VALIDATION: Prediction checking the results using the testing data########
67
 
68
  y_pgANUSPLIN1<- predict(GAM_ANUSPLIN1, newdata=data_v, se.fit = TRUE) #Using the coeff to predict new values.
69
  y_pgPRISM1<- predict(GAM_PRISM1, newdata=data_v, se.fit = TRUE)            
70
           
71
  res_ypgA1<- data_v$tmax - y_pgANUSPLIN1$fit #Residuals for GMA model that resembles the ANUSPLIN interpolation
72
  res_ypgP1<- data_v$tmax - y_pgPRISM1$fit   #Residuals for GAM model that resembles the PRISM interpolation                               
73
                             
74
  RMSE_ypgA1 <- sqrt(sum(res_ypgA1^2)/nv)          
75
  RMSE_ypgP1 <- sqrt(sum(res_ypgP1^2)/nv)
76
  
77
  results[i,3]<-RMSE_ypgA1
78
  results[i,4]<-RMSE_ypgP1
79
  
80
  # end of the for loop #1
81
  }
82

  
83
## Plotting and saving diagnostic measures
84

  
85
results_num <-results
86
mode(results_num)<- "numeric"
87
# Make it numeric first
88
# Now turn it into a data.frame...
89

  
90
results_table<-as.data.frame(results_num)
91
colnames(results_table)<-c("dates","ns","RMSE_A1", "RMSE_P1", "AIC_A1", "AIC_P1", "GCV_A1", "GCV_P1", "Deviance_A1", "Deviance_P1")
92

  
93
win.graph()
94
barplot(results_table$RMSE_A1,main="RMSE for the A1 models",names.arg=results_table$dates,ylab="Temp (0.1 X deg. C)",xlab="interolated date")
95
savePlot(paste("GAM_ANUSPLIN1_RMSE",out_prefix,".emf", sep=""), type="emf")
96
win.graph()
97
barplot(results_table$RMSE_P1,main="RMSE for the P1 models",names.arg=results_table$dates,ylab="Temp (0.1 X deg. C)",xlab="interolated date")
98
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
99
win.graph()
100
barplot(results_table$AIC_P1,main="AIC for the P1 models",names.arg=results_table$dates,ylab="Temp (0.1 X deg. C)",xlab="interolated date")
101
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
102
win.graph()
103
barplot(results_table$AIC_A1,main="AIC for the A1 models",names.arg=results_table$dates,ylab="Temp (0.1 X deg. C)",xlab="interolated date")
104
savePlot(paste("GAM_PRISM1_RMSE",out_prefix,".emf", sep=""), type="emf")
105

  
106
write.csv(results_table, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""))
107

  
108
# End of script##########
109

  
110
# ###############################
38 111

  
39
# ############ REGRESSION ###############
40
# 
41
# ###Regression part 1: linear models
42
# 
43
# lm_ANUSPLIN1<-lm(tmax~lat+lon+ELEV_SRTM, data=ghcn1507_s)
44
# lm_PRISM1<-lm(tmax~lat+lon+ELEV_SRTM+ASPECT+DISTOC, data=ghcn1507_s) #Note that a variable on inversion is missing
45
# summary(lm_ANUSPLIN1)
46
# summary(lm_PRISM1)
47
# 
48
# ###Regression part 2: GAM models
49
# GAM_ANUSPLIN1<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM), data=ghcn1507_s)
50
# GAM_PRISM1<-gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s (ASPECT)+ s(DISTOC), data=ghcn1507_s)
51
# #use the s() for smoothing function
52
# 
53
# ###Compare the models
54
# #Show AIC, Cook distance, p values and residuals plot
55
# ###Access the R2 and significance to give a report
56
# 
57
# AIC (lm_ANUSPLIN1,GAM_ANUSPLIN1) #list the AIC and for the results
58
# AIC (lm_PRISM1,GAM_PRISM1) #list the AIC and for the results
59
# 
60
# GCVA1<-GAM_ANUSPLIN1$gcv.ubre
61
# GCVP1<-GAM_PRISM1$gcv.ubre
62
# 
63
# DevianceA1<-GAM_ANUSPLIN1$deviance
64
# DevianceP1<-GAM_PRISM1$deviance
65
# 
66
# anova(lm_ANUSPLIN1, GAM_ANUSPLIN1,test="F") #compare the different models in terms of F; a reference model should be set for comparison. 
67
# anova(lm_PRISM1, GAM_PRISM1,test="F")
68
# 
69
# summary(lm_ANUSPLIN1)$r.squared
70
# summary(GAM_ANUSPLIN1)$r.squared
71
# summary(lm_PRISM1)$r.squared                
72
# summary(GAM_PRISM1)$r.squared
73 112
# 
74 113
# ############Diagnostic GAM plots#############
75 114
# win.graph()
......
89 128
# vis.gam(GAM_ANUSPLIN1, view=c("lat","ELEV_SRTM"))
90 129
# #vis.gam(GAM_ANUSPLIN1, view=c("lat","ELEV_SRTM", theta=100,phi=200))
91 130
# savePlot("GAM_ANUSPLIN1_prediction2.emf", type="emf")
92
# 
93
# ####VALIDATION: Prediction checking the results using the testing data########
94
# y_plANUSPLIN1<- predict(lm_ANUSPLIN1, newdata=ghcn1507_v, se.fit = TRUE)
95
# y_pgANUSPLIN1<- predict(GAM_ANUSPLIN1, newdata=ghcn1507_v, se.fit = TRUE) #Using the coeff to predict new values.
96
# y_plPRISM1<- predict(lm_PRISM1, newdata=ghcn1507_v, se.fit = TRUE)
97
# y_pgPRISM1<- predict(GAM_PRISM1, newdata=ghcn1507_v, se.fit = TRUE)            
98
#          
99
# res_yplA1<- ghcn1507_v$tmax - y_plANUSPLIN1$fit
100
# res_ypgA1<- ghcn1507_v$tmax - y_pgANUSPLIN1$fit
101
# res_yplP1<- ghcn1507_v$tmax - y_plPRISM1$fit   #Residuals for lm model that resembles the PRISM interpolation
102
# res_ypgP1<- ghcn1507_v$tmax - y_pgPRISM1$fit   #Residuals for GAM model that resembles the PRISM interpolation                               
103
# 
131
#
104 132
# #results_val <-c(res_yplA1,res_ypgA1,res_yplP1,res_ypgP1)
105 133
# results_val<-data.frame(res_yplA1=res_yplA1,res_ypgA1=res_ypgA1,res_yplP1=res_yplP1,res_ypgP1=res_ypgP1)
106 134
# nv<- nrow(ghcn1507_v)
107
#                   
108
# RMSE_yplA1 <- sqrt(sum(res_yplA1^2)*1/nv)            
109
# #RMSE_ypgA1 <- sqrt(sum(res_ypgA1^2)/nv)
110
# RMSE_ypgA1 <- sqrt(sum(res_ypgA1^2)/nv)
111
# RMSE_yplP1 <- sqrt(sum(res_yplP1^2)/nv)            
112
# RMSE_ypgP1 <- sqrt(sum(res_ypgP1^2)/nv)
113
#       
114
# #Printing the RMSE values for the different models                  
115
# RMSE_yplA1             
116
# RMSE_ypgA1 
117
# RMSE_yplP1          
118
# RMSE_ypgP1
119
# 
135
#                    
120 136
# mod_name<-c("yplA1","ypgA1","yplP1","ypgP1")
121 137
# mod_type<-c("lm_ANUSPLIN1","GAM_ANUSPLIN1","lm_PRISM1","GAM_PRISM1")
122 138
# 
......
136 152
# #dump("results", file= paste(path,"/","results_reg1507.txt",sep=""))
137 153
# write.csv(results, file= paste(path,"/","results_reg1507_Assessment.txt",sep=""))
138 154
# #write.csv(results_val, file= paste(path,"/","results_reg1507_val.txt",sep=""))
139
# 
140 155
#                 
141
# library(lattice)                 
142
# g <- expand.grid(x = ghcn1507_v$lon, y = ghcn1507_v$lat)
143
# g$z <- y_plANUSPLIN1$fit
144
# wireframe(z ~ x * y, data = g)                   
145
# 
146
# #####End of script##########
147
# ###############################
156

  
148 157

  
149 158

  
150 159

  

Also available in: Unified diff