Project

General

Profile

Download (13.7 KB) Statistics
| Branch: | Revision:
1 24eedf3a Benoit Parmentier
####################Interpolation of Tmax for 10 dates.#####################
2 75151566 Benoit Parmentier
#This script interpolates station values for the Oregon case study. This program loads the station data from a shapefile
3
#and perform 8 regressions using the general additive model (GAM). Note that this program:
4
#1)assumes that the shapefile in the current working 
5 24eedf3a Benoit Parmentier
#2)extract relevant variables from raster images before performing the regressions. 
6 63d8201d Benoit Parmentier
#This scripts predicts tmas xsing GAM and LST derived from MOD11A1.
7 24eedf3a Benoit Parmentier
#Interactions terms are also included and assessed using the RMSE from validation dataset.
8
#There are 10 dates used for the GAM interpolation. The dates must be provided as a textfile.
9
#Script created by Benoit Parmentier on April 4, 2012. 
10
11 63d8201d Benoit Parmentier
###Loading r library and packages                                                      # loading the raster package
12 24eedf3a Benoit Parmentier
library(gtools)                                                                        # loading ...
13
library(mgcv)
14
library(sp)
15
library(spdep)
16
library(rgdal)
17
18
###Parameters and arguments
19
20 63d8201d Benoit Parmentier
infile1<-"ghcn_or_tmax_b_04142012_OR83M.shp"
21 f042de0f Benoit Parmentier
path<-"/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations"
22
#path<-"H:/Data/IPLANT_project/data_Oregon_stations"
23 63d8201d Benoit Parmentier
setwd(path) 
24
infile2<-"dates_interpolation_03052012.txt"                                          #List of 10 dates for the regression
25 f042de0f Benoit Parmentier
infile2<-"list_365_dates_04212012.txt"
26 24eedf3a Benoit Parmentier
prop<-0.3                                                                            #Proportion of testing retained for validation   
27 f042de0f Benoit Parmentier
out_prefix<-"_05012012_mod8_LST"
28 24eedf3a Benoit Parmentier
infile3<-"LST_dates_var_names.txt"
29 80402fdf Benoit Parmentier
infile4<-"models_interpolation_04032012b.txt"
30 24eedf3a Benoit Parmentier
31
#######START OF THE SCRIPT #############
32
33
###Reading the station data and setting up for models' comparison
34 63d8201d Benoit Parmentier
filename<-sub(".shp","",infile1)              #Removing the extension from file.
35
ghcn<-readOGR(".", filename)                  #reading shapefile 
36 9c848d7a Benoit Parmentier
37 f042de0f Benoit Parmentier
proj4string(ghcn) #This retrieves the coordinate system for the SDF
38
CRS_ghcn<-proj4string(ghcn) #this can be assigned to mean_LST!!!
39 9c848d7a Benoit Parmentier
40 24eedf3a Benoit Parmentier
ghcn = transform(ghcn,Northness = cos(ASPECT)) #Adding a variable to the dataframe
41
ghcn = transform(ghcn,Eastness = sin(ASPECT))  #adding variable to the dataframe.
42
ghcn = transform(ghcn,Northness_w = sin(slope)*cos(ASPECT)) #Adding a variable to the dataframe
43
ghcn = transform(ghcn,Eastness_w = sin(slope)*sin(ASPECT))  #adding variable to the dataframe.
44 63d8201d Benoit Parmentier
45 24eedf3a Benoit Parmentier
set.seed(100)
46
dates <-readLines(paste(path,"/",infile2, sep=""))
47
LST_dates <-readLines(paste(path,"/",infile3, sep=""))
48
models <-readLines(paste(path,"/",infile4, sep=""))
49
50 f042de0f Benoit Parmentier
results <- matrix(1,length(dates),15)            #This is a matrix containing the diagnostic measures from the GAM models.
51 24eedf3a Benoit Parmentier
52 f042de0f Benoit Parmentier
results_AIC<- matrix(1,length(dates),length(models)+3)  
53
results_GCV<- matrix(1,length(dates),length(models)+3)
54
results_DEV<- matrix(1,length(dates),length(models)+3)
55
results_RMSE<- matrix(1,length(dates),length(models)+3)
56
cor_LST_LC1<-matrix(1,length(dates),1)      #correlation LST-LC1
57
cor_LST_LC3<-matrix(1,length(dates),1)      #correlation LST-LC3
58
cor_LST_tmax<-matrix(1,length(dates),1)    #correlation LST-tmax
59 24eedf3a Benoit Parmentier
#Screening for bad values
60
61
ghcn_all<-ghcn
62
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400)
63
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0)
64
ghcn<-ghcn_test2
65
66 63d8201d Benoit Parmentier
month_var<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09", "mm_10", "mm_11", "mm_12")
67 24eedf3a Benoit Parmentier
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==d)) #this creates a list of 10 subsets data
68
#note that compare to the previous version date_ column was changed to date
69
70
## looping through the dates...
71
#Change this into  a nested loop, looping through the number of models
72
73
for(i in 1:length(dates)){            # start of the for loop #1
74 63d8201d Benoit Parmentier
  date<-strptime(dates[i], "%Y%m%d")
75
  month<-strftime(date, "%m")
76
  LST_month<-paste("mm_",month,sep="")
77 24eedf3a Benoit Parmentier
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
78
  
79 63d8201d Benoit Parmentier
  mod <-ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]
80 24eedf3a Benoit Parmentier
  ghcn.subsets[[i]] = transform(ghcn.subsets[[i]],LST = mod)
81 80402fdf Benoit Parmentier
  #Screening LST values
82
  #ghcn.subsets[[i]]<-subset(ghcn.subsets[[i]],ghcn.subsets[[i]]$LST> 258 & ghcn.subsets[[i]]$LST<313)
83 24eedf3a Benoit Parmentier
  n<-nrow(ghcn.subsets[[i]])
84
  ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
85
  nv<-n-ns             #create a sample for validation with prop of the rows
86
  #ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
87
  ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
88
  ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training)
89
  data_s <- ghcn.subsets[[i]][ind.training, ]
90
  data_v <- ghcn.subsets[[i]][ind.testing, ]
91
  #mod <-data_s[,match(LST_dates[i], names(data_s))]
92
  #data_s = transform(data_s,LST = mod)
93
  #data_v = transform(data_v,LST = mod)
94
  ####Regression part 2: GAM models
95
96
  mod1<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM), data=data_s)
97 f042de0f Benoit Parmentier
  #mod2<- gam(tmax~ s(lat,lon,ELEV_SRTM), data=data_s)
98
  mod2<- gam(tmax~ s(lat,lon) + s(ELEV_SRTM), data=data_s)
99 24eedf3a Benoit Parmentier
  mod3<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) +  s (Northness)+ s (Eastness) + s(DISTOC), data=data_s)
100
  mod4<- gam(tmax~ s(lat) + s (lon) + s(ELEV_SRTM) + s(Northness) + s (Eastness) + s(DISTOC) + s(LST), data=data_s)
101
  mod5<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST), data=data_s)
102
  mod6<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC1), data=data_s)
103 80402fdf Benoit Parmentier
  mod7<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC3), data=data_s)
104 f042de0f Benoit Parmentier
  mod8<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LC1), data=data_s)
105 24eedf3a Benoit Parmentier
  
106
  ####Regression part 3: Calculating and storing diagnostic measures
107
  results_AIC[i,1]<- dates[i]  #storing the interpolation dates in the first column
108
  results_AIC[i,2]<- ns        #number of stations used in the training stage
109
  results_AIC[i,3]<- AIC (mod1)
110
  results_AIC[i,4]<- AIC (mod2)
111
  results_AIC[i,5]<- AIC (mod3)
112
  results_AIC[i,6]<- AIC (mod4)
113
  results_AIC[i,7]<- AIC (mod5)
114
  results_AIC[i,8]<- AIC (mod6)
115 80402fdf Benoit Parmentier
  results_AIC[i,9]<- AIC (mod7)
116 f042de0f Benoit Parmentier
  results_AIC[i,10]<- AIC (mod8)
117 24eedf3a Benoit Parmentier
  
118
  results_GCV[i,1]<- dates[i]  #storing the interpolation dates in the first column
119
  results_GCV[i,2]<- ns        #number of stations used in the training stage
120
  results_GCV[i,3]<- mod1$gcv.ubre
121
  results_GCV[i,4]<- mod2$gcv.ubre
122
  results_GCV[i,5]<- mod3$gcv.ubre
123
  results_GCV[i,6]<- mod4$gcv.ubre
124
  results_GCV[i,7]<- mod5$gcv.ubre
125
  results_GCV[i,8]<- mod6$gcv.ubre
126 80402fdf Benoit Parmentier
  results_GCV[i,9]<- mod7$gcv.ubre
127 f042de0f Benoit Parmentier
  results_GCV[i,10]<- mod7$gcv.ubre
128 24eedf3a Benoit Parmentier
  
129
  results_DEV[i,1]<- dates[i]  #storing the interpolation dates in the first column
130
  results_DEV[i,2]<- ns        #number of stations used in the training stage
131
  results_DEV[i,3]<- mod1$deviance
132
  results_DEV[i,4]<- mod2$deviance
133
  results_DEV[i,5]<- mod3$deviance
134
  results_DEV[i,6]<- mod4$deviance
135
  results_DEV[i,7]<- mod5$deviance
136
  results_DEV[i,8]<- mod6$deviance
137 f042de0f Benoit Parmentier
  results_DEV[i,9]<- mod7$deviance
138
  results_DEV[i,10]<- mod8$deviance
139 24eedf3a Benoit Parmentier
  
140
  #####VALIDATION: Prediction checking the results using the testing data########
141
 
142
  y_mod1<- predict(mod1, newdata=data_v, se.fit = TRUE) #Using the coeff to predict new values.
143
  y_mod2<- predict(mod2, newdata=data_v, se.fit = TRUE)            
144
  y_mod3<- predict(mod3, newdata=data_v, se.fit = TRUE) 
145
  y_mod4<- predict(mod4, newdata=data_v, se.fit = TRUE) 
146
  y_mod5<- predict(mod5, newdata=data_v, se.fit = TRUE) 
147
  y_mod6<- predict(mod6, newdata=data_v, se.fit = TRUE)
148 80402fdf Benoit Parmentier
  y_mod7<- predict(mod7, newdata=data_v, se.fit = TRUE)
149 f042de0f Benoit Parmentier
  y_mod8<- predict(mod8, newdata=data_v, se.fit = TRUE)
150 24eedf3a Benoit Parmentier
  
151
  res_mod1<- data_v$tmax - y_mod1$fit #Residuals for GMA model that resembles the ANUSPLIN interpolation
152
  res_mod2<- data_v$tmax - y_mod2$fit   #Residuals for GAM model that resembles the PRISM interpolation                               
153
  res_mod3<- data_v$tmax - y_mod3$fit  
154
  res_mod4<- data_v$tmax - y_mod4$fit
155
  res_mod5<- data_v$tmax - y_mod5$fit
156
  res_mod6<- data_v$tmax - y_mod6$fit
157 80402fdf Benoit Parmentier
  res_mod7<- data_v$tmax - y_mod7$fit
158 f042de0f Benoit Parmentier
  res_mod8<- data_v$tmax - y_mod8$fit
159 24eedf3a Benoit Parmentier
  
160
  RMSE_mod1 <- sqrt(sum(res_mod1^2)/nv)          
161
  RMSE_mod2 <- sqrt(sum(res_mod2^2)/nv)
162
  RMSE_mod3 <- sqrt(sum(res_mod3^2)/nv)
163
  RMSE_mod4 <- sqrt(sum(res_mod4^2)/nv)
164
  RMSE_mod5 <- sqrt(sum(res_mod5^2)/nv)
165
  RMSE_mod6 <- sqrt(sum(res_mod6^2)/nv)
166 80402fdf Benoit Parmentier
  RMSE_mod7 <- sqrt(sum(res_mod7^2)/nv)
167 f042de0f Benoit Parmentier
  RMSE_mod8 <- sqrt(sum(res_mod8^2)/nv)
168 24eedf3a Benoit Parmentier
169
  results_RMSE[i,1]<- dates[i]  #storing the interpolation dates in the first column
170
  results_RMSE[i,2]<- ns        #number of stations used in the training stage
171
  results_RMSE[i,3]<- RMSE_mod1
172
  results_RMSE[i,4]<- RMSE_mod2
173
  results_RMSE[i,5]<- RMSE_mod3
174
  results_RMSE[i,6]<- RMSE_mod4
175
  results_RMSE[i,7]<- RMSE_mod5
176
  results_RMSE[i,8]<- RMSE_mod6
177 80402fdf Benoit Parmentier
  results_RMSE[i,9]<- RMSE_mod7
178 f042de0f Benoit Parmentier
  results_RMSE[i,10]<- RMSE_mod8
179 9c848d7a Benoit Parmentier
  
180 f042de0f Benoit Parmentier
  #Saving dataset in dataframes
181 24eedf3a Benoit Parmentier
  data_name<-paste("ghcn_v_",dates[[i]],sep="")
182
  assign(data_name,data_v)
183
  data_name<-paste("ghcn_s_",dates[[i]],sep="")
184
  assign(data_name,data_s)
185 f042de0f Benoit Parmentier
  #ghcn_v<-ls(pattern="ghcn_v_")
186
  
187
188
  cor_LST_LC1[i]<-cor(ghcn.subsets[[i]]$LST,ghcn.subsets[[i]]$LC1)
189
  cor_LST_LC3[i]<-cor(ghcn.subsets[[i]]$LST,ghcn.subsets[[i]]$LC3)
190 24eedf3a Benoit Parmentier
  
191 f042de0f Benoit Parmentier
  #end of the for loop #1
192 24eedf3a Benoit Parmentier
  }
193
194
## Plotting and saving diagnostic measures
195
196
results_RMSEnum <-results_RMSE
197 80402fdf Benoit Parmentier
results_AICnum <-results_AIC
198 24eedf3a Benoit Parmentier
mode(results_RMSEnum)<- "numeric"
199 80402fdf Benoit Parmentier
mode(results_AICnum)<- "numeric"
200 24eedf3a Benoit Parmentier
# Make it numeric first
201
# Now turn it into a data.frame...
202
203
results_table_RMSE<-as.data.frame(results_RMSEnum)
204 80402fdf Benoit Parmentier
results_table_AIC<-as.data.frame(results_AICnum)
205 75151566 Benoit Parmentier
colnames(results_table_RMSE)<-c("dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6", "mod7", "mod8")
206
colnames(results_table_AIC)<-c("dates","ns","mod1", "mod2","mod3", "mod4", "mod5", "mod6", "mod7", "mod8")
207 24eedf3a Benoit Parmentier
208
#results_table_RMSE
209 80402fdf Benoit Parmentier
write.table(results_table_RMSE, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""), sep=",")
210
write.table(results_table_AIC, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""),sep=",", append=TRUE)
211 24eedf3a Benoit Parmentier
212 75151566 Benoit Parmentier
###Analysing the results from the 365 days run: Summarize by month
213
214
for(i in 1:nrow(results_table_RMSE)){
215
  date<-results_table_RMSE$dates[i]
216
  date<-strptime(date, "%Y%m%d")
217
  results_table_RMSE$month[i]<-as.integer(strftime(date, "%m"))
218
}
219
220
average<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE,mean, na.rm=TRUE)
221
average<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE, FUN=mean)
222
#average on all the data.frame
223
averaget<-aggregate(results_table_RMSE, by=list(results_table_RMSE$month),FUN=mean, na.rm=TRUE)
224
#mediant<-aggregate(results_table_RMSE, by=list(results_table_RMSE$month),FUN=median, na.rm=TRUE)
225
#average_lowt<-aggregate(results_table_RMSE, by=list(results_table_RMSE$month), FUN=function(v) t.test(v)$conf.int[1])
226
#average_up<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE, function(v) t.test(v)$conf.int[2])
227
228
median<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE, median, na.rm=TRUE)
229
average_low<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE, function(v) t.test(v)$conf.int[1])
230
average_up<-aggregate(cbind(mod1,mod2,mod3,mod4,mod5,mod6,mod7,mod8)~month,data=results_table_RMSE, function(v) t.test(v)$conf.int[2])
231 24eedf3a Benoit Parmentier
232 75151566 Benoit Parmentier
mod<-names(averaget)
233
mod<-mod[4:11]
234
#Saving graphic plots
235
for(i in 1:length(mod)){
236
  X11(width=14,height=10)
237
  name<-mod[i]
238
  barplot2(average[[name]],plot.ci=TRUE, ci.l=average_low[[name]], ci.u=average_up[[name]],main="Mean RMSE per month", names.arg=c("Jan", "Feb", "Mar", "Apr", "May", "Jun","Jul", "Aug", "Sep","Oct", "Nov", "Dec"),ylim=c(20,30),ylab="RMSE in tenth deg C",xlab=name)
239
  #title(paste("Sampling RMSE for mod",i,sep=""))
240
  savePlot(paste("barplot_results_RMSE_month_",name,out_prefix,".png", sep=""), type="png")
241
  dev.off() 
242
}
243
244
245
for(i in 1:length(mod)){
246
  X11(width=14,height=10)
247
  name<-mod[i]
248
  barplot2(average[[name]],plot.ci=TRUE, ci.l=average_low[[name]], ci.u=average_up[[name]],main=paste(" Mean RMSE per month ",name, sep=""), names.arg=c("Jan", "Feb", "Mar", "Apr", "May", "Jun","Jul", "Aug", "Sep","Oct", "Nov", "Dec"),ylim=c(20,30),ylab="RMSE in tenth deg C",xlab=name)
249
  #title(paste("Sampling RMSE for mod",i,sep=""))
250
  savePlot(paste("barplot_results_RMSE_month_",name,out_prefix,".png", sep=""), type="png")
251
  dev.off() 
252
253
  X11(width=14,height=10)
254
  name<-mod[i]
255
  hist(results_table_RMSE[[name]],breaks=15, main=paste(" Histogram RMSE_",name, sep=""),xlab=paste("RMSE ",name, sep=""))
256
  savePlot(paste("Hist_results_RMSE_365_",name,out_prefix,".png", sep=""), type="png")
257
  dev.off()
258
  
259
}
260
261
for(i in 1:length(mod)){
262
  X11(width=14,height=10)
263
  name<-mod[i]
264
  hist(results_table_RMSE[[name]],breaks=15, main=paste(" Histogram RMSE_",name, sep=""),xlab=paste("RMSE ",name, sep=""))
265
  savePlot(paste("Hist_results_RMSE_365_",name,out_prefix,".png", sep=""), type="png")
266
  dev.off()
267
}
268
269
r<-(results_table_RMSE[,3:10]) #selecting only the columns related to models...
270
271
mean_r<-mean(r)
272
median_r<-sapply(r, median)
273
sd_r<-sapply(r, sd)
274
275
barplot(mean_r,ylim=c(23,26),ylab="RMSE in tenth deg C")
276
barplot(median_r,ylim=c(23,26),ylab="RMSE in tenth deg C",add=TRUE,inside=FALSE,beside=TRUE) # put both on the same plot
277
barplot(sd_r,ylim=c(6,8),ylab="RMSE in tenth deg C") # put both on the same plot
278
279
height<-rbind(mean_r,median_r)
280
barplot(height,ylim=c(23,26),ylab="RMSE in tenth deg C",beside=TRUE,legend=rownames(height))
281
barplot(height,ylim=c(23,26),ylab="RMSE in tenth deg C",beside=TRUE, col=c("darkblue","red"),legend=rownames(height)) # put both on the same plot
282
283
barplot2(mean_r,median_r,ylim=c(23,26),ylab="RMSE in tenth deg C") # put both on the same plot
284
#Collect var explained and p values for each var...
285
286
# End of script##########
287 24eedf3a Benoit Parmentier
288 75151566 Benoit Parmentier
names(results_table_RMSE)