Project

General

Profile

« Previous | Next » 

Revision f042de0f

Added by Benoit Parmentier over 12 years ago

GAM LST, added model with forest only term -364 dates assessment, task #406

View differences:

climate/research/oregon/interpolation/GAM_LST_reg.R
18 18
###Parameters and arguments
19 19

  
20 20
infile1<-"ghcn_or_tmax_b_04142012_OR83M.shp"
21
#path<-"/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations"
22
path<-"H:/Data/IPLANT_project/data_Oregon_stations"
21
path<-"/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations"
22
#path<-"H:/Data/IPLANT_project/data_Oregon_stations"
23 23
setwd(path) 
24 24
infile2<-"dates_interpolation_03052012.txt"                                          #List of 10 dates for the regression
25
infile2<-"list_365_dates_04212012.txt"
25 26
prop<-0.3                                                                            #Proportion of testing retained for validation   
26
out_prefix<-"_04252012_LST_residuals"
27
out_prefix<-"_05012012_mod8_LST"
27 28
infile3<-"LST_dates_var_names.txt"
28 29
infile4<-"models_interpolation_04032012b.txt"
29 30

  
......
33 34
filename<-sub(".shp","",infile1)              #Removing the extension from file.
34 35
ghcn<-readOGR(".", filename)                  #reading shapefile 
35 36

  
36
CRS<-proj4string(ghcn)
37

  
37
proj4string(ghcn) #This retrieves the coordinate system for the SDF
38
CRS_ghcn<-proj4string(ghcn) #this can be assigned to mean_LST!!!
38 39

  
39 40
ghcn = transform(ghcn,Northness = cos(ASPECT)) #Adding a variable to the dataframe
40 41
ghcn = transform(ghcn,Eastness = sin(ASPECT))  #adding variable to the dataframe.
......
46 47
LST_dates <-readLines(paste(path,"/",infile3, sep=""))
47 48
models <-readLines(paste(path,"/",infile4, sep=""))
48 49

  
49
results <- matrix(1,length(dates),14)            #This is a matrix containing the diagnostic measures from the GAM models.
50
results <- matrix(1,length(dates),15)            #This is a matrix containing the diagnostic measures from the GAM models.
50 51

  
51
results_AIC<- matrix(1,length(dates),length(models)+2)  
52
results_GCV<- matrix(1,length(dates),length(models)+2)
53
results_DEV<- matrix(1,length(dates),length(models)+2)
54
results_RMSE<- matrix(1,length(dates),length(models)+2)
55
cor_LST_LC1<-matrix(1,10,1)      #correlation LST-LC1
56
cor_LST_LC3<-matrix(1,10,1)      #correlation LST-LC3
57
cor_LST_tmax<-matrix(1,10,1)    #correlation LST-tmax
52
results_AIC<- matrix(1,length(dates),length(models)+3)  
53
results_GCV<- matrix(1,length(dates),length(models)+3)
54
results_DEV<- matrix(1,length(dates),length(models)+3)
55
results_RMSE<- matrix(1,length(dates),length(models)+3)
56
cor_LST_LC1<-matrix(1,length(dates),1)      #correlation LST-LC1
57
cor_LST_LC3<-matrix(1,length(dates),1)      #correlation LST-LC3
58
cor_LST_tmax<-matrix(1,length(dates),1)    #correlation LST-tmax
58 59
#Screening for bad values
59 60

  
60 61
ghcn_all<-ghcn
61 62
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400)
62 63
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0)
63 64
ghcn<-ghcn_test2
64
#coords<- ghcn[,c('x_OR83M','y_OR83M')]
65 65

  
66 66
month_var<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09", "mm_10", "mm_11", "mm_12")
67 67
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==d)) #this creates a list of 10 subsets data
......
94 94
  ####Regression part 2: GAM models
95 95

  
96 96
  mod1<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM), data=data_s)
97
  mod2<- gam(tmax~ s(lat,lon,ELEV_SRTM), data=data_s)
97
  #mod2<- gam(tmax~ s(lat,lon,ELEV_SRTM), data=data_s)
98
  mod2<- gam(tmax~ s(lat,lon) + s(ELEV_SRTM), data=data_s)
98 99
  mod3<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) +  s (Northness)+ s (Eastness) + s(DISTOC), data=data_s)
99 100
  mod4<- gam(tmax~ s(lat) + s (lon) + s(ELEV_SRTM) + s(Northness) + s (Eastness) + s(DISTOC) + s(LST), data=data_s)
100 101
  mod5<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST), data=data_s)
101 102
  mod6<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC1), data=data_s)
102 103
  mod7<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC3), data=data_s)
104
  mod8<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LC1), data=data_s)
103 105
  
104 106
  ####Regression part 3: Calculating and storing diagnostic measures
105 107
  results_AIC[i,1]<- dates[i]  #storing the interpolation dates in the first column
......
111 113
  results_AIC[i,7]<- AIC (mod5)
112 114
  results_AIC[i,8]<- AIC (mod6)
113 115
  results_AIC[i,9]<- AIC (mod7)
116
  results_AIC[i,10]<- AIC (mod8)
114 117
  
115 118
  results_GCV[i,1]<- dates[i]  #storing the interpolation dates in the first column
116 119
  results_GCV[i,2]<- ns        #number of stations used in the training stage
......
121 124
  results_GCV[i,7]<- mod5$gcv.ubre
122 125
  results_GCV[i,8]<- mod6$gcv.ubre
123 126
  results_GCV[i,9]<- mod7$gcv.ubre
127
  results_GCV[i,10]<- mod7$gcv.ubre
124 128
  
125 129
  results_DEV[i,1]<- dates[i]  #storing the interpolation dates in the first column
126 130
  results_DEV[i,2]<- ns        #number of stations used in the training stage
......
130 134
  results_DEV[i,6]<- mod4$deviance
131 135
  results_DEV[i,7]<- mod5$deviance
132 136
  results_DEV[i,8]<- mod6$deviance
133
  results_DEV[i,9]<- mod6$deviance
137
  results_DEV[i,9]<- mod7$deviance
138
  results_DEV[i,10]<- mod8$deviance
134 139
  
135 140
  #####VALIDATION: Prediction checking the results using the testing data########
136 141
 
......
141 146
  y_mod5<- predict(mod5, newdata=data_v, se.fit = TRUE) 
142 147
  y_mod6<- predict(mod6, newdata=data_v, se.fit = TRUE)
143 148
  y_mod7<- predict(mod7, newdata=data_v, se.fit = TRUE)
149
  y_mod8<- predict(mod8, newdata=data_v, se.fit = TRUE)
144 150
  
145 151
  res_mod1<- data_v$tmax - y_mod1$fit #Residuals for GMA model that resembles the ANUSPLIN interpolation
146 152
  res_mod2<- data_v$tmax - y_mod2$fit   #Residuals for GAM model that resembles the PRISM interpolation                               
......
149 155
  res_mod5<- data_v$tmax - y_mod5$fit
150 156
  res_mod6<- data_v$tmax - y_mod6$fit
151 157
  res_mod7<- data_v$tmax - y_mod7$fit
158
  res_mod8<- data_v$tmax - y_mod8$fit
152 159
  
153 160
  RMSE_mod1 <- sqrt(sum(res_mod1^2)/nv)          
154 161
  RMSE_mod2 <- sqrt(sum(res_mod2^2)/nv)
......
157 164
  RMSE_mod5 <- sqrt(sum(res_mod5^2)/nv)
158 165
  RMSE_mod6 <- sqrt(sum(res_mod6^2)/nv)
159 166
  RMSE_mod7 <- sqrt(sum(res_mod7^2)/nv)
167
  RMSE_mod8 <- sqrt(sum(res_mod8^2)/nv)
160 168

  
161 169
  results_RMSE[i,1]<- dates[i]  #storing the interpolation dates in the first column
162 170
  results_RMSE[i,2]<- ns        #number of stations used in the training stage
......
167 175
  results_RMSE[i,7]<- RMSE_mod5
168 176
  results_RMSE[i,8]<- RMSE_mod6
169 177
  results_RMSE[i,9]<- RMSE_mod7
178
  results_RMSE[i,10]<- RMSE_mod8
170 179
  
171
  #Saving dataset in dataframes: residuals from RMSE
172
  
173
#   data_v$mod1<-y_mod1$fit
174
#   data_v$mod2<-y_mod2$fit
175
#   data_v$mod3<-y_mod3$fit
176
#   data_v$mod4<-y_mod4$fit
177
#   data_v$mod5<-y_mod5$fit
178
#   data_v$mod6<-y_mod6$fit
179
#   data_v$mod7<-y_mod7$fit
180
#   
181
#   data_s$mod1<-mod1$fit
182
#   data_s$mod2<-mod2$fit
183
#   data_s$mod3<-mod3$fit
184
#   data_s$mod4<-mod4$fit
185
#   data_s$mod5<-mod5$fit
186
#   data_s$mod6<-mod6$fit
187
#   data_s$mod7<-mod7$fit
188
#   
189
  data_v$res_mod1<-as.numeric(res_mod1)
190
  data_v$res_mod2<-as.numeric(res_mod2)
191
  data_v$res_mod3<-as.numeric(res_mod3)
192
  data_v$res_mod4<-as.numeric(res_mod4)
193
  data_v$res_mod5<-as.numeric(res_mod5)
194
  data_v$res_mod6<-as.numeric(res_mod6)
195
  data_v$res_mod7<-as.numeric(res_mod7)
196
  
197
  data_s$res_mod1<-as.numeric(mod1$residuals)
198
  data_s$res_mod2<-as.numeric(mod2$residuals)
199
  data_s$res_mod3<-as.numeric(mod3$residuals)
200
  data_s$res_mod4<-as.numeric(mod4$residuals)
201
  data_s$res_mod5<-as.numeric(mod5$residuals)
202
  data_s$res_mod6<-as.numeric(mod6$residuals)
203
  data_s$res_mod7<-as.numeric(mod7$residuals)
204
  
180
  #Saving dataset in dataframes
205 181
  data_name<-paste("ghcn_v_",dates[[i]],sep="")
206 182
  assign(data_name,data_v)
207
  write.table(data_v, file= paste(path,"/",data_name,".txt",sep=""), sep=" ")
208
  #write out a new shapefile (including .prj component)
209
  coords<- data_v[,c('x_OR83M','y_OR83M')]
210
  coordinates(data_v)<-coords
211
  proj4string(data_v)<-CRS  #Need to assign coordinates...
212
  outfile<-sub(".shp","",data_name)   #Removing extension if it is present
213
  writeOGR(data_v,".", outfile, driver ="ESRI Shapefile")
214
  
215 183
  data_name<-paste("ghcn_s_",dates[[i]],sep="")
216 184
  assign(data_name,data_s)
217
  write.table(data_s, file= paste(path,"/",data_name,".txt",sep=""), sep=" ")
218
  #write out a new shapefile (including .prj component)
219
  coords<- data_s[,c('x_OR83M','y_OR83M')]
220
  coordinates(data_s)<-coords
221
  proj4string(data_s)<-CRS  #Need to assign coordinates..
222
  outfile<-sub(".shp","",data_name)   #Removing extension if it is present
223
  writeOGR(data_s,".", outfile, driver ="ESRI Shapefile")
185
  #ghcn_v<-ls(pattern="ghcn_v_")
186
  
187

  
188
  cor_LST_LC1[i]<-cor(ghcn.subsets[[i]]$LST,ghcn.subsets[[i]]$LC1)
189
  cor_LST_LC3[i]<-cor(ghcn.subsets[[i]]$LST,ghcn.subsets[[i]]$LC3)
224 190
  
225
  # end of the for loop #1
191
  #end of the for loop #1
226 192
  }
227 193

  
228 194
## Plotting and saving diagnostic measures
......
250 216
f<-file(paste(path,"/","results_GAM_Assessment",out_prefix,"2.txt",sep=""),"w")
251 217
write(results_table_RMSE, file=f)
252 218
close(f)
253

  
254 219
# End of script##########
255 220

  
256 221
#Selecting dates and files based on names
......
262 227
#   cor_LST_LC3[i]<-cor(ghcn.subsets[[i]]$LST,ghcn.subsets[[i]]$LC3)
263 228
# }
264 229

  
230
#mod9<- gam(tmax~ te(lat,lon,ELEV_SRTM), data=ghcn_s_20101031)

Also available in: Unified diff