21 |
21 |
library(multicore) # if installed allows easy parallelization
|
22 |
22 |
library(reshape) # very useful for switching from 'wide' to 'long' data formats
|
23 |
23 |
library(lattice); library(latticeExtra)
|
24 |
|
library(raster)
|
|
24 |
library(raster); library(rasterVis)
|
25 |
25 |
|
26 |
26 |
###Parameters and arguments
|
27 |
27 |
|
... | ... | |
34 |
34 |
#path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations"
|
35 |
35 |
#path<-"H:/Data/IPLANT_project/data_Oregon_stations"
|
36 |
36 |
setwd(path)
|
37 |
|
out_prefix<-"_20120713_precip"
|
|
37 |
out_prefix<-"20120723PRCP"
|
38 |
38 |
prop=0.7 # proportion of data used for fitting
|
39 |
39 |
|
|
40 |
### load ROI
|
|
41 |
roi <- readOGR("../Test_sites/","Oregon")
|
|
42 |
roi=spTransform(roi,CRS("+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"))
|
|
43 |
|
|
44 |
|
40 |
45 |
##################################
|
41 |
46 |
## Load covariate raster brick
|
42 |
47 |
covar=brick(paste(path,"/covariates.gri",sep=""))
|
... | ... | |
47 |
52 |
|
48 |
53 |
###Reading the station data and setting up for models' comparison
|
49 |
54 |
filename<-sub(".shp","",infile1) #Removing the extension from file.
|
50 |
|
ghcn<-readOGR(".", filename) #reading shapefile
|
|
55 |
ghcn<-readOGR(".", filename) #reading shapefile
|
|
56 |
ghcn@data[,c("x","y")]=coordinates(ghcn)
|
51 |
57 |
|
52 |
58 |
#Note that "transform" output is a data.frame not spatial object
|
53 |
59 |
#set.seed(100) #modify this to a seed variable allowing different runs.
|
... | ... | |
62 |
68 |
|
63 |
69 |
|
64 |
70 |
#### Define GAM models
|
65 |
|
mods=data.frame(formula=c(
|
66 |
|
"value ~ s(x_OR83M,y_OR83M)",
|
67 |
|
"value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev",
|
68 |
|
"value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew",
|
69 |
|
"value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(CER_mean)",
|
70 |
|
"value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(CER_P20um)",
|
71 |
|
"value ~ s(x_OR83M,y_OR83M) + elev + ns + ew + s(CER_P20um)",
|
72 |
|
"value ~ s(x_OR83M,y_OR83M,CER_P20um) + elev + ns + ew",
|
73 |
|
"value ~ s(x_OR83M,y_OR83M) + s(distoc,CER_P20um)+elev + ns + ew",
|
74 |
|
"value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(CLD_mean)",
|
75 |
|
"value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(COT_mean)"
|
76 |
|
),stringsAsFactors=F)
|
|
71 |
mods=data.frame(
|
|
72 |
formula=c(
|
|
73 |
"value ~ s(x_OR83M,y_OR83M)",
|
|
74 |
# "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev",
|
|
75 |
"value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew",
|
|
76 |
"value ~ s(distoc) + s(CER_P20um) + elev + ns + ew",
|
|
77 |
"value ~ s(distoc) + s(COT_mean) + elev + ns + ew",
|
|
78 |
"value ~ s(distoc) + s(CLD_mean) + elev + ns + ew",
|
|
79 |
"value ~ s(CLD_mean) + elev + ns + ew",
|
|
80 |
"value ~ s(COT_mean) + elev + ns + ew",
|
|
81 |
"value ~ s(CER_P20um) + elev + ns + ew",
|
|
82 |
"value ~ s(CER_mean) + elev + ns + ew"
|
|
83 |
# "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(CER_P20um)",
|
|
84 |
# "value ~ s(x_OR83M,y_OR83M,CER_P20um) +s(x_OR83M,y_OR83M,CLD_mean) + elev + ns + ew",
|
|
85 |
# "value ~ s(x_OR83M,y_OR83M) + s(CER_P20um,CLD_mean) + elev + ns + ew",
|
|
86 |
# "value ~ x_OR83M + y_OR83M +s(CER_P20um) + elev + ns + ew",
|
|
87 |
# "value ~ s(x_OR83M,y_OR83M,CER_P20um) + elev + ns + ew",
|
|
88 |
# "value ~ s(x_OR83M,y_OR83M) + s(distoc,CER_P20um)+elev + ns + ew"
|
|
89 |
# "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(CLD_mean)",
|
|
90 |
# "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(COT_mean)"
|
|
91 |
),stringsAsFactors=F)
|
77 |
92 |
mods$model=paste("mod",1:nrow(mods),sep="")
|
78 |
93 |
|
79 |
94 |
## confirm all model terms are in covar raster object for prediction
|
... | ... | |
81 |
96 |
unique(do.call(c, # find unique terms
|
82 |
97 |
lapply(mods$formula,function(i) #get terms from all models & split smoothed terms
|
83 |
98 |
unlist(strsplit(attr(terms(as.formula(i)),"term.labels"),split=","))))))
|
84 |
|
if(any(terms%in%layerNames(covarm)))
|
|
99 |
if(any(terms%in%layerNames(covar)))
|
85 |
100 |
print("All model terms are present in the raster object") else
|
86 |
101 |
warning("Some model terms not present in raster object, prediction may fail for some models")
|
87 |
102 |
|
... | ... | |
106 |
121 |
|
107 |
122 |
|
108 |
123 |
## loop through the dates...
|
109 |
|
|
|
124 |
i=1
|
|
125 |
m=3
|
|
126 |
savemodel=T;saveFullPrediction=T;scale=F;verbose=T
|
|
127 |
|
110 |
128 |
ghcn.subsets <-lapply(dates, function(d) subset(ghcn@data, date==d)) #this creates a list of 10 subset data
|
111 |
129 |
|
112 |
|
results=do.call(rbind.data.frame, # Collect the results in a single data.frame
|
113 |
|
lapply(1:length(dates),function(i) { # loop over dates
|
|
130 |
results=do.call(rbind.data.frame, # Collect the results in a single data.frame
|
|
131 |
lapply(1:length(dates),function(i,savemodel=T,saveFullPrediction=T,scale=F,verbose=T) { # loop over dates
|
|
132 |
if(verbose) print(paste("Starting Date:",dates[i]))
|
114 |
133 |
date<-dates[i] # get date
|
115 |
134 |
month<-strftime(date, "%m") # get month
|
116 |
|
# LST_month<-paste("mm_",month,sep="") # get LST_month name
|
117 |
|
|
118 |
|
## subset full raster brick to include correct month of satellite data
|
119 |
|
covarm=subset(covar,subset=which(covar@z$month=="00"|covar@z$month==month))
|
120 |
|
## update layer names to match those in ghcn table
|
121 |
|
layerNames(covarm)[getZ(covarm)!="00"]=sub(paste("_",month,sep=""),"",layerNames(covarm)[getZ(covarm)!="00"])
|
122 |
|
|
123 |
135 |
## extract subset of data for this day
|
124 |
136 |
tdata=ghcn.subsets[[i]]
|
125 |
|
##Regression part 1: Creating a validation dataset by creating training and testing datasets
|
126 |
|
# mod <-ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]
|
127 |
|
# ghcn.subsets[[i]] = transform(ghcn.subsets[[i]],LST = mod)
|
128 |
|
##Screening LST values
|
129 |
|
##ghcn.subsets[[i]]<-subset(ghcn.subsets[[i]],ghcn.subsets[[i]]$LST> 258 & ghcn.subsets[[i]]$LST<313)
|
130 |
|
|
|
137 |
## drop stations with no x coordinates (these are stations in the buffer region)
|
|
138 |
tdata=tdata[!is.na(tdata$x_OR83M),] ## REMOVE THIS WHEN FULL REGION IS INCLUDED
|
131 |
139 |
## transform the response
|
132 |
140 |
tdata$value<-trans(tdata$value)
|
133 |
141 |
tdata$weights=tdata$count/max(tdata$count)
|
134 |
142 |
|
135 |
|
n<-nrow(tdata)
|
136 |
|
ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows
|
137 |
|
nv<-n-ns #create a sample for validation with prop of the rows
|
138 |
|
ind.training <- sample(nrow(tdata), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
|
139 |
|
ind.testing <- setdiff(1:nrow(tdata), ind.training)
|
140 |
|
data_s <- tdata[ind.training, ]
|
141 |
|
data_v <- tdata[ind.testing, ]
|
|
143 |
#######
|
|
144 |
if(saveFullPrediction){
|
|
145 |
if(verbose) print(paste("Extracting covar information for this date (",dates[i],")"))
|
|
146 |
|
|
147 |
## subset full raster brick to include correct month of satellite data
|
|
148 |
covarm=subset(covar,subset=which(covar@z$month=="00"|covar@z$month==month))
|
|
149 |
covarm=setZ(covarm,getZ(covar)[which(covar@z$month=="00"|covar@z$month==month)])
|
|
150 |
## update layer names to match those in ghcn table
|
|
151 |
layerNames(covarm)[getZ(covarm)!="00"]=sub(paste("_",month,sep=""),"",layerNames(covarm)[getZ(covarm)!="00"])
|
|
152 |
## convert to matrix for prediction later
|
|
153 |
pred=extract(covarm,coordinates(covarm))
|
|
154 |
}
|
142 |
155 |
|
143 |
|
## lapply loops through models for the ith day, calculates the validation metrics, and saves them as R objects
|
|
156 |
|
|
157 |
if(scale){
|
|
158 |
## get list of variables that need to be scaled to assess relative importance
|
|
159 |
scalevars=!colnames(tdata)%in%c("station","month","count",
|
|
160 |
"value","latitude","longitude","date","weights")
|
|
161 |
tdata[,colnames(tdata)[scalevars]]=scale(tdata[,scalevars])
|
|
162 |
}
|
|
163 |
|
|
164 |
|
|
165 |
## Create subsets for fitting and validation
|
|
166 |
n = nrow(tdata)
|
|
167 |
ns = round(n*prop) #Create a sample from the data frame with 70% of the rows
|
|
168 |
nv = n-ns #create a sample for validation with prop of the rows
|
|
169 |
ind.training = sample(nrow(tdata), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
|
|
170 |
tdata$fit=F;tdata$fit[ind.training]=T #add column to tdata with fit/training status
|
|
171 |
tdata$class=as.factor(ifelse(tdata$fit,"Fitting","Validation"))
|
|
172 |
|
|
173 |
|
|
174 |
## lapply loops through models for the ith day, fits models, calculates the validation metrics, and saves them as R objects (if desired)
|
|
175 |
if(verbose) print(paste("Starting the model fitting for this date (",dates[i],")"))
|
144 |
176 |
results=do.call(rbind.data.frame,
|
145 |
|
lapply(1:nrow(mods),function(m,savemodel=F,saveFullPrediction=F) {
|
|
177 |
lapply(1:nrow(mods),function(m,data=tdata[tdata$fit],...) {
|
|
178 |
if(verbose) print(paste("Fitting Model:",mods$model[m]))
|
146 |
179 |
## will gam() fail? If so, return NAs instead of crashing
|
147 |
|
err=try(gam(formula(mods$formula[m]),data=data_s),silent=T)
|
|
180 |
err=try(gam(formula(mods$formula[m]),data=tdata[tdata$fit,]),silent=T)
|
148 |
181 |
if(length(attr(err,"class"))==1) if(attr(err,"class")=="try-error")
|
149 |
182 |
return(## this table must match the results table below
|
150 |
183 |
data.frame(date=dates[i],month=format(dates[i],"%m"),
|
151 |
184 |
model=mods$model[m],form=mods$formula[m],ns=ns,
|
152 |
185 |
AIC=NA, GCV=NA,DEV=NA,RMSE=NA,R2=NA))
|
153 |
|
## run the models
|
154 |
186 |
|
155 |
|
mod=gam(formula(mods$formula[m]),data=data_s,weights=data_s$weights)
|
|
187 |
## define the spatial covariance structure
|
|
188 |
# library(geoR)
|
|
189 |
# plot(variog(coords=as.matrix(data_s[,c("x","y")]),data=tdata$value[tdata$fit]))
|
|
190 |
# plot(variofit(variog(coords=as.matrix(tdata[tdata$fit,c("x","y")]),data=tdata$value[tdata$fit]),cov.model="exponential"))
|
|
191 |
# vg=likfit(geodata=list(coords=tdata[tdata$fit,c("x","y")],data=tdata$value[tdata$fit]),cov.model="exponential",ini.cov.pars=expand.grid(seq(0.1,2,len=5),seq(1000,500000,len=5)))
|
|
192 |
# lines.variomodel(vg)
|
|
193 |
corr=corExp(c(100000,0.1),form=~x+y,nugget=T,fixed=F)
|
|
194 |
# corr=Initialize(corr,tdata[tdata$fit,])
|
|
195 |
|
|
196 |
## run the model
|
|
197 |
mod=gamm(formula(mods$formula[m]),data=tdata[tdata$fit,],correlation=corr,weights=weights,method="ML")
|
|
198 |
# mod=gam(formula(mods$formula[m]),data=tdata[tdata$fit,],weights=tdata$weights[tdata$fit])
|
|
199 |
|
|
200 |
|
|
201 |
### generate knots
|
|
202 |
# knots=makegrid(bbox(ghcn),cellsize=100000)
|
|
203 |
# coordinates(knots)=c("x1","x2")
|
|
204 |
# proj4string(knots)=proj4string(ghcn)
|
|
205 |
# niter=1500
|
|
206 |
# t1=Sys.time()
|
|
207 |
# f1=formula(value~CER_P20um+elevation+ns+ew)
|
|
208 |
# m1=spLM(f1,data=tdata[tdata$fit,],coords=as.matrix(tdata[tdata$fit,c("x","y")]),
|
|
209 |
# starting=list("phi"=2.5,"sigma.sq"=4, "tau.sq"=1,"nu"=0.5),
|
|
210 |
# sp.tuning=list("phi"=0.8, "sigma.sq"=0.1, "tau.sq"=0.2,"nu"=0.5),
|
|
211 |
# priors=list("phi.Unif"=c(0.1, 10), "sigma.sq.IG"=c(2, 1),
|
|
212 |
# "tau.sq.IG"=c(2, 1),"nu.unif"=c(0.01,3)),
|
|
213 |
# cov.model="matern",
|
|
214 |
# knots=coordinates(knots),
|
|
215 |
# n.samples=niter, verbose=TRUE, n.report=100,sub.samples=c(500,niter,1))
|
|
216 |
# t2=Sys.time()
|
|
217 |
# y_mod<- spPredict(m1, tdata[,c("x","y")],pred.covars=model.matrix(lm(f1,data=tdata))) #Using the coeff to predict new values.
|
|
218 |
# y_mod<- spPredict(m1, pred[,c("x_OR83M","y_OR83M")],pred.covars=model.matrix(lm(f1,data=data.frame(value=0,pred)))) #Using the coeff to predict new values.
|
|
219 |
# y_mod$fit=apply(y_mod$y.pred,1,mean)
|
|
220 |
# tdata$pred=y_mod$fit
|
156 |
221 |
|
157 |
222 |
##VALIDATION: Prediction checking the results using the testing data ########
|
158 |
|
y_mod<- predict(mod, newdata=data_v, se.fit = TRUE) #Using the coeff to predict new values.
|
159 |
|
res_mod<- itrans(data_v$value) - itrans(y_mod$fit) #Residuals
|
160 |
|
plot(itrans(y_mod$fit)~itrans(data_v$value),cex=data_v$weights);abline(0,1)
|
|
223 |
tpred=predict(mod$gam, newdata=tdata, se.fit = TRUE) #Using the coeff to predict new values.
|
|
224 |
tdata$pred=as.vector(tpred$fit)
|
|
225 |
tdata$pred.se=as.vector(tpred$se)
|
|
226 |
|
|
227 |
# y_mod$fit[y_mod$fit<0]=NA
|
|
228 |
tdata$resid<- itrans(tdata$value) - itrans(tdata$pred) #Residuals
|
161 |
229 |
|
162 |
230 |
##Regression part 3: Calculating and storing diagnostic measures
|
163 |
231 |
tresults=data.frame( # build 1-row dataframe for this model-date
|
... | ... | |
166 |
234 |
model=mods$model[m], # model number
|
167 |
235 |
form=mods$formula[m], # model number
|
168 |
236 |
ns=ns, # number of stations used in the training stage
|
169 |
|
AIC=AIC(mod), # AIC
|
170 |
|
GCV=mod$gcv.ubre, # GCV
|
171 |
|
DEV=mod$deviance, # Deviance
|
172 |
|
RMSE=sqrt(sum(res_mod^2,na.rm=T)/nv), # RMSE
|
173 |
|
R2=summary(lm(itrans(y_mod$fit)~itrans(data_v$value)))$r.squared # R^2
|
174 |
|
)
|
175 |
|
|
|
237 |
# AIC=AIC(mod$gam), # AIC
|
|
238 |
# GCV=mod$gam$gcv.ubre, # GCV
|
|
239 |
# DEV=mod$gam$deviance, # Deviance
|
|
240 |
ME=mean(tdata$resid[!tdata$fit],na.rm=T), # Mean error
|
|
241 |
MAE=mean(abs(tdata$resid[!tdata$fit]),na.rm=T), # Mean absolute error
|
|
242 |
pME=mean((itrans(tdata$pred[!tdata$fit])/itrans(tdata$value[!tdata$fit]))[itrans(tdata$pred[!tdata$fit])/itrans(tdata$value[!tdata$fit])<Inf],na.rm=T), # Mean % of total
|
|
243 |
RMSE=sqrt(sum(tdata$resid[!tdata$fit]^2,na.rm=T)/nv), # RMSE
|
|
244 |
R2=summary(lm(itrans(tdata$pred[!tdata$fit])~itrans(tdata$value[!tdata$fit])))$r.squared, # R^2
|
|
245 |
R2w=summary(lm(itrans(tdata$pred[!tdata$fit])~itrans(tdata$value[!tdata$fit]),weights=tdata$weights[!tdata$fit]))$r.squared,
|
|
246 |
R2lw=summary(lm(tdata$pred[!tdata$fit]~tdata$value[!tdata$fit],weights=tdata$weights[!tdata$fit]))$r.squared)
|
|
247 |
if(verbose) print(tresults)
|
|
248 |
write.csv(tresults,file=paste(path,"/",out_prefix,"_",gsub("-","",date),"_",mods$model[m],".csv",sep=""))
|
176 |
249 |
|
177 |
250 |
## Save the model object if desired
|
178 |
|
if(savemodel) save(mod,file= paste(path,"/","results_GAM_Model","_",out_prefix,"_",
|
179 |
|
dates[i],"_",mods$model[m],".Rdata",sep=""))
|
|
251 |
if(savemodel) save(mod,file= paste(path,"/",out_prefix,"_",gsub("-","",date),"_",mods$model[m],".Rdata",sep=""))
|
180 |
252 |
|
181 |
253 |
## do the full prediction and save it if desired
|
182 |
254 |
if(saveFullPrediction){
|
183 |
|
p1=predict(covarm,mod,progress="text",fun="predict")
|
184 |
|
p1=predict(mod,sdata@data,type="response",progress="text",fun="predict")
|
185 |
|
|
186 |
|
predict(covarm,mod,type="response",
|
187 |
|
filename=paste(outpath,"/",gsub("-","",date),"_",mods$model[m],"_prediction.tif",sep=""),
|
188 |
|
format="GTiff",progress="text",fun="predict",overwrite=T)
|
189 |
|
predict(covarm,mod,filename=paste(outpath,"/",gsub("-","",date),"_",mods$model[m],"_prediction.se.tif",sep=""),
|
190 |
|
format="GTiff",progress="text",fun="predict.se",overwrite=T)
|
|
255 |
if(verbose) print(paste("Doing predictions for model ",mods$model[m]," for this date (",dates[i],")"))
|
|
256 |
## do the prediction
|
|
257 |
p1=predict(mod$gam,as.data.frame(pred),type="response",se.fit=T,block.size=10000)
|
|
258 |
## convert to raster stack (mean and se)
|
|
259 |
p2=stack(SpatialPixelsDataFrame(coordinates(covarm),
|
|
260 |
data=data.frame(
|
|
261 |
pred=itrans(as.numeric(p1$fit)),
|
|
262 |
se=itrans(as.numeric(p1$se.fit)))))
|
|
263 |
|
|
264 |
writeRaster(p2,filename=paste(path,"/",out_prefix,"_",gsub("-","",date),"_",
|
|
265 |
mods$model[m],"_prediction.tif",sep=""),
|
|
266 |
format="GTiff",overwrite=T)
|
|
267 |
## draw map of full prediction
|
|
268 |
ncols=100
|
|
269 |
c.at=unique(quantile(ghcn$value,seq(0,1,len=ncols))) #get quantile bins of raw data for all months
|
|
270 |
c.ramp=colorRampPalette(c("brown","grey","lightgreen","darkgreen")) #assign color ramp
|
|
271 |
c.cols=c.ramp(length(c.at)-1) #get ncol-1 colors to corresponding to c.at
|
|
272 |
## define se colors
|
|
273 |
s.at=unique(quantile(subset(p2,"se"),seq(0,1,len=ncols),na.rm=T)) #get quantile bins of raw data for all months
|
|
274 |
s.ramp=colorRampPalette(c("blue","red")) #assign color ramp
|
|
275 |
s.cols=s.ramp(length(s.at)-1) #get ncol-1 colors to corresponding to c.at
|
|
276 |
## define residual color maps
|
|
277 |
r.at=c(-Inf,-20,-5,-1,1,5,20,Inf)
|
|
278 |
r.ramp=colorRampPalette(c("blue","darkblue","grey","darkred","red"))
|
|
279 |
r.cols=r.ramp(length(r.at)-1)
|
|
280 |
r.cut=cut(tdata$resid,breaks=r.at)
|
|
281 |
## plot the estimated values
|
|
282 |
pdf(paste(path,"/",gsub("-","",date),"_",mods$model[m],"_prediction.pdf",sep=""),width=11,height=8.5)
|
|
283 |
|
|
284 |
print(levelplot(subset(p2,"pred"),col.regions=c.ramp(length(c.at)),at=c.at,main=paste("Mean Predictions (Month",m,")"),
|
|
285 |
sub=paste("Model:",mods$formula[m],"\n Circles indicate training stations, triangles indicate validation stations"),margin=F)+
|
|
286 |
layer(panel.points(tdata$x_OR83M,tdata$y_OR83M,
|
|
287 |
fill=as.character(cut(itrans(tdata$value),breaks=c.at,labels=c.cols)),col="black",
|
|
288 |
pch=21,cex=tdata$weights),data=list(tdata=tdata[tdata$fit,],c.at=c.at,c.cols=c.cols))+
|
|
289 |
layer(panel.points(tdata$x_OR83M,tdata$y_OR83M,
|
|
290 |
fill=as.character(cut(itrans(tdata$value),breaks=c.at,labels=c.cols)),col="black",pch=24,cex=tdata$weights),
|
|
291 |
data=list(tdata=tdata[!tdata$fit,],c.cols=c.cols,c.at=c.at))+
|
|
292 |
layer(sp.lines(as(roi,"SpatialLinesDataFrame"),col="black")))
|
|
293 |
## plot of prediction errors
|
|
294 |
print(levelplot(subset(p2,"se"),col.regions=s.ramp(length(s.at)),at=s.at,main=paste("Prediction Standard Errors (Month",m,")"),
|
|
295 |
sub=paste("Model:",mods$formula[m],"\n Circles indicate training stations, triangles indicate validation stations"),margin=F)+
|
|
296 |
layer(panel.points(tdata[,c("x_OR83M","y_OR83M")],fill=as.character(cut(itrans(tdata$pred.se),breaks=s.at,labels=s.cols)),col="black",pch=21),
|
|
297 |
data=list(tdata=tdata[tdata$fit,],s.at=s.at,s.cols=s.cols))+
|
|
298 |
layer(panel.points(tdata[,c("x_OR83M","y_OR83M")],fill=as.character(cut(itrans(tdata$pred.se),breaks=s.at,labels=s.cols)),col="black",pch=24),
|
|
299 |
data=list(tdata=tdata[!tdata$fit,],s.cols=s.cols,s.at=s.at))+
|
|
300 |
layer(sp.lines(as(roi,"SpatialLinesDataFrame"),col="black")))
|
|
301 |
## plot of residuals
|
|
302 |
print(levelplot(subset(p2,"pred"),col.regions=c.ramp(length(c.at)),at=c.at,main=paste("Mean Predictions with station residuals (month",m,")"),
|
|
303 |
sub=paste("Model:",mods$formula[m],"\n Circles indicate training stations, triangles indicate validation stations"),margin=F,
|
|
304 |
key=list(text=list(rev(levels(r.cut)),col=rev(r.cols)),points=list(pch=21,fill=rev(r.cols)),space="left",title="Station \n Residuals"))+
|
|
305 |
layer(panel.points(tdata[,c("x_OR83M","y_OR83M")],
|
|
306 |
fill=as.character(cut(itrans(tdata$resid),breaks=r.at,labels=r.cols)),col="black",pch=21,cex=tdata$weights),
|
|
307 |
data=list(tdata=tdata[tdata$fit,],r.cols=r.cols,r.at=r.at))+
|
|
308 |
layer(panel.points(tdata[,c("x_OR83M","y_OR83M")],
|
|
309 |
fill=as.character(cut(itrans(tdata$resid),breaks=r.at,labels=r.cols)),col="black",pch=24,cex=tdata$weights),
|
|
310 |
data=list(tdata=tdata[!tdata$fit,],r.cols=r.cols,r.at=r.at))+
|
|
311 |
layer(sp.lines(as(roi,"SpatialLinesDataFrame"),col="black")))
|
|
312 |
|
|
313 |
# pdata=data.frame(Predicted=apply(itrans(as.matrix(y_mod$fit)),1,function(x) max(x,0)),Observed=itrans(data_v$value))
|
|
314 |
print(xyplot(itrans(pred)~itrans(value),groups=class,ylab="Predicted",xlab="Observed",main=mods$formula[m],data=tdata,asp=.5,auto.key=T)+layer(panel.abline(0,1,col="red")))
|
|
315 |
# xyplot(pred~value,groups=class,ylab="Predicted",xlab="Observed",main=mods$formula[m],data=tdata,asp=.5,auto.key=T)+layer(panel.abline(0,1,col="red"))
|
|
316 |
plot(mod$gam,pages=1,residuals=T,scale=-1,pers=T,shade=T,seWithMean=T); mtext(mods$formula[m],3,padj=-2)
|
|
317 |
dev.off()
|
|
318 |
|
191 |
319 |
}
|
192 |
320 |
|
193 |
321 |
## print progress
|
... | ... | |
195 |
323 |
## return the results table
|
196 |
324 |
return(tresults)
|
197 |
325 |
# end of the for loop #2 (nested in loop #1)
|
198 |
|
}))
|
199 |
|
## identify model with minium AIC
|
|
326 |
})) #end of loop for this time step
|
|
327 |
|
|
328 |
## identify model with minimum AIC
|
200 |
329 |
results$lowAIC=F
|
201 |
330 |
results$lowAIC[which.min(results$AIC)]=T
|
202 |
331 |
|
... | ... | |
208 |
337 |
|
209 |
338 |
|
210 |
339 |
|
211 |
|
write.table(results_RMSE_all2, file= paste(path,"/","results_GAM_Assessment",out_prefix,"all.txt",sep=""), sep=",", col.names=TRUE)
|
|
340 |
write.csv(results, file= paste(path,"/",out_prefix,"_results.csv",sep=""))
|
|
341 |
|
|
342 |
|
|
343 |
|
|
344 |
## some plots
|
|
345 |
myTheme <- standard.theme(col = FALSE)
|
|
346 |
myTheme$superpose.symbol$pch <-1:20
|
|
347 |
myTheme$superpose.symbol$col=1:12
|
|
348 |
|
|
349 |
pdf(paste(path,"/",out_prefix,"_ModelSummary.pdf",sep=""),width=11,height=8.5)
|
|
350 |
|
|
351 |
for(i in c("CER_mean","CER_P20um","CLD","COT")) {
|
|
352 |
maps=subset(covar,grep(i,layerNames(covar)))
|
|
353 |
ncols=100
|
|
354 |
c.at=unique(quantile(as.matrix(maps),seq(0,1,len=ncols))) #get quantile bins of raw data for all months
|
|
355 |
c.ramp=colorRampPalette(c("brown","grey","lightgreen","darkgreen")) #assign color ramp
|
|
356 |
print(levelplot(maps,col.regions=c.ramp(length(c.at)-1),at=c.at,main=i)+layer(sp.lines(as(roi,"SpatialLinesDataFrame"),col="black")))
|
|
357 |
}
|
|
358 |
|
|
359 |
## histogram of observed values by month
|
|
360 |
histogram(~value|format(as.Date(date),"%m"),data=ghcn@data,points=F,auto.key=list(space="right"),
|
|
361 |
breaks=seq(0,50,len=50),fill="grey",col="grey",border="transparent",scales=list(x=list(log=F),y=list(relation="free",crt=90)),
|
|
362 |
main="Histograms of observed mean daily precipitation, by month",xlab="Mean Daily Precipitation (mm)")
|
|
363 |
|
|
364 |
## plot scatterplots by month
|
|
365 |
for(i in sprintf("%02d",1:12)) print(splom(subset(covar,grep(paste("[CER|CLD|COT].*",i,sep=""),layerNames(covar),value=T)),main=paste("Month",i),
|
|
366 |
sub="MOD06 cloud product climatologies for a single month"))
|
212 |
367 |
|
213 |
368 |
resl=melt(results,id=c("date","month","model","form","ns","lowAIC"))
|
214 |
369 |
|
215 |
|
xyplot(value~date|variable,groups=form,data=resl,scales=list(y=list(relation="free")),auto.key=T)
|
|
370 |
xyplot(value~date|variable,groups=form,data=resl,scales=list(y=list(relation="free")),auto.key=T,par.settings = myTheme)
|
216 |
371 |
|
217 |
|
xyplot(form~value|variable,groups=month,data=resl,scales=list(x=list(relation="free")),auto.key=T)
|
|
372 |
resl2=cast(resl,date+month+variable~model)
|
|
373 |
|
|
374 |
myTheme$superpose.symbol$col=c("blue",rep("green",3),rep("red",3),rep("purple",3),rep("blue",2))
|
|
375 |
|
|
376 |
xyplot(mod6~mod8|variable,groups=month,data=resl2,scales=list(relation="free"),auto.key=list(space="right"),par.settings = myTheme)+layer(panel.abline(0,1))
|
|
377 |
|
|
378 |
|
|
379 |
dev.off()
|
|
380 |
|
|
381 |
print(xtable(mods),include.rownames=F,file=paste(path,"/",out_prefix,"_Models.tex",sep=""))
|
|
382 |
|
|
383 |
xyplot(form~value|variable,groups=month,data=resl,scales=list(x=list(relation="free"),y=list(draw=T, alternating=1)),auto.key=list(space="right"),par.settings = myTheme)
|
218 |
384 |
|
219 |
385 |
bwplot(form~value|variable,data=resl,scales=list(x=list(relation="free")))
|
220 |
386 |
|
221 |
|
resl2=cast(resl,date+month+variable~model)
|
222 |
|
xyplot(mod5~mod6|variable,groups=month,data=resl2,scales=list(relation="free"))+layer(panel.abline(0,1))
|
223 |
387 |
|
224 |
388 |
|
225 |
389 |
|
Updates to run 12 months of mean daily precipitation using a variety of models. Includes using gamm() to incorporate spatial effects in addition to the smooth parameters. Not all clean and cozy yet, uploading as a work-in-progress...