Project

General

Profile

« Previous | Next » 

Revision 858fd2e7

Added by Adam Wilson over 12 years ago

Updates to run 12 months of mean daily precipitation using a variety of models. Includes using gamm() to incorporate spatial effects in addition to the smooth parameters. Not all clean and cozy yet, uploading as a work-in-progress...

View differences:

climate/procedures/MOD06_L2_data_compile.r
47 47

  
48 48
## download data from ftp site.  Unfortunately the selection has to be selected using the website and orders downloaded via ftp.
49 49

  
50
system("wget -r --retr-symlinks ftp://ladsweb.nascom.nasa.gov/orders/500676499/ -P /home/adamw/acrobates/projects/interp/data/modis/MOD06_L2_hdf")
50
#system("wget -r --retr-symlinks ftp://ladsweb.nascom.nasa.gov/orders/500676499/ -P /home/adamw/acrobates/projects/interp/data/modis/MOD06_L2_hdf")
51 51

  
52 52
## specify some working directories
53 53
gdir="output/"
......
330 330
table(done.vs)
331 331
vs=vs[!done.vs,]
332 332

  
333
tfs=fs2$path[which(fs2$month==vs$month[i]&fs2$type==vs$type[i])]
334
do.call(c,mclapply(2:length(tfs),function(f) identical(extent(raster(tfs[f-1])),extent(raster(tfs[f])))))
335
raster(tfs[23])
333
#tfs=fs2$path[which(fs2$month==vs$month[i]&fs2$type==vs$type[i])]
334
#do.call(c,mclapply(2:length(tfs),function(f) #identical(extent(raster(tfs[f-1])),extent(raster(tfs[f])))))
335
#raster(tfs[23])
336 336

  
337 337
## process the monthly summaries using the raster package
338 338
mclapply(1:nrow(vs),function(i){
climate/research/oregon/interpolation/GAM.R
21 21
library(multicore)  # if installed allows easy parallelization
22 22
library(reshape)    # very useful for switching from 'wide' to 'long' data formats
23 23
library(lattice); library(latticeExtra)
24
library(raster)
24
library(raster); library(rasterVis)
25 25

  
26 26
###Parameters and arguments
27 27

  
......
34 34
                                        #path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations"
35 35
                                        #path<-"H:/Data/IPLANT_project/data_Oregon_stations"
36 36
setwd(path) 
37
out_prefix<-"_20120713_precip"
37
out_prefix<-"20120723PRCP"
38 38
prop=0.7                                # proportion of data used for fitting
39 39

  
40
### load ROI
41
roi <- readOGR("../Test_sites/","Oregon")
42
roi=spTransform(roi,CRS("+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"))
43

  
44

  
40 45
##################################
41 46
## Load covariate raster brick
42 47
covar=brick(paste(path,"/covariates.gri",sep=""))
......
47 52

  
48 53
###Reading the station data and setting up for models' comparison
49 54
filename<-sub(".shp","",infile1)              #Removing the extension from file.
50
ghcn<-readOGR(".", filename)                  #reading shapefile 
55
ghcn<-readOGR(".", filename)                  #reading shapefile
56
ghcn@data[,c("x","y")]=coordinates(ghcn)
51 57

  
52 58
                                              #Note that "transform" output is a data.frame not spatial object 
53 59
#set.seed(100) #modify this to a seed variable allowing different runs.
......
62 68

  
63 69

  
64 70
####  Define GAM models
65
mods=data.frame(formula=c(    
66
                  "value ~ s(x_OR83M,y_OR83M)",
67
                  "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev",
68
                  "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew",
69
                  "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(CER_mean)",
70
                  "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(CER_P20um)",
71
                  "value ~ s(x_OR83M,y_OR83M) + elev + ns + ew + s(CER_P20um)",
72
                  "value ~ s(x_OR83M,y_OR83M,CER_P20um) + elev + ns + ew",
73
                  "value ~ s(x_OR83M,y_OR83M) + s(distoc,CER_P20um)+elev + ns + ew",
74
                  "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(CLD_mean)",
75
                  "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(COT_mean)"
76
                  ),stringsAsFactors=F)
71
mods=data.frame(
72
  formula=c(    
73
    "value ~ s(x_OR83M,y_OR83M)", 
74
#    "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev",
75
    "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew",
76
    "value ~ s(distoc) + s(CER_P20um) + elev + ns + ew",
77
    "value ~ s(distoc) + s(COT_mean) + elev + ns + ew",
78
    "value ~ s(distoc) + s(CLD_mean) + elev + ns + ew",
79
    "value ~ s(CLD_mean) + elev + ns + ew",
80
    "value ~ s(COT_mean) + elev + ns + ew",
81
    "value ~ s(CER_P20um) + elev + ns + ew",
82
    "value ~ s(CER_mean) + elev + ns + ew"
83
                                        #    "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(CER_P20um)",
84
#    "value ~ s(x_OR83M,y_OR83M,CER_P20um) +s(x_OR83M,y_OR83M,CLD_mean) + elev + ns + ew",
85
#    "value ~ s(x_OR83M,y_OR83M) + s(CER_P20um,CLD_mean) + elev + ns + ew",
86
#    "value ~ x_OR83M + y_OR83M +s(CER_P20um) + elev + ns + ew",
87
#    "value ~ s(x_OR83M,y_OR83M,CER_P20um) + elev + ns + ew",
88
#    "value ~ s(x_OR83M,y_OR83M) + s(distoc,CER_P20um)+elev + ns + ew"
89
#    "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(CLD_mean)",
90
#    "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(COT_mean)"
91
    ),stringsAsFactors=F)
77 92
mods$model=paste("mod",1:nrow(mods),sep="")
78 93

  
79 94
## confirm all model terms are in covar raster object for prediction
......
81 96
  unique(do.call(c,                                # find unique terms
82 97
                 lapply(mods$formula,function(i)   #get terms from all models & split smoothed terms
83 98
                        unlist(strsplit(attr(terms(as.formula(i)),"term.labels"),split=","))))))
84
if(any(terms%in%layerNames(covarm)))
99
if(any(terms%in%layerNames(covar)))
85 100
  print("All model terms are present in the raster object") else
86 101
warning("Some model terms not present in raster object, prediction may fail for some models")
87 102

  
......
106 121

  
107 122

  
108 123
## loop through the dates...
109
  
124
i=1
125
m=3
126
savemodel=T;saveFullPrediction=T;scale=F;verbose=T
127

  
110 128
ghcn.subsets <-lapply(dates, function(d) subset(ghcn@data, date==d)) #this creates a list of 10 subset data
111 129
  
112
 results=do.call(rbind.data.frame,                   # Collect the results in a single data.frame
113
   lapply(1:length(dates),function(i) {            # loop over dates
130
  results=do.call(rbind.data.frame,                   # Collect the results in a single data.frame
131
   lapply(1:length(dates),function(i,savemodel=T,saveFullPrediction=T,scale=F,verbose=T) {            # loop over dates
132
     if(verbose)      print(paste("Starting Date:",dates[i]))
114 133
     date<-dates[i]                                  # get date
115 134
     month<-strftime(date, "%m")                     # get month
116
#     LST_month<-paste("mm_",month,sep="")            # get LST_month name
117

  
118
     ## subset full raster brick to include correct month of satellite data
119
     covarm=subset(covar,subset=which(covar@z$month=="00"|covar@z$month==month))
120
     ## update layer names to match those in ghcn table
121
     layerNames(covarm)[getZ(covarm)!="00"]=sub(paste("_",month,sep=""),"",layerNames(covarm)[getZ(covarm)!="00"])
122

  
123 135
     ## extract subset of data for this day
124 136
     tdata=ghcn.subsets[[i]]
125
     ##Regression part 1: Creating a validation dataset by creating training and testing datasets
126
#     mod <-ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]
127
#     ghcn.subsets[[i]] = transform(ghcn.subsets[[i]],LST = mod)
128
     ##Screening LST values
129
     ##ghcn.subsets[[i]]<-subset(ghcn.subsets[[i]],ghcn.subsets[[i]]$LST> 258 & ghcn.subsets[[i]]$LST<313)
130

  
137
     ## drop stations with no x coordinates (these are stations in the buffer region)
138
     tdata=tdata[!is.na(tdata$x_OR83M),]  ## REMOVE THIS WHEN FULL REGION IS INCLUDED
131 139
     ## transform the response
132 140
     tdata$value<-trans(tdata$value)
133 141
     tdata$weights=tdata$count/max(tdata$count)
134 142

  
135
     n<-nrow(tdata)
136
     ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
137
     nv<-n-ns             #create a sample for validation with prop of the rows
138
     ind.training <- sample(nrow(tdata), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
139
     ind.testing <- setdiff(1:nrow(tdata), ind.training)
140
     data_s <- tdata[ind.training, ]
141
     data_v <- tdata[ind.testing, ]
143
     #######
144
     if(saveFullPrediction){
145
       if(verbose) print(paste("Extracting covar information for this date (",dates[i],")"))
146
 
147
       ## subset full raster brick to include correct month of satellite data
148
       covarm=subset(covar,subset=which(covar@z$month=="00"|covar@z$month==month))
149
       covarm=setZ(covarm,getZ(covar)[which(covar@z$month=="00"|covar@z$month==month)])
150
       ## update layer names to match those in ghcn table
151
       layerNames(covarm)[getZ(covarm)!="00"]=sub(paste("_",month,sep=""),"",layerNames(covarm)[getZ(covarm)!="00"])
152
       ## convert to matrix for prediction later
153
       pred=extract(covarm,coordinates(covarm))
154
     }
142 155
     
143
     ## lapply loops through models for the ith day, calculates the validation metrics, and saves them as R objects
156

  
157
     if(scale){
158
       ## get list of variables that need to be scaled to assess relative importance
159
       scalevars=!colnames(tdata)%in%c("station","month","count",
160
                                       "value","latitude","longitude","date","weights")
161
       tdata[,colnames(tdata)[scalevars]]=scale(tdata[,scalevars])
162
     }
163

  
164

  
165
     ## Create subsets for fitting and validation
166
     n = nrow(tdata)
167
     ns = round(n*prop)  #Create a sample from the data frame with 70% of the rows
168
     nv = n-ns             #create a sample for validation with prop of the rows
169
     ind.training = sample(nrow(tdata), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
170
     tdata$fit=F;tdata$fit[ind.training]=T   #add column to tdata with fit/training status
171
     tdata$class=as.factor(ifelse(tdata$fit,"Fitting","Validation"))
172

  
173

  
174
     ## lapply loops through models for the ith day, fits models, calculates the validation metrics, and saves them as R objects (if desired)
175
     if(verbose) print(paste("Starting the model fitting for this date (",dates[i],")"))
144 176
     results=do.call(rbind.data.frame,
145
       lapply(1:nrow(mods),function(m,savemodel=F,saveFullPrediction=F) {  
177
       lapply(1:nrow(mods),function(m,data=tdata[tdata$fit],...) {
178
         if(verbose) print(paste("Fitting Model:",mods$model[m]))
146 179
         ## will gam() fail?  If so, return NAs instead of crashing 
147
         err=try(gam(formula(mods$formula[m]),data=data_s),silent=T)
180
         err=try(gam(formula(mods$formula[m]),data=tdata[tdata$fit,]),silent=T)
148 181
         if(length(attr(err,"class"))==1) if(attr(err,"class")=="try-error")
149 182
           return(## this table must match the results table below
150 183
                  data.frame(date=dates[i],month=format(dates[i],"%m"),
151 184
                             model=mods$model[m],form=mods$formula[m],ns=ns,
152 185
                             AIC=NA, GCV=NA,DEV=NA,RMSE=NA,R2=NA))
153
         ## run the models
154 186

  
155
         mod=gam(formula(mods$formula[m]),data=data_s,weights=data_s$weights)
187
         ## define the spatial covariance structure
188
#         library(geoR)
189
#         plot(variog(coords=as.matrix(data_s[,c("x","y")]),data=tdata$value[tdata$fit]))
190
#         plot(variofit(variog(coords=as.matrix(tdata[tdata$fit,c("x","y")]),data=tdata$value[tdata$fit]),cov.model="exponential"))
191
#         vg=likfit(geodata=list(coords=tdata[tdata$fit,c("x","y")],data=tdata$value[tdata$fit]),cov.model="exponential",ini.cov.pars=expand.grid(seq(0.1,2,len=5),seq(1000,500000,len=5)))
192
#         lines.variomodel(vg)
193
         corr=corExp(c(100000,0.1),form=~x+y,nugget=T,fixed=F)
194
#         corr=Initialize(corr,tdata[tdata$fit,])
195

  
196
         ## run the model
197
         mod=gamm(formula(mods$formula[m]),data=tdata[tdata$fit,],correlation=corr,weights=weights,method="ML")
198
#         mod=gam(formula(mods$formula[m]),data=tdata[tdata$fit,],weights=tdata$weights[tdata$fit])
199

  
200

  
201
         ### generate knots
202
#         knots=makegrid(bbox(ghcn),cellsize=100000)
203
#         coordinates(knots)=c("x1","x2")
204
#         proj4string(knots)=proj4string(ghcn)
205
#        niter=1500
206
#         t1=Sys.time()
207
#         f1=formula(value~CER_P20um+elevation+ns+ew)
208
#         m1=spLM(f1,data=tdata[tdata$fit,],coords=as.matrix(tdata[tdata$fit,c("x","y")]), 
209
#           starting=list("phi"=2.5,"sigma.sq"=4, "tau.sq"=1,"nu"=0.5),
210
#           sp.tuning=list("phi"=0.8, "sigma.sq"=0.1, "tau.sq"=0.2,"nu"=0.5),
211
#           priors=list("phi.Unif"=c(0.1, 10), "sigma.sq.IG"=c(2, 1),
212
#             "tau.sq.IG"=c(2, 1),"nu.unif"=c(0.01,3)),
213
#           cov.model="matern",
214
#           knots=coordinates(knots),
215
#           n.samples=niter, verbose=TRUE, n.report=100,sub.samples=c(500,niter,1))
216
#         t2=Sys.time()
217
#         y_mod<- spPredict(m1, tdata[,c("x","y")],pred.covars=model.matrix(lm(f1,data=tdata))) #Using the coeff to predict new values.
218
#         y_mod<- spPredict(m1, pred[,c("x_OR83M","y_OR83M")],pred.covars=model.matrix(lm(f1,data=data.frame(value=0,pred)))) #Using the coeff to predict new values.
219
#         y_mod$fit=apply(y_mod$y.pred,1,mean)
220
#         tdata$pred=y_mod$fit
156 221
         
157 222
         ##VALIDATION: Prediction checking the results using the testing data   ########
158
         y_mod<- predict(mod, newdata=data_v, se.fit = TRUE) #Using the coeff to predict new values.
159
         res_mod<- itrans(data_v$value) - itrans(y_mod$fit) #Residuals 
160
         plot(itrans(y_mod$fit)~itrans(data_v$value),cex=data_v$weights);abline(0,1)
223
         tpred=predict(mod$gam, newdata=tdata, se.fit = TRUE) #Using the coeff to predict new values.
224
         tdata$pred=as.vector(tpred$fit)
225
         tdata$pred.se=as.vector(tpred$se)
226

  
227
#         y_mod$fit[y_mod$fit<0]=NA
228
         tdata$resid<- itrans(tdata$value) - itrans(tdata$pred) #Residuals 
161 229

  
162 230
         ##Regression part 3: Calculating and storing diagnostic measures
163 231
         tresults=data.frame(            # build 1-row dataframe for this model-date
......
166 234
           model=mods$model[m],          # model number
167 235
           form=mods$formula[m],          # model number
168 236
           ns=ns,                        # number of stations used in the training stage
169
           AIC=AIC(mod),                # AIC
170
           GCV=mod$gcv.ubre,             # GCV
171
           DEV=mod$deviance,             # Deviance
172
           RMSE=sqrt(sum(res_mod^2,na.rm=T)/nv),  # RMSE
173
           R2=summary(lm(itrans(y_mod$fit)~itrans(data_v$value)))$r.squared  # R^2
174
           )
175

  
237
#           AIC=AIC(mod$gam),                # AIC
238
#           GCV=mod$gam$gcv.ubre,             # GCV
239
#           DEV=mod$gam$deviance,             # Deviance
240
           ME=mean(tdata$resid[!tdata$fit],na.rm=T),  # Mean error
241
           MAE=mean(abs(tdata$resid[!tdata$fit]),na.rm=T),  # Mean absolute error
242
           pME=mean((itrans(tdata$pred[!tdata$fit])/itrans(tdata$value[!tdata$fit]))[itrans(tdata$pred[!tdata$fit])/itrans(tdata$value[!tdata$fit])<Inf],na.rm=T),  # Mean % of total
243
           RMSE=sqrt(sum(tdata$resid[!tdata$fit]^2,na.rm=T)/nv),  # RMSE
244
           R2=summary(lm(itrans(tdata$pred[!tdata$fit])~itrans(tdata$value[!tdata$fit])))$r.squared,  # R^2
245
           R2w=summary(lm(itrans(tdata$pred[!tdata$fit])~itrans(tdata$value[!tdata$fit]),weights=tdata$weights[!tdata$fit]))$r.squared,
246
           R2lw=summary(lm(tdata$pred[!tdata$fit]~tdata$value[!tdata$fit],weights=tdata$weights[!tdata$fit]))$r.squared)
247
              if(verbose) print(tresults)
248
         write.csv(tresults,file=paste(path,"/",out_prefix,"_",gsub("-","",date),"_",mods$model[m],".csv",sep=""))
176 249
      
177 250
         ## Save the model object if desired
178
         if(savemodel)  save(mod,file= paste(path,"/","results_GAM_Model","_",out_prefix,"_",
179
                                   dates[i],"_",mods$model[m],".Rdata",sep=""))
251
         if(savemodel)  save(mod,file= paste(path,"/",out_prefix,"_",gsub("-","",date),"_",mods$model[m],".Rdata",sep=""))
180 252

  
181 253
         ## do the full prediction and save it if desired
182 254
         if(saveFullPrediction){
183
           p1=predict(covarm,mod,progress="text",fun="predict")
184
           p1=predict(mod,sdata@data,type="response",progress="text",fun="predict")
185

  
186
           predict(covarm,mod,type="response",
187
                   filename=paste(outpath,"/",gsub("-","",date),"_",mods$model[m],"_prediction.tif",sep=""),
188
                   format="GTiff",progress="text",fun="predict",overwrite=T)
189
           predict(covarm,mod,filename=paste(outpath,"/",gsub("-","",date),"_",mods$model[m],"_prediction.se.tif",sep=""),
190
                   format="GTiff",progress="text",fun="predict.se",overwrite=T)
255
           if(verbose) print(paste("Doing predictions for model ",mods$model[m]," for this date (",dates[i],")"))
256
           ## do the prediction
257
           p1=predict(mod$gam,as.data.frame(pred),type="response",se.fit=T,block.size=10000)
258
           ## convert to raster stack (mean and se)
259
           p2=stack(SpatialPixelsDataFrame(coordinates(covarm),
260
             data=data.frame(
261
               pred=itrans(as.numeric(p1$fit)),
262
               se=itrans(as.numeric(p1$se.fit)))))
263
          
264
            writeRaster(p2,filename=paste(path,"/",out_prefix,"_",gsub("-","",date),"_",
265
                            mods$model[m],"_prediction.tif",sep=""),
266
                     format="GTiff",overwrite=T)
267
           ## draw map of full prediction
268
            ncols=100
269
           c.at=unique(quantile(ghcn$value,seq(0,1,len=ncols)))  #get quantile bins of raw data for all months
270
           c.ramp=colorRampPalette(c("brown","grey","lightgreen","darkgreen"))  #assign color ramp
271
           c.cols=c.ramp(length(c.at)-1)  #get ncol-1 colors to corresponding to c.at
272
           ## define se colors
273
           s.at=unique(quantile(subset(p2,"se"),seq(0,1,len=ncols),na.rm=T))  #get quantile bins of raw data for all months
274
           s.ramp=colorRampPalette(c("blue","red"))  #assign color ramp
275
           s.cols=s.ramp(length(s.at)-1)  #get ncol-1 colors to corresponding to c.at
276
           ## define residual color maps
277
           r.at=c(-Inf,-20,-5,-1,1,5,20,Inf)
278
           r.ramp=colorRampPalette(c("blue","darkblue","grey","darkred","red"))
279
           r.cols=r.ramp(length(r.at)-1)
280
           r.cut=cut(tdata$resid,breaks=r.at)
281
           ## plot the estimated values
282
           pdf(paste(path,"/",gsub("-","",date),"_",mods$model[m],"_prediction.pdf",sep=""),width=11,height=8.5)
283

  
284
           print(levelplot(subset(p2,"pred"),col.regions=c.ramp(length(c.at)),at=c.at,main=paste("Mean Predictions  (Month",m,")"),
285
                           sub=paste("Model:",mods$formula[m],"\n  Circles indicate training stations, triangles indicate validation stations"),margin=F)+
286
                 layer(panel.points(tdata$x_OR83M,tdata$y_OR83M,
287
                                    fill=as.character(cut(itrans(tdata$value),breaks=c.at,labels=c.cols)),col="black",
288
                                    pch=21,cex=tdata$weights),data=list(tdata=tdata[tdata$fit,],c.at=c.at,c.cols=c.cols))+
289
                 layer(panel.points(tdata$x_OR83M,tdata$y_OR83M,
290
                                    fill=as.character(cut(itrans(tdata$value),breaks=c.at,labels=c.cols)),col="black",pch=24,cex=tdata$weights),
291
                       data=list(tdata=tdata[!tdata$fit,],c.cols=c.cols,c.at=c.at))+
292
                 layer(sp.lines(as(roi,"SpatialLinesDataFrame"),col="black")))
293
           ## plot of prediction errors
294
           print(levelplot(subset(p2,"se"),col.regions=s.ramp(length(s.at)),at=s.at,main=paste("Prediction Standard Errors (Month",m,")"),
295
                     sub=paste("Model:",mods$formula[m],"\n  Circles indicate training stations, triangles indicate validation stations"),margin=F)+
296
                       layer(panel.points(tdata[,c("x_OR83M","y_OR83M")],fill=as.character(cut(itrans(tdata$pred.se),breaks=s.at,labels=s.cols)),col="black",pch=21),
297
                             data=list(tdata=tdata[tdata$fit,],s.at=s.at,s.cols=s.cols))+
298
               layer(panel.points(tdata[,c("x_OR83M","y_OR83M")],fill=as.character(cut(itrans(tdata$pred.se),breaks=s.at,labels=s.cols)),col="black",pch=24),
299
                     data=list(tdata=tdata[!tdata$fit,],s.cols=s.cols,s.at=s.at))+
300
                 layer(sp.lines(as(roi,"SpatialLinesDataFrame"),col="black")))
301
           ## plot of residuals
302
           print(levelplot(subset(p2,"pred"),col.regions=c.ramp(length(c.at)),at=c.at,main=paste("Mean Predictions with station residuals (month",m,")"),
303
                     sub=paste("Model:",mods$formula[m],"\n  Circles indicate training stations, triangles indicate validation stations"),margin=F,
304
                     key=list(text=list(rev(levels(r.cut)),col=rev(r.cols)),points=list(pch=21,fill=rev(r.cols)),space="left",title="Station \n Residuals"))+
305
             layer(panel.points(tdata[,c("x_OR83M","y_OR83M")],
306
                                fill=as.character(cut(itrans(tdata$resid),breaks=r.at,labels=r.cols)),col="black",pch=21,cex=tdata$weights),
307
                                        data=list(tdata=tdata[tdata$fit,],r.cols=r.cols,r.at=r.at))+
308
               layer(panel.points(tdata[,c("x_OR83M","y_OR83M")],
309
                                  fill=as.character(cut(itrans(tdata$resid),breaks=r.at,labels=r.cols)),col="black",pch=24,cex=tdata$weights),
310
                     data=list(tdata=tdata[!tdata$fit,],r.cols=r.cols,r.at=r.at))+
311
                 layer(sp.lines(as(roi,"SpatialLinesDataFrame"),col="black")))
312

  
313
          #         pdata=data.frame(Predicted=apply(itrans(as.matrix(y_mod$fit)),1,function(x) max(x,0)),Observed=itrans(data_v$value))
314
           print(xyplot(itrans(pred)~itrans(value),groups=class,ylab="Predicted",xlab="Observed",main=mods$formula[m],data=tdata,asp=.5,auto.key=T)+layer(panel.abline(0,1,col="red")))
315
 #          xyplot(pred~value,groups=class,ylab="Predicted",xlab="Observed",main=mods$formula[m],data=tdata,asp=.5,auto.key=T)+layer(panel.abline(0,1,col="red"))
316
           plot(mod$gam,pages=1,residuals=T,scale=-1,pers=T,shade=T,seWithMean=T); mtext(mods$formula[m],3,padj=-2)
317
           dev.off()
318
           
191 319
         }
192 320
         
193 321
         ## print progress
......
195 323
         ## return the results table
196 324
         return(tresults)
197 325
                                        # end of the for loop #2 (nested in loop #1)
198
       }))
199
     ## identify model with minium AIC
326
       }))  #end of loop for this time step
327
     
328
     ## identify model with minimum AIC
200 329
     results$lowAIC=F
201 330
     results$lowAIC[which.min(results$AIC)]=T
202 331

  
......
208 337

  
209 338

  
210 339

  
211
  write.table(results_RMSE_all2, file= paste(path,"/","results_GAM_Assessment",out_prefix,"all.txt",sep=""), sep=",", col.names=TRUE)
340
  write.csv(results, file= paste(path,"/",out_prefix,"_results.csv",sep=""))
341

  
342

  
343

  
344
## some plots
345
myTheme <- standard.theme(col = FALSE)
346
myTheme$superpose.symbol$pch <-1:20
347
myTheme$superpose.symbol$col=1:12
348

  
349
pdf(paste(path,"/",out_prefix,"_ModelSummary.pdf",sep=""),width=11,height=8.5)
350

  
351
for(i in c("CER_mean","CER_P20um","CLD","COT")) {
352
  maps=subset(covar,grep(i,layerNames(covar)))
353
  ncols=100
354
  c.at=unique(quantile(as.matrix(maps),seq(0,1,len=ncols)))  #get quantile bins of raw data for all months
355
  c.ramp=colorRampPalette(c("brown","grey","lightgreen","darkgreen"))  #assign color ramp
356
  print(levelplot(maps,col.regions=c.ramp(length(c.at)-1),at=c.at,main=i)+layer(sp.lines(as(roi,"SpatialLinesDataFrame"),col="black")))
357
}
358

  
359
## histogram of observed values by month
360
histogram(~value|format(as.Date(date),"%m"),data=ghcn@data,points=F,auto.key=list(space="right"),
361
          breaks=seq(0,50,len=50),fill="grey",col="grey",border="transparent",scales=list(x=list(log=F),y=list(relation="free",crt=90)),
362
          main="Histograms of observed mean daily precipitation, by month",xlab="Mean Daily Precipitation (mm)")
363

  
364
## plot scatterplots by month
365
for(i in sprintf("%02d",1:12))   print(splom(subset(covar,grep(paste("[CER|CLD|COT].*",i,sep=""),layerNames(covar),value=T)),main=paste("Month",i),
366
                                       sub="MOD06 cloud product climatologies for a single month"))
212 367

  
213 368
resl=melt(results,id=c("date","month","model","form","ns","lowAIC"))
214 369

  
215
xyplot(value~date|variable,groups=form,data=resl,scales=list(y=list(relation="free")),auto.key=T)
370
xyplot(value~date|variable,groups=form,data=resl,scales=list(y=list(relation="free")),auto.key=T,par.settings = myTheme)
216 371

  
217
xyplot(form~value|variable,groups=month,data=resl,scales=list(x=list(relation="free")),auto.key=T)
372
resl2=cast(resl,date+month+variable~model)
373

  
374
myTheme$superpose.symbol$col=c("blue",rep("green",3),rep("red",3),rep("purple",3),rep("blue",2))
375

  
376
xyplot(mod6~mod8|variable,groups=month,data=resl2,scales=list(relation="free"),auto.key=list(space="right"),par.settings = myTheme)+layer(panel.abline(0,1))
377

  
378

  
379
dev.off()
380

  
381
print(xtable(mods),include.rownames=F,file=paste(path,"/",out_prefix,"_Models.tex",sep=""))
382

  
383
xyplot(form~value|variable,groups=month,data=resl,scales=list(x=list(relation="free"),y=list(draw=T, alternating=1)),auto.key=list(space="right"),par.settings = myTheme)
218 384

  
219 385
bwplot(form~value|variable,data=resl,scales=list(x=list(relation="free")))
220 386

  
221
resl2=cast(resl,date+month+variable~model)
222
xyplot(mod5~mod6|variable,groups=month,data=resl2,scales=list(relation="free"))+layer(panel.abline(0,1))
223 387

  
224 388
  
225 389

  

Also available in: Unified diff