Project

General

Profile

Download (14.7 KB) Statistics
| Branch: | Revision:
1
### Figures and tables for MOD09 Cloud Manuscript
2

    
3
setwd("~/acrobates/adamw/projects/cloud/")
4

    
5

    
6
## libraries
7
library(rasterVis)
8
library(latticeExtra)
9
library(xtable)
10
library(texreg)
11
library(reshape)
12
library(caTools)
13

    
14

    
15
## read in data
16
#cld=read.csv("data/NDP026D/cld.csv")
17
cldm=read.csv("data/NDP026D/cldm.csv")
18
#cldy=read.csv("data/NDP026D/cldy.csv")
19
#clda=read.csv("data/NDP026D/clda.csv")
20
st=read.csv("data/NDP026D/stations.csv")
21

    
22

    
23
## add lulc factor information
24
require(plotKML); data(worldgrids_pal)  #load IGBP palette
25
IGBP=data.frame(ID=0:16,col=worldgrids_pal$IGBP[-c(18,19)],stringsAsFactors=F)
26
IGBP$class=rownames(IGBP);rownames(IGBP)=1:nrow(IGBP)
27

    
28
## month factors
29
#cld$month2=factor(cld$month,labels=month.name)
30
cldm$month2=factor(cldm$month,labels=month.name)
31

    
32
coordinates(st)=c("lon","lat")
33
projection(st)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
34

    
35
##make spatial object
36
#cldys=cldy
37
#coordinates(cldys)=c("lon","lat")
38
#projection(cldys)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
39

    
40
#### Evaluate MOD35 Cloud data
41
mod09=brick("data/cloud_monthly.nc")
42
mod09s=brick("data/cloud_yseasmean.nc",varname="CF");names(mod09s)=c("DJF","MAM","JJA","SON")
43
mod09c=brick("data/cloud_ymonmean.nc",varname="CF");names(mod09c)=month.name
44
mod09a=brick("data/cloud_mean.nc",varname="CF");names(mod09a)="Mean Annual Cloud Frequency"
45

    
46
mod09min=brick("data/cloud_min.nc",varname="CFmin")
47
mod09max=brick("data/cloud_max.nc",varname="CFmax")
48
mod09sd=brick("data/cloud_std.nc",varname="CFsd")
49
#mod09mean=raster("data/mod09_clim_mac.nc")
50
#names(mod09d)=c("Mean","Minimum","Maximum","Standard Deviation")
51

    
52
#plot(mod09a,layers=1,margin=F,maxpixels=100)
53

    
54
## calculated differences
55
cldm$difm=cldm$mod09-cldm$cld_all
56
cldm$difs=cldm$mod09sd+cldm$cldsd_all
57

    
58
#clda$dif=clda$mod09-clda$cld
59

    
60
## read in global coasts for nice plotting
61
library(maptools)
62
library(rgdal)
63

    
64
#coast=getRgshhsMap("/mnt/data/jetzlab/Data/environ/global/gshhg/gshhs_h.b", xlim = NULL, ylim = NULL, level = 4) 
65
land=readShapePoly("/mnt/data/jetzlab/Data/environ/global/gshhg/GSHHS_shp/c/GSHHS_c_L1.shp",force_ring=TRUE)
66
projection(land)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
67
CP <- as(extent(-180, 180, -60, 84), "SpatialPolygons")
68
proj4string(CP) <- CRS(proj4string(land))
69
coast=as(land[land$area>50,],"SpatialLines")
70
## Clip the map
71
land <- gIntersection(land, CP, byid=F)
72
coast <- gIntersection(coast, CP, byid=F)
73

    
74

    
75
#### get biome data
76
##make spatial object
77
cldms=cldm
78
coordinates(cldms)=c("lon","lat")
79
projection(cldms)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
80

    
81
if(!file.exists("data/teow/biomes.shp")){
82
    teow=readOGR("/mnt/data/jetzlab/Data/environ/global/teow/official/","wwf_terr_ecos")
83
    teow=teow[teow$BIOME<90,]
84
    biome=unionSpatialPolygons(teow,teow$BIOME, threshold=5)
85
    biomeid=read.csv("/mnt/data/jetzlab/Data/environ/global/teow/official/biome.csv",stringsAsFactors=F)
86
    biome=SpatialPolygonsDataFrame(biome,data=biomeid)
87
    writeOGR(biome,"data/teow","biomes",driver="ESRI Shapefile")
88
}
89
biome=readOGR("data/teow/","biomes")
90
projection(biome)=projection(st)
91
st$biome=over(st,biome,returnList=F)$BiomeID
92

    
93
cldm$biome=st$biome[match(cldm$StaID,st$id)]
94

    
95
## get stratified sample of points from biomes for illustration
96
if(!file.exists("output/biomesamplepoints.csv")){
97
    n_biomesamples=1000
98
    library(multicore)
99
    biomesample=do.call(rbind.data.frame,mclapply(1:length(biome),function(i)
100
        data.frame(biome=biome$BiomeID[i],coordinates(spsample(biome[i,],n=n_biomesamples,type="stratified",nsig=2)))))
101
    write.csv(biomesample,"output/biomesamplepoints.csv",row.names=F)
102
}
103
biomesample=read.csv("output/biomesamplepoints.csv")
104
coordinates(biomesample)=c("x1","x2")
105

    
106
#biomesample=extract(biomesample,mod09c)
107

    
108
## Figures
109
n=100
110
  at=seq(0,100,length=n)
111
colr=colorRampPalette(c("black","green","red"))
112
cols=colr(n)
113

    
114

    
115
#pdf("output/Figures.pdf",width=11,height=8.5)
116
png("output/Figures_%03d.png",width=5000,height=4000,res=600,pointsize=36,bg="transparent")
117

    
118
res=1e5
119
greg=list(ylim=c(-60,84),xlim=c(-180,180))
120
    
121
## Figure 1: 4-panel summaries
122
#- Annual average
123
levelplot(mod09a,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(space="bottom",adj=1),
124
  margin=F,maxpixels=res,ylab="",xlab="",useRaster=T,ylim=greg$ylim)+
125
  layer(sp.lines(coast,col="black"),under=F)
126
## Mean annual with validation stations
127
levelplot(mod09a,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(space="bottom",adj=1),
128
  margin=F,maxpixels=res,ylab="",xlab="",useRaster=T,ylim=greg$ylim)+
129
  layer(panel.xyplot(lon,lat,pch=16,cex=.3,col="black"),data=data.frame(coordinates(st)))+
130
  layer(sp.lines(coast,col="black"),under=F)
131

    
132
## Seasonal Means
133
levelplot(mod09s,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(space="bottom",adj=2),
134
  margin=F,maxpixels=res,ylab="",xlab="",useRaster=T,ylim=greg$ylim)+
135
  layer(sp.lines(coast,col="black"),under=F)
136

    
137
## Monthly Means
138
levelplot(mod09c,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(space="bottom",adj=1),
139
  margin=F,maxpixels=res,ylab="Latitude",xlab="Longitude",useRaster=T,ylim=greg$ylim)+
140
  layer(sp.lines(coast,col="black"),under=F)
141

    
142
#- Monthly minimum
143
#- Monthly maximum
144
#- STDEV or Min-Max
145
#p_mac=levelplot(mod09a,col.regions=colr(n),cuts=99,at=seq(0,100,length=100),margin=F,maxpixels=1e5,colorkey=list(space="bottom",height=.75),xlab="",ylab="",useRaster=T)+
146
#      layer(sp.lines(coast,col="black"),under=F)
147
#p_min=levelplot(mod09min,col.regions=colr(n),cuts=99,margin=F,maxpixels=1e5,colorkey=list(space="bottom",height=.75),useRaster=T)
148
#p_max=levelplot(mod09max,col.regions=colr(n),cuts=99,margin=F,maxpixels=1e5,colorkey=list(space="bottom",height=.75),useRaster=T)
149
#p_sd=levelplot(mod09sd,col.regions=colr(n),cuts=99,at=seq(0,100,length=100),margin=F,maxpixels=1e5,colorkey=list(space="bottom",height=.75),useRaster=T)
150
#p3=c("Mean Cloud Frequency (%)"=p_mac,"Max Cloud Frequency (%)"=p_max,"Min Cloud Frequency (%)"=p_min,"Cloud Frequency Variability (SD)"=p_sd,x.same=T,y.same=T,merge.legends=T,layout=c(2,2))
151
#print(p3)
152

    
153
bgr=function(x,n=100,br=0,c1=c("darkblue","blue","grey"),c2=c("grey","red","purple")){
154
    at=unique(c(seq(min(x,na.rm=T),max(x,na.rm=T),len=n)))
155
    bg=colorRampPalette(c1)
156
    gr=colorRampPalette(c2)
157
    return(list(at=at,col=c(bg(sum(at<br)),gr(sum(at>=br)))))
158
}
159

    
160
colat=bgr(cldm$difm)
161
phist=histogram(cldm$difm,breaks=colat$at,border=NA,col=colat$col,xlim=c(-50,40),type="count",xlab="Difference (MOD09-NDP026D)")#,seq(0,1,len=6),na.rm=T)
162
pmap=xyplot(lat~lon|month2,data=cldm,groups=cut(cldm$difm,rev(colat$at)),
163
       par.settings=list(superpose.symbol=list(col=colat$col)),pch=16,cex=.25,
164
       auto.key=F,#list(space="right",title="Difference\n(MOD09-NDP026D)",cex.title=1),asp=1,
165
       ylab="Latitude",xlab="Longitude")+
166
  layer(sp.lines(coast,col="black",lwd=.1),under=F)
167
print(phist,position=c(0,.75,1,1),more=T)
168
print(pmap,position=c(0,0,1,.78))
169

    
170
### heatmap of mod09 vs. NDP for all months
171
hmcols=colorRampPalette(c("grey","blue","red","purple"))
172
#hmcols=colorRampPalette(c(grey(.8),grey(.3),grey(.2)))
173
tr=c(0,80)
174
colkey <- draw.colorkey(list(col = hmcols(tr[2]), at = tr[1]:tr[2],height=.25))
175

    
176
xyplot(cld_all~mod09|month2,data=cldm,panel=function(x,y,subscripts){
177
  n=50
178
  bins=seq(0,100,len=n)
179
  tb=melt(as.matrix(table(
180
    x=cut(x,bins,labels=bins[-1]),
181
    y=cut(y,bins,labels=bins[-1]))))
182
  qat=unique(tb$value)
183
  print(max(qat))
184
  qat=tr[1]:tr[2]#unique(tb$value)
185
  panel.levelplot(tb$x,tb$y,tb$value,at=qat,col.regions=c("transparent",hmcols(length(qat))),subscripts=1:nrow(tb))
186
#  panel.abline(0,1,col="black",lwd=2)
187
  panel.abline(lm(y ~ x),col="black",lwd=2)
188
#  panel.ablineq(lm(y ~ x), r.sq = TRUE,at = 0.6,pos=1, offset=0,digits=2,col="blue")
189
  panel.text(70,10,bquote(paste(R^2,"=",.(round(summary(lm(y ~ x))$r.squared,2)))),cex=1.2)
190
},asp=1,scales=list(at=seq(0,100,len=6),useRaster=T,colorkey=list(width=.5,title="Number of Stations")),
191
          ylab="NDP Mean Cloud Amount (%)",xlab="MOD09 Cloud Frequency (%)",
192
              legend= list(right = list(fun = colkey)))#+ layer(panel.abline(0,1,col="black",lwd=2))
193

    
194

    
195
## Monthly Climatologies
196
## for(i in 1:2){
197
##  p1=xyplot(cld~mod09|month2,data=cldm,cex=.2,pch=16,subscripts=T,ylab="NDP Mean Cloud Amount",xlab="MOD09 Cloud Frequency (%)")+
198
##   layer(panel.lines(1:100,predict(lm(y~x),newdata=data.frame(x=1:100)),col="green"))+
199
##   layer(panel.lines(1:100,predict(lm(y~x+I(x^2)),newdata=data.frame(x=1:100)),col="blue"))+
200
##   layer(panel.abline(0,1,col="red"))
201
##     if(i==2){
202
##      p1=p1+layer(panel.segments(mod09[subscripts],cld[subscripts]-cldsd[subscripts],mod09[subscripts],cld[subscripts]+cldsd[subscripts],subscripts=subscripts,col="grey"),data=cldm,under=T,magicdots=T)
203
##      p1=p1+layer(panel.segments(mod09[subscripts]-mod09sd[subscripts],cld[subscripts],mod09[subscripts]+mod09sd[subscripts],cld[subscripts],subscripts=subscripts,col="grey"),data=cldm,under=T,magicdots=T) 
204
##        }
205
## print(p1)
206
## }
207

    
208
bwplot(lulcc~difm,data=cldm,horiz=T,xlab="Difference (MOD09-Observed)",varwidth=T,notch=T)+layer(panel.abline(v=0))
209

    
210
#library(BayesFactor)
211
#coplot(difm~month2|lulcc,data=cldm[!is.na(cldm$dif)&!is.na(cldm$lulcc),],panel = panel.smooth)
212

    
213

    
214
dev.off()
215

    
216

    
217

    
218
## Validation table construction
219

    
220
summary(lm(cld_all~mod09+lat,data=cldm))
221

    
222
               
223
## assess latitude bias
224
cldm$abslat=abs(cldm$lat)
225
cldm$absdif=abs(cldm$difm)
226

    
227
###################################################################
228
### validation by biome
229
bs=biome$BiomeID
230
mod_bs=lapply(bs,function(bs) lm(cld_all~mod09,data=cldm[cldm$biome==bs,]))
231
names(mod_bs)=biome$Biome
232

    
233
screenreg(mod_bs,digits=2,single.row=F,custom.model.names=names(mod_bs))
234

    
235

    
236
#lm_all=lm(cld_all~mod09,data=cldm[!is.na(cldm$cld),])
237
lm_mod=lm(cld~mod09+biome,data=cldm)
238

    
239
screenreg(lm_mod,digits=2,single.row=F)
240

    
241
####################################################################
242
## assess temporal stability
243

    
244
## spatialy subset data to stations at least 10km apart
245
st2=remove.duplicates(st,zero=10)
246

    
247
## Subset data
248
## drop missing observations
249
cldm.t=cldm[!is.na(cldm$cld_all)&!is.na(cldm$mod09)&!is.na(cldm$biome),]
250
cldm.t=cldm.t[cldm.t$lat>=-60,]
251
#  make sure all stations have all mod09 data
252
stdrop=names(which(tapply(cldm.t$month,cldm.t$StaID,length)!=12))
253
cldm.t=cldm.t[!cldm.t$StaID%in%stdrop,]
254
# Keep only stations at least 10km apart 
255
cldm.t=cldm.t[cldm.t$StaID%in%st2$id,]
256
## Subset to only some months, if desired
257
#cldm.t=cldm.t[cldm.t$month%in%1:3,]
258

    
259

    
260
## Select Knots
261
knots=spsample(land,500,type="regular")
262

    
263
                                        #  reshape data
264
m.cld=cast(cldm.t,StaID+lat+lon+biome~month,value="cld_all");colnames(m.cld)[-(1:4)]=paste("cld.",colnames(m.cld)[-(1:4)],sep="")
265
m.mod09=cast(cldm.t,StaID~month,value="mod09");colnames(m.mod09)[-1]=paste("mod09.",colnames(m.mod09)[-1],sep="")
266
mdata=cbind(m.cld,m.mod09)
267

    
268
## cast to 
269
coords <- as.matrix(m.cld[,c("lon","lat")])#as.matrix(ne.temp[,c("UTMX", "UTMY")]/1000)
270
max.d <- max(iDist(coords))
271

    
272
##make symbolic model formula statement for each month
273
mods <- lapply(paste(paste(paste("cld.",1:N.t,sep=''),paste("mod09.",1:N.t,sep=''),sep='~'),"",sep=""), as.formula)
274

    
275
tlm=model.matrix(lm(mods[[1]],data=mdata))
276

    
277
N.t <- ncol(m.mod09)-1 ##number of months
278
n <- nrow(m.cld) ##number of observation per months
279
p <- ncol(tlm) #number of regression parameters in each month
280

    
281
starting <- list("beta"=rep(0,N.t*p), "phi"=rep(3/(0.5*max.d), N.t),
282
                 "sigma.sq"=rep(2,N.t), "tau.sq"=rep(1, N.t),
283
                 "sigma.eta"=diag(rep(0.01, p)))
284
tuning <- list("phi"=rep(5, N.t))
285

    
286
priors <- list("beta.0.Norm"=list(rep(0,p), diag(1000,p)),
287
               "phi.Unif"=list(rep(3/(0.9*max.d), N.t), rep(3/(0.05*max.d), N.t)),
288
               "sigma.sq.IG"=list(rep(2,N.t), rep(10,N.t)),
289
               "tau.sq.IG"=list(rep(2,N.t), rep(5,N.t)),
290
               "sigma.eta.IW"=list(2, diag(0.001,p)))
291
cov.model <- "exponential"
292

    
293
## Run the model
294
n.samples <- 100
295
m.1=spDynLM(mods,data=mdata,coords=coords,knots=coordinates(knots),n.samples=n.samples,starting=starting,tuning=tuning,priors=priors,cov.model=cov.model,get.fitted=T,n.report=25)
296

    
297
## summarize
298
burn.in <- floor(0.75*n.samples)
299
quant <- function(x){quantile(x, prob=c(0.5, 0.025, 0.975))}
300
beta <- apply(m.1$p.beta.samples[burn.in:n.samples,], 2, quant)
301
beta.0 <- beta[,grep("Intercept", colnames(beta))]
302
beta.1 <- beta[,grep("mod09", colnames(beta))]
303

    
304

    
305

    
306

    
307

    
308

    
309

    
310

    
311
library(texreg)
312
 extract.lm <- function(model) {
313
     s <- summary(model)
314
     names <- rownames(s$coef)
315
     co <- s$coef[, 1]
316
     se <- s$coef[, 2]
317
     pval <- s$coef[, 4]
318
     rs <- s$r.squared
319
     n <- nobs(model)
320
     rmse=sqrt(mean((residuals(s)^2)))
321
     gof <- c(rs, rmse, n)
322
     gof.names <- c("R-Squared","RMSE","n")
323
     tr <- createTexreg(coef.names = names, coef = co, se = se, 
324
                        pvalues = pval, gof.names = gof.names, gof = gof)
325
     return(tr)
326
 }
327
setMethod("extract", signature = className("lm", "stats"),definition = extract.lm)
328

    
329
forms=c("cld~mod09+month2+lat")
330
lm_all=lm(cld_all~mod09+lat,data=cldm[!is.na(cldm$cld),])
331

    
332

    
333
### Compare two time periods
334
lm_all1=lm(cld_all~mod09,data=cldm[!is.na(cldm$cld),])
335
lm_mod=lm(cld~mod09,data=cldm)
336
mods=list("1970-2000 complete"=lm_all1,"2000-2009"=lm_mod)
337

    
338
screenreg(mods,digits=2,single.row=T,custom.model.names=names(mods))
339
htmlreg(mods,file = "output/tempstab.doc",
340
        custom.model.names = names(mods),
341
        single.row = T, inline.css = FALSE,doctype = TRUE, html.tag = TRUE, head.tag = TRUE, body.tag = TRUE)
342

    
343

    
344

    
345
abslm=lm(absdif~abslat:I(abslat^2),data=cldm)
346

    
347
plot(absdif~abslat,data=cldm,cex=.25,pch=16)
348
lines(0:90,predict(abslm,newdata=data.frame(abslat=0:90),type="response"),col="red")
349

    
350
bf=anovaBF(dif~lulcc+month2,data=cldm[!is.na(cldm$dif)&!is.na(cldm$lulcc),])
351
ch=posterior(bf, iterations = 1000)
352
summary(bf)
353
plot(bf)
354

    
355
## explore validation error
356
cldm$lulcc=as.factor(IGBP$class[match(cldm$lulc,IGBP$ID)])
357

    
358
## Table of RMSE's by lulc by month
359
lulctl=ddply(cldm,c("month","lulc"),function(x) c(count=nrow(x),rmse=sqrt(mean((x$mod09-x$cld)^2,na.rm=T))))
360
lulctl=lulctl[!is.na(lulctl$lulc),]
361
lulctl$lulcc=as.factor(IGBP$class[match(lulctl$lulc,IGBP$ID)])
362

    
363
lulctl= ddply(cldm,c("lulc"),function(x) c(count=nrow(x),mean=paste(mean(x$difm,na.rm=T)," (",sd(x$difm,na.rm=T),")",sep=""),rmse=sqrt(mean((x$difm)^2,na.rm=T))))
364
lulctl$lulcc=as.factor(IGBP$class[match(lulctl$lulc,IGBP$ID)]
365
print(xtable(lulctl[,c("lulcc","count","mean","rmse")],digits=1),"html")
366

    
367

    
368
lulcrmse=cast(lulcrmsel,lulcc~month,value="rmse")
369
lulcrmse
370

    
371
lulcrmse.q=round(do.call(rbind,apply(lulcrmse,1,function(x) data.frame(Min=min(x,na.rm=T),Mean=mean(x,na.rm=T),Max=max(x,na.rm=T),SD=sd(x,na.rm=T)))),1)#quantile,c(0.025,0.5,.975),na.rm=T)),1)
372
lulcrmse.q=lulcrmse.q[order(lulcrmse.q$Mean,decreasing=T),]
373
lulcrmse.q
374

    
375
print(xtable(lulcrmse,digits=1),"html")
376

    
377
bgyr=colorRampPalette(c("blue","green","yellow","red"))
378
levelplot(rmse~month*lulcc,data=lulcrmsel,col.regions=bgyr(1000),at=quantile(lulcrmsel$rmse,seq(0,1,len=100),na.rm=T))
379

    
380

    
381
### Linear models
382
summary(lm(dif~as.factor(lulc)+lat+month2,data=cldm))
383

    
(21-21/52)