Project

General

Profile

Download (13.5 KB) Statistics
| Branch: | Revision:
1
#! /bin/R
2
### Script to download and process the NDP-026D station cloud dataset
3
setwd("~/acrobates/adamw/projects/interp/data/NDP026D")
4

    
5
library(multicore)
6
library(latticeExtra)
7
library(doMC)
8
library(rasterVis)
9
library(rgdal)
10
library(reshape)
11
library(hexbin)
12
## register parallel processing
13
#registerDoMC(10)
14
#beginCluster(10)
15

    
16
## available here http://cdiac.ornl.gov/epubs/ndp/ndp026d/ndp026d.html
17

    
18
## Get station locations
19
system("wget -N -nd http://cdiac.ornl.gov/ftp/ndp026d/cat01/01_STID -P data/")
20
st=read.table("data/01_STID",skip=1)
21
colnames(st)=c("StaID","LAT","LON","ELEV","ny1","fy1","ly1","ny7","fy7","ly7","SDC","b5c")
22
st$lat=st$LAT/100
23
st$lon=st$LON/100
24
st$lon[st$lon>180]=st$lon[st$lon>180]-360
25
st=st[,c("StaID","ELEV","lat","lon")]
26
colnames(st)=c("id","elev","lat","lon")
27
write.csv(st,"stations.csv",row.names=F)
28
coordinates(st)=c("lon","lat")
29
## download data
30
system("wget -N -nd ftp://cdiac.ornl.gov/pub/ndp026d/cat67_78/* -A '.tc.Z' -P data/")
31

    
32
system("gunzip data/*.Z")
33

    
34
## define FWF widths
35
f162=c(5,5,4,7,7,7,4) #format 162
36
c162=c("StaID","YR","Nobs","Amt","Fq","AWP","NC")
37

    
38
## use monthly timeseries
39
cld=do.call(rbind.data.frame,mclapply(sprintf("%02d",1:12),function(m) {
40
  d=read.fwf(list.files("data",pattern=paste("MNYDC.",m,".tc",sep=""),full=T),skip=1,widths=f162)
41
  colnames(d)=c162
42
  d$month=as.numeric(m)
43
  print(m)
44
  return(d)}
45
  ))
46

    
47
## add lat/lon
48
cld[,c("lat","lon")]=st[match(cld$StaID,st$id),c("lat","lon")]
49

    
50
## drop missing values
51
cld=cld[,!grepl("Fq|AWP|NC",colnames(cld))]
52
cld$Amt[cld$Amt<0]=NA
53
#cld$Fq[cld$Fq<0]=NA
54
#cld$AWP[cld$AWP<0]=NA
55
#cld$NC[cld$NC<0]=NA
56
#cld=cld[cld$Nobs>0,]
57

    
58
## add the MOD09 data to cld
59
#### Evaluate MOD35 Cloud data
60
mod09=brick("~/acrobates/adamw/projects/cloud/data/mod09.nc")
61

    
62
## overlay the data with 32km diameter (16km radius) buffer
63
## buffer size from Dybbroe, et al. (2005) doi:10.1175/JAM-2189.1.
64
buf=16000
65
mod09sta=lapply(1:nlayers(mod09),function(l) {print(l); extract(mod09[[l]],st,buffer=buf,fun=mean,na.rm=T,df=T)[,2]})
66
mod09st=do.call(cbind.data.frame,mod09sta)
67
#mod09st=mod09st[,!is.na(colnames(mod09st))]
68
colnames(mod09st)=names(mod09)
69
mod09st$id=st$id
70
mod09stl=melt(mod09st,id.vars="id")
71
mod09stl[,c("year","month")]=do.call(rbind,strsplit(sub("X","",mod09stl$variable),"[.]"))[,1:2]
72
## add it to cld
73
cld$mod09=mod09stl$value[match(paste(cld$StaID,cld$YR,cld$month),paste(mod09stl$id,mod09stl$year,as.numeric(mod09stl$month)))]
74

    
75

    
76
## LULC
77
#system(paste("gdalwarp -r near -co \"COMPRESS=LZW\" -tr ",paste(res(mod09),collapse=" ",sep=""),
78
#             "-tap -multi -t_srs \"",   projection(mod09),"\" /mnt/data/jetzlab/Data/environ/global/landcover/MODIS/MCD12Q1_IGBP_2005_v51.tif ../modis/mod12/MCD12Q1_IGBP_2005_v51.tif"))
79
lulc=raster("../modis/mod12/MCD12Q1_IGBP_2005_v51.tif")
80
#lulc=ratify(lulc)
81
require(plotKML); data(worldgrids_pal)  #load IGBP palette
82
IGBP=data.frame(ID=0:16,col=worldgrids_pal$IGBP[-c(18,19)],stringsAsFactors=F)
83
IGBP$class=rownames(IGBP);rownames(IGBP)=1:nrow(IGBP)
84
levels(lulc)=list(IGBP)
85
#lulc=crop(lulc,mod09)
86
  Mode <- function(x) {
87
      ux <- na.omit(unique(x))
88
        ux[which.max(tabulate(match(x, ux)),na.rm=T)]
89
      }
90
lulcst=extract(lulc,st,fun=Mode,na.rm=T,buffer=buf,df=T)
91
colnames(lulcst)=c("id","lulc")
92
## add it to cld
93
cld$lulc=lulcst$lulc[match(cld$StaID,lulcst$id)]
94
cld$lulc=factor(as.integer(cld$lulc),labels=IGBP$class)
95

    
96
## update cld column names
97
colnames(cld)[grep("Amt",colnames(cld))]="cld"
98
cld$cld=cld$cld/100
99

    
100
## calculate means and sds
101
cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){
102
  data.frame(
103
             month=x$month[1],
104
             lulc=x$lulc[1],
105
             StaID=x$StaID[1],
106
             mod09=mean(x$mod09,na.rm=T),
107
             mod09sd=sd(x$mod09,na.rm=T),
108
             cld=mean(x$cld[x$Nobs>10],na.rm=T),
109
             cldsd=sd(x$cld[x$Nobs>10],na.rm=T))}))
110
cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")]
111

    
112
## means by year
113
cldy=do.call(rbind.data.frame,by(cld,list(year=as.factor(cld$YR),StaID=as.factor(cld$StaID)),function(x){
114
  data.frame(
115
             year=x$YR[1],
116
             StaID=x$StaID[1],
117
             lulc=x$lulc[1],
118
             mod09=mean(x$mod09,na.rm=T),
119
             mod09sd=sd(x$mod09,na.rm=T),
120
             cld=mean(x$Amt[x$Nobs>10]/100,na.rm=T),
121
             cldsd=sd(x$Amt[x$Nobs>10]/100,na.rm=T))}))
122
cldy[,c("lat","lon")]=coordinates(st)[match(cldy$StaID,st$id),c("lat","lon")]
123

    
124
## overall mean
125
clda=do.call(rbind.data.frame,by(cld,list(StaID=as.factor(cld$StaID)),function(x){
126
  data.frame(
127
             StaID=x$StaID[1],
128
             lulc=x$lulc[1],
129
             mod09=mean(x$mod09,na.rm=T),
130
             mod09sd=sd(x$mod09,na.rm=T),
131
             cld=mean(x$cld[x$Nobs>10],na.rm=T),
132
             cldsd=sd(x$cld[x$Nobs>10],na.rm=T))}))
133
clda[,c("lat","lon")]=coordinates(st)[match(clda$StaID,st$id),c("lat","lon")]
134

    
135

    
136
## write out the tables
137
write.csv(cld,file="cld.csv",row.names=F)
138
write.csv(cldy,file="cldy.csv",row.names=F)
139
write.csv(cldm,file="cldm.csv",row.names=F)
140
write.csv(clda,file="clda.csv",row.names=F
141
)
142
#########################################################################
143
##################
144
###
145
cld=read.csv("cld.csv")
146
cldm=read.csv("cldm.csv")
147
cldy=read.csv("cldy.csv")
148
clda=read.csv("clda.csv")
149
st=read.csv("stations.csv")
150

    
151
### remove mod09==0 due to mosaic problem (remove when fixed)
152
cld=cld[!is.na(cld$lat)&cld$mod09!=0,]
153
cldm=cldm[!is.na(cldm$lat)&cldm$mod09!=0,]
154
cldy=cldy[!is.na(cldy$lat)&cldy$mod09!=0,]
155

    
156
## month factors
157
cld$month2=factor(cld$month,labels=month.name)
158
cldm$month2=factor(cldm$month,labels=month.name)
159

    
160
coordinates(st)=c("lon","lat")
161
projection(st)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
162

    
163
##make spatial object
164
cldms=cldm
165
coordinates(cldms)=c("lon","lat")
166
projection(cldms)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
167

    
168
##make spatial object
169
cldys=cldy
170
coordinates(cldys)=c("lon","lat")
171
projection(cldys)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
172

    
173
#### Evaluate MOD35 Cloud data
174
mod09=brick("~/acrobates/adamw/projects/cloud/data/mod09.nc")
175
mod09c=brick("~/acrobates/adamw/projects/cloud/data/mod09_clim.nc",varname="CF");names(mod09c)=month.name
176
mod09c2=raster("~/acrobates/adamw/projects/cloud/data/mod09_clim.nc",varname="CF",nl=1)
177

    
178
### get monthly climatologies for each station 
179
#cldc=do.call(rbind.data.frame,by(cld,list(id=cld$StaID,month=cld$month),function(x){
180
#  x$mod09[x$mod09==0]=NA
181
#  data.frame(id=x$StaID[1],month=x$month[1],Nobs=sum(x$Nobs,na.rm=T),Amt=mean(x$Amt,na.rm=T),mod09=mean(x$mod09,na.rm=T))
182
#  }))
183

    
184
## read in global coasts for nice plotting
185
library(maptools)
186

    
187
data(wrld_simpl)
188
coast <- unionSpatialPolygons(wrld_simpl, rep("land",nrow(wrld_simpl)), threshold=5)
189
coast=as(coast,"SpatialLines")
190
#coast=spTransform(coast,CRS(projection(mod35)))
191

    
192

    
193
n=100
194
  at=seq(0,100,length=n)
195
colr=colorRampPalette(c("black","green","red"))
196
cols=colr(n)
197

    
198

    
199
pdf("/home/adamw/acrobates/adamw/projects/cloud/output/validation.pdf",width=11,height=8.5)
200

    
201
### maps of mod09 and NDP
202
## map of stations
203
xyplot(lat~lon,data=data.frame(coordinates(st)),pch=16,cex=.5, main="NDP-026D Cloud Climatology Stations",ylab="Latitude",xlab="Longitude")+
204
  layer(sp.lines(coast,col="grey"),under=T)
205

    
206
levelplot(mod09c,col.regions=colr(100),at=seq(0,100,len=100),margin=F,maxpixels=1e5,main="MOD09 Cloud Frequency",ylab="Latitude",xlab="Longitude")
207

    
208
#p2=xyplot(lat~lon|month2,data=cldm,col=as.character(cut(cldm$cld,seq(0,100,len=100),labels=colr(99))),pch=16,cex=.1,auto.key=T,asp=1,
209
#       main="NDP-026D Cloud Climatology Stations",ylab="Latitude",xlab="Longitude",layout=c(12,1))+
210
#  layer(sp.lines(coast,col="black",lwd=.1),under=F)
211
#v_month=c(p1,p2,layout=c(12,2),x.same=T,y.same=T,merge.legends=T)
212
#print(v_month)
213

    
214

    
215
#xyplot(lat~lon|month2,groups=cut(cldm$cld,seq(0,100,len=5)),data=cldm,pch=".",cex=.2,auto.key=T,
216
#       main="Mean Monthly Cloud Coverage",ylab="Latitude",xlab="Longitude",
217
#        par.settings = list(superpose.symbol= list(pch=16,col=c("blue","green","yellow","red"))))+
218
#  layer(sp.lines(coast,col="grey"),under=T)
219

    
220
### heatmap of mod09 vs. NDP for all months
221
hmcols=colorRampPalette(c("grey","blue","red"))
222
tr=c(0,27)
223
colkey <- draw.colorkey(list(col = hmcols(tr[2]), at = tr[1]:tr[2],height=.25))
224

    
225
xyplot(cld~mod09,data=cld[cld$Nobs>10,],panel=function(x,y,subscripts){
226
  n=150
227
  bins=seq(0,100,len=n)
228
  tb=melt(as.matrix(table(
229
    x=cut(x,bins,labels=bins[-1]),
230
    y=cut(y,bins,labels=bins[-1]))))
231
  qat=tr[1]:tr[2]#unique(tb$value)
232
  print(qat)
233
  panel.levelplot(tb$x,tb$y,tb$value,at=qat,col.regions=c("transparent",hmcols(length(qat))),subscripts=subscripts)
234
  },asp=1,scales=list(at=seq(0,100,len=6)),ylab="NDP Mean Cloud Amount (%)",xlab="MOD09 Cloud Frequency (%)",
235
       legend= list(right = list(fun = colkey,title="Station Count")))+
236
  layer(panel.abline(0,1,col="black",lwd=2))+
237
  layer(panel.ablineq(lm(y ~ x), r.sq = TRUE,at = 0.6,pos=1, offset=22,digits=2,col="blue"), style = 1)
238

    
239

    
240

    
241
xyplot(cld~mod09|month2,data=cld[cld$Nobs>10,],panel=function(x,y,subscripts){
242
  n=50
243
  bins=seq(0,100,len=n)
244
  tb=melt(as.matrix(table(
245
    x=cut(x,bins,labels=bins[-1]),
246
    y=cut(y,bins,labels=bins[-1]))))
247
  qat=unique(tb$value)
248
  print(qat)
249
  qat=0:26
250
  qat=tr[1]:tr[2]#unique(tb$value)
251
  panel.levelplot(tb$x,tb$y,tb$value,at=qat,col.regions=c("transparent",hmcols(length(qat))),subscripts=1:nrow(tb))
252
  layer(panel.abline(0,1,col="black",lwd=2))+
253
  layer(panel.ablineq(lm(y ~ x), r.sq = TRUE,at = 0.6,pos=1, offset=0,digits=2,col="blue"), style = 1)
254
},asp=1,scales=list(at=seq(0,100,len=6),useRaster=T,colorkey=list(width=.5,title="Number of Stations")),
255
          ylab="NDP Mean Cloud Amount (%)",xlab="MOD09 Cloud Frequency (%)",
256
              legend= list(right = list(fun = colkey)))+ layer(panel.abline(0,1,col="black",lwd=2))
257

    
258

    
259
xyplot(cld~mod09,data=clda,cex=0.5,pch=16)+
260
  layer(panel.abline(lm(y~x),col="blue"))+
261
#  layer(panel.lines(x,predict(lm(y~x),type="prediction")))+
262
  layer(panel.loess(x,y,col="blue",span=.2))+
263
  layer(panel.abline(0,1,col="red"))+
264
  layer(panel.segments(mod09,cld-cldsd,mod09,cld+cldsd,col="grey"),data=clda,under=T,magicdots=T)
265

    
266
## all monthly values
267
#xyplot(cld~mod09|as.factor(month),data=cld[cld$Nobs>75,],cex=.2,pch=16,subscripts=T)+
268
#  layer(panel.abline(lm(y~x),col="blue"))+
269
#  layer(panel.abline(0,1,col="red"))
270

    
271
## Monthly Climatologies
272
for(i in 1:2){
273
 p1=xyplot(cld~mod09|month2,data=cldm,cex=.2,pch=16,subscripts=T,ylab="NDP Mean Cloud Amount",xlab="MOD09 Cloud Frequency (%)")+
274
  layer(panel.lines(1:100,predict(lm(y~x),newdata=data.frame(x=1:100)),col="green"))+
275
  layer(panel.lines(1:100,predict(lm(y~x+I(x^3)),newdata=data.frame(x=1:100)),col="blue"))+
276
  layer(panel.abline(0,1,col="red"))
277
    if(i==2){
278
     p1=p1+layer(panel.segments(mod09[subscripts],cld[subscripts]-cldsd[subscripts],mod09[subscripts],cld[subscripts]+cldsd[subscripts],subscripts=subscripts,col="grey"),data=cldm,under=T,magicdots=T)
279
     p1=p1+layer(panel.segments(mod09[subscripts]-mod09sd[subscripts],cld[subscripts],mod09[subscripts]+mod09sd[subscripts],cld[subscripts],subscripts=subscripts,col="grey"),data=cldm,under=T,magicdots=T)
280
        }
281
print(p1)
282
}
283

    
284
dev.off()
285

    
286

    
287
summary(lm(Amt~mod09,data=cld))
288
summary(lm(cld~mod09_10+as.factor(lulc),data=d))
289
summary(lm(cld~mod09_10+as.factor(lulc),data=d))
290

    
291
### exploratory plots
292
xyplot(cld~mod09_10,groups=lulc,data=d@data,pch=16,cex=.5)+layer(panel.abline(0,1,col="red"))
293
xyplot(cld~mod09_10+mod35c5_10|as.factor(lulc),data=d@data,type=c("p","r"),pch=16,cex=.25,auto.key=T)+layer(panel.abline(0,1,col="green"))
294
xyplot(cld~mod35_10|as.factor(lulc),data=d@data,pch=16,cex=.5)+layer(panel.abline(0,1,col="red"))
295
xyplot(mod35_10~mod09_10|as.factor(lulc),data=d@data,pch=16,cex=.5)+layer(panel.abline(0,1,col="red"))
296

    
297
densityplot(stack(mod35,mod09))
298
boxplot(mod35,lulc)
299

    
300
bwplot(mod09~mod35|cut(y,5),data=stack(mod09,mod35))
301

    
302
## add a color key
303
breaks=seq(0,100,by=25)
304
cldm$cut=cut(cldm$cld,breaks)
305
cp=colorRampPalette(c("blue","orange","red"))
306
cols=cp(length(at))
307

    
308

    
309

    
310
## write a pdf
311
#dir.create("output")
312
pdf("output/NDP026d.pdf",width=11,height=8.5)
313

    
314

    
315

    
316
## Validation
317
m=10
318
zlim=c(40,100)
319
dr=subset(mod35,subset=m);projection(dr)=projection(mod35)
320
ds=cldms[cldms$month==m,]
321
plot(dr,col=cp(100),zlim=zlim,main="Comparison of MOD35 Cloud Frequency and NDP-026D Station Cloud Climatologies",
322
     ylab="Northing (m)",xlab="Easting (m)",sub="MOD35 is proportion of cloudy days, while NDP-026D is Mean Cloud Coverage")
323
plot(ds,add=T,pch=21,cex=3,lwd=2,fg="black",bg=as.character(cut(ds$cld,breaks=seq(zlim[1],zlim[2],len=5),labels=cp(4))))
324
#legend("topright",legend=seq(zlim[1],zlim[2],len=5),pch=16,col=cp(length(breaks)))
325

    
326

    
327
xyplot(mod35~cld,data=mod35v,subscripts=T,auto.key=T,panel=function(x,y,subscripts){
328
   td=mod35v[subscripts,]
329
#   panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts)
330
   panel.xyplot(x,y,subscripts=subscripts,type=c("p","smooth"),pch=16,col="black")
331
#   panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts)
332
 },ylab="MOD35 Proportion Cloudy Days",xlab="NDP-026D Mean Monthly Cloud Amount",
333
        main="Comparison of MOD35 Cloud Mask and Station Cloud Climatologies")
334

    
335
#xyplot(mod35~cld|month,data=mod35v,subscripts=T,auto.key=T,panel=function(x,y,subscripts){
336
#   td=mod35v[subscripts,]
337
#   panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts)
338
#   panel.xyplot(x,y,subscripts=subscripts,type=c("p","smooth"),pch=16,col="black")
339
#   panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts)
340
# },ylab="MOD35 Proportion Cloudy Days",xlab="NDP-026D Mean Monthly Cloud Amount",
341
#        main="Comparison of MOD35 Cloud Mask and Station Cloud Climatologies")
342

    
343

    
344
dev.off()
345

    
346
graphics.off()
(34-34/47)