Project

General

Profile

Download (13.9 KB) Statistics
| Branch: | Revision:
1
#! /bin/R
2
### Script to download and process the NDP-026D station cloud dataset
3
setwd("~/acrobates/adamw/projects/interp/data/NDP026D")
4

    
5
library(multicore)
6
library(latticeExtra)
7
library(doMC)
8
library(rasterVis)
9
library(rgdal)
10
library(reshape)
11
library(hexbin)
12
## register parallel processing
13
#registerDoMC(10)
14
#beginCluster(10)
15

    
16
## available here http://cdiac.ornl.gov/epubs/ndp/ndp026d/ndp026d.html
17

    
18
## Get station locations
19
system("wget -N -nd http://cdiac.ornl.gov/ftp/ndp026d/cat01/01_STID -P data/")
20
st=read.table("data/01_STID",skip=1)
21
colnames(st)=c("StaID","LAT","LON","ELEV","ny1","fy1","ly1","ny7","fy7","ly7","SDC","b5c")
22
st$lat=st$LAT/100
23
st$lon=st$LON/100
24
st$lon[st$lon>180]=st$lon[st$lon>180]-360
25
st=st[,c("StaID","ELEV","lat","lon")]
26
colnames(st)=c("id","elev","lat","lon")
27
write.csv(st,"stations.csv",row.names=F)
28
coordinates(st)=c("lon","lat")
29
## download data
30
system("wget -N -nd ftp://cdiac.ornl.gov/pub/ndp026d/cat67_78/* -A '.tc.Z' -P data/")
31

    
32
system("gunzip data/*.Z")
33

    
34
## define FWF widths
35
f162=c(5,5,4,7,7,7,4) #format 162
36
c162=c("StaID","YR","Nobs","Amt","Fq","AWP","NC")
37

    
38
## use monthly timeseries
39
cld=do.call(rbind.data.frame,mclapply(sprintf("%02d",1:12),function(m) {
40
  d=read.fwf(list.files("data",pattern=paste("MNYDC.",m,".tc",sep=""),full=T),skip=1,widths=f162)
41
  colnames(d)=c162
42
  d$month=as.numeric(m)
43
  print(m)
44
  return(d)}
45
  ))
46

    
47
## add lat/lon
48
cld[,c("lat","lon")]=st[match(cld$StaID,st$id),c("lat","lon")]
49

    
50
## drop missing values
51
cld=cld[,!grepl("Fq|AWP|NC",colnames(cld))]
52
cld$Amt[cld$Amt<0]=NA
53
#cld$Fq[cld$Fq<0]=NA
54
#cld$AWP[cld$AWP<0]=NA
55
#cld$NC[cld$NC<0]=NA
56
#cld=cld[cld$Nobs>0,]
57

    
58
## add the MOD09 data to cld
59
#### Evaluate MOD35 Cloud data
60
mod09=brick("~/acrobates/adamw/projects/cloud/data/mod09.nc")
61

    
62
## overlay the data with 32km diameter (16km radius) buffer
63
## buffer size from Dybbroe, et al. (2005) doi:10.1175/JAM-2189.1.
64
buf=16000
65
#mod09sta=lapply(1:nlayers(mod09),function(l) {print(l); extract(mod09[[l]],st,buffer=buf,fun=mean,na.rm=T,df=T)[,2]})
66
bins=cut(1:nrow(st),100)
67
mod09sta=lapply(levels(bins),function(lb) {
68
  l=which(bins==lb)
69
  td=extract(mod09,st[l,],buffer=buf,fun=mean,na.rm=T,df=T)
70
  td$id=st$id[l]
71
  print(lb)#as.vector(c(l,td[,1:4])))
72
  write.table(td,"valid.csv",append=T,col.names=F,quote=F,sep=",",row.names=F)
73
  td
74
})#,mc.cores=3)
75

    
76
#mod09sta=extract(mod09,st,buffer=buf,fun=mean,na.rm=T,df=T)
77
mod09st=read.csv("valid.csv",header=F)[,-c(1,2)]
78

    
79
#mod09st=do.call(rbind.data.frame,mod09sta)
80
#mod09st=mod09st[,!is.na(colnames(mod09st))]
81
colnames(mod09st)=c(names(mod09),"id")
82
#mod09st$id=st$id
83
mod09stl=melt(mod09st,id.vars="id")
84
mod09stl[,c("year","month")]=do.call(rbind,strsplit(sub("X","",mod09stl$variable),"[.]"))[,1:2]
85
## add it to cld
86
cld$mod09=mod09stl$value[match(paste(cld$StaID,cld$YR,cld$month),paste(mod09stl$id,mod09stl$year,as.numeric(mod09stl$month)))]
87

    
88

    
89
## LULC
90
#system(paste("gdalwarp -r near -co \"COMPRESS=LZW\" -tr ",paste(res(mod09),collapse=" ",sep=""),
91
#             "-tap -multi -t_srs \"",   projection(mod09),"\" /mnt/data/jetzlab/Data/environ/global/landcover/MODIS/MCD12Q1_IGBP_2005_v51.tif ../modis/mod12/MCD12Q1_IGBP_2005_v51.tif"))
92
lulc=raster("../modis/mod12/MCD12Q1_IGBP_2005_v51.tif")
93
#lulc=ratify(lulc)
94
require(plotKML); data(worldgrids_pal)  #load IGBP palette
95
IGBP=data.frame(ID=0:16,col=worldgrids_pal$IGBP[-c(18,19)],stringsAsFactors=F)
96
IGBP$class=rownames(IGBP);rownames(IGBP)=1:nrow(IGBP)
97
levels(lulc)=list(IGBP)
98
#lulc=crop(lulc,mod09)
99
  Mode <- function(x) {
100
      ux <- na.omit(unique(x))
101
        ux[which.max(tabulate(match(x, ux)))]
102
      }
103
lulcst=extract(lulc,st,fun=Mode,buffer=buf,df=T)
104
colnames(lulcst)=c("id","lulc")
105
## add it to cld
106
cld$lulc=lulcst$lulc[match(cld$StaID,lulcst$id)]
107
#cld$lulc=factor(as.integer(cld$lulc),labels=IGBP$class[sort(unique(cld$lulc))])
108

    
109
## update cld column names
110
colnames(cld)[grep("Amt",colnames(cld))]="cld"
111
cld$cld=cld$cld/100
112

    
113
## calculate means and sds
114
cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){
115
  data.frame(
116
             month=x$month[1],
117
             lulc=x$lulc[1],
118
             StaID=x$StaID[1],
119
             mod09=mean(x$mod09,na.rm=T),
120
             mod09sd=sd(x$mod09,na.rm=T),
121
             cld=mean(x$cld[x$Nobs>10],na.rm=T),
122
             cldsd=sd(x$cld[x$Nobs>10],na.rm=T))}))
123
cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")]
124

    
125
## means by year
126
cldy=do.call(rbind.data.frame,by(cld,list(year=as.factor(cld$YR),StaID=as.factor(cld$StaID)),function(x){
127
  data.frame(
128
             year=x$YR[1],
129
             StaID=x$StaID[1],
130
             lulc=x$lulc[1],
131
             mod09=mean(x$mod09,na.rm=T),
132
             mod09sd=sd(x$mod09,na.rm=T),
133
             cld=mean(x$Amt[x$Nobs>10]/100,na.rm=T),
134
             cldsd=sd(x$Amt[x$Nobs>10]/100,na.rm=T))}))
135
cldy[,c("lat","lon")]=coordinates(st)[match(cldy$StaID,st$id),c("lat","lon")]
136

    
137
## overall mean
138
clda=do.call(rbind.data.frame,by(cld,list(StaID=as.factor(cld$StaID)),function(x){
139
  data.frame(
140
             StaID=x$StaID[1],
141
             lulc=x$lulc[1],
142
             mod09=mean(x$mod09,na.rm=T),
143
             mod09sd=sd(x$mod09,na.rm=T),
144
             cld=mean(x$cld[x$Nobs>10],na.rm=T),
145
             cldsd=sd(x$cld[x$Nobs>10],na.rm=T))}))
146
clda[,c("lat","lon")]=coordinates(st)[match(clda$StaID,st$id),c("lat","lon")]
147

    
148

    
149
## write out the tables
150
write.csv(cld,file="cld.csv",row.names=F)
151
write.csv(cldy,file="cldy.csv",row.names=F)
152
write.csv(cldm,file="cldm.csv",row.names=F)
153
write.csv(clda,file="clda.csv",row.names=F
154
)
155
#########################################################################
156
##################
157
###
158
cld=read.csv("cld.csv")
159
cldm=read.csv("cldm.csv")
160
cldy=read.csv("cldy.csv")
161
clda=read.csv("clda.csv")
162
st=read.csv("stations.csv")
163

    
164
### remove mod09==0 due to mosaic problem (remove when fixed)
165
cld=cld[!is.na(cld$lat)&cld$mod09!=0,]
166
cldm=cldm[!is.na(cldm$lat)&cldm$mod09!=0,]
167
cldy=cldy[!is.na(cldy$lat)&cldy$mod09!=0,]
168

    
169
## month factors
170
cld$month2=factor(cld$month,labels=month.name)
171
cldm$month2=factor(cldm$month,labels=month.name)
172

    
173
coordinates(st)=c("lon","lat")
174
projection(st)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
175

    
176
##make spatial object
177
cldms=cldm
178
coordinates(cldms)=c("lon","lat")
179
projection(cldms)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
180

    
181
##make spatial object
182
cldys=cldy
183
coordinates(cldys)=c("lon","lat")
184
projection(cldys)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
185

    
186
#### Evaluate MOD35 Cloud data
187
mod09=brick("~/acrobates/adamw/projects/cloud/data/mod09.nc")
188
mod09c=brick("~/acrobates/adamw/projects/cloud/data/mod09_clim.nc",varname="CF");names(mod09c)=month.name
189
mod09c2=raster("~/acrobates/adamw/projects/cloud/data/mod09_clim.nc",varname="CF",nl=1)
190

    
191
### get monthly climatologies for each station 
192
#cldc=do.call(rbind.data.frame,by(cld,list(id=cld$StaID,month=cld$month),function(x){
193
#  x$mod09[x$mod09==0]=NA
194
#  data.frame(id=x$StaID[1],month=x$month[1],Nobs=sum(x$Nobs,na.rm=T),Amt=mean(x$Amt,na.rm=T),mod09=mean(x$mod09,na.rm=T))
195
#  }))
196

    
197
## read in global coasts for nice plotting
198
library(maptools)
199

    
200
data(wrld_simpl)
201
coast <- unionSpatialPolygons(wrld_simpl, rep("land",nrow(wrld_simpl)), threshold=5)
202
coast=as(coast,"SpatialLines")
203
#coast=spTransform(coast,CRS(projection(mod35)))
204

    
205

    
206
n=100
207
  at=seq(0,100,length=n)
208
colr=colorRampPalette(c("black","green","red"))
209
cols=colr(n)
210

    
211

    
212
pdf("/home/adamw/acrobates/adamw/projects/cloud/output/validation.pdf",width=11,height=8.5)
213

    
214
### maps of mod09 and NDP
215
## map of stations
216
xyplot(lat~lon,data=data.frame(coordinates(st)),pch=16,cex=.5, main="NDP-026D Cloud Climatology Stations",ylab="Latitude",xlab="Longitude")+
217
  layer(sp.lines(coast,col="grey"),under=T)
218

    
219
levelplot(mod09c,col.regions=colr(100),at=seq(0,100,len=100),margin=F,maxpixels=1e5,main="MOD09 Cloud Frequency",ylab="Latitude",xlab="Longitude")
220

    
221
#p2=xyplot(lat~lon|month2,data=cldm,col=as.character(cut(cldm$cld,seq(0,100,len=100),labels=colr(99))),pch=16,cex=.1,auto.key=T,asp=1,
222
#       main="NDP-026D Cloud Climatology Stations",ylab="Latitude",xlab="Longitude",layout=c(12,1))+
223
#  layer(sp.lines(coast,col="black",lwd=.1),under=F)
224
#v_month=c(p1,p2,layout=c(12,2),x.same=T,y.same=T,merge.legends=T)
225
#print(v_month)
226

    
227

    
228
#xyplot(lat~lon|month2,groups=cut(cldm$cld,seq(0,100,len=5)),data=cldm,pch=".",cex=.2,auto.key=T,
229
#       main="Mean Monthly Cloud Coverage",ylab="Latitude",xlab="Longitude",
230
#        par.settings = list(superpose.symbol= list(pch=16,col=c("blue","green","yellow","red"))))+
231
#  layer(sp.lines(coast,col="grey"),under=T)
232

    
233
### heatmap of mod09 vs. NDP for all months
234
hmcols=colorRampPalette(c("grey","blue","red"))
235
tr=c(0,27)
236
colkey <- draw.colorkey(list(col = hmcols(tr[2]), at = tr[1]:tr[2],height=.25))
237

    
238
xyplot(cld~mod09,data=cld[cld$Nobs>10,],panel=function(x,y,subscripts){
239
  n=150
240
  bins=seq(0,100,len=n)
241
  tb=melt(as.matrix(table(
242
    x=cut(x,bins,labels=bins[-1]),
243
    y=cut(y,bins,labels=bins[-1]))))
244
  qat=tr[1]:tr[2]#unique(tb$value)
245
  print(qat)
246
  panel.levelplot(tb$x,tb$y,tb$value,at=qat,col.regions=c("transparent",hmcols(length(qat))),subscripts=subscripts)
247
  },asp=1,scales=list(at=seq(0,100,len=6)),ylab="NDP Mean Cloud Amount (%)",xlab="MOD09 Cloud Frequency (%)",
248
       legend= list(right = list(fun = colkey,title="Station Count")))+
249
  layer(panel.abline(0,1,col="black",lwd=2))+
250
  layer(panel.ablineq(lm(y ~ x), r.sq = TRUE,at = 0.6,pos=1, offset=22,digits=2,col="blue"), style = 1)
251

    
252

    
253

    
254
xyplot(cld~mod09|month2,data=cld[cld$Nobs>10,],panel=function(x,y,subscripts){
255
  n=50
256
  bins=seq(0,100,len=n)
257
  tb=melt(as.matrix(table(
258
    x=cut(x,bins,labels=bins[-1]),
259
    y=cut(y,bins,labels=bins[-1]))))
260
  qat=unique(tb$value)
261
  print(qat)
262
  qat=0:26
263
  qat=tr[1]:tr[2]#unique(tb$value)
264
  panel.levelplot(tb$x,tb$y,tb$value,at=qat,col.regions=c("transparent",hmcols(length(qat))),subscripts=1:nrow(tb))
265
  layer(panel.abline(0,1,col="black",lwd=2))+
266
  layer(panel.ablineq(lm(y ~ x), r.sq = TRUE,at = 0.6,pos=1, offset=0,digits=2,col="blue"), style = 1)
267
},asp=1,scales=list(at=seq(0,100,len=6),useRaster=T,colorkey=list(width=.5,title="Number of Stations")),
268
          ylab="NDP Mean Cloud Amount (%)",xlab="MOD09 Cloud Frequency (%)",
269
              legend= list(right = list(fun = colkey)))+ layer(panel.abline(0,1,col="black",lwd=2))
270

    
271

    
272
xyplot(cld~mod09,data=clda,cex=0.5,pch=16)+
273
  layer(panel.abline(lm(y~x),col="blue"))+
274
#  layer(panel.lines(x,predict(lm(y~x),type="prediction")))+
275
  layer(panel.loess(x,y,col="blue",span=.2))+
276
  layer(panel.abline(0,1,col="red"))+
277
  layer(panel.segments(mod09,cld-cldsd,mod09,cld+cldsd,col="grey"),data=clda,under=T,magicdots=T)
278

    
279
## all monthly values
280
#xyplot(cld~mod09|as.factor(month),data=cld[cld$Nobs>75,],cex=.2,pch=16,subscripts=T)+
281
#  layer(panel.abline(lm(y~x),col="blue"))+
282
#  layer(panel.abline(0,1,col="red"))
283

    
284
## Monthly Climatologies
285
for(i in 1:2){
286
 p1=xyplot(cld~mod09|month2,data=cldm,cex=.2,pch=16,subscripts=T,ylab="NDP Mean Cloud Amount",xlab="MOD09 Cloud Frequency (%)")+
287
  layer(panel.lines(1:100,predict(lm(y~x),newdata=data.frame(x=1:100)),col="green"))+
288
  layer(panel.lines(1:100,predict(lm(y~x+I(x^3)),newdata=data.frame(x=1:100)),col="blue"))+
289
  layer(panel.abline(0,1,col="red"))
290
    if(i==2){
291
     p1=p1+layer(panel.segments(mod09[subscripts],cld[subscripts]-cldsd[subscripts],mod09[subscripts],cld[subscripts]+cldsd[subscripts],subscripts=subscripts,col="grey"),data=cldm,under=T,magicdots=T)
292
     p1=p1+layer(panel.segments(mod09[subscripts]-mod09sd[subscripts],cld[subscripts],mod09[subscripts]+mod09sd[subscripts],cld[subscripts],subscripts=subscripts,col="grey"),data=cldm,under=T,magicdots=T)
293
        }
294
print(p1)
295
}
296

    
297
dev.off()
298

    
299

    
300
summary(lm(Amt~mod09,data=cld))
301
summary(lm(cld~mod09_10+as.factor(lulc),data=d))
302
summary(lm(cld~mod09_10+as.factor(lulc),data=d))
303

    
304
### exploratory plots
305
xyplot(cld~mod09_10,groups=lulc,data=d@data,pch=16,cex=.5)+layer(panel.abline(0,1,col="red"))
306
xyplot(cld~mod09_10+mod35c5_10|as.factor(lulc),data=d@data,type=c("p","r"),pch=16,cex=.25,auto.key=T)+layer(panel.abline(0,1,col="green"))
307
xyplot(cld~mod35_10|as.factor(lulc),data=d@data,pch=16,cex=.5)+layer(panel.abline(0,1,col="red"))
308
xyplot(mod35_10~mod09_10|as.factor(lulc),data=d@data,pch=16,cex=.5)+layer(panel.abline(0,1,col="red"))
309

    
310
densityplot(stack(mod35,mod09))
311
boxplot(mod35,lulc)
312

    
313
bwplot(mod09~mod35|cut(y,5),data=stack(mod09,mod35))
314

    
315
## add a color key
316
breaks=seq(0,100,by=25)
317
cldm$cut=cut(cldm$cld,breaks)
318
cp=colorRampPalette(c("blue","orange","red"))
319
cols=cp(length(at))
320

    
321

    
322

    
323
## write a pdf
324
#dir.create("output")
325
pdf("output/NDP026d.pdf",width=11,height=8.5)
326

    
327

    
328

    
329
## Validation
330
m=10
331
zlim=c(40,100)
332
dr=subset(mod35,subset=m);projection(dr)=projection(mod35)
333
ds=cldms[cldms$month==m,]
334
plot(dr,col=cp(100),zlim=zlim,main="Comparison of MOD35 Cloud Frequency and NDP-026D Station Cloud Climatologies",
335
     ylab="Northing (m)",xlab="Easting (m)",sub="MOD35 is proportion of cloudy days, while NDP-026D is Mean Cloud Coverage")
336
plot(ds,add=T,pch=21,cex=3,lwd=2,fg="black",bg=as.character(cut(ds$cld,breaks=seq(zlim[1],zlim[2],len=5),labels=cp(4))))
337
#legend("topright",legend=seq(zlim[1],zlim[2],len=5),pch=16,col=cp(length(breaks)))
338

    
339

    
340
xyplot(mod35~cld,data=mod35v,subscripts=T,auto.key=T,panel=function(x,y,subscripts){
341
   td=mod35v[subscripts,]
342
#   panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts)
343
   panel.xyplot(x,y,subscripts=subscripts,type=c("p","smooth"),pch=16,col="black")
344
#   panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts)
345
 },ylab="MOD35 Proportion Cloudy Days",xlab="NDP-026D Mean Monthly Cloud Amount",
346
        main="Comparison of MOD35 Cloud Mask and Station Cloud Climatologies")
347

    
348
#xyplot(mod35~cld|month,data=mod35v,subscripts=T,auto.key=T,panel=function(x,y,subscripts){
349
#   td=mod35v[subscripts,]
350
#   panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts)
351
#   panel.xyplot(x,y,subscripts=subscripts,type=c("p","smooth"),pch=16,col="black")
352
#   panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts)
353
# },ylab="MOD35 Proportion Cloudy Days",xlab="NDP-026D Mean Monthly Cloud Amount",
354
#        main="Comparison of MOD35 Cloud Mask and Station Cloud Climatologies")
355

    
356

    
357
dev.off()
358

    
359
graphics.off()
(35-35/48)