Project

General

Profile

« Previous | Next » 

Revision f5ef0596

Added by Adam Wilson almost 11 years ago

Updated EE script to run in parallel and minor edits to post-processing

View differences:

climate/procedures/NDP-026D.R
1
#! /bin/R
2 1
### Script to download and process the NDP-026D station cloud dataset
3
setwd("~/acrobates/adamw/projects/interp/data/NDP026D")
2

  
3
setwd("~/acrobates/adamw/projects/cloud/data/NDP026D")
4 4

  
5 5
library(multicore)
6
library(latticeExtra)
7 6
library(doMC)
8 7
library(rasterVis)
9 8
library(rgdal)
10
library(reshape)
11
library(hexbin)
12
## register parallel processing
13
#registerDoMC(10)
14
#beginCluster(10)
15 9

  
16
## available here http://cdiac.ornl.gov/epubs/ndp/ndp026d/ndp026d.html
10

  
11

  
12
## Data available here http://cdiac.ornl.gov/epubs/ndp/ndp026d/ndp026d.html
17 13

  
18 14
## Get station locations
19 15
system("wget -N -nd http://cdiac.ornl.gov/ftp/ndp026d/cat01/01_STID -P data/")
......
45 41
  ))
46 42

  
47 43
## add lat/lon
48
cld[,c("lat","lon")]=st[match(cld$StaID,st$id),c("lat","lon")]
44
cld[,c("lat","lon")]=coordinates(st)[match(cld$StaID,st$id),]
49 45

  
50 46
## drop missing values
51 47
cld=cld[,!grepl("Fq|AWP|NC",colnames(cld))]
......
62 58
## overlay the data with 32km diameter (16km radius) buffer
63 59
## buffer size from Dybbroe, et al. (2005) doi:10.1175/JAM-2189.1.
64 60
buf=16000
65
#mod09sta=lapply(1:nlayers(mod09),function(l) {print(l); extract(mod09[[l]],st,buffer=buf,fun=mean,na.rm=T,df=T)[,2]})
66 61
bins=cut(1:nrow(st),100)
67 62
mod09sta=lapply(levels(bins),function(lb) {
68 63
  l=which(bins==lb)
......
73 68
  td
74 69
})#,mc.cores=3)
75 70

  
76
#mod09sta=extract(mod09,st,buffer=buf,fun=mean,na.rm=T,df=T)
71
## read it back in
77 72
mod09st=read.csv("valid.csv",header=F)[,-c(1,2)]
78 73

  
79
#mod09st=do.call(rbind.data.frame,mod09sta)
80
#mod09st=mod09st[,!is.na(colnames(mod09st))]
81
colnames(mod09st)=c(names(mod09),"id")
82
#mod09st$id=st$id
74
colnames(mod09st)=c(names(mod09)[-1],"id")
83 75
mod09stl=melt(mod09st,id.vars="id")
84 76
mod09stl[,c("year","month")]=do.call(rbind,strsplit(sub("X","",mod09stl$variable),"[.]"))[,1:2]
77

  
85 78
## add it to cld
86 79
cld$mod09=mod09stl$value[match(paste(cld$StaID,cld$YR,cld$month),paste(mod09stl$id,mod09stl$year,as.numeric(mod09stl$month)))]
87 80

  
......
89 82
## LULC
90 83
#system(paste("gdalwarp -r near -co \"COMPRESS=LZW\" -tr ",paste(res(mod09),collapse=" ",sep=""),
91 84
#             "-tap -multi -t_srs \"",   projection(mod09),"\" /mnt/data/jetzlab/Data/environ/global/landcover/MODIS/MCD12Q1_IGBP_2005_v51.tif ../modis/mod12/MCD12Q1_IGBP_2005_v51.tif"))
92
lulc=raster("../modis/mod12/MCD12Q1_IGBP_2005_v51.tif")
93
#lulc=ratify(lulc)
85
lulc=raster("~/acrobates/adamw/projects/interp/data/modis/mod12/MCD12Q1_IGBP_2005_v51.tif")
94 86
require(plotKML); data(worldgrids_pal)  #load IGBP palette
95 87
IGBP=data.frame(ID=0:16,col=worldgrids_pal$IGBP[-c(18,19)],stringsAsFactors=F)
96 88
IGBP$class=rownames(IGBP);rownames(IGBP)=1:nrow(IGBP)
97 89
levels(lulc)=list(IGBP)
98
#lulc=crop(lulc,mod09)
99
  Mode <- function(x) {
90
## function to get modal lulc value
91
Mode <- function(x) {
100 92
      ux <- na.omit(unique(x))
101 93
        ux[which.max(tabulate(match(x, ux)))]
102 94
      }
......
104 96
colnames(lulcst)=c("id","lulc")
105 97
## add it to cld
106 98
cld$lulc=lulcst$lulc[match(cld$StaID,lulcst$id)]
107
#cld$lulc=factor(as.integer(cld$lulc),labels=IGBP$class[sort(unique(cld$lulc))])
99
cld$lulcc=IGBP$class[match(cld$lulc,IGBP$ID)]
108 100

  
109 101
## update cld column names
110 102
colnames(cld)[grep("Amt",colnames(cld))]="cld"
111 103
cld$cld=cld$cld/100
104
cld[,c("lat","lon")]=coordinates(st)[match(cld$StaID,st$id),c("lat","lon")]
112 105

  
113 106
## calculate means and sds
114 107
cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){
......
118 111
             StaID=x$StaID[1],
119 112
             mod09=mean(x$mod09,na.rm=T),
120 113
             mod09sd=sd(x$mod09,na.rm=T),
121
             cld=mean(x$cld[x$Nobs>10],na.rm=T),
122
             cldsd=sd(x$cld[x$Nobs>10],na.rm=T))}))
114
             cld=mean(x$cld[x$Nobs>50],na.rm=T),
115
             cldsd=sd(x$cld[x$Nobs>50],na.rm=T))}))
123 116
cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")]
124 117

  
125 118
## means by year
......
130 123
             lulc=x$lulc[1],
131 124
             mod09=mean(x$mod09,na.rm=T),
132 125
             mod09sd=sd(x$mod09,na.rm=T),
133
             cld=mean(x$Amt[x$Nobs>10]/100,na.rm=T),
134
             cldsd=sd(x$Amt[x$Nobs>10]/100,na.rm=T))}))
126
             cld=mean(x$cld[x$Nobs>50]/100,na.rm=T),
127
             cldsd=sd(x$cld[x$Nobs>50]/100,na.rm=T))}))
135 128
cldy[,c("lat","lon")]=coordinates(st)[match(cldy$StaID,st$id),c("lat","lon")]
136 129

  
137 130
## overall mean
......
150 143
write.csv(cld,file="cld.csv",row.names=F)
151 144
write.csv(cldy,file="cldy.csv",row.names=F)
152 145
write.csv(cldm,file="cldm.csv",row.names=F)
153
write.csv(clda,file="clda.csv",row.names=F
154
)
155
#########################################################################
156
##################
157
###
158
cld=read.csv("cld.csv")
159
cldm=read.csv("cldm.csv")
160
cldy=read.csv("cldy.csv")
161
clda=read.csv("clda.csv")
162
st=read.csv("stations.csv")
163

  
164
### remove mod09==0 due to mosaic problem (remove when fixed)
165
cld=cld[!is.na(cld$lat)&cld$mod09!=0,]
166
cldm=cldm[!is.na(cldm$lat)&cldm$mod09!=0,]
167
cldy=cldy[!is.na(cldy$lat)&cldy$mod09!=0,]
168

  
169
## month factors
170
cld$month2=factor(cld$month,labels=month.name)
171
cldm$month2=factor(cldm$month,labels=month.name)
172

  
173
coordinates(st)=c("lon","lat")
174
projection(st)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
175

  
176
##make spatial object
177
cldms=cldm
178
coordinates(cldms)=c("lon","lat")
179
projection(cldms)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
180

  
181
##make spatial object
182
cldys=cldy
183
coordinates(cldys)=c("lon","lat")
184
projection(cldys)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
185

  
186
#### Evaluate MOD35 Cloud data
187
mod09=brick("~/acrobates/adamw/projects/cloud/data/mod09.nc")
188
mod09c=brick("~/acrobates/adamw/projects/cloud/data/mod09_clim.nc",varname="CF");names(mod09c)=month.name
189
mod09c2=raster("~/acrobates/adamw/projects/cloud/data/mod09_clim.nc",varname="CF",nl=1)
190

  
191
### get monthly climatologies for each station 
192
#cldc=do.call(rbind.data.frame,by(cld,list(id=cld$StaID,month=cld$month),function(x){
193
#  x$mod09[x$mod09==0]=NA
194
#  data.frame(id=x$StaID[1],month=x$month[1],Nobs=sum(x$Nobs,na.rm=T),Amt=mean(x$Amt,na.rm=T),mod09=mean(x$mod09,na.rm=T))
195
#  }))
196

  
197
## read in global coasts for nice plotting
198
library(maptools)
199

  
200
data(wrld_simpl)
201
coast <- unionSpatialPolygons(wrld_simpl, rep("land",nrow(wrld_simpl)), threshold=5)
202
coast=as(coast,"SpatialLines")
203
#coast=spTransform(coast,CRS(projection(mod35)))
204

  
205

  
206
n=100
207
  at=seq(0,100,length=n)
208
colr=colorRampPalette(c("black","green","red"))
209
cols=colr(n)
210

  
211

  
212
pdf("/home/adamw/acrobates/adamw/projects/cloud/output/validation.pdf",width=11,height=8.5)
213

  
214
### maps of mod09 and NDP
215
## map of stations
216
xyplot(lat~lon,data=data.frame(coordinates(st)),pch=16,cex=.5, main="NDP-026D Cloud Climatology Stations",ylab="Latitude",xlab="Longitude")+
217
  layer(sp.lines(coast,col="grey"),under=T)
218

  
219
levelplot(mod09c,col.regions=colr(100),at=seq(0,100,len=100),margin=F,maxpixels=1e5,main="MOD09 Cloud Frequency",ylab="Latitude",xlab="Longitude")
220

  
221
#p2=xyplot(lat~lon|month2,data=cldm,col=as.character(cut(cldm$cld,seq(0,100,len=100),labels=colr(99))),pch=16,cex=.1,auto.key=T,asp=1,
222
#       main="NDP-026D Cloud Climatology Stations",ylab="Latitude",xlab="Longitude",layout=c(12,1))+
223
#  layer(sp.lines(coast,col="black",lwd=.1),under=F)
224
#v_month=c(p1,p2,layout=c(12,2),x.same=T,y.same=T,merge.legends=T)
225
#print(v_month)
226

  
227

  
228
#xyplot(lat~lon|month2,groups=cut(cldm$cld,seq(0,100,len=5)),data=cldm,pch=".",cex=.2,auto.key=T,
229
#       main="Mean Monthly Cloud Coverage",ylab="Latitude",xlab="Longitude",
230
#        par.settings = list(superpose.symbol= list(pch=16,col=c("blue","green","yellow","red"))))+
231
#  layer(sp.lines(coast,col="grey"),under=T)
146
write.csv(clda,file="clda.csv",row.names=F)
232 147

  
233
### heatmap of mod09 vs. NDP for all months
234
hmcols=colorRampPalette(c("grey","blue","red"))
235
tr=c(0,27)
236
colkey <- draw.colorkey(list(col = hmcols(tr[2]), at = tr[1]:tr[2],height=.25))
237

  
238
xyplot(cld~mod09,data=cld[cld$Nobs>10,],panel=function(x,y,subscripts){
239
  n=150
240
  bins=seq(0,100,len=n)
241
  tb=melt(as.matrix(table(
242
    x=cut(x,bins,labels=bins[-1]),
243
    y=cut(y,bins,labels=bins[-1]))))
244
  qat=tr[1]:tr[2]#unique(tb$value)
245
  print(qat)
246
  panel.levelplot(tb$x,tb$y,tb$value,at=qat,col.regions=c("transparent",hmcols(length(qat))),subscripts=subscripts)
247
  },asp=1,scales=list(at=seq(0,100,len=6)),ylab="NDP Mean Cloud Amount (%)",xlab="MOD09 Cloud Frequency (%)",
248
       legend= list(right = list(fun = colkey,title="Station Count")))+
249
  layer(panel.abline(0,1,col="black",lwd=2))+
250
  layer(panel.ablineq(lm(y ~ x), r.sq = TRUE,at = 0.6,pos=1, offset=22,digits=2,col="blue"), style = 1)
251

  
252

  
253

  
254
xyplot(cld~mod09|month2,data=cld[cld$Nobs>10,],panel=function(x,y,subscripts){
255
  n=50
256
  bins=seq(0,100,len=n)
257
  tb=melt(as.matrix(table(
258
    x=cut(x,bins,labels=bins[-1]),
259
    y=cut(y,bins,labels=bins[-1]))))
260
  qat=unique(tb$value)
261
  print(qat)
262
  qat=0:26
263
  qat=tr[1]:tr[2]#unique(tb$value)
264
  panel.levelplot(tb$x,tb$y,tb$value,at=qat,col.regions=c("transparent",hmcols(length(qat))),subscripts=1:nrow(tb))
265
  layer(panel.abline(0,1,col="black",lwd=2))+
266
  layer(panel.ablineq(lm(y ~ x), r.sq = TRUE,at = 0.6,pos=1, offset=0,digits=2,col="blue"), style = 1)
267
},asp=1,scales=list(at=seq(0,100,len=6),useRaster=T,colorkey=list(width=.5,title="Number of Stations")),
268
          ylab="NDP Mean Cloud Amount (%)",xlab="MOD09 Cloud Frequency (%)",
269
              legend= list(right = list(fun = colkey)))+ layer(panel.abline(0,1,col="black",lwd=2))
270

  
271

  
272
xyplot(cld~mod09,data=clda,cex=0.5,pch=16)+
273
  layer(panel.abline(lm(y~x),col="blue"))+
274
#  layer(panel.lines(x,predict(lm(y~x),type="prediction")))+
275
  layer(panel.loess(x,y,col="blue",span=.2))+
276
  layer(panel.abline(0,1,col="red"))+
277
  layer(panel.segments(mod09,cld-cldsd,mod09,cld+cldsd,col="grey"),data=clda,under=T,magicdots=T)
278

  
279
## all monthly values
280
#xyplot(cld~mod09|as.factor(month),data=cld[cld$Nobs>75,],cex=.2,pch=16,subscripts=T)+
281
#  layer(panel.abline(lm(y~x),col="blue"))+
282
#  layer(panel.abline(0,1,col="red"))
283

  
284
## Monthly Climatologies
285
for(i in 1:2){
286
 p1=xyplot(cld~mod09|month2,data=cldm,cex=.2,pch=16,subscripts=T,ylab="NDP Mean Cloud Amount",xlab="MOD09 Cloud Frequency (%)")+
287
  layer(panel.lines(1:100,predict(lm(y~x),newdata=data.frame(x=1:100)),col="green"))+
288
  layer(panel.lines(1:100,predict(lm(y~x+I(x^3)),newdata=data.frame(x=1:100)),col="blue"))+
289
  layer(panel.abline(0,1,col="red"))
290
    if(i==2){
291
     p1=p1+layer(panel.segments(mod09[subscripts],cld[subscripts]-cldsd[subscripts],mod09[subscripts],cld[subscripts]+cldsd[subscripts],subscripts=subscripts,col="grey"),data=cldm,under=T,magicdots=T)
292
     p1=p1+layer(panel.segments(mod09[subscripts]-mod09sd[subscripts],cld[subscripts],mod09[subscripts]+mod09sd[subscripts],cld[subscripts],subscripts=subscripts,col="grey"),data=cldm,under=T,magicdots=T)
293
        }
294
print(p1)
295
}
296

  
297
dev.off()
298

  
299

  
300
summary(lm(Amt~mod09,data=cld))
301
summary(lm(cld~mod09_10+as.factor(lulc),data=d))
302
summary(lm(cld~mod09_10+as.factor(lulc),data=d))
303

  
304
### exploratory plots
305
xyplot(cld~mod09_10,groups=lulc,data=d@data,pch=16,cex=.5)+layer(panel.abline(0,1,col="red"))
306
xyplot(cld~mod09_10+mod35c5_10|as.factor(lulc),data=d@data,type=c("p","r"),pch=16,cex=.25,auto.key=T)+layer(panel.abline(0,1,col="green"))
307
xyplot(cld~mod35_10|as.factor(lulc),data=d@data,pch=16,cex=.5)+layer(panel.abline(0,1,col="red"))
308
xyplot(mod35_10~mod09_10|as.factor(lulc),data=d@data,pch=16,cex=.5)+layer(panel.abline(0,1,col="red"))
309

  
310
densityplot(stack(mod35,mod09))
311
boxplot(mod35,lulc)
312

  
313
bwplot(mod09~mod35|cut(y,5),data=stack(mod09,mod35))
314

  
315
## add a color key
316
breaks=seq(0,100,by=25)
317
cldm$cut=cut(cldm$cld,breaks)
318
cp=colorRampPalette(c("blue","orange","red"))
319
cols=cp(length(at))
320

  
321

  
322

  
323
## write a pdf
324
#dir.create("output")
325
pdf("output/NDP026d.pdf",width=11,height=8.5)
326

  
327

  
328

  
329
## Validation
330
m=10
331
zlim=c(40,100)
332
dr=subset(mod35,subset=m);projection(dr)=projection(mod35)
333
ds=cldms[cldms$month==m,]
334
plot(dr,col=cp(100),zlim=zlim,main="Comparison of MOD35 Cloud Frequency and NDP-026D Station Cloud Climatologies",
335
     ylab="Northing (m)",xlab="Easting (m)",sub="MOD35 is proportion of cloudy days, while NDP-026D is Mean Cloud Coverage")
336
plot(ds,add=T,pch=21,cex=3,lwd=2,fg="black",bg=as.character(cut(ds$cld,breaks=seq(zlim[1],zlim[2],len=5),labels=cp(4))))
337
#legend("topright",legend=seq(zlim[1],zlim[2],len=5),pch=16,col=cp(length(breaks)))
338

  
339

  
340
xyplot(mod35~cld,data=mod35v,subscripts=T,auto.key=T,panel=function(x,y,subscripts){
341
   td=mod35v[subscripts,]
342
#   panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts)
343
   panel.xyplot(x,y,subscripts=subscripts,type=c("p","smooth"),pch=16,col="black")
344
#   panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts)
345
 },ylab="MOD35 Proportion Cloudy Days",xlab="NDP-026D Mean Monthly Cloud Amount",
346
        main="Comparison of MOD35 Cloud Mask and Station Cloud Climatologies")
347

  
348
#xyplot(mod35~cld|month,data=mod35v,subscripts=T,auto.key=T,panel=function(x,y,subscripts){
349
#   td=mod35v[subscripts,]
350
#   panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts)
351
#   panel.xyplot(x,y,subscripts=subscripts,type=c("p","smooth"),pch=16,col="black")
352
#   panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts)
353
# },ylab="MOD35 Proportion Cloudy Days",xlab="NDP-026D Mean Monthly Cloud Amount",
354
#        main="Comparison of MOD35 Cloud Mask and Station Cloud Climatologies")
355

  
356

  
357
dev.off()
148
#########################################################################
358 149

  
359
graphics.off()

Also available in: Unified diff