Revision f5ef0596
Added by Adam Wilson almost 11 years ago
climate/procedures/NDP-026D.R | ||
---|---|---|
1 |
#! /bin/R |
|
2 | 1 |
### Script to download and process the NDP-026D station cloud dataset |
3 |
setwd("~/acrobates/adamw/projects/interp/data/NDP026D") |
|
2 |
|
|
3 |
setwd("~/acrobates/adamw/projects/cloud/data/NDP026D") |
|
4 | 4 |
|
5 | 5 |
library(multicore) |
6 |
library(latticeExtra) |
|
7 | 6 |
library(doMC) |
8 | 7 |
library(rasterVis) |
9 | 8 |
library(rgdal) |
10 |
library(reshape) |
|
11 |
library(hexbin) |
|
12 |
## register parallel processing |
|
13 |
#registerDoMC(10) |
|
14 |
#beginCluster(10) |
|
15 | 9 |
|
16 |
## available here http://cdiac.ornl.gov/epubs/ndp/ndp026d/ndp026d.html |
|
10 |
|
|
11 |
|
|
12 |
## Data available here http://cdiac.ornl.gov/epubs/ndp/ndp026d/ndp026d.html |
|
17 | 13 |
|
18 | 14 |
## Get station locations |
19 | 15 |
system("wget -N -nd http://cdiac.ornl.gov/ftp/ndp026d/cat01/01_STID -P data/") |
... | ... | |
45 | 41 |
)) |
46 | 42 |
|
47 | 43 |
## add lat/lon |
48 |
cld[,c("lat","lon")]=st[match(cld$StaID,st$id),c("lat","lon")]
|
|
44 |
cld[,c("lat","lon")]=coordinates(st)[match(cld$StaID,st$id),]
|
|
49 | 45 |
|
50 | 46 |
## drop missing values |
51 | 47 |
cld=cld[,!grepl("Fq|AWP|NC",colnames(cld))] |
... | ... | |
62 | 58 |
## overlay the data with 32km diameter (16km radius) buffer |
63 | 59 |
## buffer size from Dybbroe, et al. (2005) doi:10.1175/JAM-2189.1. |
64 | 60 |
buf=16000 |
65 |
#mod09sta=lapply(1:nlayers(mod09),function(l) {print(l); extract(mod09[[l]],st,buffer=buf,fun=mean,na.rm=T,df=T)[,2]}) |
|
66 | 61 |
bins=cut(1:nrow(st),100) |
67 | 62 |
mod09sta=lapply(levels(bins),function(lb) { |
68 | 63 |
l=which(bins==lb) |
... | ... | |
73 | 68 |
td |
74 | 69 |
})#,mc.cores=3) |
75 | 70 |
|
76 |
#mod09sta=extract(mod09,st,buffer=buf,fun=mean,na.rm=T,df=T)
|
|
71 |
## read it back in
|
|
77 | 72 |
mod09st=read.csv("valid.csv",header=F)[,-c(1,2)] |
78 | 73 |
|
79 |
#mod09st=do.call(rbind.data.frame,mod09sta) |
|
80 |
#mod09st=mod09st[,!is.na(colnames(mod09st))] |
|
81 |
colnames(mod09st)=c(names(mod09),"id") |
|
82 |
#mod09st$id=st$id |
|
74 |
colnames(mod09st)=c(names(mod09)[-1],"id") |
|
83 | 75 |
mod09stl=melt(mod09st,id.vars="id") |
84 | 76 |
mod09stl[,c("year","month")]=do.call(rbind,strsplit(sub("X","",mod09stl$variable),"[.]"))[,1:2] |
77 |
|
|
85 | 78 |
## add it to cld |
86 | 79 |
cld$mod09=mod09stl$value[match(paste(cld$StaID,cld$YR,cld$month),paste(mod09stl$id,mod09stl$year,as.numeric(mod09stl$month)))] |
87 | 80 |
|
... | ... | |
89 | 82 |
## LULC |
90 | 83 |
#system(paste("gdalwarp -r near -co \"COMPRESS=LZW\" -tr ",paste(res(mod09),collapse=" ",sep=""), |
91 | 84 |
# "-tap -multi -t_srs \"", projection(mod09),"\" /mnt/data/jetzlab/Data/environ/global/landcover/MODIS/MCD12Q1_IGBP_2005_v51.tif ../modis/mod12/MCD12Q1_IGBP_2005_v51.tif")) |
92 |
lulc=raster("../modis/mod12/MCD12Q1_IGBP_2005_v51.tif") |
|
93 |
#lulc=ratify(lulc) |
|
85 |
lulc=raster("~/acrobates/adamw/projects/interp/data/modis/mod12/MCD12Q1_IGBP_2005_v51.tif") |
|
94 | 86 |
require(plotKML); data(worldgrids_pal) #load IGBP palette |
95 | 87 |
IGBP=data.frame(ID=0:16,col=worldgrids_pal$IGBP[-c(18,19)],stringsAsFactors=F) |
96 | 88 |
IGBP$class=rownames(IGBP);rownames(IGBP)=1:nrow(IGBP) |
97 | 89 |
levels(lulc)=list(IGBP) |
98 |
#lulc=crop(lulc,mod09)
|
|
99 |
Mode <- function(x) {
|
|
90 |
## function to get modal lulc value
|
|
91 |
Mode <- function(x) { |
|
100 | 92 |
ux <- na.omit(unique(x)) |
101 | 93 |
ux[which.max(tabulate(match(x, ux)))] |
102 | 94 |
} |
... | ... | |
104 | 96 |
colnames(lulcst)=c("id","lulc") |
105 | 97 |
## add it to cld |
106 | 98 |
cld$lulc=lulcst$lulc[match(cld$StaID,lulcst$id)] |
107 |
#cld$lulc=factor(as.integer(cld$lulc),labels=IGBP$class[sort(unique(cld$lulc))])
|
|
99 |
cld$lulcc=IGBP$class[match(cld$lulc,IGBP$ID)]
|
|
108 | 100 |
|
109 | 101 |
## update cld column names |
110 | 102 |
colnames(cld)[grep("Amt",colnames(cld))]="cld" |
111 | 103 |
cld$cld=cld$cld/100 |
104 |
cld[,c("lat","lon")]=coordinates(st)[match(cld$StaID,st$id),c("lat","lon")] |
|
112 | 105 |
|
113 | 106 |
## calculate means and sds |
114 | 107 |
cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){ |
... | ... | |
118 | 111 |
StaID=x$StaID[1], |
119 | 112 |
mod09=mean(x$mod09,na.rm=T), |
120 | 113 |
mod09sd=sd(x$mod09,na.rm=T), |
121 |
cld=mean(x$cld[x$Nobs>10],na.rm=T),
|
|
122 |
cldsd=sd(x$cld[x$Nobs>10],na.rm=T))}))
|
|
114 |
cld=mean(x$cld[x$Nobs>50],na.rm=T),
|
|
115 |
cldsd=sd(x$cld[x$Nobs>50],na.rm=T))}))
|
|
123 | 116 |
cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")] |
124 | 117 |
|
125 | 118 |
## means by year |
... | ... | |
130 | 123 |
lulc=x$lulc[1], |
131 | 124 |
mod09=mean(x$mod09,na.rm=T), |
132 | 125 |
mod09sd=sd(x$mod09,na.rm=T), |
133 |
cld=mean(x$Amt[x$Nobs>10]/100,na.rm=T),
|
|
134 |
cldsd=sd(x$Amt[x$Nobs>10]/100,na.rm=T))}))
|
|
126 |
cld=mean(x$cld[x$Nobs>50]/100,na.rm=T),
|
|
127 |
cldsd=sd(x$cld[x$Nobs>50]/100,na.rm=T))}))
|
|
135 | 128 |
cldy[,c("lat","lon")]=coordinates(st)[match(cldy$StaID,st$id),c("lat","lon")] |
136 | 129 |
|
137 | 130 |
## overall mean |
... | ... | |
150 | 143 |
write.csv(cld,file="cld.csv",row.names=F) |
151 | 144 |
write.csv(cldy,file="cldy.csv",row.names=F) |
152 | 145 |
write.csv(cldm,file="cldm.csv",row.names=F) |
153 |
write.csv(clda,file="clda.csv",row.names=F |
|
154 |
) |
|
155 |
######################################################################### |
|
156 |
################## |
|
157 |
### |
|
158 |
cld=read.csv("cld.csv") |
|
159 |
cldm=read.csv("cldm.csv") |
|
160 |
cldy=read.csv("cldy.csv") |
|
161 |
clda=read.csv("clda.csv") |
|
162 |
st=read.csv("stations.csv") |
|
163 |
|
|
164 |
### remove mod09==0 due to mosaic problem (remove when fixed) |
|
165 |
cld=cld[!is.na(cld$lat)&cld$mod09!=0,] |
|
166 |
cldm=cldm[!is.na(cldm$lat)&cldm$mod09!=0,] |
|
167 |
cldy=cldy[!is.na(cldy$lat)&cldy$mod09!=0,] |
|
168 |
|
|
169 |
## month factors |
|
170 |
cld$month2=factor(cld$month,labels=month.name) |
|
171 |
cldm$month2=factor(cldm$month,labels=month.name) |
|
172 |
|
|
173 |
coordinates(st)=c("lon","lat") |
|
174 |
projection(st)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") |
|
175 |
|
|
176 |
##make spatial object |
|
177 |
cldms=cldm |
|
178 |
coordinates(cldms)=c("lon","lat") |
|
179 |
projection(cldms)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") |
|
180 |
|
|
181 |
##make spatial object |
|
182 |
cldys=cldy |
|
183 |
coordinates(cldys)=c("lon","lat") |
|
184 |
projection(cldys)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") |
|
185 |
|
|
186 |
#### Evaluate MOD35 Cloud data |
|
187 |
mod09=brick("~/acrobates/adamw/projects/cloud/data/mod09.nc") |
|
188 |
mod09c=brick("~/acrobates/adamw/projects/cloud/data/mod09_clim.nc",varname="CF");names(mod09c)=month.name |
|
189 |
mod09c2=raster("~/acrobates/adamw/projects/cloud/data/mod09_clim.nc",varname="CF",nl=1) |
|
190 |
|
|
191 |
### get monthly climatologies for each station |
|
192 |
#cldc=do.call(rbind.data.frame,by(cld,list(id=cld$StaID,month=cld$month),function(x){ |
|
193 |
# x$mod09[x$mod09==0]=NA |
|
194 |
# data.frame(id=x$StaID[1],month=x$month[1],Nobs=sum(x$Nobs,na.rm=T),Amt=mean(x$Amt,na.rm=T),mod09=mean(x$mod09,na.rm=T)) |
|
195 |
# })) |
|
196 |
|
|
197 |
## read in global coasts for nice plotting |
|
198 |
library(maptools) |
|
199 |
|
|
200 |
data(wrld_simpl) |
|
201 |
coast <- unionSpatialPolygons(wrld_simpl, rep("land",nrow(wrld_simpl)), threshold=5) |
|
202 |
coast=as(coast,"SpatialLines") |
|
203 |
#coast=spTransform(coast,CRS(projection(mod35))) |
|
204 |
|
|
205 |
|
|
206 |
n=100 |
|
207 |
at=seq(0,100,length=n) |
|
208 |
colr=colorRampPalette(c("black","green","red")) |
|
209 |
cols=colr(n) |
|
210 |
|
|
211 |
|
|
212 |
pdf("/home/adamw/acrobates/adamw/projects/cloud/output/validation.pdf",width=11,height=8.5) |
|
213 |
|
|
214 |
### maps of mod09 and NDP |
|
215 |
## map of stations |
|
216 |
xyplot(lat~lon,data=data.frame(coordinates(st)),pch=16,cex=.5, main="NDP-026D Cloud Climatology Stations",ylab="Latitude",xlab="Longitude")+ |
|
217 |
layer(sp.lines(coast,col="grey"),under=T) |
|
218 |
|
|
219 |
levelplot(mod09c,col.regions=colr(100),at=seq(0,100,len=100),margin=F,maxpixels=1e5,main="MOD09 Cloud Frequency",ylab="Latitude",xlab="Longitude") |
|
220 |
|
|
221 |
#p2=xyplot(lat~lon|month2,data=cldm,col=as.character(cut(cldm$cld,seq(0,100,len=100),labels=colr(99))),pch=16,cex=.1,auto.key=T,asp=1, |
|
222 |
# main="NDP-026D Cloud Climatology Stations",ylab="Latitude",xlab="Longitude",layout=c(12,1))+ |
|
223 |
# layer(sp.lines(coast,col="black",lwd=.1),under=F) |
|
224 |
#v_month=c(p1,p2,layout=c(12,2),x.same=T,y.same=T,merge.legends=T) |
|
225 |
#print(v_month) |
|
226 |
|
|
227 |
|
|
228 |
#xyplot(lat~lon|month2,groups=cut(cldm$cld,seq(0,100,len=5)),data=cldm,pch=".",cex=.2,auto.key=T, |
|
229 |
# main="Mean Monthly Cloud Coverage",ylab="Latitude",xlab="Longitude", |
|
230 |
# par.settings = list(superpose.symbol= list(pch=16,col=c("blue","green","yellow","red"))))+ |
|
231 |
# layer(sp.lines(coast,col="grey"),under=T) |
|
146 |
write.csv(clda,file="clda.csv",row.names=F) |
|
232 | 147 |
|
233 |
### heatmap of mod09 vs. NDP for all months |
|
234 |
hmcols=colorRampPalette(c("grey","blue","red")) |
|
235 |
tr=c(0,27) |
|
236 |
colkey <- draw.colorkey(list(col = hmcols(tr[2]), at = tr[1]:tr[2],height=.25)) |
|
237 |
|
|
238 |
xyplot(cld~mod09,data=cld[cld$Nobs>10,],panel=function(x,y,subscripts){ |
|
239 |
n=150 |
|
240 |
bins=seq(0,100,len=n) |
|
241 |
tb=melt(as.matrix(table( |
|
242 |
x=cut(x,bins,labels=bins[-1]), |
|
243 |
y=cut(y,bins,labels=bins[-1])))) |
|
244 |
qat=tr[1]:tr[2]#unique(tb$value) |
|
245 |
print(qat) |
|
246 |
panel.levelplot(tb$x,tb$y,tb$value,at=qat,col.regions=c("transparent",hmcols(length(qat))),subscripts=subscripts) |
|
247 |
},asp=1,scales=list(at=seq(0,100,len=6)),ylab="NDP Mean Cloud Amount (%)",xlab="MOD09 Cloud Frequency (%)", |
|
248 |
legend= list(right = list(fun = colkey,title="Station Count")))+ |
|
249 |
layer(panel.abline(0,1,col="black",lwd=2))+ |
|
250 |
layer(panel.ablineq(lm(y ~ x), r.sq = TRUE,at = 0.6,pos=1, offset=22,digits=2,col="blue"), style = 1) |
|
251 |
|
|
252 |
|
|
253 |
|
|
254 |
xyplot(cld~mod09|month2,data=cld[cld$Nobs>10,],panel=function(x,y,subscripts){ |
|
255 |
n=50 |
|
256 |
bins=seq(0,100,len=n) |
|
257 |
tb=melt(as.matrix(table( |
|
258 |
x=cut(x,bins,labels=bins[-1]), |
|
259 |
y=cut(y,bins,labels=bins[-1])))) |
|
260 |
qat=unique(tb$value) |
|
261 |
print(qat) |
|
262 |
qat=0:26 |
|
263 |
qat=tr[1]:tr[2]#unique(tb$value) |
|
264 |
panel.levelplot(tb$x,tb$y,tb$value,at=qat,col.regions=c("transparent",hmcols(length(qat))),subscripts=1:nrow(tb)) |
|
265 |
layer(panel.abline(0,1,col="black",lwd=2))+ |
|
266 |
layer(panel.ablineq(lm(y ~ x), r.sq = TRUE,at = 0.6,pos=1, offset=0,digits=2,col="blue"), style = 1) |
|
267 |
},asp=1,scales=list(at=seq(0,100,len=6),useRaster=T,colorkey=list(width=.5,title="Number of Stations")), |
|
268 |
ylab="NDP Mean Cloud Amount (%)",xlab="MOD09 Cloud Frequency (%)", |
|
269 |
legend= list(right = list(fun = colkey)))+ layer(panel.abline(0,1,col="black",lwd=2)) |
|
270 |
|
|
271 |
|
|
272 |
xyplot(cld~mod09,data=clda,cex=0.5,pch=16)+ |
|
273 |
layer(panel.abline(lm(y~x),col="blue"))+ |
|
274 |
# layer(panel.lines(x,predict(lm(y~x),type="prediction")))+ |
|
275 |
layer(panel.loess(x,y,col="blue",span=.2))+ |
|
276 |
layer(panel.abline(0,1,col="red"))+ |
|
277 |
layer(panel.segments(mod09,cld-cldsd,mod09,cld+cldsd,col="grey"),data=clda,under=T,magicdots=T) |
|
278 |
|
|
279 |
## all monthly values |
|
280 |
#xyplot(cld~mod09|as.factor(month),data=cld[cld$Nobs>75,],cex=.2,pch=16,subscripts=T)+ |
|
281 |
# layer(panel.abline(lm(y~x),col="blue"))+ |
|
282 |
# layer(panel.abline(0,1,col="red")) |
|
283 |
|
|
284 |
## Monthly Climatologies |
|
285 |
for(i in 1:2){ |
|
286 |
p1=xyplot(cld~mod09|month2,data=cldm,cex=.2,pch=16,subscripts=T,ylab="NDP Mean Cloud Amount",xlab="MOD09 Cloud Frequency (%)")+ |
|
287 |
layer(panel.lines(1:100,predict(lm(y~x),newdata=data.frame(x=1:100)),col="green"))+ |
|
288 |
layer(panel.lines(1:100,predict(lm(y~x+I(x^3)),newdata=data.frame(x=1:100)),col="blue"))+ |
|
289 |
layer(panel.abline(0,1,col="red")) |
|
290 |
if(i==2){ |
|
291 |
p1=p1+layer(panel.segments(mod09[subscripts],cld[subscripts]-cldsd[subscripts],mod09[subscripts],cld[subscripts]+cldsd[subscripts],subscripts=subscripts,col="grey"),data=cldm,under=T,magicdots=T) |
|
292 |
p1=p1+layer(panel.segments(mod09[subscripts]-mod09sd[subscripts],cld[subscripts],mod09[subscripts]+mod09sd[subscripts],cld[subscripts],subscripts=subscripts,col="grey"),data=cldm,under=T,magicdots=T) |
|
293 |
} |
|
294 |
print(p1) |
|
295 |
} |
|
296 |
|
|
297 |
dev.off() |
|
298 |
|
|
299 |
|
|
300 |
summary(lm(Amt~mod09,data=cld)) |
|
301 |
summary(lm(cld~mod09_10+as.factor(lulc),data=d)) |
|
302 |
summary(lm(cld~mod09_10+as.factor(lulc),data=d)) |
|
303 |
|
|
304 |
### exploratory plots |
|
305 |
xyplot(cld~mod09_10,groups=lulc,data=d@data,pch=16,cex=.5)+layer(panel.abline(0,1,col="red")) |
|
306 |
xyplot(cld~mod09_10+mod35c5_10|as.factor(lulc),data=d@data,type=c("p","r"),pch=16,cex=.25,auto.key=T)+layer(panel.abline(0,1,col="green")) |
|
307 |
xyplot(cld~mod35_10|as.factor(lulc),data=d@data,pch=16,cex=.5)+layer(panel.abline(0,1,col="red")) |
|
308 |
xyplot(mod35_10~mod09_10|as.factor(lulc),data=d@data,pch=16,cex=.5)+layer(panel.abline(0,1,col="red")) |
|
309 |
|
|
310 |
densityplot(stack(mod35,mod09)) |
|
311 |
boxplot(mod35,lulc) |
|
312 |
|
|
313 |
bwplot(mod09~mod35|cut(y,5),data=stack(mod09,mod35)) |
|
314 |
|
|
315 |
## add a color key |
|
316 |
breaks=seq(0,100,by=25) |
|
317 |
cldm$cut=cut(cldm$cld,breaks) |
|
318 |
cp=colorRampPalette(c("blue","orange","red")) |
|
319 |
cols=cp(length(at)) |
|
320 |
|
|
321 |
|
|
322 |
|
|
323 |
## write a pdf |
|
324 |
#dir.create("output") |
|
325 |
pdf("output/NDP026d.pdf",width=11,height=8.5) |
|
326 |
|
|
327 |
|
|
328 |
|
|
329 |
## Validation |
|
330 |
m=10 |
|
331 |
zlim=c(40,100) |
|
332 |
dr=subset(mod35,subset=m);projection(dr)=projection(mod35) |
|
333 |
ds=cldms[cldms$month==m,] |
|
334 |
plot(dr,col=cp(100),zlim=zlim,main="Comparison of MOD35 Cloud Frequency and NDP-026D Station Cloud Climatologies", |
|
335 |
ylab="Northing (m)",xlab="Easting (m)",sub="MOD35 is proportion of cloudy days, while NDP-026D is Mean Cloud Coverage") |
|
336 |
plot(ds,add=T,pch=21,cex=3,lwd=2,fg="black",bg=as.character(cut(ds$cld,breaks=seq(zlim[1],zlim[2],len=5),labels=cp(4)))) |
|
337 |
#legend("topright",legend=seq(zlim[1],zlim[2],len=5),pch=16,col=cp(length(breaks))) |
|
338 |
|
|
339 |
|
|
340 |
xyplot(mod35~cld,data=mod35v,subscripts=T,auto.key=T,panel=function(x,y,subscripts){ |
|
341 |
td=mod35v[subscripts,] |
|
342 |
# panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts) |
|
343 |
panel.xyplot(x,y,subscripts=subscripts,type=c("p","smooth"),pch=16,col="black") |
|
344 |
# panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts) |
|
345 |
},ylab="MOD35 Proportion Cloudy Days",xlab="NDP-026D Mean Monthly Cloud Amount", |
|
346 |
main="Comparison of MOD35 Cloud Mask and Station Cloud Climatologies") |
|
347 |
|
|
348 |
#xyplot(mod35~cld|month,data=mod35v,subscripts=T,auto.key=T,panel=function(x,y,subscripts){ |
|
349 |
# td=mod35v[subscripts,] |
|
350 |
# panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts) |
|
351 |
# panel.xyplot(x,y,subscripts=subscripts,type=c("p","smooth"),pch=16,col="black") |
|
352 |
# panel.segments(x-td$cldsd[subscripts],y,x+td$cldsd[subscripts],y,subscripts=subscripts) |
|
353 |
# },ylab="MOD35 Proportion Cloudy Days",xlab="NDP-026D Mean Monthly Cloud Amount", |
|
354 |
# main="Comparison of MOD35 Cloud Mask and Station Cloud Climatologies") |
|
355 |
|
|
356 |
|
|
357 |
dev.off() |
|
148 |
######################################################################### |
|
358 | 149 |
|
359 |
graphics.off() |
Also available in: Unified diff
Updated EE script to run in parallel and minor edits to post-processing