1
|
### Script to download and process the NDP-026D station cloud dataset
|
2
|
|
3
|
setwd("~/acrobates/adamw/projects/cloud/data/NDP026D")
|
4
|
|
5
|
library(multicore)
|
6
|
library(doMC)
|
7
|
library(rasterVis)
|
8
|
library(rgdal)
|
9
|
library(reshape)
|
10
|
|
11
|
|
12
|
## Data available here http://cdiac.ornl.gov/epubs/ndp/ndp026d/ndp026d.html
|
13
|
|
14
|
## Get station locations
|
15
|
system("wget -N -nd http://cdiac.ornl.gov/ftp/ndp026d/cat01/01_STID -P data/")
|
16
|
st=read.table("data/01_STID",skip=1)
|
17
|
colnames(st)=c("StaID","LAT","LON","ELEV","ny1","fy1","ly1","ny7","fy7","ly7","SDC","b5c")
|
18
|
st$lat=st$LAT/100
|
19
|
st$lon=st$LON/100
|
20
|
st$lon[st$lon>180]=st$lon[st$lon>180]-360
|
21
|
st=st[,c("StaID","ELEV","lat","lon")]
|
22
|
colnames(st)=c("id","elev","lat","lon")
|
23
|
write.csv(st,"stations.csv",row.names=F)
|
24
|
coordinates(st)=c("lon","lat")
|
25
|
projection(st)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
|
26
|
st@data[,c("lon","lat")]=coordinates(st)
|
27
|
|
28
|
## download data
|
29
|
system("wget -N -nd ftp://cdiac.ornl.gov/pub/ndp026d/cat67_78/* -A '.tc.Z' -P data/")
|
30
|
|
31
|
system("gunzip data/*.Z")
|
32
|
|
33
|
## define FWF widths
|
34
|
f162=c(5,5,4,7,7,7,4) #format 162
|
35
|
c162=c("StaID","YR","Nobs","Amt","Fq","AWP","NC")
|
36
|
|
37
|
## use monthly timeseries
|
38
|
cld=do.call(rbind.data.frame,mclapply(sprintf("%02d",1:12),function(m) {
|
39
|
d=read.fwf(list.files("data",pattern=paste("MNYDC.",m,".tc$",sep=""),full=T),skip=1,widths=f162)
|
40
|
colnames(d)=c162
|
41
|
d$month=as.numeric(m)
|
42
|
print(m)
|
43
|
return(d)}
|
44
|
))
|
45
|
|
46
|
## add lat/lon
|
47
|
cld[,c("lat","lon")]=coordinates(st)[match(cld$StaID,st$id),]
|
48
|
|
49
|
## drop missing values
|
50
|
cld=cld[,!grepl("Fq|AWP|NC",colnames(cld))]
|
51
|
cld$Amt[cld$Amt<0]=NA
|
52
|
#cld$Fq[cld$Fq<0]=NA
|
53
|
#cld$AWP[cld$AWP<0]=NA
|
54
|
#cld$NC[cld$NC<0]=NA
|
55
|
#cld=cld[cld$Nobs>0,]
|
56
|
|
57
|
## calculate means and sds
|
58
|
cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){
|
59
|
data.frame(
|
60
|
month=x$month[1],
|
61
|
StaID=x$StaID[1],
|
62
|
cld=mean(x$cld[x$Nobs>60],na.rm=T),
|
63
|
cldsd=sd(x$cld[x$Nobs>60],na.rm=T))}))
|
64
|
cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")]
|
65
|
|
66
|
|
67
|
## add the MOD09 data to cld
|
68
|
#### Evaluate MOD35 Cloud data
|
69
|
mod09=brick("~/acrobates/adamw/projects/cloud/data/cloud_ymonmean.nc")
|
70
|
mod09std=brick("~/acrobates/adamw/projects/cloud/data/cloud_ymonstd.nc")
|
71
|
|
72
|
## overlay the data with 32km diameter (16km radius) buffer
|
73
|
## buffer size from Dybbroe, et al. (2005) doi:10.1175/JAM-2189.1.
|
74
|
buf=16000
|
75
|
bins=cut(st$lat,10)
|
76
|
rerun=F
|
77
|
if(rerun&file.exists("valid.csv")) file.remove("valid.csv")
|
78
|
mod09sta=lapply(levels(bins),function(lb) {
|
79
|
l=which(bins==lb)
|
80
|
## mean
|
81
|
td=extract(mod09,st[l,],buffer=buf,fun=mean,na.rm=T,df=T)
|
82
|
td$id=st$id[l]
|
83
|
td$type="mean"
|
84
|
## std
|
85
|
td2=extract(mod09std,st[l,],buffer=buf,fun=mean,na.rm=T,df=T)
|
86
|
td2$id=st$id[l]
|
87
|
td2$type="sd"
|
88
|
print(lb)#as.vector(c(l,td[,1:4])))
|
89
|
write.table(rbind(td,td2),"valid.csv",append=T,col.names=F,quote=F,sep=",",row.names=F)
|
90
|
td
|
91
|
})#,mc.cores=3)
|
92
|
|
93
|
## read it back in
|
94
|
mod09st=read.csv("valid.csv",header=F)[,-c(1,2)]
|
95
|
|
96
|
colnames(mod09st)=c(names(mod09)[-1],"id")
|
97
|
mod09stl=melt(mod09st,id.vars=c("id","sd"))
|
98
|
mod09stl[,c("year","month")]=do.call(rbind,strsplit(sub("X","",mod09stl$variable),"[.]"))[,1:2]
|
99
|
mod09stl$value[mod09stl$value<0]=NA
|
100
|
|
101
|
## add it to cld
|
102
|
cldm$mod09=mod09stl$value[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))]
|
103
|
|
104
|
|
105
|
## LULC
|
106
|
#system(paste("gdalwarp -r near -co \"COMPRESS=LZW\" -tr ",paste(res(mod09),collapse=" ",sep=""),
|
107
|
# "-tap -multi -t_srs \"", projection(mod09),"\" /mnt/data/jetzlab/Data/environ/global/landcover/MODIS/MCD12Q1_IGBP_2005_v51.tif ../modis/mod12/MCD12Q1_IGBP_2005_v51.tif"))
|
108
|
lulc=raster("~/acrobates/adamw/projects/interp/data/modis/mod12/MCD12Q1_IGBP_2005_v51.tif")
|
109
|
require(plotKML); data(worldgrids_pal) #load IGBP palette
|
110
|
IGBP=data.frame(ID=0:16,col=worldgrids_pal$IGBP[-c(18,19)],stringsAsFactors=F)
|
111
|
IGBP$class=rownames(IGBP);rownames(IGBP)=1:nrow(IGBP)
|
112
|
levels(lulc)=list(IGBP)
|
113
|
## function to get modal lulc value
|
114
|
Mode <- function(x) {
|
115
|
ux <- na.omit(unique(x))
|
116
|
ux[which.max(tabulate(match(x, ux)))]
|
117
|
}
|
118
|
lulcst=extract(lulc,st,fun=Mode,buffer=buf,df=T)
|
119
|
colnames(lulcst)=c("id","lulc")
|
120
|
## add it to cld
|
121
|
cldm$lulc=lulcst$lulc[match(cldm$StaID,lulcst$id)]
|
122
|
cldm$lulcc=IGBP$class[match(cldm$lulc,IGBP$ID)]
|
123
|
|
124
|
## update cld column names
|
125
|
colnames(cldm)[grep("Amt",colnames(cldm))]="cld"
|
126
|
cldm$cld=cldm$cld/100
|
127
|
cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")]
|
128
|
|
129
|
## calculate means and sds
|
130
|
#cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){
|
131
|
# data.frame(
|
132
|
# month=x$month[1],
|
133
|
# lulc=x$lulc[1],
|
134
|
# StaID=x$StaID[1],
|
135
|
# mod09=mean(x$mod09,na.rm=T),
|
136
|
# mod09sd=sd(x$mod09,na.rm=T),
|
137
|
# cld=mean(x$cld[x$Nobs>50],na.rm=T),
|
138
|
# cldsd=sd(x$cld[x$Nobs>50],na.rm=T))}))
|
139
|
#cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")]
|
140
|
|
141
|
## means by year
|
142
|
#cldy=do.call(rbind.data.frame,by(cld,list(year=as.factor(cld$YR),StaID=as.factor(cld$StaID)),function(x){
|
143
|
# data.frame(
|
144
|
# year=x$YR[1],
|
145
|
# StaID=x$StaID[1],
|
146
|
# lulc=x$lulc[1],
|
147
|
# mod09=mean(x$mod09,na.rm=T),
|
148
|
# mod09sd=sd(x$mod09,na.rm=T),
|
149
|
# cld=mean(x$cld[x$Nobs>50]/100,na.rm=T),
|
150
|
# cldsd=sd(x$cld[x$Nobs>50]/100,na.rm=T))}))
|
151
|
#cldy[,c("lat","lon")]=coordinates(st)[match(cldy$StaID,st$id),c("lat","lon")]
|
152
|
|
153
|
## overall mean
|
154
|
clda=do.call(rbind.data.frame,by(cldm,list(StaID=as.factor(cldm$StaID)),function(x){
|
155
|
data.frame(
|
156
|
StaID=x$StaID[1],
|
157
|
lulc=x$lulc[1],
|
158
|
mod09=mean(x$mod09,na.rm=T),
|
159
|
mod09sd=sd(x$mod09,na.rm=T),
|
160
|
cld=mean(x$cld,na.rm=T),
|
161
|
cldsd=sd(x$cld,na.rm=T))}))
|
162
|
clda[,c("lat","lon")]=coordinates(st)[match(clda$StaID,st$id),c("lat","lon")]
|
163
|
|
164
|
|
165
|
## write out the tables
|
166
|
write.csv(cld,file="cld.csv",row.names=F)
|
167
|
#write.csv(cldy,file="cldy.csv",row.names=F)
|
168
|
write.csv(cldm,file="cldm.csv",row.names=F)
|
169
|
write.csv(clda,file="clda.csv",row.names=F)
|
170
|
|
171
|
#########################################################################
|
172
|
|