1
|
### Script to download and process the NDP-026D station cloud dataset
|
2
|
|
3
|
setwd("~/acrobates/adamw/projects/cloud/data/NDP026D")
|
4
|
|
5
|
library(multicore)
|
6
|
library(doMC)
|
7
|
library(rasterVis)
|
8
|
library(rgdal)
|
9
|
|
10
|
|
11
|
|
12
|
## Data available here http://cdiac.ornl.gov/epubs/ndp/ndp026d/ndp026d.html
|
13
|
|
14
|
## Get station locations
|
15
|
system("wget -N -nd http://cdiac.ornl.gov/ftp/ndp026d/cat01/01_STID -P data/")
|
16
|
st=read.table("data/01_STID",skip=1)
|
17
|
colnames(st)=c("StaID","LAT","LON","ELEV","ny1","fy1","ly1","ny7","fy7","ly7","SDC","b5c")
|
18
|
st$lat=st$LAT/100
|
19
|
st$lon=st$LON/100
|
20
|
st$lon[st$lon>180]=st$lon[st$lon>180]-360
|
21
|
st=st[,c("StaID","ELEV","lat","lon")]
|
22
|
colnames(st)=c("id","elev","lat","lon")
|
23
|
write.csv(st,"stations.csv",row.names=F)
|
24
|
coordinates(st)=c("lon","lat")
|
25
|
## download data
|
26
|
system("wget -N -nd ftp://cdiac.ornl.gov/pub/ndp026d/cat67_78/* -A '.tc.Z' -P data/")
|
27
|
|
28
|
system("gunzip data/*.Z")
|
29
|
|
30
|
## define FWF widths
|
31
|
f162=c(5,5,4,7,7,7,4) #format 162
|
32
|
c162=c("StaID","YR","Nobs","Amt","Fq","AWP","NC")
|
33
|
|
34
|
## use monthly timeseries
|
35
|
cld=do.call(rbind.data.frame,mclapply(sprintf("%02d",1:12),function(m) {
|
36
|
d=read.fwf(list.files("data",pattern=paste("MNYDC.",m,".tc",sep=""),full=T),skip=1,widths=f162)
|
37
|
colnames(d)=c162
|
38
|
d$month=as.numeric(m)
|
39
|
print(m)
|
40
|
return(d)}
|
41
|
))
|
42
|
|
43
|
## add lat/lon
|
44
|
cld[,c("lat","lon")]=coordinates(st)[match(cld$StaID,st$id),]
|
45
|
|
46
|
## drop missing values
|
47
|
cld=cld[,!grepl("Fq|AWP|NC",colnames(cld))]
|
48
|
cld$Amt[cld$Amt<0]=NA
|
49
|
#cld$Fq[cld$Fq<0]=NA
|
50
|
#cld$AWP[cld$AWP<0]=NA
|
51
|
#cld$NC[cld$NC<0]=NA
|
52
|
#cld=cld[cld$Nobs>0,]
|
53
|
|
54
|
## add the MOD09 data to cld
|
55
|
#### Evaluate MOD35 Cloud data
|
56
|
mod09=brick("~/acrobates/adamw/projects/cloud/data/mod09.nc")
|
57
|
|
58
|
## overlay the data with 32km diameter (16km radius) buffer
|
59
|
## buffer size from Dybbroe, et al. (2005) doi:10.1175/JAM-2189.1.
|
60
|
buf=16000
|
61
|
bins=cut(1:nrow(st),100)
|
62
|
mod09sta=lapply(levels(bins),function(lb) {
|
63
|
l=which(bins==lb)
|
64
|
td=extract(mod09,st[l,],buffer=buf,fun=mean,na.rm=T,df=T)
|
65
|
td$id=st$id[l]
|
66
|
print(lb)#as.vector(c(l,td[,1:4])))
|
67
|
write.table(td,"valid.csv",append=T,col.names=F,quote=F,sep=",",row.names=F)
|
68
|
td
|
69
|
})#,mc.cores=3)
|
70
|
|
71
|
## read it back in
|
72
|
mod09st=read.csv("valid.csv",header=F)[,-c(1,2)]
|
73
|
|
74
|
colnames(mod09st)=c(names(mod09)[-1],"id")
|
75
|
mod09stl=melt(mod09st,id.vars="id")
|
76
|
mod09stl[,c("year","month")]=do.call(rbind,strsplit(sub("X","",mod09stl$variable),"[.]"))[,1:2]
|
77
|
|
78
|
## add it to cld
|
79
|
cld$mod09=mod09stl$value[match(paste(cld$StaID,cld$YR,cld$month),paste(mod09stl$id,mod09stl$year,as.numeric(mod09stl$month)))]
|
80
|
|
81
|
|
82
|
## LULC
|
83
|
#system(paste("gdalwarp -r near -co \"COMPRESS=LZW\" -tr ",paste(res(mod09),collapse=" ",sep=""),
|
84
|
# "-tap -multi -t_srs \"", projection(mod09),"\" /mnt/data/jetzlab/Data/environ/global/landcover/MODIS/MCD12Q1_IGBP_2005_v51.tif ../modis/mod12/MCD12Q1_IGBP_2005_v51.tif"))
|
85
|
lulc=raster("~/acrobates/adamw/projects/interp/data/modis/mod12/MCD12Q1_IGBP_2005_v51.tif")
|
86
|
require(plotKML); data(worldgrids_pal) #load IGBP palette
|
87
|
IGBP=data.frame(ID=0:16,col=worldgrids_pal$IGBP[-c(18,19)],stringsAsFactors=F)
|
88
|
IGBP$class=rownames(IGBP);rownames(IGBP)=1:nrow(IGBP)
|
89
|
levels(lulc)=list(IGBP)
|
90
|
## function to get modal lulc value
|
91
|
Mode <- function(x) {
|
92
|
ux <- na.omit(unique(x))
|
93
|
ux[which.max(tabulate(match(x, ux)))]
|
94
|
}
|
95
|
lulcst=extract(lulc,st,fun=Mode,buffer=buf,df=T)
|
96
|
colnames(lulcst)=c("id","lulc")
|
97
|
## add it to cld
|
98
|
cld$lulc=lulcst$lulc[match(cld$StaID,lulcst$id)]
|
99
|
cld$lulcc=IGBP$class[match(cld$lulc,IGBP$ID)]
|
100
|
|
101
|
## update cld column names
|
102
|
colnames(cld)[grep("Amt",colnames(cld))]="cld"
|
103
|
cld$cld=cld$cld/100
|
104
|
cld[,c("lat","lon")]=coordinates(st)[match(cld$StaID,st$id),c("lat","lon")]
|
105
|
|
106
|
## calculate means and sds
|
107
|
cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){
|
108
|
data.frame(
|
109
|
month=x$month[1],
|
110
|
lulc=x$lulc[1],
|
111
|
StaID=x$StaID[1],
|
112
|
mod09=mean(x$mod09,na.rm=T),
|
113
|
mod09sd=sd(x$mod09,na.rm=T),
|
114
|
cld=mean(x$cld[x$Nobs>50],na.rm=T),
|
115
|
cldsd=sd(x$cld[x$Nobs>50],na.rm=T))}))
|
116
|
cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")]
|
117
|
|
118
|
## means by year
|
119
|
cldy=do.call(rbind.data.frame,by(cld,list(year=as.factor(cld$YR),StaID=as.factor(cld$StaID)),function(x){
|
120
|
data.frame(
|
121
|
year=x$YR[1],
|
122
|
StaID=x$StaID[1],
|
123
|
lulc=x$lulc[1],
|
124
|
mod09=mean(x$mod09,na.rm=T),
|
125
|
mod09sd=sd(x$mod09,na.rm=T),
|
126
|
cld=mean(x$cld[x$Nobs>50]/100,na.rm=T),
|
127
|
cldsd=sd(x$cld[x$Nobs>50]/100,na.rm=T))}))
|
128
|
cldy[,c("lat","lon")]=coordinates(st)[match(cldy$StaID,st$id),c("lat","lon")]
|
129
|
|
130
|
## overall mean
|
131
|
clda=do.call(rbind.data.frame,by(cld,list(StaID=as.factor(cld$StaID)),function(x){
|
132
|
data.frame(
|
133
|
StaID=x$StaID[1],
|
134
|
lulc=x$lulc[1],
|
135
|
mod09=mean(x$mod09,na.rm=T),
|
136
|
mod09sd=sd(x$mod09,na.rm=T),
|
137
|
cld=mean(x$cld[x$Nobs>10],na.rm=T),
|
138
|
cldsd=sd(x$cld[x$Nobs>10],na.rm=T))}))
|
139
|
clda[,c("lat","lon")]=coordinates(st)[match(clda$StaID,st$id),c("lat","lon")]
|
140
|
|
141
|
|
142
|
## write out the tables
|
143
|
write.csv(cld,file="cld.csv",row.names=F)
|
144
|
write.csv(cldy,file="cldy.csv",row.names=F)
|
145
|
write.csv(cldm,file="cldm.csv",row.names=F)
|
146
|
write.csv(clda,file="clda.csv",row.names=F)
|
147
|
|
148
|
#########################################################################
|
149
|
|