1
|
### Script to download and process the NDP-026D station cloud dataset
|
2
|
|
3
|
setwd("~/acrobates/adamw/projects/cloud/data/NDP026D")
|
4
|
|
5
|
library(multicore)
|
6
|
library(doMC)
|
7
|
library(rasterVis)
|
8
|
library(rgdal)
|
9
|
library(reshape)
|
10
|
|
11
|
|
12
|
## Data available here http://cdiac.ornl.gov/epubs/ndp/ndp026d/ndp026d.html
|
13
|
|
14
|
## Get station locations
|
15
|
system("wget -N -nd http://cdiac.ornl.gov/ftp/ndp026d/cat01/01_STID -P data/")
|
16
|
st=read.table("data/01_STID",skip=1)
|
17
|
colnames(st)=c("StaID","LAT","LON","ELEV","ny1","fy1","ly1","ny7","fy7","ly7","SDC","b5c")
|
18
|
st$lat=st$LAT/100
|
19
|
st$lon=st$LON/100
|
20
|
st$lon[st$lon>180]=st$lon[st$lon>180]-360
|
21
|
st=st[,c("StaID","ELEV","lat","lon")]
|
22
|
colnames(st)=c("id","elev","lat","lon")
|
23
|
write.csv(st,"stations.csv",row.names=F)
|
24
|
coordinates(st)=c("lon","lat")
|
25
|
projection(st)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
|
26
|
st@data[,c("lon","lat")]=coordinates(st)
|
27
|
|
28
|
## download data
|
29
|
system("wget -N -nd ftp://cdiac.ornl.gov/pub/ndp026d/cat67_78/* -A '.tc.Z' -P data/")
|
30
|
|
31
|
system("gunzip data/*.Z")
|
32
|
|
33
|
## define FWF widths
|
34
|
f162=c(5,5,4,7,7,7,4) #format 162
|
35
|
c162=c("StaID","YR","Nobs","Amt","Fq","AWP","NC")
|
36
|
|
37
|
## use monthly timeseries
|
38
|
cld=do.call(rbind.data.frame,mclapply(sprintf("%02d",1:12),function(m) {
|
39
|
d=read.fwf(list.files("data",pattern=paste("MNYDC.",m,".tc$",sep=""),full=T),skip=1,widths=f162)
|
40
|
colnames(d)=c162
|
41
|
d$month=as.numeric(m)
|
42
|
print(m)
|
43
|
return(d)}
|
44
|
))
|
45
|
|
46
|
## add lat/lon
|
47
|
cld[,c("lat","lon")]=coordinates(st)[match(cld$StaID,st$id),]
|
48
|
|
49
|
## drop missing values
|
50
|
cld=cld[,!grepl("Fq|AWP|NC",colnames(cld))]
|
51
|
cld$Amt[cld$Amt<0]=NA
|
52
|
cld$Amt=cld$Amt/100
|
53
|
|
54
|
## calculate means and sds for full record (1970-2009)
|
55
|
Nobsthresh=20 #minimum number of observations to include
|
56
|
|
57
|
cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){
|
58
|
data.frame(
|
59
|
month=x$month[1],
|
60
|
StaID=x$StaID[1],
|
61
|
cld_all=mean(x$Amt[x$Nobs>=Nobsthresh],na.rm=T), # full record
|
62
|
cldsd_all=sd(x$Amt[x$Nobs>=Nobsthresh],na.rm=T),
|
63
|
cld=mean(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T), #only MODIS epoch
|
64
|
cldsd=sd(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T))}))
|
65
|
cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")]
|
66
|
|
67
|
|
68
|
|
69
|
## add the MOD09 data to cld
|
70
|
#### Evaluate MOD35 Cloud data
|
71
|
mod09=brick("~/acrobates/adamw/projects/cloud/data/cloud_ymonmean.nc")
|
72
|
mod09std=brick("~/acrobates/adamw/projects/cloud/data/cloud_ymonstd.nc")
|
73
|
|
74
|
## overlay the data with 32km diameter (16km radius) buffer
|
75
|
## buffer size from Dybbroe, et al. (2005) doi:10.1175/JAM-2189.1.
|
76
|
buf=16000
|
77
|
bins=cut(st$lat,10)
|
78
|
rerun=F
|
79
|
if(rerun&file.exists("valid.csv")) file.remove("valid.csv")
|
80
|
mod09sta=lapply(levels(bins),function(lb) {
|
81
|
l=which(bins==lb)
|
82
|
## mean
|
83
|
td=extract(mod09,st[l,],buffer=buf,fun=mean,na.rm=T,df=T)
|
84
|
td$id=st$id[l]
|
85
|
td$type="mean"
|
86
|
## std
|
87
|
td2=extract(mod09std,st[l,],buffer=buf,fun=mean,na.rm=T,df=T)
|
88
|
td2$id=st$id[l]
|
89
|
td2$type="sd"
|
90
|
print(lb)#as.vector(c(l,td[,1:4])))
|
91
|
write.table(rbind(td,td2),"valid.csv",append=T,col.names=F,quote=F,sep=",",row.names=F)
|
92
|
td
|
93
|
})#,mc.cores=3)
|
94
|
|
95
|
## read it back in
|
96
|
mod09st=read.csv("valid.csv",header=F)[,-c(1)]
|
97
|
colnames(mod09st)=c(names(mod09),"id","type")
|
98
|
mod09stl=melt(mod09st,id.vars=c("id","type"))
|
99
|
mod09stl[,c("year","month")]=do.call(rbind,strsplit(sub("X","",mod09stl$variable),"[.]"))[,1:2]
|
100
|
mod09stl$value[mod09stl$value<0]=NA
|
101
|
mod09stl=cast(mod09stl,id+year+month~type,value="value")
|
102
|
|
103
|
## add it to cld
|
104
|
cldm$mod09=mod09stl$mean[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))]
|
105
|
cldm$mod09sd=mod09stl$sd[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))]
|
106
|
|
107
|
|
108
|
## LULC
|
109
|
#system(paste("gdalwarp -r near -co \"COMPRESS=LZW\" -tr ",paste(res(mod09),collapse=" ",sep=""),
|
110
|
# "-tap -multi -t_srs \"", projection(mod09),"\" /mnt/data/jetzlab/Data/environ/global/landcover/MODIS/MCD12Q1_IGBP_2005_v51.tif ../modis/mod12/MCD12Q1_IGBP_2005_v51.tif"))
|
111
|
lulc=raster("~/acrobates/adamw/projects/interp/data/modis/mod12/MCD12Q1_IGBP_2005_v51.tif")
|
112
|
require(plotKML); data(worldgrids_pal) #load IGBP palette
|
113
|
IGBP=data.frame(ID=0:16,col=worldgrids_pal$IGBP[-c(18,19)],stringsAsFactors=F)
|
114
|
IGBP$class=rownames(IGBP);rownames(IGBP)=1:nrow(IGBP)
|
115
|
levels(lulc)=list(IGBP)
|
116
|
## function to get modal lulc value
|
117
|
Mode <- function(x) {
|
118
|
ux <- na.omit(unique(x))
|
119
|
ux[which.max(tabulate(match(x, ux)))]
|
120
|
}
|
121
|
lulcst=extract(lulc,st,fun=Mode,buffer=buf,df=T)
|
122
|
colnames(lulcst)=c("id","lulc")
|
123
|
## add it to cld
|
124
|
cldm$lulc=lulcst$lulc[match(cldm$StaID,lulcst$id)]
|
125
|
cldm$lulcc=IGBP$class[match(cldm$lulc,IGBP$ID)]
|
126
|
|
127
|
|
128
|
## write out the tables
|
129
|
write.csv(cld,file="cld.csv",row.names=F)
|
130
|
write.csv(cldm,file="cldm.csv",row.names=F)
|
131
|
|
132
|
#########################################################################
|
133
|
|