Project

General

Profile

« Previous | Next » 

Revision a29da850

Added by Adam Wilson over 10 years ago

Update bounding box to remove high-latitude garbarge

View differences:

climate/research/cloud/MOD09/2_bias.R
1 1
###  Script to compile the monthly cloud data from earth engine into a netcdf file for further processing
2 2

  
3 3
library(rasterVis)
4
library(multicore)
4 5
library(doMC)
5 6
library(foreach)
6
library(RcppOctave)
7 7
library(rgdal)
8 8
registerDoMC(12)
9 9

  
......
16 16

  
17 17

  
18 18
## Specify path to VSNR souce code and add it to RcppOctave path
19
library(RcppOctave)
19 20
mpath="/mnt/data/personal/adamw/projects/environmental-layers/climate/research/cloud/MOD09/vsnr/"
20 21
.O$addpath(mpath)
21 22

  
......
23 24
#########################################
24 25
####  Bias correction functions
25 26

  
26
fgabor=function(d,theta=-15,x=200,y=5){
27
fgabor=function(d,theta,x=200,y=5){
27 28
    thetaR=(theta*pi)/180
28 29
    cds=expand.grid(x=(1:nrow(d))-round(nrow(d)/2),y=(1:ncol(d))-round(ncol(d)/2))
29 30
    sigma_x=x
......
102 103
allexts=rbind.data.frame(modexts,mydexts)
103 104

  
104 105
### assemble list of files to process
105
df2=data.frame(path=list.files(paste(datadir,"/mcd09tif",sep=""),full=T,pattern="[0-9][.]tif$"),stringsAsFactors=F)
106
df2[,c("sensor","month")]=do.call(rbind.data.frame,strsplit(basename(df2$path),"_|[.]"))[,c(1,2)]
106
df2=data.frame(path=list.files(paste(datadir,"/mcd09tif",sep=""),full=T,pattern="[0-9]*[mean|sd].*tif$"),stringsAsFactors=F)
107
df2[,c("sensor","month","type")]=do.call(rbind.data.frame,strsplit(basename(df2$path),"_|[.]"))[,c(1,2,3)]
107 108

  
108 109
## create a list of jobs to process
109 110
jobs=data.frame(allexts,month=rep(sprintf("%02d",1:12),each=nrow(allexts)))
110
jobs$path=df2$path[match(paste(jobs$sensor,jobs$month),paste(df2$sensor,df2$month))]
111
jobs=rbind.data.frame(cbind.data.frame(type="mean",jobs),cbind.data.frame(type="sd",jobs))
112
jobs$path=df2$path[match(paste(jobs$sensor,jobs$month,jobs$type),paste(df2$sensor,df2$month,df2$type))]
113
## drop any jobs with no associated files
114
jobs=jobs[!is.na(jobs$path),]
111 115

  
112 116

  
113 117
## loop over sensor-months to create full grid of corrected values
114
foreach( i=1:nrow(jobs)) %dopar% {
118
foreach( i=1:nrow(jobs), .options.multicore=list(preschedule=FALSE)) %dopar% {
115 119
    file=jobs$path[i]
116 120
    toutfile=paste(datadir,"mcd09bias/", sub(".tif","",basename(file)),"_",jobs$tile[i],".tif",sep="")
117
#    if(file.exists(toutfile)) {writeLines(paste(toutfile,"Exists, moving on"));next}
121
    if(file.exists(toutfile)) {writeLines(paste(toutfile,"Exists, moving on"));return(NULL)}
118 122
    writeLines(paste("Starting: ",toutfile," tile:",jobs$tile[i]," ( ",i," out of ",nrow(jobs),")"))
119
    ## set sensor-specific parameters
120
    ## add extra region for correction depending on which sensor is being processed
121 123
    ## set angle of orbital artefacts to be corrected
122 124
    sensor=jobs$sensor[i]
123 125
    if(sensor=="MOD09") scanangle=-15
......
138 140
    res=vsnr(d2,gabor=psi,alpha=2,p=2,epsilon=1,prec=5e-6,maxit=50,C=1,full=F)
139 141
    res2=res*100
140 142
    ## write the file
141
    writeRaster(res2,file=toutfile,overwrite=T,datatype='INT2S',options=c("COMPRESS=LZW", "PREDICTOR=2"),NAvalue=-32768)
143
    writeRaster(res2,file=toutfile,overwrite=T,datatype='INT2S',options=c("COMPRESS=LZW", "PREDICTOR=2"),NAvalue=32767)
142 144
    ## remove temporary files
143 145
    rmr(d);rmr(d2);rmr(psi);rmr(res);rmr(res2)
144 146
    ## remove old temporary files older than x hours
......
147 149
}
148 150

  
149 151

  
150

  
151 152
############################################
152 153
## now mosaic the tiles with the original image to keep only the corrected data (when available) and the uncorrected data where there is no tile.
153 154
## this relies on df2 created above
155
#tfs=list.files(paste(datadir,"/mcd09bias",sep=""),pattern="M.*_[0-9]._[0-9][.]tif",full=T)
156
#file.rename(tfs,paste(substr(tfs,1,46),"mean_",substr(tfs,47,52),sep=""))
157

  
158
## define color scale for mean and sd
159
mkct=function(palette,vals,bit)
160
    data.frame(id=0:(2^bit-1),t(col2rgb(c(palette(length(vals)+1),rep("#00000000",(2^bit)-length(vals)-1)),alpha=T)))
161

  
162
colR=colorRampPalette(c("#08306b","#0d57a1","#2878b8","#4997c9","#72b2d7","#a2cbe2","#c7dcef","#deebf7","#f7fbff"))
163
meancols=mkct(colR,vals=0:10000,bit=16)
164
write.table(meancols,file="data/colors16_mean.txt",quote=F,row.names=F,col.names=F)
165

  
166
colR2=colorRampPalette(c("#0000FF","#00FF80","#FF0080"))
167
sdcols=mkct(colR2,vals=0:10000,bit=16)
168
write.table(sdcols,file="data/colors16_sd.txt",quote=F,row.names=F,col.names=F)
169

  
154 170

  
155 171
foreach( i=1:nrow(df2)) %dopar% {
156 172
    ifile=df2$path[i]
157
    outfile=paste(datadir,"/mcd09ctif/",df2$sensor[i],"_",df2$month[i],"_uncompressed.tif",sep="")
158
    outfile2=paste(datadir,"/mcd09ctif/",df2$sensor[i],"_",df2$month[i],".tif",sep="")
159
    ##    if(file.exists(outfile)) {print(paste(outfile," exists, moving on...")); return(NULL)}
160
    ## create VRT of first band of the full image 
161
    fvrt=sub("[.]tif",".vrt",ifile)
162
    system(paste("gdalbuildvrt -b 1 ",fvrt," ",ifile))
173
    itype=df2$type[i]
174
    outfile=paste(datadir,"/mcd09ctif/",sub(".tif",".vrt",basename(ifile)),sep="")
175
    outfile2=paste(datadir,"/mcd09ctif/unmasked_",basename(ifile),sep="")
176
    outfile3=paste(datadir,"/mcd09ctif/",basename(ifile),sep="")
177
#    if(file.exists(outfile3)) {print(paste(outfile," exists, moving on...")); return(NULL)}
163 178
    ## mosaic the tiles with the original data (keeping the new data when available)
164
    tfiles=paste(c(fvrt,list.files(paste(datadir,"/mcd09bias",sep=""),pattern=paste(sub("[.]tif","",basename(outfile2)),"_[0-9]*[.]tif",sep=""),full=T)),collapse=" ")
165
    if(file.exists(outfile)) file.remove(outfile2,outfile)
166
    system(paste("gdal_merge.py --config GDAL_CACHEMAX 10000 -init -32768 -n -32768 -co BIGTIFF=yes  -o ",outfile," ",tfiles,sep="")) 
167
    system(paste("gdal_translate -co COMPRESS=LZW -co ZLEVEL=9 -co PREDICTOR=2 ",outfile," ",outfile2,sep=""))
168
    file.remove(fvrt,outfile)
169
    writeLines(paste("Finished ",outfile))
179
    tfiles=paste(c(ifile,list.files(paste(datadir,"/mcd09bias",sep=""),pattern=paste(sub("[.]tif","",basename(outfile3)),"_[0-9]*[.]tif",sep=""),full=T)),collapse=" ")
180
    system(paste("gdalbuildvrt -srcnodata 32767 -vrtnodata 32767 ",outfile," ",tfiles,sep="")) 
181
    system(paste("gdal_translate -a_ullr -180 90 180 -90 -a_nodata 32767 -co COMPRESS=LZW -co ZLEVEL=9 -co PREDICTOR=2 ",outfile," ",outfile2,sep=""))
182
    ## use pksetmask to set any values >100 (except the missing values) to 100
183
    ## these exist due to the reprojection to wgs84 from sinusoidal, there are a few pixels with values slightly
184
    ## greater than 100
185
    ## also convert to an unsigned 16-bit integer to allow adding a color table
186
    ict=ifelse(itype=="mean","data/colors16_mean.txt","data/colors16_sd.txt")
187
    system(paste("pksetmask -i ",outfile2," -m ",outfile2," -ot UInt16 ",
188
                 "--operator='>' --msknodata 20000 --nodata 65535  --operator='>' --msknodata 10000 --nodata 10000 --operator='<' --msknodata 0 --nodata 65535 ",
189
                 " -ct ",ict,"  -co COMPRESS=LZW -co PREDICTOR=2 -o ",outfile3))
190
    ## clean up temporary files
191
    file.remove(outfile,outfile2)
192
    writeLines(paste("#######################################             Finished ",outfile))
170 193
}
171
 
172 194

  
173 195

  
196
## check output
197
for(i in 1:nrow(df2)) {
198
    ifile=df2$path[i]
199
    outfile3=paste(datadir,"/mcd09ctif/",basename(ifile),sep="")
200
    print(ifile)
201
    system(paste("gdalinfo ",outfile3,"| grep 'Size is'"))
202
}
climate/research/cloud/MOD09/3_biome.R
1
### Produce summary of cloud frequency by biome
2

  
3
setwd("~/acrobates/adamw/projects/cloud/")
4

  
5

  
6
## libraries
7
library(rasterVis)
8
library(latticeExtra)
9
library(xtable)
10
library(texreg)
11
library(reshape)
12
library(caTools)
13
library(rgeos)
14
library(raster)
15

  
16

  
17
#### Evaluate MOD35 Cloud data
18
mod09c=brick("data/cloud_ymonmean.nc",varname="CF");names(mod09c)=month.name
19
mod09sd=brick("data/cloud_std.nc",varname="CFsd")
20

  
21
## rasterize the biome dataset
22
bpath="data/teow/biomes.shp"
23
## create a copy with a numeric biome code
24
biome=readOGR("data/teow/","biomes")
25
bcode=unique(data.frame(code=code,realm=biome$realm,biome=biome$biome))
26
    
27
system(paste("gdal_rasterize gdal_rasterize -a code -burn 0 -l mask ",bpath,"  work.tif"))
28

  
29
biome=readOGR("data/teow/","biomes")
30
     n_biomesamples=1000
31
     library(multicore)
32
     biomesample=do.call(rbind.data.frame,mclapply(1:length(biome),function(i)
33
         data.frame(code=biome$code[i],coordinates(spsample(biome[i,],n=n_biomesamples,type="stratified",nsig=2)))))
34
     colnames(biomesample)[2:3]=c("lon","lat")
35
     biomesample[,c("biome","realm")]=biome@data[match(biomesample$code,biome$code),c("biome","realm")]
36
     write.csv(biomesample,"output/biomesamplepoints.csv",row.names=F)
37
 }
38

  
39
     ## Extract data for points
40
 if(!file.exists("output/biomesamplepoints_cloud.csv")){
41
     biomesample=read.csv("output/biomesamplepoints.csv")
42
     biomep=raster::extract(mod09c,biomesample,sp=T)
43
     biomep$lon=biomesample$lon
44
     biomep@data[,c("lon","lat")]=coordinates(biomep)
45
     write.csv(biomep@data,"output/biomesamplepoints_cloud.csv",row.names=F)
46
}     
47
biomep=read.csv("output/biomesamplepoints_cloud.csv")
48
coordinates(biomep)=c("lon","lat")
49
projection(biomep)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
50

  
51

  
52
## get stratified sample of points from biomes for illustration
53
 if(!file.exists("output/biomesamplepoints.csv")){
54
     biome=readOGR("data/teow/","biomes")
55
     n_biomesamples=1000
56
     library(multicore)
57
     biomesample=do.call(rbind.data.frame,mclapply(1:length(biome),function(i)
58
         data.frame(code=biome$code[i],coordinates(spsample(biome[i,],n=n_biomesamples,type="stratified",nsig=2)))))
59
     colnames(biomesample)[2:3]=c("lon","lat")
60
     biomesample[,c("biome","realm")]=biome@data[match(biomesample$code,biome$code),c("biome","realm")]
61
     write.csv(biomesample,"output/biomesamplepoints.csv",row.names=F)
62
 }
63

  
64
     ## Extract data for points
65
 if(!file.exists("output/biomesamplepoints_cloud.csv")){
66
     biomesample=read.csv("output/biomesamplepoints.csv")
67
     biomep=raster::extract(mod09c,biomesample,sp=T)
68
     biomep$lon=biomesample$lon
69
     biomep@data[,c("lon","lat")]=coordinates(biomep)
70
     write.csv(biomep@data,"output/biomesamplepoints_cloud.csv",row.names=F)
71
}     
72
biomep=read.csv("output/biomesamplepoints_cloud.csv")
73
coordinates(biomep)=c("lon","lat")
74
projection(biomep)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
75

  
76

  
77
#plot(mod09a,layers=1,margin=F,maxpixels=100)
78

  
79
## calculated differences
80
cldm$difm=cldm$mod09-cldm$cld_all
81
cldm$difs=cldm$mod09sd+cldm$cldsd_all
82

  
83
#clda$dif=clda$mod09-clda$cld
84

  
85
## read in global coasts for nice plotting
86
library(maptools)
87
library(rgdal)
88

  
89
#coast=getRgshhsMap("/mnt/data/jetzlab/Data/environ/global/gshhg/gshhs_h.b", xlim = NULL, ylim = NULL, level = 4) 
90
land=readShapePoly("/mnt/data/jetzlab/Data/environ/global/gshhg/GSHHS_shp/c/GSHHS_c_L1.shp",force_ring=TRUE)
91
projection(land)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
92
CP <- as(extent(-180, 180, -60, 84), "SpatialPolygons")
93
proj4string(CP) <- CRS(proj4string(land))
94
coast=as(land[land$area>50,],"SpatialLines")
95
## Clip the map
96
land <- gIntersection(land, CP, byid=F)
97
coast <- gIntersection(coast, CP, byid=F)
98

  
99
## get stratified sample of points from biomes for illustration
100
 if(!file.exists("output/biomesamplepoints.csv")){
101
     biome=readOGR("data/teow/","biomes")
102
     n_biomesamples=1000
103
     library(multicore)
104
     biomesample=do.call(rbind.data.frame,mclapply(1:length(biome),function(i)
105
         data.frame(code=biome$code[i],coordinates(spsample(biome[i,],n=n_biomesamples,type="stratified",nsig=2)))))
106
     colnames(biomesample)[2:3]=c("lon","lat")
107
     biomesample[,c("biome","realm")]=biome@data[match(biomesample$code,biome$code),c("biome","realm")]
108
     write.csv(biomesample,"output/biomesamplepoints.csv",row.names=F)
109
 }
110

  
111
     ## Extract data for points
112
 if(!file.exists("output/biomesamplepoints_cloud.csv")){
113
     biomesample=read.csv("output/biomesamplepoints.csv")
114
     biomep=raster::extract(mod09c,biomesample,sp=T)
115
     biomep$lon=biomesample$lon
116
     biomep@data[,c("lon","lat")]=coordinates(biomep)
117
     write.csv(biomep@data,"output/biomesamplepoints_cloud.csv",row.names=F)
118
}     
119
biomep=read.csv("output/biomesamplepoints_cloud.csv")
120
coordinates(biomep)=c("lon","lat")
121
projection(biomep)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
122

  
123

  
124
## Figures
125
n=100
126
at=seq(0,100,length=n)
127
colr=colorRampPalette(c("black","green","red"))
128
cols=colr(n)
129

  
130
## set plotting parameters
131
my.theme = trellis.par.get()
132
my.theme$strip.background=list(col="transparent")
133
trellis.par.set(my.theme)
134

  
135
#pdf("output/Figures.pdf",width=11,height=8.5)
136
png("output/CF_Figures_%03d.png",width=5000,height=4000,res=600,pointsize=42,bg="white")
137

  
138
## set plotting parameters
139
my.theme = trellis.par.get()
140
my.theme$strip.background=list(col="transparent")
141
trellis.par.set(my.theme)
142

  
143
res=1e6
144
greg=list(ylim=c(-60,84),xlim=c(-180,180))
145
    
146
## Figure 1: 4-panel summaries
147
#- Annual average
148
levelplot(mod09a,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(space="bottom",adj=1),
149
  margin=F,maxpixels=res,ylab="",xlab="",useRaster=T,ylim=greg$ylim)+
150
    layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
151
    layer(sp.lines(coast,col="black"),under=F)
152
## Mean annual with validation stations
153
levelplot(mod09a,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(title="Cloud Frequency (%)",space="bottom",adj=1),
154
  margin=F,maxpixels=res,ylab="",xlab="",useRaster=T,ylim=greg$ylim)+
155
    layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
156
    layer(panel.xyplot(lon,lat,pch=16,cex=.3,col="black"),data=data.frame(coordinates(st)))+
157
  layer(sp.lines(coast,col="black"),under=F)
158

  
159
## Seasonal Means
160
levelplot(mod09s,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(title="Cloud Frequency (%)", space="bottom",adj=2),
161
  margin=F,maxpixels=res,ylab="",xlab="",useRaster=T,ylim=greg$ylim)+
162
    layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
163
    layer(sp.lines(coast,col="black"),under=F)
164

  
165

  
166
## Monthly Means
167
levelplot(mod09c,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(title="Cloud Frequency (%)", space="bottom",adj=1), 
168
  margin=F,maxpixels=res,ylab="Latitude",xlab="Longitude",useRaster=T,ylim=greg$ylim)+
169
    layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
170
    layer(sp.lines(coast,col="black"),under=F)
171

  
172
#- Monthly minimum
173
#- Monthly maximum
174
#- STDEV or Min-Max
175
p_mac=levelplot(mod09a,col.regions=colr(n),cuts=99,at=seq(0,100,length=100),margin=F,
176
    ylim=greg$ylim,maxpixels=res/10,colorkey=list(title="Cloud Frequency (%)", space="top",height=.75),xlab="",ylab="",useRaster=T)+
177
    layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
178
    layer(sp.lines(coast,col="black"),under=F)
179
p_max=levelplot(mod09max,col.regions=colr(n),cuts=99,at=seq(0,100,length=100),margin=F,maxpixels=res/10,
180
    ylim=greg$ylim,colorkey=list(space="bottom",height=.75),useRaster=T)+
181
    layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
182
    layer(sp.lines(coast,col="black"),under=F)
183
p_intra=levelplot(mod09intra,col.regions=colr(n),cuts=99,at=seq(0,40,length=100),margin=F,maxpixels=res/100,
184
    ylim=greg$ylim,colorkey=list(space="bottom",height=.75),useRaster=T)+
185
    layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
186
    layer(sp.lines(coast,col="black"),under=F)
187
p_inter=levelplot(mod09inter,col.regions=colr(n),cuts=99,at=seq(0,15,length=100),margin=F,maxpixels=res/100,
188
    ylim=greg$ylim,colorkey=list(title="Cloud Frequency (%)", space="top",height=.75),useRaster=T)+
189
    layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
190
    layer(sp.lines(coast,col="black"),under=F)
191

  
192
p3=c("Mean Cloud Frequency (%)"=p_mac,"Max Cloud Frequency (%)"=p_max,"Interannual Variability (sd)"=p_inter,"Intraannual Variability (sd)"=p_intra,x.same=T,y.same=F,merge.legends=T,layout=c(2,2))
193
print(p3)
194

  
195
bgr=function(x,n=100,br=0,c1=c("darkblue","blue","grey"),c2=c("grey","red","purple")){
196
    at=unique(c(seq(min(x,na.rm=T),max(x,na.rm=T),len=n)))
197
    bg=colorRampPalette(c1)
198
    gr=colorRampPalette(c2)
199
    return(list(at=at,col=c(bg(sum(at<br)),gr(sum(at>=br)))))
200
}
201

  
202
cldm$resid=NA
203
# get residuals of simple linear model
204
cldm$resid[!is.na(cldm$cld_all)&!is.na(cldm$mod09)]=residuals(lm(mod09~cld_all,data=cldm))
205
colat=bgr(cldm$resid)
206
phist=histogram(cldm$resid,breaks=colat$at,border=NA,col=colat$col,xlim=c(-30,30),type="count",xlab="MODCF Residuals")#,seq(0,1,len=6),na.rm=T)
207
pmap=xyplot(lat~lon|month2,data=cldm,groups=cut(cldm$resid,rev(colat$at)),
208
       par.settings=list(superpose.symbol=list(col=colat$col)),pch=16,cex=.25,
209
       auto.key=F,#list(space="right",title="Difference\n(MOD09-NDP026D)",cex.title=1),asp=1,
210
       ylab="Latitude",xlab="Longitude")+
211
  layer(sp.lines(coast,col="black",lwd=.1),under=F)
212
print(phist,position=c(0,.75,1,1),more=T)
213
print(pmap,position=c(0,0,1,.78))
214

  
215
### heatmap of mod09 vs. NDP for all months
216
hmcols=colorRampPalette(c("grey","blue","red","purple"))
217
#hmcols=colorRampPalette(c(grey(.8),grey(.3),grey(.2)))
218
tr=c(0,66)
219
colkey <- draw.colorkey(list(col = hmcols(tr[2]), at = tr[1]:tr[2],height=.25))
220

  
221
xyplot(cld_all~mod09|month2,data=cldm,panel=function(x,y,subscripts){
222
  n=50
223
  bins=seq(0,100,len=n)
224
  tb=melt(as.matrix(table(
225
    x=cut(x,bins,labels=bins[-1]),
226
    y=cut(y,bins,labels=bins[-1]))))
227
  qat=unique(tb$value)
228
  print(max(qat))
229
  qat=tr[1]:tr[2]#unique(tb$value)
230
  panel.levelplot(tb$x,tb$y,tb$value,at=qat,col.regions=c("transparent",hmcols(length(qat))),subscripts=1:nrow(tb))
231
#  panel.abline(0,1,col="black",lwd=2)
232
  panel.abline(lm(y ~ x),col="black",lwd=2)
233
#  panel.ablineq(lm(y ~ x), r.sq = TRUE,at = 0.6,pos=1, offset=0,digits=2,col="blue")
234
  panel.text(70,10,bquote(paste(R^2,"=",.(round(summary(lm(y ~ x))$r.squared,2)))),cex=1)
235
},asp=1,scales=list(at=seq(0,100,len=6),useRaster=T,colorkey=list(width=.5,title="Number of Stations")),
236
          ylab="NDP Mean Cloud Amount (%)",xlab="MODCF Cloud Frequency (%)",
237
              legend= list(right = list(fun = colkey)))#+ layer(panel.abline(0,1,col="black",lwd=2))
238

  
239

  
240
bwplot(lulcc~difm,data=cldm,horiz=T,xlab="Difference (MOD09-Observed)",varwidth=T,notch=T)+layer(panel.abline(v=0))
241

  
242
dev.off()
243

  
244
####################################################################
245
### Regional Comparisons
246
## Compare with worldclim and NPP
247
#wc=stack(as.list(paste("/mnt/data/jetzlab/Data/environ/global/worldclim/prec_",1:12,".bil",sep="")))
248
wc_map=stack(as.list(paste("/mnt/data/jetzlab/Data/environ/global/worldclim/bio_12.bil",sep="")))
249
wc_dem=stack(as.list(paste("/mnt/data/jetzlab/Data/environ/global/worldclim/alt.bil",sep="")))
250

  
251
regs=list(
252
  Cascades=extent(c(-122.8,-118,44.9,47)),
253
  Hawaii=extent(c(-156.5,-154,18.75,20.5)),
254
  Boliva=extent(c(-71,-63,-20,-15)),
255
  Venezuela=extent(c(-69,-59,0,7)),
256
  CFR=extent(c(17.75,22.5,-34.8,-32.6)),
257
  Madagascar=extent(c(46,52,-17,-12))
258
  #reg2=extent(c(-81,-70,-4,10))
259
  )
260

  
261

  
262
## read in GEWEX 1-degree data
263
gewex=mean(brick("data/gewex/CA_PATMOSX_NOAA.nc",varname="a_CA"))
264
names(gewex)="PATMOS-x GEWEX AVHRR"
265

  
266
## calculate 1-degree means of MODCF data
267
#MOD_gewex=gewex
268
#MOD_gewex@data@values=1:length(MOD_gewex@data@values)
269
#MOD_gewex2=zonal(mod09a,MOD_gewex,fun='mean')
270
png("output/Resolution_Figures_%03d.png",width=5500,height=4000,res=600,pointsize=36,bg="white")
271
trellis.par.set(my.theme)
272
#pdf("output/mod09_resolution.pdf",width=11,height=8.5)
273

  
274
r="Venezuela"
275
# ylab.right = "Cloud Frequency (%)",par.settings = list(layout.widths = list(axis.key.padding = 0.1,axis.left=0.6,ylab.right = 3,right.padding=2)),
276
pars=list(layout.heights=list(key.bottom=2,key.top=1),layout.widths = list(axis.key.padding = 3,axis.left=0.6))
277
p1=levelplot(crop(mod09a,regs[[r]]),col.regions=grey(seq(0,1,len=100)),at=seq(45,100,len=99),
278
    colorkey=list(space="top",width=1,height=.75,labels=list(labels=c(50,75,100),at=c(50,75,100))),
279
    cuts=99,margin=F,max.pixels=1e6,par.settings = pars)
280
p2=levelplot(crop(gewex,regs[[r]]),col.regions=grey(seq(0,1,len=100)),at=seq(.45,1,len=99),cuts=99,margin=F,max.pixels=1e6,
281
    colorkey=list(space="top",width=1,height=.75,labels=list(labels=c(50,75,100),at=c(.50,.75,1))),
282
    par.settings = pars)
283
tmap=crop(wc_map,regs[[r]])
284
p3=levelplot(tmap,col.regions=grey(seq(0,1,len=100)),cuts=100,at=seq(tmap@data@min,tmap@data@max,len=100),margin=F,maxpixels=1e6,
285
    colorkey=list(space="bottom",height=.75,width=1),xlab="",ylab="",main=names(regs)[r],useRaster=T,
286
    par.settings = pars)
287
p4=levelplot(crop(wc_dem,regs[[r]]),col.regions=grey(seq(0,1,len=100)),cuts=99,margin=F,max.pixels=1e6,
288
    colorkey=list(space="bottom",height=.75,width=1),
289
    par.settings = pars)#,labels=list(labels=c(1000,4000),at=c(1000,4000))))
290
print(c("MODCF (%)"=p1,"PATMOS-x GEWEX (%)"=p2,"WorldClim Precip (mm)"=p3,"Elevation (m)"=p4,x.same=T,y.same=T,merge.legends=T,layout=c(2,2)))
291

  
292

  
293
dev.off()
294

  
295

  
296
#########################################
297
### Some stats for paper
298

  
299
## number of stations retained
300
length(unique(cldm$StaID[!is.na(cldm$cld_all)]))
301
length(unique(cldm$StaID[!is.na(cldm$cld)]))
302

  
303
# approximate size of mod09ga archive - get total size for one day from the USGS website
304
size=scan("http://e4ftl01.cr.usgs.gov/MOLT/MOD09GA.005/2000.04.30/",what="char")
305
## extract all filesizes in MB (all the HDFs) and sum them and covert to TB for the length of the full record
306
sum(as.numeric(sub("M","",grep("[0-9]*M$",size,value=T))))/1024/1024*as.numeric(as.Date("2013-12-31")-as.Date("2000-02-24"))
307

  
308
## seasonal variability
309
cellStats(mod09sd,"mean")
310

  
311
## Validation table construction
312
quantile(cldm$difm,na.rm=T)
313

  
314
summary(lm(cld_all~mod09+lat,data=cldm))
315

  
316
              
317

  
318
###################################################################
319
### summary by biome
320
biomep$id=1:nrow(biomep)
321
biomepl=melt(biomep@data,id.vars=c("id","code","biome","realm"))
322
colnames(biomepl)[grep("variable",colnames(biomepl))]="month"
323
biomepl$month=factor(biomepl$month,ordered=T,levels=month.name)
324
biomepl$realm=factor(biomepl$realm,ordered=T,levels=c("Antarctic","Australasia","Oceania", "IndoMalay", "Neotropics","Palearctic","Nearctic" ))
325
biomepl$value[biomepl$value<0]=NA
326

  
327

  
328
png("output/Biome_Figures_%03d.png",width=5500,height=4000,res=600,pointsize=36,bg="white")
329
trellis.par.set(my.theme)
330

  
331
#[biomepl$id%in%sample(biomep$id,10000),]
332
p1=useOuterStrips(xyplot(value~month|realm+biome,groups=id,data=biomepl,panel=function(x,y,groups = groups, subscripts = subscripts){
333
    panel.xyplot(x,y,col=grey(0.6,0.2),type="l",lwd=.5,groups=groups,subscripts=subscripts)
334
    panel.smoother(y ~ s(x), method = "gam",lwd=2,subscripts=subscripts,n=24)
335
},scales=list(y=list(at=c(0,100),lim=c(-20,120),cex=.75,alternating=2,tck=c(0,1)),x=list(at=c(1,7),rot=45,alternating=1)),ylab="Biome",xlab.top="Geographic Realm",ylab.right="MODCF (%)", xlab="Month"),
336
    strip=strip.custom(par.strip.text = list(cex = .7)),strip.left=strip.custom(horizontal=TRUE,par.strip.text = list(cex = .75)))
337
p1$par.settings$layout.widths$strip.left[1]=13
338
p1$par.strip.text$lines=.65
339
print(p1)
340

  
341
dev.off()
342

  
343

  
344
####################################################################
345
## assess temporal stability
346

  
347
## spatialy subset data to stations at least 10km apart
348
st2=remove.duplicates(st,zero=10)
349

  
350
## Subset data
351
## drop missing observations
352
cldm.t=cldm[!is.na(cldm$cld_all)&!is.na(cldm$mod09)&!is.na(cldm$biome),]
353
cldm.t=cldm.t[cldm.t$lat>=-60,]
354
#  make sure all stations have all mod09 data
355
stdrop=names(which(tapply(cldm.t$month,cldm.t$StaID,length)!=12))
356
cldm.t=cldm.t[!cldm.t$StaID%in%stdrop,]
357
# Keep only stations at least 10km apart 
358
cldm.t=cldm.t[cldm.t$StaID%in%st2$id,]
359
## Subset to only some months, if desired
360
#cldm.t=cldm.t[cldm.t$month%in%1:3,]
361

  
362

  
363
## Select Knots
364
knots=spsample(land,500,type="regular")
365

  
366
                                        #  reshape data
367
m.cld=cast(cldm.t,StaID+lat+lon+biome~month,value="cld_all");colnames(m.cld)[-(1:4)]=paste("cld.",colnames(m.cld)[-(1:4)],sep="")
368
m.mod09=cast(cldm.t,StaID~month,value="mod09");colnames(m.mod09)[-1]=paste("mod09.",colnames(m.mod09)[-1],sep="")
369
mdata=cbind(m.cld,m.mod09)
370

  
371
## cast to 
372
coords <- as.matrix(m.cld[,c("lon","lat")])#as.matrix(ne.temp[,c("UTMX", "UTMY")]/1000)
373
max.d <- max(iDist(coords))
374

  
375
##make symbolic model formula statement for each month
376
mods <- lapply(paste(paste(paste("cld.",1:N.t,sep=''),paste("mod09.",1:N.t,sep=''),sep='~'),"",sep=""), as.formula)
377

  
378
tlm=model.matrix(lm(mods[[1]],data=mdata))
379

  
380
N.t <- ncol(m.mod09)-1 ##number of months
381
n <- nrow(m.cld) ##number of observation per months
382
p <- ncol(tlm) #number of regression parameters in each month
383

  
384
starting <- list("beta"=rep(0,N.t*p), "phi"=rep(3/(0.5*max.d), N.t),
385
                 "sigma.sq"=rep(2,N.t), "tau.sq"=rep(1, N.t),
386
                 "sigma.eta"=diag(rep(0.01, p)))
387
tuning <- list("phi"=rep(5, N.t))
388

  
389
priors <- list("beta.0.Norm"=list(rep(0,p), diag(1000,p)),
390
               "phi.Unif"=list(rep(3/(0.9*max.d), N.t), rep(3/(0.05*max.d), N.t)),
391
               "sigma.sq.IG"=list(rep(2,N.t), rep(10,N.t)),
392
               "tau.sq.IG"=list(rep(2,N.t), rep(5,N.t)),
393
               "sigma.eta.IW"=list(2, diag(0.001,p)))
394
cov.model <- "exponential"
395

  
396
## Run the model
397
n.samples <- 500
398
m.1=spDynLM(mods,data=mdata,coords=coords,knots=coordinates(knots),n.samples=n.samples,starting=starting,tuning=tuning,priors=priors,cov.model=cov.model,get.fitted=T,n.report=25)
399

  
400
save(m.1,file="output/m.1.Rdata")
401
## summarize
402
burn.in <- floor(0.75*n.samples)
403
quant <- function(x){quantile(x, prob=c(0.5, 0.025, 0.975))}
404
beta <- apply(m.1$p.beta.samples[burn.in:n.samples,], 2, quant)
405
beta.0 <- beta[,grep("Intercept", colnames(beta))]
406
beta.1 <- beta[,grep("mod09", colnames(beta))]
407

  
408

  
409

  
410

  
411
### Compare time periods
412
library(texreg)
413
 extract.lm <- function(model) {
414
     s <- summary(model)
415
     names <- rownames(s$coef)
416
     co <- s$coef[, 1]
417
     se <- s$coef[, 2]
418
     pval <- s$coef[, 4]
419
     rs <- s$r.squared
420
     n <- as.integer(nobs(model))
421
     rmse=sqrt(mean((residuals(s)^2)))
422
     gof <- c(rs, rmse, n)
423
     gof.names <- c("R-Squared","RMSE","n")
424
     tr <- createTexreg(coef.names = names, coef = co, se = se, 
425
                        pvalues = pval, gof.names = gof.names, gof = gof)
426
     return(tr)
427
 }
428
setMethod("extract", signature = className("lm", "stats"),definition = extract.lm)
429

  
430
forms=c("cld~mod09+month2+lat")
431
lm_all=lm(cld_all~mod09+lat,data=cldm[!is.na(cldm$cld),])
432

  
433

  
434
### Compare two time periods
435
lm_all1=lm(cld_all~mod09,data=cldm[!is.na(cldm$cld)&cldm$cldn_all>=10,])
436

  
437
lm_mod=lm(cld~mod09,data=cldm[cldm$cldn==10,])
438
mods=list("1970-2009"=lm_all1,"2000-2009"=lm_mod)
439

  
440
screenreg(mods,digits=2,single.row=T,custom.model.names=names(mods),custom.coef.names = c("Intercept", "MODCF"))
441

  
442
htmlreg(mods,file = "output/tempstab.doc",
443
        custom.model.names = names(mods),
444
        single.row = T, inline.css = FALSE,doctype = TRUE, html.tag = TRUE, head.tag = TRUE, body.tag = TRUE)
445

  
446

  
447
## assess latitude bias
448
cldm$abslat=abs(cldm$lat)
449
cldm$absdif=abs(cldm$difm)
450
abslm=lm(absdif~abslat*I(abslat^2),data=cldm[cldm$cldn_all>30,])
451

  
452
xyplot(absdif~abslat|month2,type=c("p","smooth"),data=cldm,cex=.25,pch=16)
453

  
454
plot(absdif~abslat,data=cldm[cldm$cldn_all>30,],cex=.25,pch=16)
455
lines(0:90,predict(abslm,newdata=data.frame(abslat=0:90),type="response"),col="red")
456

  
457
bf=anovaBF(dif~lulcc+month2,data=cldm[!is.na(cldm$dif)&!is.na(cldm$lulcc),])
458
ch=posterior(bf, iterations = 1000)
459
summary(bf)
460
plot(bf)
461

  
462
## explore validation error
463
cldm$lulcc=as.factor(IGBP$class[match(cldm$lulc,IGBP$ID)])
464

  
465
## Table of RMSE's by lulc by month
466
lulctl=ddply(cldm,c("month","lulc"),function(x) c(count=nrow(x),rmse=sqrt(mean((x$mod09-x$cld)^2,na.rm=T))))
467
lulctl=lulctl[!is.na(lulctl$lulc),]
468
lulctl$lulcc=as.factor(IGBP$class[match(lulctl$lulc,IGBP$ID)])
469

  
470
lulctl=ddply(cldm,c("lulc"),function(x) c(count=nrow(x),mean=paste(round(mean(x$difm,na.rm=T),2)," (",round(sd(x$difm,na.rm=T),2),")",sep=""),rmse=round(sqrt(mean((x$difm)^2,na.rm=T)),2)))
471
lulctl$lulcc=as.factor(IGBP$class[match(lulctl$lulc,IGBP$ID)])
472
    print(xtable(lulctl[order(lulctl$rmse),c("lulcc","count","mean","rmse")],digits=1),type="html",include.rownames=F,file="output/lulcc.doc",row.names=F)
473
    
474

  
475
lulcrmse=cast(lulcrmsel,lulcc~month,value="rmse")
476
lulcrmse
477

  
478
lulcrmse.q=round(do.call(rbind,apply(lulcrmse,1,function(x) data.frame(Min=min(x,na.rm=T),Mean=mean(x,na.rm=T),Max=max(x,na.rm=T),SD=sd(x,na.rm=T)))),1)#quantile,c(0.025,0.5,.975),na.rm=T)),1)
479
lulcrmse.q=lulcrmse.q[order(lulcrmse.q$Mean,decreasing=T),]
480
lulcrmse.q
481

  
482
print(xtable(lulcrmse,digits=1),"html")
483

  
484
bgyr=colorRampPalette(c("blue","green","yellow","red"))
485
levelplot(rmse~month*lulcc,data=lulcrmsel,col.regions=bgyr(1000),at=quantile(lulcrmsel$rmse,seq(0,1,len=100),na.rm=T))
486

  
487

  
488
### Linear models
489
summary(lm(dif~as.factor(lulc)+lat+month2,data=cldm))
490

  
climate/research/cloud/MOD09/3_combine.R
1 1
################################################################################
2 2
###  calculate monthly means of terra and aqua
3

  
3
setwd("/mnt/data/personal/adamw/projects/cloud")
4 4
datadir="/mnt/data2/projects/cloud/"
5 5

  
6
library(raster)
7
library(foreach)
8
library(multicore)
9
library(doMC)
10
registerDoMC(12)
11

  
12

  
13
### assemble list of files to process
14
df=data.frame(path=list.files(paste(datadir,"/mcd09ctif",sep=""),full=T,pattern="M[Y|O].*[0-9]*[mean|sd].*tif$"),stringsAsFactors=F)
15
df[,c("sensor","month","type")]=do.call(rbind.data.frame,strsplit(basename(df$path),"_|[.]"))[,c(1,2,3)]
16

  
17
df2=unique(df[,c("month","type")])
18

  
19
overwrite=T
20

  
6 21
# Create combined (MOD+MYD) corrected mean CF
7
    foreach(i=1:12) %dopar% {
8
        f=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*[O|Y].*_",sprintf("%02d",i),"[.]tif$",sep=""),full=T)
22
    foreach(i=1:nrow(df2), .options.multicore=list(preschedule=FALSE)) %dopar% {
23
        f=df$path[df$month==df2$month[i]&df$type==df2$type[i]]
9 24
        ## Define output and check if it already exists
10
        tmcd=paste(datadir,"/mcd09ctif/MCD09_",sprintf("%02d", i),"_uncompressed.tif",sep="")
11
        tmcd2=paste(datadir,"/mcd09ctif/MCD09_",sprintf("%02d", i),".tif",sep="")
25
        tmcd=paste(datadir,"/mcd09ctif/MCD09_",df2$type[i],"_",df2$month[i],"_uncompressed.tif",sep="")
26
        tmcd2=paste(datadir,"/mcd09ctif/MCD09_",df2$type[i],"_",df2$month[i],".tif",sep="")
12 27
        ## check if output already exists
13
#        if(file.exists(tmcd2)){print(paste(tmcd2,"Exists, moving on..."));return(NULL)}
14
        if(file.exists(tmcd2)){print(paste(tmcd2,"Exists, deleting it..."));file.remove(tmcd,tmcd2)}
28
        if(!overwrite&file.exists(tmcd2)){print(paste(tmcd2,"Exists, moving on..."));return(NULL)}
29
        if(overwrite&file.exists(tmcd2)){print(paste(tmcd2,"Exists, deleting it..."));file.remove(tmcd,tmcd2)}
15 30
        ## Take average between images
16 31
        ## switch NA values to 32768 to facilitate recasting to 8-bit below, otherwise they are confounded with 0 cloud values
17
        ops=paste("-t_srs 'EPSG:4326' -multi -srcnodata -32768 -dstnodata 32767 -r bilinear -te -180 -90 180 90 -tr 0.008333333333333 -0.008333333333333",
32
        ops=paste(" -t_srs 'EPSG:4326' -multi -srcnodata 65535 -dstnodata 65535 -r bilinear -te -180 -90 180 90 -tr 0.008333333333333 -0.008333333333333",
18 33
            "-co BIGTIFF=YES  --config GDAL_CACHEMAX 20000 -wm 2000 -wo NUM_THREADS:10 -wo SOURCE_EXTRA=5")
19 34
        system(paste("gdalwarp -overwrite -r average -co COMPRESS=LZW -co ZLEVEL=9  ",ops," ",paste(f,collapse=" ")," ",tmcd))
20 35
        ## update metadata
21
        tags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS MOD09GA and MYD09GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).",
36
        if(df2$type[i]=="mean")
37
            tags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS MOD09GA and MYD09GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).",
22 38
            "The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days.'"),
23 39
            "TIFFTAG_DOCUMENTNAME='Collection 5 MCD09 Cloud Frequency'",
24 40
            paste("TIFFTAG_DATETIME='2013",sprintf("%02d", i),"15'",sep=""),
25 41
              "TIFFTAG_ARTIST='Adam M. Wilson (adam.wilson@yale.edu)'")
26
        system(paste("/usr/local/src/gdal-1.10.0/swig/python/scripts/gdal_edit.py ",tmcd," ",paste("-mo ",tags,sep="",collapse=" "),sep=""))
27
        # create final fixed image
28
        system(paste("gdal_translate -co COMPRESS=LZW -co ZLEVEL=9 -co PREDICTOR=2 ",tmcd," ",tmcd2,sep=""))
29
        writeLines(paste("Finished month",i))
30
    }
31

  
32
################################################
33
# Create combined (MOD+MYD) corrected mean CF SD
34
    foreach(i=1:12) %dopar% {
35
        f=list.files(paste(datadir,"/mcd09tif",sep=""),pattern=paste(".*[O|Y].*_",sprintf("%02d",i),"[.]tif$",sep=""),full=T)
36
        ## Define output and check if it already exists
37
        tmcd=paste(datadir,"/mcd09ctif/MCD09sd_",sprintf("%02d", i),"_uncompressed.tif",sep="")
38
        tmcd=paste(datadir,"/mcd09ctif/MCD09sd_",sprintf("%02d", i),"_uncompressed.tif",sep="")
39
        tmcd2=paste(datadir,"/mcd09ctif/MCD09sd_",sprintf("%02d", i),".tif",sep="")
40
        ## check if output already exists
41
        if(file.exists(tmcd2)){print(paste(tmcd2,"Exists, moving on..."));return(NULL)}
42
        ## Take average between images
43
        ops=paste("-t_srs 'EPSG:4326' -multi -srcnodata -32768 -dstnodata -32768 -r bilinear -te -180 -90 180 90 -tr 0.008333333333333 -0.008333333333333",
44
            "-co BIGTIFF=YES  --config GDAL_CACHEMAX 20000 -wm 2000 -wo NUM_THREADS:10 -wo SOURCE_EXTRA=5")
45
        system(paste("gdalwarp -overwrite -r average -co COMPRESS=LZW -co ZLEVEL=9  ",ops," ",paste(f,collapse=" ")," ",tmcd))
46
        ## update metadata
42
        if(df2$type[i]=="sd")
47 43
        tags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Standard Deviation of the Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS",
48 44
            " MOD09GA and MYD09GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).",
49
            "The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days"),
45
            "The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days.'"),
50 46
            "TIFFTAG_DOCUMENTNAME='Collection 5 MCD09 SD of Cloud Frequency'",
51 47
            paste("TIFFTAG_DATETIME='2013",sprintf("%02d", i),"15'",sep=""),
52 48
              "TIFFTAG_ARTIST='Adam M. Wilson (adam.wilson@yale.edu)'")
53 49
        system(paste("/usr/local/src/gdal-1.10.0/swig/python/scripts/gdal_edit.py ",tmcd," ",paste("-mo ",tags,sep="",collapse=" "),sep=""))
54 50
        # create final fixed image
55
        system(paste("gdal_translate -b 2 -a_nodata -32768 -co COMPRESS=LZW -co ZLEVEL=9 -co PREDICTOR=2 ",tmcd," ",tmcd2,sep=""))
56
        writeLines(paste("Finished month",i))
51
        system(paste("gdal_translate -co COMPRESS=LZW -co ZLEVEL=9 -co PREDICTOR=2 ",tmcd," ",tmcd2,sep=""))
52
        file.remove(tmcd)
53
        writeLines(paste("##########################################    Finished ",tmcd2))
57 54
    }
58 55

  
59 56

  
60 57

  
61 58
#################################################################################
62 59
###### convert to 8-bit compressed file, add colors and other details
63
f2=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09_[0-9].[.]tif$",sep=""),full=T)
64 60

  
65
for( i in 1:length(f2)){
61

  
62
f2=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09_.*_[0-9].[.]tif$",sep=""),full=T)
63

  
64

  
65
foreach(i=1:length(f2), .options.multicore=list(preschedule=FALSE)) %dopar% {
66 66
    file=f2[i]
67
    outfilevrt=paste(datadir,"/mcd09ctif/",sub(".tif",".vrt",basename(file)),sep="")
68
    outfile=paste("data/mcd09tif/MCD09_",sprintf("%02d",i),".tif",sep="")
69
    outfilesd=paste("data/mcd09tif/MCD09_",sprintf("%02d",i),"_sd.tif",sep="")
70
    ## create VRT and edit the color table
71
    ## create the vrt to add a color table following https://trac.osgeo.org/gdal/wiki/FAQRaster#Howtocreateormodifyanimagecolortable
72
    system(paste("gdal_translate  -scale 0 10000 0 100 -of VRT ",file," ",outfilevrt))
67
    outfilevrt=sub("[.]tif",".vrt",file)
68
    outfile=paste("data/mcd09tif/",basename(file),sep="")
69
    ## rescale to 0-100 using a VRT
70
    system(paste("gdal_translate  -scale 0 10000 0 100 -of VRT ",file," ",outfilevrt)) 
71
    ## add color table for 8-bit data
73 72
    vrt=scan(outfilevrt,what="char")
74 73
    hd=c("<ColorInterp>Palette</ColorInterp>","<ColorTable>")
75 74
    ft="</ColorTable>"
......
81 80
    ## update missing data flag following http://lists.osgeo.org/pipermail/gdal-dev/2010-February/023541.html
82 81
    csi=grep("<ComplexSource>",vrt2)  # get index of current color table
83 82
    vrt2=c(vrt2[1:csi],"<NODATA>327</NODATA>",vrt2[(csi+1):length(vrt2)])
84
    write.table(vrt2,file=outfilevrt,col.names=F,row.names=F,quote=F)
85

  
86
    #system(paste("gdal_translate  -of VRT -scale 0 10000 0 100  ",outfilevrt," ",outfilevrt2))
87
   
88

  
89
                                        #    system(paste("pkreclass  -i ",outfilevrt," ",paste("-mo ",tags,sep="",collapse=" ")," ",outfilevrt," ",outfile2))
83
    write.table(vrt2,file=outfilevrt,col.names=F,row.names=F,quote=F)              
90 84
    tags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS M*D09GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).",
91 85
        "The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days. ",
92 86
        "Band Descriptions: 1) Mean Monthly Cloud Frequency'"),
......
100 94

  
101 95
################
102 96
### calculate inter vs. intra annual variability
103
f3=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09_[0-9].[.]tif$",sep=""),full=T)
104
f3sd=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09sd_[0-9].[.]tif$",sep=""),full=T)
97
f3=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09_mean_[0-9].[.]tif$",sep=""),full=T)
98
f3sd=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09_sd_[0-9].[.]tif$",sep=""),full=T)
105 99

  
106 100
dmean=stack(as.list(f3))
107 101
dsd=stack(as.list(f3sd))
108 102

  
109
dinter=calc(dmean,sd,file=paste(datadir,"/mcd09ctif/inter.tif",sep=""),options=c("COMPRESS=LZW","ZLEVEL=9"))
110
dintra=calc(dsd,mean,file=paste(datadir,"/mcd09ctif/intra.tif",sep=""),options=c("COMPRESS=LZW","ZLEVEL=9"))
103
beginCluster(12)
104

  
105
## Function to calculate standard deviation and round it to nearest integer
106
Rsd=function(x) calc(x,function(x) round(sd(x,na.rm=T)))
107

  
108
dinter=clusterR(dmean,Rsd,file=paste(datadir,"/mcd09ctif/inter.tif",sep=""),options=c("COMPRESS=LZW","PREDICTOR=2"),overwrite=T,dataType='INT1U',NAflag=255)
109
dintra=clusterR(dsd,mean,file=paste(datadir,"/mcd09ctif/intra.tif",sep=""),options=c("COMPRESS=LZW","PREDICTOR=2"),overwrite=T,dataType='INT1U',NAflag=255)
110

  
111
endCluster()
111 112

  
112 113
tplot=F
113 114
if(tplot){
climate/research/cloud/MOD09/4_validate.R
1
### Script to download and process the NDP-026D station cloud dataset
2
### to validate MODIS cloud frequencies
3

  
4
setwd("/mnt/data/personal/adamw/projects/cloud/")
5

  
6
library(multicore)
7
library(doMC)
8
library(rasterVis)
9
library(rgdal)
10
library(reshape)
11
library(maptools)
12
library(rgeos)
13

  
14
## Data available here http://cdiac.ornl.gov/epubs/ndp/ndp026d/ndp026d.html
15

  
16
download=F  #download data?
17
## Get station locations
18
if(download)   system("wget -N -nd http://cdiac.ornl.gov/ftp/ndp026d/cat01/01_STID -P data/NDP026D/data/")
19

  
20
st=read.table("data/NDP026D/data/01_STID",skip=1)
21
colnames(st)=c("StaID","LAT","LON","ELEV","ny1","fy1","ly1","ny7","fy7","ly7","SDC","b5c")
22
st$lat=st$LAT/100
23
st$lon=st$LON/100
24
st$lon[st$lon>180]=st$lon[st$lon>180]-360
25
st=st[,c("StaID","ELEV","lat","lon")]
26
colnames(st)=c("id","elev","lat","lon")
27
write.csv(st,"stations.csv",row.names=F)
28
coordinates(st)=c("lon","lat")
29
projection(st)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
30
st@data[,c("lon","lat")]=coordinates(st)
31

  
32
## download data
33
if(download){
34
    system("wget -N -nd ftp://cdiac.ornl.gov/pub/ndp026d/cat67_78/* -A '.tc.Z' -P data/NDP026D/data/")
35
    system("gunzip data/*.Z")
36
}
37

  
38
## define FWF widths
39
f162=c(5,5,4,7,7,7,4) #format 162
40
c162=c("StaID","YR","Nobs","Amt","Fq","AWP","NC")
41

  
42
## use monthly timeseries
43
cld=do.call(rbind.data.frame,mclapply(sprintf("%02d",1:12),function(m) {
44
  d=read.fwf(list.files("data/NDP026D/data",pattern=paste("MNYDC.",m,".tc$",sep=""),full=T),skip=1,widths=f162)
45
  colnames(d)=c162
46
  d$month=as.numeric(m)
47
  print(m)
48
  return(d)}
49
  ))
50

  
51
## add lat/lon
52
cld[,c("lat","lon")]=coordinates(st)[match(cld$StaID,st$id),]
53

  
54
## drop missing values
55
cld=cld[,!grepl("Fq|AWP|NC",colnames(cld))]
56
cld$Amt[cld$Amt<0]=NA
57
cld$Amt=cld$Amt/100
58
cld=cld[!is.na(cld$Amt),]
59

  
60
## table of stations with > 20 observations per month
61
cast(cld,StaID~YR,value="Nobs")
62
mtab=ddply(cld,c('StaID','month'),function(df){ data.frame(count=sum(df$Nobs>20,na.rm=T))})
63
#mtab2=mtab[table(mtab$count>10)]
64
stem(mtab$count)
65

  
66
## calculate means and sds for full record (1970-2009)
67
Nobsthresh=20 #minimum number of observations to include 
68

  
69
cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){
70
  data.frame(
71
      month=x$month[1],
72
      StaID=x$StaID[1],
73
      cld_all=mean(x$Amt[x$Nobs>=Nobsthresh],na.rm=T),  # full record
74
      cldsd_all=sd(x$Amt[x$Nobs>=Nobsthresh],na.rm=T),
75
      cldn_all=length(x$Amt[x$Nobs>=Nobsthresh]),
76
      cld=mean(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T), #only MODIS epoch
77
      cldsd=sd(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T),
78
      cldn=length(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh]))}))
79

  
80
    cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")]
81

  
82

  
83

  
84
## add the EarthEnvCloud data to cld
85
mod09=stack(list.files("data/mcd09tif/",pattern="MCD09_[0-9]*[.]tif",full=T))
86
NAvalue(mod09)=255
87
#mod09std=brick("~/acrobates/adamw/projects/cloud/data/cloud_ymonstd.nc")
88

  
89
## overlay the data with 32km diameter (16km radius) buffer
90
## buffer size from Dybbroe, et al. (2005) doi:10.1175/JAM-2189.1.
91
buf=16000
92
bins=cut(st$lat,10)
93
rerun=F
94
if(rerun&file.exists("valid.csv")) file.remove("valid.csv")
95
mod09sta=lapply(levels(bins),function(lb) {
96
  l=which(bins==lb)
97
  ## mean
98
  td=extract(mod09,st[l,],buffer=buf,fun=mean,na.rm=T,df=T)
99
  td$id=st$id[l]
100
  td$type="mean"
101
  ## std
102
  td2=extract(mod09std,st[l,],buffer=buf,fun=mean,na.rm=T,df=T)
103
  td2$id=st$id[l]
104
  td2$type="sd"
105
  print(lb)#as.vector(c(l,td[,1:4])))
106
  write.table(rbind(td,td2),"valid.csv",append=T,col.names=F,quote=F,sep=",",row.names=F)
107
  td
108
})#,mc.cores=3)
109

  
110
## read it back in
111
mod09st=read.csv("valid.csv",header=F)[,-c(1)]
112
colnames(mod09st)=c(names(mod09),"id","type")
113
mod09stl=melt(mod09st,id.vars=c("id","type"))
114
mod09stl[,c("year","month")]=do.call(rbind,strsplit(sub("X","",mod09stl$variable),"[.]"))[,1:2]
115
mod09stl$value[mod09stl$value<0]=NA
116
mod09stl=cast(mod09stl,id+year+month~type,value="value")
117

  
118
## add it to cld
119
cldm$mod09=mod09stl$mean[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))]
120
cldm$mod09sd=mod09stl$sd[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))]
121

  
122

  
123
## LULC
124
#system(paste("gdalwarp -r near -co \"COMPRESS=LZW\" -tr ",paste(res(mod09),collapse=" ",sep=""),
125
#             "-tap -multi -t_srs \"",   projection(mod09),"\" /mnt/data/jetzlab/Data/environ/global/landcover/MODIS/MCD12Q1_IGBP_2005_v51.tif ../modis/mod12/MCD12Q1_IGBP_2005_v51.tif"))
126
lulc=raster("~/acrobates/adamw/projects/interp/data/modis/mod12/MCD12Q1_IGBP_2005_v51.tif")
127
require(plotKML); data(worldgrids_pal)  #load IGBP palette
128
IGBP=data.frame(ID=0:16,col=worldgrids_pal$IGBP[-c(18,19)],stringsAsFactors=F)
129
IGBP$class=rownames(IGBP);rownames(IGBP)=1:nrow(IGBP)
130
levels(lulc)=list(IGBP)
131
## function to get modal lulc value
132
Mode <- function(x) {
133
      ux <- na.omit(unique(x))
134
        ux[which.max(tabulate(match(x, ux)))]
135
      }
136
lulcst=extract(lulc,st,fun=Mode,buffer=buf,df=T)
137
colnames(lulcst)=c("id","lulc")
138
## add it to cld
139
cldm$lulc=lulcst$lulc[match(cldm$StaID,lulcst$id)]
140
cldm$lulcc=IGBP$class[match(cldm$lulc,IGBP$ID)]
141

  
142

  
143
### Add biome data
144
biome=readOGR("../teow/","biomes")
145
projection(biome)=projection(st)
146
#st$biome=over(st,biome,returnList=F)$BiomeID
147
dists=apply(gDistance(st,biome,byid=T),2,which.min)
148
st$biomec=biome$code[dists]
149
st$realm=biome$realm[dists]
150
st$biome=biome$biome[dists]
151

  
152
cldm$biomec=st$biomec[match(cldm$StaID,st$id)]
153
cldm$realm=st$relam[match(cldm$StaID,st$id)]
154
cldm$biome=st$biome[match(cldm$StaID,st$id)]
155

  
156

  
157
## write out the tables
158
write.csv(cld,file="cld.csv",row.names=F)
159
write.csv(cldm,file="cldm.csv",row.names=F)
160
writeOGR(st,dsn=".",layer="stations",driver="ESRI Shapefile",overwrite_layer=T)
161
#########################################################################
162

  
climate/research/cloud/MOD09/NDP-026D.R
1
### Script to download and process the NDP-026D station cloud dataset
2
### to validate MODIS cloud frequencies
3

  
4
setwd("~/acrobates/adamw/projects/cloud/data/NDP026D")
5

  
6
library(multicore)
7
library(doMC)
8
library(rasterVis)
9
library(rgdal)
10
library(reshape)
11
library(maptools)
12
library(rgeos)
13

  
14
## Data available here http://cdiac.ornl.gov/epubs/ndp/ndp026d/ndp026d.html
15

  
16
## Get station locations
17
system("wget -N -nd http://cdiac.ornl.gov/ftp/ndp026d/cat01/01_STID -P data/")
18
st=read.table("data/01_STID",skip=1)
19
colnames(st)=c("StaID","LAT","LON","ELEV","ny1","fy1","ly1","ny7","fy7","ly7","SDC","b5c")
20
st$lat=st$LAT/100
21
st$lon=st$LON/100
22
st$lon[st$lon>180]=st$lon[st$lon>180]-360
23
st=st[,c("StaID","ELEV","lat","lon")]
24
colnames(st)=c("id","elev","lat","lon")
25
write.csv(st,"stations.csv",row.names=F)
26
coordinates(st)=c("lon","lat")
27
projection(st)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
28
st@data[,c("lon","lat")]=coordinates(st)
29

  
30
## download data
31
system("wget -N -nd ftp://cdiac.ornl.gov/pub/ndp026d/cat67_78/* -A '.tc.Z' -P data/")
32

  
33
system("gunzip data/*.Z")
34

  
35
## define FWF widths
36
f162=c(5,5,4,7,7,7,4) #format 162
37
c162=c("StaID","YR","Nobs","Amt","Fq","AWP","NC")
38

  
39
## use monthly timeseries
40
cld=do.call(rbind.data.frame,mclapply(sprintf("%02d",1:12),function(m) {
41
  d=read.fwf(list.files("data",pattern=paste("MNYDC.",m,".tc$",sep=""),full=T),skip=1,widths=f162)
42
  colnames(d)=c162
43
  d$month=as.numeric(m)
44
  print(m)
45
  return(d)}
46
  ))
47

  
48
## add lat/lon
49
cld[,c("lat","lon")]=coordinates(st)[match(cld$StaID,st$id),]
50

  
51
## drop missing values
52
cld=cld[,!grepl("Fq|AWP|NC",colnames(cld))]
53
cld$Amt[cld$Amt<0]=NA
54
cld$Amt=cld$Amt/100
55
cld=cld[!is.na(cld$Amt),]
56

  
57
## table of stations with > 20 observations per month
58
cast(cld,StaID~YR,value="Nobs")
59
mtab=ddply(cld,c('StaID','month'),function(df){ data.frame(count=sum(df$Nobs>20,na.rm=T))})
60
mtab2=mtab[
61
    table(mtab$count>10)
62
stem(mtab$count)
63

  
64
## calculate means and sds for full record (1970-2009)
65
Nobsthresh=20 #minimum number of observations to include 
66

  
67
cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){
68
  data.frame(
69
      month=x$month[1],
70
      StaID=x$StaID[1],
71
      cld_all=mean(x$Amt[x$Nobs>=Nobsthresh],na.rm=T),  # full record
72
      cldsd_all=sd(x$Amt[x$Nobs>=Nobsthresh],na.rm=T),
73
      cldn_all=length(x$Amt[x$Nobs>=Nobsthresh]),
74
      cld=mean(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T), #only MODIS epoch
75
      cldsd=sd(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T),
76
      cldn=length(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh]))}))
77

  
78
    cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")]
79

  
80

  
81

  
82
## add the MOD09 data to cld
83
#### Evaluate MOD35 Cloud data
84
mod09=brick("~/acrobates/adamw/projects/cloud/data/cloud_ymonmean.nc")
85
mod09std=brick("~/acrobates/adamw/projects/cloud/data/cloud_ymonstd.nc")
86

  
87
## overlay the data with 32km diameter (16km radius) buffer
88
## buffer size from Dybbroe, et al. (2005) doi:10.1175/JAM-2189.1.
89
buf=16000
90
bins=cut(st$lat,10)
91
rerun=F
92
if(rerun&file.exists("valid.csv")) file.remove("valid.csv")
93
mod09sta=lapply(levels(bins),function(lb) {
94
  l=which(bins==lb)
95
  ## mean
96
  td=extract(mod09,st[l,],buffer=buf,fun=mean,na.rm=T,df=T)
97
  td$id=st$id[l]
98
  td$type="mean"
99
  ## std
100
  td2=extract(mod09std,st[l,],buffer=buf,fun=mean,na.rm=T,df=T)
101
  td2$id=st$id[l]
102
  td2$type="sd"
103
  print(lb)#as.vector(c(l,td[,1:4])))
104
  write.table(rbind(td,td2),"valid.csv",append=T,col.names=F,quote=F,sep=",",row.names=F)
105
  td
106
})#,mc.cores=3)
107

  
108
## read it back in
109
mod09st=read.csv("valid.csv",header=F)[,-c(1)]
110
colnames(mod09st)=c(names(mod09),"id","type")
111
mod09stl=melt(mod09st,id.vars=c("id","type"))
112
mod09stl[,c("year","month")]=do.call(rbind,strsplit(sub("X","",mod09stl$variable),"[.]"))[,1:2]
113
mod09stl$value[mod09stl$value<0]=NA
114
mod09stl=cast(mod09stl,id+year+month~type,value="value")
115

  
116
## add it to cld
117
cldm$mod09=mod09stl$mean[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))]
118
cldm$mod09sd=mod09stl$sd[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))]
119

  
120

  
121
## LULC
122
#system(paste("gdalwarp -r near -co \"COMPRESS=LZW\" -tr ",paste(res(mod09),collapse=" ",sep=""),
123
#             "-tap -multi -t_srs \"",   projection(mod09),"\" /mnt/data/jetzlab/Data/environ/global/landcover/MODIS/MCD12Q1_IGBP_2005_v51.tif ../modis/mod12/MCD12Q1_IGBP_2005_v51.tif"))
124
lulc=raster("~/acrobates/adamw/projects/interp/data/modis/mod12/MCD12Q1_IGBP_2005_v51.tif")
125
require(plotKML); data(worldgrids_pal)  #load IGBP palette
126
IGBP=data.frame(ID=0:16,col=worldgrids_pal$IGBP[-c(18,19)],stringsAsFactors=F)
127
IGBP$class=rownames(IGBP);rownames(IGBP)=1:nrow(IGBP)
128
levels(lulc)=list(IGBP)
129
## function to get modal lulc value
130
Mode <- function(x) {
131
      ux <- na.omit(unique(x))
132
        ux[which.max(tabulate(match(x, ux)))]
133
      }
134
lulcst=extract(lulc,st,fun=Mode,buffer=buf,df=T)
135
colnames(lulcst)=c("id","lulc")
136
## add it to cld
137
cldm$lulc=lulcst$lulc[match(cldm$StaID,lulcst$id)]
138
cldm$lulcc=IGBP$class[match(cldm$lulc,IGBP$ID)]
139

  
140

  
141
### Add biome data
142
biome=readOGR("../teow/","biomes")
143
projection(biome)=projection(st)
144
#st$biome=over(st,biome,returnList=F)$BiomeID
145
dists=apply(gDistance(st,biome,byid=T),2,which.min)
146
st$biomec=biome$code[dists]
147
st$realm=biome$realm[dists]
148
st$biome=biome$biome[dists]
149

  
150
cldm$biomec=st$biomec[match(cldm$StaID,st$id)]
151
cldm$realm=st$relam[match(cldm$StaID,st$id)]
152
cldm$biome=st$biome[match(cldm$StaID,st$id)]
153

  
154

  
155
## write out the tables
156
write.csv(cld,file="cld.csv",row.names=F)
157
write.csv(cldm,file="cldm.csv",row.names=F)
158
writeOGR(st,dsn=".",layer="stations",driver="ESRI Shapefile",overwrite_layer=T)
159
#########################################################################
160

  
climate/research/cloud/MOD09/ee/ee_compile.R
1 1
###  Script to compile the monthly cloud data from earth engine into a netcdf file for further processing
2 2

  
3
library(rasterVis)
3
library(raster)
4 4
library(doMC)
5
library(multicore)
6
library(foreach)
7
library(mgcv)
8
library(RcppOctave)
9 5
registerDoMC(12)
10 6

  
11 7

  
......
42 38
}
43 39

  
44 40
rasterOptions(tmpdir=tmpfs,overwrite=T, format="GTiff",maxmemory=1e9)
45

  
46

  
47 41
rerun=T  # set to true to recalculate all dates even if file already exists
48 42

  
49 43
## define month-sensors to process
......
53 47
#jobs=jobs[jobs$sensor=="MYD09",]
54 48

  
55 49

  
50

  
51
### add boundaries to file list to remove problematic pixels at high latitudes
52
## these boundaries were later added to the earth engine script, so if it is re-run this is not necessary
53
#xmin,xmax,ymin,ymax
54
mextent=list(
55
    "01"=c(-180,-90,180,73.5),#
56
    "02"=c(-180,-90,180,84),  #
57
    "03"=c(-180,-90,180,90),
58
    "04"=c(-180,-90,180,90),
59
    "05"=c(-180,-69,180,90),
60
    "06"=c(-180,-62.5,180,90),  #
61
    "07"=c(-180,-67,180,90),    #
62
    "08"=c(-180,-77,180,90),
63
    "09"=c(-180,-77,180,90),
64
    "10"=c(-180,-90,180,89), #
65
    "11"=c(-180,-90,180,77),
66
    "12"=c(-180,-90,180,69)
67
)    
68

  
69
## project to sinusoidal
70
proj="'+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs'"
71

  
72
mextentsin=lapply(mextent,function(x) c(project(t(x[1:2]),sub("'","",proj)),project(t(x[3:4]),sub("'","",proj))))
73

  
56 74
## Loop over data to mosaic tifs, compress, and add metadata
57 75
    foreach(i=1:nrow(jobs)) %dopar% {
58 76
        ## get month
59 77
        m=jobs$month[i]
78
        cm=sprintf("%02d",m)
79
        
60 80
        date=df$date[df$month==m][1]
61 81
        print(date)
62 82
        ## get sensor
......
64 84
        s2=sub("GA","",s)
65 85

  
66 86
        ## Define output and check if it already exists
67
        tvrt=paste(datadir,"/mcd09tif/",s2,"_",sprintf("%02d", m),".vrt",sep="")
68
        ttif1=paste(datadir,"/mcd09tif/",s2,"_",sprintf("%02d", m),"_uncompressed.tif",sep="")
69
        ttif2=paste(datadir,"/mcd09tif/",s2,"_",sprintf("%02d", m),".tif",sep="")
87
        tvrt=paste(datadir,"/mcd09tif/",s2,"_",cm,"_globalsin.vrt",sep="")
88
        tvrt2=paste(datadir,"/mcd09tif/",s2,"_",cm,"_globalwgs84.vrt",sep="")
89
        ttif=paste(datadir,"/mcd09tif/",s2,"_",cm,"_mean.tif",sep="")
90
        ttif2=paste(datadir,"/mcd09tif/",s2,"_",cm,"_sd.tif",sep="")
70 91

  
71 92
        ## check if output already exists
72
        if(!rerun&file.exists(ttif1)) return(NA)
93
        if(!rerun&file.exists(ttif)) return(NA)
73 94
        ## build VRT to merge tiles
74
        proj="'+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs'"
75
        system(paste("gdalbuildvrt -b 1 -b 2 -srcnodata -32768 ",tvrt," ",paste(df$path[df$month==m&df$sensor==s],collapse=" ")))
95
        ## include subseting using mextentsin object created above to ensure cropping of problematic values
96
        system(paste("gdalbuildvrt -b 1 -b 2 -te ",paste(round(mextentsin[[cm]]),collapse=" ")," -srcnodata -32768 -vrtnodata 32767 ",tvrt," ",paste(df$path[df$month==m&df$sensor==s],collapse=" ")))
76 97
        ## Merge to geotif in temporary directory
77
        ## specify sourc projection because it gts it slightly wrong by default #-ot Int16 -dstnodata -32768
78
        ops=paste("-s_srs ",proj,"  -t_srs 'EPSG:4326' -multi -srcnodata -32768  -ot Int16 -dstnodata -32768 -r bilinear -te -180 -90 180 90 -tr 0.008333333333333 -0.008333333333333",
79
            "-co BIGTIFF=YES --config GDAL_CACHEMAX 2000 -wm 2000 -wo NUM_THREADS:10 -co COMPRESS=LZW -co PREDICTOR=2")
80
        ## if file exists, remove it avoid warping into existing file
81
        if(file.exists(ttif1)) file.remove(ttif1)
82
        ## run the warp
83
        system(paste("gdalwarp -overwrite ",ops," ",tvrt," ",ttif1))
98
        ## specify sourc projection because it gets it slightly wrong by default 
99
        ops=paste("-multi -of vrt --config GDAL_CACHEMAX 500 -wm 500 -wo NUM_THREADS:10 -srcnodata 32767 -dstnodata 32767 -s_srs ",proj,"  -t_srs 'EPSG:4326' ",
100
            " -ot Int16 -r bilinear -te -180 -90 180 90 -tr 0.008333333333333 -0.008333333333333")
101
        ## create the warpped VRT
102
        system(paste("gdalwarp -overwrite ",ops," ",tvrt," ",tvrt2))
84 103

  
85 104
        ## Compress file and add metadata tags
86
        ops2=paste("-ot Int16 -co COMPRESS=LZW -co PREDICTOR=2 -stats")
87
        tags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS ",s,"GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).",
88
            "The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days. ",
89
            "Band Descriptions: 1) Mean Monthly Cloud Frequency x 10000 2) Standard Deviation of Mean Monthly Cloud x 10000'"),
90
              "TIFFTAG_DOCUMENTNAME='Collection 5 ",s," Cloud Frequency'",
105
        ops2=paste("-ot Int16 -co COMPRESS=LZW -co PREDICTOR=2 -stats -a_nodata 32767 ")
106
        meantags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Monthly Cloud Frequency (x10000) for 2000-2013 extracted from C5 MODIS ",s,"GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).",
107
            "The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days. '"),
108
              paste("TIFFTAG_DOCUMENTNAME='Collection 5 ",s," Mean Cloud Frequency'",sep=""),
109
              paste("TIFFTAG_DATETIME='2013",sprintf("%02d", m),"15'",sep=""),
110
              "TIFFTAG_ARTIST='Adam M. Wilson (adam.wilson@yale.edu)'")
111
        sdtags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Standard Deviation (x10000) of Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS ",s,"GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).",
112
            "The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days. '"),
113
              paste("TIFFTAG_DOCUMENTNAME='Collection 5 ",s," SD Cloud Frequency'",sep=""),
91 114
              paste("TIFFTAG_DATETIME='2013",sprintf("%02d", m),"15'",sep=""),
92 115
              "TIFFTAG_ARTIST='Adam M. Wilson (adam.wilson@yale.edu)'")
93
        system(paste("gdal_translate  ",ops2," ",paste("-mo ",tags,sep="",collapse=" ")," ",ttif1," ",ttif2))
94 116

  
95
        ## delete temporary files
96
        file.remove(tvrt,ttif1)
117
        ## run the merge, warp, and compress all in one step...
118
        system(paste("gdal_translate -b 1  ",ops2," ",paste("-mo ",meantags,sep="",collapse=" ")," ",tvrt2," ",ttif))
119
        system(paste("gdal_translate -b 2  ",ops2," ",paste("-mo ",sdtags,sep="",collapse=" ")," ",tvrt2," ",ttif2))
97 120
        writeLines(paste("Month:",m," Sensor:",s," Finished"))
98 121
    }
99 122

  

Also available in: Unified diff