Revision a29da850
Added by Adam Wilson over 10 years ago
climate/research/cloud/MOD09/2_bias.R | ||
---|---|---|
1 | 1 |
### Script to compile the monthly cloud data from earth engine into a netcdf file for further processing |
2 | 2 |
|
3 | 3 |
library(rasterVis) |
4 |
library(multicore) |
|
4 | 5 |
library(doMC) |
5 | 6 |
library(foreach) |
6 |
library(RcppOctave) |
|
7 | 7 |
library(rgdal) |
8 | 8 |
registerDoMC(12) |
9 | 9 |
|
... | ... | |
16 | 16 |
|
17 | 17 |
|
18 | 18 |
## Specify path to VSNR souce code and add it to RcppOctave path |
19 |
library(RcppOctave) |
|
19 | 20 |
mpath="/mnt/data/personal/adamw/projects/environmental-layers/climate/research/cloud/MOD09/vsnr/" |
20 | 21 |
.O$addpath(mpath) |
21 | 22 |
|
... | ... | |
23 | 24 |
######################################### |
24 | 25 |
#### Bias correction functions |
25 | 26 |
|
26 |
fgabor=function(d,theta=-15,x=200,y=5){
|
|
27 |
fgabor=function(d,theta,x=200,y=5){ |
|
27 | 28 |
thetaR=(theta*pi)/180 |
28 | 29 |
cds=expand.grid(x=(1:nrow(d))-round(nrow(d)/2),y=(1:ncol(d))-round(ncol(d)/2)) |
29 | 30 |
sigma_x=x |
... | ... | |
102 | 103 |
allexts=rbind.data.frame(modexts,mydexts) |
103 | 104 |
|
104 | 105 |
### assemble list of files to process |
105 |
df2=data.frame(path=list.files(paste(datadir,"/mcd09tif",sep=""),full=T,pattern="[0-9][.]tif$"),stringsAsFactors=F)
|
|
106 |
df2[,c("sensor","month")]=do.call(rbind.data.frame,strsplit(basename(df2$path),"_|[.]"))[,c(1,2)]
|
|
106 |
df2=data.frame(path=list.files(paste(datadir,"/mcd09tif",sep=""),full=T,pattern="[0-9]*[mean|sd].*tif$"),stringsAsFactors=F)
|
|
107 |
df2[,c("sensor","month","type")]=do.call(rbind.data.frame,strsplit(basename(df2$path),"_|[.]"))[,c(1,2,3)]
|
|
107 | 108 |
|
108 | 109 |
## create a list of jobs to process |
109 | 110 |
jobs=data.frame(allexts,month=rep(sprintf("%02d",1:12),each=nrow(allexts))) |
110 |
jobs$path=df2$path[match(paste(jobs$sensor,jobs$month),paste(df2$sensor,df2$month))] |
|
111 |
jobs=rbind.data.frame(cbind.data.frame(type="mean",jobs),cbind.data.frame(type="sd",jobs)) |
|
112 |
jobs$path=df2$path[match(paste(jobs$sensor,jobs$month,jobs$type),paste(df2$sensor,df2$month,df2$type))] |
|
113 |
## drop any jobs with no associated files |
|
114 |
jobs=jobs[!is.na(jobs$path),] |
|
111 | 115 |
|
112 | 116 |
|
113 | 117 |
## loop over sensor-months to create full grid of corrected values |
114 |
foreach( i=1:nrow(jobs)) %dopar% { |
|
118 |
foreach( i=1:nrow(jobs), .options.multicore=list(preschedule=FALSE)) %dopar% {
|
|
115 | 119 |
file=jobs$path[i] |
116 | 120 |
toutfile=paste(datadir,"mcd09bias/", sub(".tif","",basename(file)),"_",jobs$tile[i],".tif",sep="") |
117 |
# if(file.exists(toutfile)) {writeLines(paste(toutfile,"Exists, moving on"));next}
|
|
121 |
if(file.exists(toutfile)) {writeLines(paste(toutfile,"Exists, moving on"));return(NULL)}
|
|
118 | 122 |
writeLines(paste("Starting: ",toutfile," tile:",jobs$tile[i]," ( ",i," out of ",nrow(jobs),")")) |
119 |
## set sensor-specific parameters |
|
120 |
## add extra region for correction depending on which sensor is being processed |
|
121 | 123 |
## set angle of orbital artefacts to be corrected |
122 | 124 |
sensor=jobs$sensor[i] |
123 | 125 |
if(sensor=="MOD09") scanangle=-15 |
... | ... | |
138 | 140 |
res=vsnr(d2,gabor=psi,alpha=2,p=2,epsilon=1,prec=5e-6,maxit=50,C=1,full=F) |
139 | 141 |
res2=res*100 |
140 | 142 |
## write the file |
141 |
writeRaster(res2,file=toutfile,overwrite=T,datatype='INT2S',options=c("COMPRESS=LZW", "PREDICTOR=2"),NAvalue=-32768)
|
|
143 |
writeRaster(res2,file=toutfile,overwrite=T,datatype='INT2S',options=c("COMPRESS=LZW", "PREDICTOR=2"),NAvalue=32767)
|
|
142 | 144 |
## remove temporary files |
143 | 145 |
rmr(d);rmr(d2);rmr(psi);rmr(res);rmr(res2) |
144 | 146 |
## remove old temporary files older than x hours |
... | ... | |
147 | 149 |
} |
148 | 150 |
|
149 | 151 |
|
150 |
|
|
151 | 152 |
############################################ |
152 | 153 |
## now mosaic the tiles with the original image to keep only the corrected data (when available) and the uncorrected data where there is no tile. |
153 | 154 |
## this relies on df2 created above |
155 |
#tfs=list.files(paste(datadir,"/mcd09bias",sep=""),pattern="M.*_[0-9]._[0-9][.]tif",full=T) |
|
156 |
#file.rename(tfs,paste(substr(tfs,1,46),"mean_",substr(tfs,47,52),sep="")) |
|
157 |
|
|
158 |
## define color scale for mean and sd |
|
159 |
mkct=function(palette,vals,bit) |
|
160 |
data.frame(id=0:(2^bit-1),t(col2rgb(c(palette(length(vals)+1),rep("#00000000",(2^bit)-length(vals)-1)),alpha=T))) |
|
161 |
|
|
162 |
colR=colorRampPalette(c("#08306b","#0d57a1","#2878b8","#4997c9","#72b2d7","#a2cbe2","#c7dcef","#deebf7","#f7fbff")) |
|
163 |
meancols=mkct(colR,vals=0:10000,bit=16) |
|
164 |
write.table(meancols,file="data/colors16_mean.txt",quote=F,row.names=F,col.names=F) |
|
165 |
|
|
166 |
colR2=colorRampPalette(c("#0000FF","#00FF80","#FF0080")) |
|
167 |
sdcols=mkct(colR2,vals=0:10000,bit=16) |
|
168 |
write.table(sdcols,file="data/colors16_sd.txt",quote=F,row.names=F,col.names=F) |
|
169 |
|
|
154 | 170 |
|
155 | 171 |
foreach( i=1:nrow(df2)) %dopar% { |
156 | 172 |
ifile=df2$path[i] |
157 |
outfile=paste(datadir,"/mcd09ctif/",df2$sensor[i],"_",df2$month[i],"_uncompressed.tif",sep="") |
|
158 |
outfile2=paste(datadir,"/mcd09ctif/",df2$sensor[i],"_",df2$month[i],".tif",sep="") |
|
159 |
## if(file.exists(outfile)) {print(paste(outfile," exists, moving on...")); return(NULL)} |
|
160 |
## create VRT of first band of the full image |
|
161 |
fvrt=sub("[.]tif",".vrt",ifile) |
|
162 |
system(paste("gdalbuildvrt -b 1 ",fvrt," ",ifile)) |
|
173 |
itype=df2$type[i] |
|
174 |
outfile=paste(datadir,"/mcd09ctif/",sub(".tif",".vrt",basename(ifile)),sep="") |
|
175 |
outfile2=paste(datadir,"/mcd09ctif/unmasked_",basename(ifile),sep="") |
|
176 |
outfile3=paste(datadir,"/mcd09ctif/",basename(ifile),sep="") |
|
177 |
# if(file.exists(outfile3)) {print(paste(outfile," exists, moving on...")); return(NULL)} |
|
163 | 178 |
## mosaic the tiles with the original data (keeping the new data when available) |
164 |
tfiles=paste(c(fvrt,list.files(paste(datadir,"/mcd09bias",sep=""),pattern=paste(sub("[.]tif","",basename(outfile2)),"_[0-9]*[.]tif",sep=""),full=T)),collapse=" ") |
|
165 |
if(file.exists(outfile)) file.remove(outfile2,outfile) |
|
166 |
system(paste("gdal_merge.py --config GDAL_CACHEMAX 10000 -init -32768 -n -32768 -co BIGTIFF=yes -o ",outfile," ",tfiles,sep="")) |
|
167 |
system(paste("gdal_translate -co COMPRESS=LZW -co ZLEVEL=9 -co PREDICTOR=2 ",outfile," ",outfile2,sep="")) |
|
168 |
file.remove(fvrt,outfile) |
|
169 |
writeLines(paste("Finished ",outfile)) |
|
179 |
tfiles=paste(c(ifile,list.files(paste(datadir,"/mcd09bias",sep=""),pattern=paste(sub("[.]tif","",basename(outfile3)),"_[0-9]*[.]tif",sep=""),full=T)),collapse=" ") |
|
180 |
system(paste("gdalbuildvrt -srcnodata 32767 -vrtnodata 32767 ",outfile," ",tfiles,sep="")) |
|
181 |
system(paste("gdal_translate -a_ullr -180 90 180 -90 -a_nodata 32767 -co COMPRESS=LZW -co ZLEVEL=9 -co PREDICTOR=2 ",outfile," ",outfile2,sep="")) |
|
182 |
## use pksetmask to set any values >100 (except the missing values) to 100 |
|
183 |
## these exist due to the reprojection to wgs84 from sinusoidal, there are a few pixels with values slightly |
|
184 |
## greater than 100 |
|
185 |
## also convert to an unsigned 16-bit integer to allow adding a color table |
|
186 |
ict=ifelse(itype=="mean","data/colors16_mean.txt","data/colors16_sd.txt") |
|
187 |
system(paste("pksetmask -i ",outfile2," -m ",outfile2," -ot UInt16 ", |
|
188 |
"--operator='>' --msknodata 20000 --nodata 65535 --operator='>' --msknodata 10000 --nodata 10000 --operator='<' --msknodata 0 --nodata 65535 ", |
|
189 |
" -ct ",ict," -co COMPRESS=LZW -co PREDICTOR=2 -o ",outfile3)) |
|
190 |
## clean up temporary files |
|
191 |
file.remove(outfile,outfile2) |
|
192 |
writeLines(paste("####################################### Finished ",outfile)) |
|
170 | 193 |
} |
171 |
|
|
172 | 194 |
|
173 | 195 |
|
196 |
## check output |
|
197 |
for(i in 1:nrow(df2)) { |
|
198 |
ifile=df2$path[i] |
|
199 |
outfile3=paste(datadir,"/mcd09ctif/",basename(ifile),sep="") |
|
200 |
print(ifile) |
|
201 |
system(paste("gdalinfo ",outfile3,"| grep 'Size is'")) |
|
202 |
} |
climate/research/cloud/MOD09/3_biome.R | ||
---|---|---|
1 |
### Produce summary of cloud frequency by biome |
|
2 |
|
|
3 |
setwd("~/acrobates/adamw/projects/cloud/") |
|
4 |
|
|
5 |
|
|
6 |
## libraries |
|
7 |
library(rasterVis) |
|
8 |
library(latticeExtra) |
|
9 |
library(xtable) |
|
10 |
library(texreg) |
|
11 |
library(reshape) |
|
12 |
library(caTools) |
|
13 |
library(rgeos) |
|
14 |
library(raster) |
|
15 |
|
|
16 |
|
|
17 |
#### Evaluate MOD35 Cloud data |
|
18 |
mod09c=brick("data/cloud_ymonmean.nc",varname="CF");names(mod09c)=month.name |
|
19 |
mod09sd=brick("data/cloud_std.nc",varname="CFsd") |
|
20 |
|
|
21 |
## rasterize the biome dataset |
|
22 |
bpath="data/teow/biomes.shp" |
|
23 |
## create a copy with a numeric biome code |
|
24 |
biome=readOGR("data/teow/","biomes") |
|
25 |
bcode=unique(data.frame(code=code,realm=biome$realm,biome=biome$biome)) |
|
26 |
|
|
27 |
system(paste("gdal_rasterize gdal_rasterize -a code -burn 0 -l mask ",bpath," work.tif")) |
|
28 |
|
|
29 |
biome=readOGR("data/teow/","biomes") |
|
30 |
n_biomesamples=1000 |
|
31 |
library(multicore) |
|
32 |
biomesample=do.call(rbind.data.frame,mclapply(1:length(biome),function(i) |
|
33 |
data.frame(code=biome$code[i],coordinates(spsample(biome[i,],n=n_biomesamples,type="stratified",nsig=2))))) |
|
34 |
colnames(biomesample)[2:3]=c("lon","lat") |
|
35 |
biomesample[,c("biome","realm")]=biome@data[match(biomesample$code,biome$code),c("biome","realm")] |
|
36 |
write.csv(biomesample,"output/biomesamplepoints.csv",row.names=F) |
|
37 |
} |
|
38 |
|
|
39 |
## Extract data for points |
|
40 |
if(!file.exists("output/biomesamplepoints_cloud.csv")){ |
|
41 |
biomesample=read.csv("output/biomesamplepoints.csv") |
|
42 |
biomep=raster::extract(mod09c,biomesample,sp=T) |
|
43 |
biomep$lon=biomesample$lon |
|
44 |
biomep@data[,c("lon","lat")]=coordinates(biomep) |
|
45 |
write.csv(biomep@data,"output/biomesamplepoints_cloud.csv",row.names=F) |
|
46 |
} |
|
47 |
biomep=read.csv("output/biomesamplepoints_cloud.csv") |
|
48 |
coordinates(biomep)=c("lon","lat") |
|
49 |
projection(biomep)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs" |
|
50 |
|
|
51 |
|
|
52 |
## get stratified sample of points from biomes for illustration |
|
53 |
if(!file.exists("output/biomesamplepoints.csv")){ |
|
54 |
biome=readOGR("data/teow/","biomes") |
|
55 |
n_biomesamples=1000 |
|
56 |
library(multicore) |
|
57 |
biomesample=do.call(rbind.data.frame,mclapply(1:length(biome),function(i) |
|
58 |
data.frame(code=biome$code[i],coordinates(spsample(biome[i,],n=n_biomesamples,type="stratified",nsig=2))))) |
|
59 |
colnames(biomesample)[2:3]=c("lon","lat") |
|
60 |
biomesample[,c("biome","realm")]=biome@data[match(biomesample$code,biome$code),c("biome","realm")] |
|
61 |
write.csv(biomesample,"output/biomesamplepoints.csv",row.names=F) |
|
62 |
} |
|
63 |
|
|
64 |
## Extract data for points |
|
65 |
if(!file.exists("output/biomesamplepoints_cloud.csv")){ |
|
66 |
biomesample=read.csv("output/biomesamplepoints.csv") |
|
67 |
biomep=raster::extract(mod09c,biomesample,sp=T) |
|
68 |
biomep$lon=biomesample$lon |
|
69 |
biomep@data[,c("lon","lat")]=coordinates(biomep) |
|
70 |
write.csv(biomep@data,"output/biomesamplepoints_cloud.csv",row.names=F) |
|
71 |
} |
|
72 |
biomep=read.csv("output/biomesamplepoints_cloud.csv") |
|
73 |
coordinates(biomep)=c("lon","lat") |
|
74 |
projection(biomep)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs" |
|
75 |
|
|
76 |
|
|
77 |
#plot(mod09a,layers=1,margin=F,maxpixels=100) |
|
78 |
|
|
79 |
## calculated differences |
|
80 |
cldm$difm=cldm$mod09-cldm$cld_all |
|
81 |
cldm$difs=cldm$mod09sd+cldm$cldsd_all |
|
82 |
|
|
83 |
#clda$dif=clda$mod09-clda$cld |
|
84 |
|
|
85 |
## read in global coasts for nice plotting |
|
86 |
library(maptools) |
|
87 |
library(rgdal) |
|
88 |
|
|
89 |
#coast=getRgshhsMap("/mnt/data/jetzlab/Data/environ/global/gshhg/gshhs_h.b", xlim = NULL, ylim = NULL, level = 4) |
|
90 |
land=readShapePoly("/mnt/data/jetzlab/Data/environ/global/gshhg/GSHHS_shp/c/GSHHS_c_L1.shp",force_ring=TRUE) |
|
91 |
projection(land)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs" |
|
92 |
CP <- as(extent(-180, 180, -60, 84), "SpatialPolygons") |
|
93 |
proj4string(CP) <- CRS(proj4string(land)) |
|
94 |
coast=as(land[land$area>50,],"SpatialLines") |
|
95 |
## Clip the map |
|
96 |
land <- gIntersection(land, CP, byid=F) |
|
97 |
coast <- gIntersection(coast, CP, byid=F) |
|
98 |
|
|
99 |
## get stratified sample of points from biomes for illustration |
|
100 |
if(!file.exists("output/biomesamplepoints.csv")){ |
|
101 |
biome=readOGR("data/teow/","biomes") |
|
102 |
n_biomesamples=1000 |
|
103 |
library(multicore) |
|
104 |
biomesample=do.call(rbind.data.frame,mclapply(1:length(biome),function(i) |
|
105 |
data.frame(code=biome$code[i],coordinates(spsample(biome[i,],n=n_biomesamples,type="stratified",nsig=2))))) |
|
106 |
colnames(biomesample)[2:3]=c("lon","lat") |
|
107 |
biomesample[,c("biome","realm")]=biome@data[match(biomesample$code,biome$code),c("biome","realm")] |
|
108 |
write.csv(biomesample,"output/biomesamplepoints.csv",row.names=F) |
|
109 |
} |
|
110 |
|
|
111 |
## Extract data for points |
|
112 |
if(!file.exists("output/biomesamplepoints_cloud.csv")){ |
|
113 |
biomesample=read.csv("output/biomesamplepoints.csv") |
|
114 |
biomep=raster::extract(mod09c,biomesample,sp=T) |
|
115 |
biomep$lon=biomesample$lon |
|
116 |
biomep@data[,c("lon","lat")]=coordinates(biomep) |
|
117 |
write.csv(biomep@data,"output/biomesamplepoints_cloud.csv",row.names=F) |
|
118 |
} |
|
119 |
biomep=read.csv("output/biomesamplepoints_cloud.csv") |
|
120 |
coordinates(biomep)=c("lon","lat") |
|
121 |
projection(biomep)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs" |
|
122 |
|
|
123 |
|
|
124 |
## Figures |
|
125 |
n=100 |
|
126 |
at=seq(0,100,length=n) |
|
127 |
colr=colorRampPalette(c("black","green","red")) |
|
128 |
cols=colr(n) |
|
129 |
|
|
130 |
## set plotting parameters |
|
131 |
my.theme = trellis.par.get() |
|
132 |
my.theme$strip.background=list(col="transparent") |
|
133 |
trellis.par.set(my.theme) |
|
134 |
|
|
135 |
#pdf("output/Figures.pdf",width=11,height=8.5) |
|
136 |
png("output/CF_Figures_%03d.png",width=5000,height=4000,res=600,pointsize=42,bg="white") |
|
137 |
|
|
138 |
## set plotting parameters |
|
139 |
my.theme = trellis.par.get() |
|
140 |
my.theme$strip.background=list(col="transparent") |
|
141 |
trellis.par.set(my.theme) |
|
142 |
|
|
143 |
res=1e6 |
|
144 |
greg=list(ylim=c(-60,84),xlim=c(-180,180)) |
|
145 |
|
|
146 |
## Figure 1: 4-panel summaries |
|
147 |
#- Annual average |
|
148 |
levelplot(mod09a,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(space="bottom",adj=1), |
|
149 |
margin=F,maxpixels=res,ylab="",xlab="",useRaster=T,ylim=greg$ylim)+ |
|
150 |
layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+ |
|
151 |
layer(sp.lines(coast,col="black"),under=F) |
|
152 |
## Mean annual with validation stations |
|
153 |
levelplot(mod09a,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(title="Cloud Frequency (%)",space="bottom",adj=1), |
|
154 |
margin=F,maxpixels=res,ylab="",xlab="",useRaster=T,ylim=greg$ylim)+ |
|
155 |
layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+ |
|
156 |
layer(panel.xyplot(lon,lat,pch=16,cex=.3,col="black"),data=data.frame(coordinates(st)))+ |
|
157 |
layer(sp.lines(coast,col="black"),under=F) |
|
158 |
|
|
159 |
## Seasonal Means |
|
160 |
levelplot(mod09s,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(title="Cloud Frequency (%)", space="bottom",adj=2), |
|
161 |
margin=F,maxpixels=res,ylab="",xlab="",useRaster=T,ylim=greg$ylim)+ |
|
162 |
layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+ |
|
163 |
layer(sp.lines(coast,col="black"),under=F) |
|
164 |
|
|
165 |
|
|
166 |
## Monthly Means |
|
167 |
levelplot(mod09c,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(title="Cloud Frequency (%)", space="bottom",adj=1), |
|
168 |
margin=F,maxpixels=res,ylab="Latitude",xlab="Longitude",useRaster=T,ylim=greg$ylim)+ |
|
169 |
layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+ |
|
170 |
layer(sp.lines(coast,col="black"),under=F) |
|
171 |
|
|
172 |
#- Monthly minimum |
|
173 |
#- Monthly maximum |
|
174 |
#- STDEV or Min-Max |
|
175 |
p_mac=levelplot(mod09a,col.regions=colr(n),cuts=99,at=seq(0,100,length=100),margin=F, |
|
176 |
ylim=greg$ylim,maxpixels=res/10,colorkey=list(title="Cloud Frequency (%)", space="top",height=.75),xlab="",ylab="",useRaster=T)+ |
|
177 |
layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+ |
|
178 |
layer(sp.lines(coast,col="black"),under=F) |
|
179 |
p_max=levelplot(mod09max,col.regions=colr(n),cuts=99,at=seq(0,100,length=100),margin=F,maxpixels=res/10, |
|
180 |
ylim=greg$ylim,colorkey=list(space="bottom",height=.75),useRaster=T)+ |
|
181 |
layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+ |
|
182 |
layer(sp.lines(coast,col="black"),under=F) |
|
183 |
p_intra=levelplot(mod09intra,col.regions=colr(n),cuts=99,at=seq(0,40,length=100),margin=F,maxpixels=res/100, |
|
184 |
ylim=greg$ylim,colorkey=list(space="bottom",height=.75),useRaster=T)+ |
|
185 |
layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+ |
|
186 |
layer(sp.lines(coast,col="black"),under=F) |
|
187 |
p_inter=levelplot(mod09inter,col.regions=colr(n),cuts=99,at=seq(0,15,length=100),margin=F,maxpixels=res/100, |
|
188 |
ylim=greg$ylim,colorkey=list(title="Cloud Frequency (%)", space="top",height=.75),useRaster=T)+ |
|
189 |
layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+ |
|
190 |
layer(sp.lines(coast,col="black"),under=F) |
|
191 |
|
|
192 |
p3=c("Mean Cloud Frequency (%)"=p_mac,"Max Cloud Frequency (%)"=p_max,"Interannual Variability (sd)"=p_inter,"Intraannual Variability (sd)"=p_intra,x.same=T,y.same=F,merge.legends=T,layout=c(2,2)) |
|
193 |
print(p3) |
|
194 |
|
|
195 |
bgr=function(x,n=100,br=0,c1=c("darkblue","blue","grey"),c2=c("grey","red","purple")){ |
|
196 |
at=unique(c(seq(min(x,na.rm=T),max(x,na.rm=T),len=n))) |
|
197 |
bg=colorRampPalette(c1) |
|
198 |
gr=colorRampPalette(c2) |
|
199 |
return(list(at=at,col=c(bg(sum(at<br)),gr(sum(at>=br))))) |
|
200 |
} |
|
201 |
|
|
202 |
cldm$resid=NA |
|
203 |
# get residuals of simple linear model |
|
204 |
cldm$resid[!is.na(cldm$cld_all)&!is.na(cldm$mod09)]=residuals(lm(mod09~cld_all,data=cldm)) |
|
205 |
colat=bgr(cldm$resid) |
|
206 |
phist=histogram(cldm$resid,breaks=colat$at,border=NA,col=colat$col,xlim=c(-30,30),type="count",xlab="MODCF Residuals")#,seq(0,1,len=6),na.rm=T) |
|
207 |
pmap=xyplot(lat~lon|month2,data=cldm,groups=cut(cldm$resid,rev(colat$at)), |
|
208 |
par.settings=list(superpose.symbol=list(col=colat$col)),pch=16,cex=.25, |
|
209 |
auto.key=F,#list(space="right",title="Difference\n(MOD09-NDP026D)",cex.title=1),asp=1, |
|
210 |
ylab="Latitude",xlab="Longitude")+ |
|
211 |
layer(sp.lines(coast,col="black",lwd=.1),under=F) |
|
212 |
print(phist,position=c(0,.75,1,1),more=T) |
|
213 |
print(pmap,position=c(0,0,1,.78)) |
|
214 |
|
|
215 |
### heatmap of mod09 vs. NDP for all months |
|
216 |
hmcols=colorRampPalette(c("grey","blue","red","purple")) |
|
217 |
#hmcols=colorRampPalette(c(grey(.8),grey(.3),grey(.2))) |
|
218 |
tr=c(0,66) |
|
219 |
colkey <- draw.colorkey(list(col = hmcols(tr[2]), at = tr[1]:tr[2],height=.25)) |
|
220 |
|
|
221 |
xyplot(cld_all~mod09|month2,data=cldm,panel=function(x,y,subscripts){ |
|
222 |
n=50 |
|
223 |
bins=seq(0,100,len=n) |
|
224 |
tb=melt(as.matrix(table( |
|
225 |
x=cut(x,bins,labels=bins[-1]), |
|
226 |
y=cut(y,bins,labels=bins[-1])))) |
|
227 |
qat=unique(tb$value) |
|
228 |
print(max(qat)) |
|
229 |
qat=tr[1]:tr[2]#unique(tb$value) |
|
230 |
panel.levelplot(tb$x,tb$y,tb$value,at=qat,col.regions=c("transparent",hmcols(length(qat))),subscripts=1:nrow(tb)) |
|
231 |
# panel.abline(0,1,col="black",lwd=2) |
|
232 |
panel.abline(lm(y ~ x),col="black",lwd=2) |
|
233 |
# panel.ablineq(lm(y ~ x), r.sq = TRUE,at = 0.6,pos=1, offset=0,digits=2,col="blue") |
|
234 |
panel.text(70,10,bquote(paste(R^2,"=",.(round(summary(lm(y ~ x))$r.squared,2)))),cex=1) |
|
235 |
},asp=1,scales=list(at=seq(0,100,len=6),useRaster=T,colorkey=list(width=.5,title="Number of Stations")), |
|
236 |
ylab="NDP Mean Cloud Amount (%)",xlab="MODCF Cloud Frequency (%)", |
|
237 |
legend= list(right = list(fun = colkey)))#+ layer(panel.abline(0,1,col="black",lwd=2)) |
|
238 |
|
|
239 |
|
|
240 |
bwplot(lulcc~difm,data=cldm,horiz=T,xlab="Difference (MOD09-Observed)",varwidth=T,notch=T)+layer(panel.abline(v=0)) |
|
241 |
|
|
242 |
dev.off() |
|
243 |
|
|
244 |
#################################################################### |
|
245 |
### Regional Comparisons |
|
246 |
## Compare with worldclim and NPP |
|
247 |
#wc=stack(as.list(paste("/mnt/data/jetzlab/Data/environ/global/worldclim/prec_",1:12,".bil",sep=""))) |
|
248 |
wc_map=stack(as.list(paste("/mnt/data/jetzlab/Data/environ/global/worldclim/bio_12.bil",sep=""))) |
|
249 |
wc_dem=stack(as.list(paste("/mnt/data/jetzlab/Data/environ/global/worldclim/alt.bil",sep=""))) |
|
250 |
|
|
251 |
regs=list( |
|
252 |
Cascades=extent(c(-122.8,-118,44.9,47)), |
|
253 |
Hawaii=extent(c(-156.5,-154,18.75,20.5)), |
|
254 |
Boliva=extent(c(-71,-63,-20,-15)), |
|
255 |
Venezuela=extent(c(-69,-59,0,7)), |
|
256 |
CFR=extent(c(17.75,22.5,-34.8,-32.6)), |
|
257 |
Madagascar=extent(c(46,52,-17,-12)) |
|
258 |
#reg2=extent(c(-81,-70,-4,10)) |
|
259 |
) |
|
260 |
|
|
261 |
|
|
262 |
## read in GEWEX 1-degree data |
|
263 |
gewex=mean(brick("data/gewex/CA_PATMOSX_NOAA.nc",varname="a_CA")) |
|
264 |
names(gewex)="PATMOS-x GEWEX AVHRR" |
|
265 |
|
|
266 |
## calculate 1-degree means of MODCF data |
|
267 |
#MOD_gewex=gewex |
|
268 |
#MOD_gewex@data@values=1:length(MOD_gewex@data@values) |
|
269 |
#MOD_gewex2=zonal(mod09a,MOD_gewex,fun='mean') |
|
270 |
png("output/Resolution_Figures_%03d.png",width=5500,height=4000,res=600,pointsize=36,bg="white") |
|
271 |
trellis.par.set(my.theme) |
|
272 |
#pdf("output/mod09_resolution.pdf",width=11,height=8.5) |
|
273 |
|
|
274 |
r="Venezuela" |
|
275 |
# ylab.right = "Cloud Frequency (%)",par.settings = list(layout.widths = list(axis.key.padding = 0.1,axis.left=0.6,ylab.right = 3,right.padding=2)), |
|
276 |
pars=list(layout.heights=list(key.bottom=2,key.top=1),layout.widths = list(axis.key.padding = 3,axis.left=0.6)) |
|
277 |
p1=levelplot(crop(mod09a,regs[[r]]),col.regions=grey(seq(0,1,len=100)),at=seq(45,100,len=99), |
|
278 |
colorkey=list(space="top",width=1,height=.75,labels=list(labels=c(50,75,100),at=c(50,75,100))), |
|
279 |
cuts=99,margin=F,max.pixels=1e6,par.settings = pars) |
|
280 |
p2=levelplot(crop(gewex,regs[[r]]),col.regions=grey(seq(0,1,len=100)),at=seq(.45,1,len=99),cuts=99,margin=F,max.pixels=1e6, |
|
281 |
colorkey=list(space="top",width=1,height=.75,labels=list(labels=c(50,75,100),at=c(.50,.75,1))), |
|
282 |
par.settings = pars) |
|
283 |
tmap=crop(wc_map,regs[[r]]) |
|
284 |
p3=levelplot(tmap,col.regions=grey(seq(0,1,len=100)),cuts=100,at=seq(tmap@data@min,tmap@data@max,len=100),margin=F,maxpixels=1e6, |
|
285 |
colorkey=list(space="bottom",height=.75,width=1),xlab="",ylab="",main=names(regs)[r],useRaster=T, |
|
286 |
par.settings = pars) |
|
287 |
p4=levelplot(crop(wc_dem,regs[[r]]),col.regions=grey(seq(0,1,len=100)),cuts=99,margin=F,max.pixels=1e6, |
|
288 |
colorkey=list(space="bottom",height=.75,width=1), |
|
289 |
par.settings = pars)#,labels=list(labels=c(1000,4000),at=c(1000,4000)))) |
|
290 |
print(c("MODCF (%)"=p1,"PATMOS-x GEWEX (%)"=p2,"WorldClim Precip (mm)"=p3,"Elevation (m)"=p4,x.same=T,y.same=T,merge.legends=T,layout=c(2,2))) |
|
291 |
|
|
292 |
|
|
293 |
dev.off() |
|
294 |
|
|
295 |
|
|
296 |
######################################### |
|
297 |
### Some stats for paper |
|
298 |
|
|
299 |
## number of stations retained |
|
300 |
length(unique(cldm$StaID[!is.na(cldm$cld_all)])) |
|
301 |
length(unique(cldm$StaID[!is.na(cldm$cld)])) |
|
302 |
|
|
303 |
# approximate size of mod09ga archive - get total size for one day from the USGS website |
|
304 |
size=scan("http://e4ftl01.cr.usgs.gov/MOLT/MOD09GA.005/2000.04.30/",what="char") |
|
305 |
## extract all filesizes in MB (all the HDFs) and sum them and covert to TB for the length of the full record |
|
306 |
sum(as.numeric(sub("M","",grep("[0-9]*M$",size,value=T))))/1024/1024*as.numeric(as.Date("2013-12-31")-as.Date("2000-02-24")) |
|
307 |
|
|
308 |
## seasonal variability |
|
309 |
cellStats(mod09sd,"mean") |
|
310 |
|
|
311 |
## Validation table construction |
|
312 |
quantile(cldm$difm,na.rm=T) |
|
313 |
|
|
314 |
summary(lm(cld_all~mod09+lat,data=cldm)) |
|
315 |
|
|
316 |
|
|
317 |
|
|
318 |
################################################################### |
|
319 |
### summary by biome |
|
320 |
biomep$id=1:nrow(biomep) |
|
321 |
biomepl=melt(biomep@data,id.vars=c("id","code","biome","realm")) |
|
322 |
colnames(biomepl)[grep("variable",colnames(biomepl))]="month" |
|
323 |
biomepl$month=factor(biomepl$month,ordered=T,levels=month.name) |
|
324 |
biomepl$realm=factor(biomepl$realm,ordered=T,levels=c("Antarctic","Australasia","Oceania", "IndoMalay", "Neotropics","Palearctic","Nearctic" )) |
|
325 |
biomepl$value[biomepl$value<0]=NA |
|
326 |
|
|
327 |
|
|
328 |
png("output/Biome_Figures_%03d.png",width=5500,height=4000,res=600,pointsize=36,bg="white") |
|
329 |
trellis.par.set(my.theme) |
|
330 |
|
|
331 |
#[biomepl$id%in%sample(biomep$id,10000),] |
|
332 |
p1=useOuterStrips(xyplot(value~month|realm+biome,groups=id,data=biomepl,panel=function(x,y,groups = groups, subscripts = subscripts){ |
|
333 |
panel.xyplot(x,y,col=grey(0.6,0.2),type="l",lwd=.5,groups=groups,subscripts=subscripts) |
|
334 |
panel.smoother(y ~ s(x), method = "gam",lwd=2,subscripts=subscripts,n=24) |
|
335 |
},scales=list(y=list(at=c(0,100),lim=c(-20,120),cex=.75,alternating=2,tck=c(0,1)),x=list(at=c(1,7),rot=45,alternating=1)),ylab="Biome",xlab.top="Geographic Realm",ylab.right="MODCF (%)", xlab="Month"), |
|
336 |
strip=strip.custom(par.strip.text = list(cex = .7)),strip.left=strip.custom(horizontal=TRUE,par.strip.text = list(cex = .75))) |
|
337 |
p1$par.settings$layout.widths$strip.left[1]=13 |
|
338 |
p1$par.strip.text$lines=.65 |
|
339 |
print(p1) |
|
340 |
|
|
341 |
dev.off() |
|
342 |
|
|
343 |
|
|
344 |
#################################################################### |
|
345 |
## assess temporal stability |
|
346 |
|
|
347 |
## spatialy subset data to stations at least 10km apart |
|
348 |
st2=remove.duplicates(st,zero=10) |
|
349 |
|
|
350 |
## Subset data |
|
351 |
## drop missing observations |
|
352 |
cldm.t=cldm[!is.na(cldm$cld_all)&!is.na(cldm$mod09)&!is.na(cldm$biome),] |
|
353 |
cldm.t=cldm.t[cldm.t$lat>=-60,] |
|
354 |
# make sure all stations have all mod09 data |
|
355 |
stdrop=names(which(tapply(cldm.t$month,cldm.t$StaID,length)!=12)) |
|
356 |
cldm.t=cldm.t[!cldm.t$StaID%in%stdrop,] |
|
357 |
# Keep only stations at least 10km apart |
|
358 |
cldm.t=cldm.t[cldm.t$StaID%in%st2$id,] |
|
359 |
## Subset to only some months, if desired |
|
360 |
#cldm.t=cldm.t[cldm.t$month%in%1:3,] |
|
361 |
|
|
362 |
|
|
363 |
## Select Knots |
|
364 |
knots=spsample(land,500,type="regular") |
|
365 |
|
|
366 |
# reshape data |
|
367 |
m.cld=cast(cldm.t,StaID+lat+lon+biome~month,value="cld_all");colnames(m.cld)[-(1:4)]=paste("cld.",colnames(m.cld)[-(1:4)],sep="") |
|
368 |
m.mod09=cast(cldm.t,StaID~month,value="mod09");colnames(m.mod09)[-1]=paste("mod09.",colnames(m.mod09)[-1],sep="") |
|
369 |
mdata=cbind(m.cld,m.mod09) |
|
370 |
|
|
371 |
## cast to |
|
372 |
coords <- as.matrix(m.cld[,c("lon","lat")])#as.matrix(ne.temp[,c("UTMX", "UTMY")]/1000) |
|
373 |
max.d <- max(iDist(coords)) |
|
374 |
|
|
375 |
##make symbolic model formula statement for each month |
|
376 |
mods <- lapply(paste(paste(paste("cld.",1:N.t,sep=''),paste("mod09.",1:N.t,sep=''),sep='~'),"",sep=""), as.formula) |
|
377 |
|
|
378 |
tlm=model.matrix(lm(mods[[1]],data=mdata)) |
|
379 |
|
|
380 |
N.t <- ncol(m.mod09)-1 ##number of months |
|
381 |
n <- nrow(m.cld) ##number of observation per months |
|
382 |
p <- ncol(tlm) #number of regression parameters in each month |
|
383 |
|
|
384 |
starting <- list("beta"=rep(0,N.t*p), "phi"=rep(3/(0.5*max.d), N.t), |
|
385 |
"sigma.sq"=rep(2,N.t), "tau.sq"=rep(1, N.t), |
|
386 |
"sigma.eta"=diag(rep(0.01, p))) |
|
387 |
tuning <- list("phi"=rep(5, N.t)) |
|
388 |
|
|
389 |
priors <- list("beta.0.Norm"=list(rep(0,p), diag(1000,p)), |
|
390 |
"phi.Unif"=list(rep(3/(0.9*max.d), N.t), rep(3/(0.05*max.d), N.t)), |
|
391 |
"sigma.sq.IG"=list(rep(2,N.t), rep(10,N.t)), |
|
392 |
"tau.sq.IG"=list(rep(2,N.t), rep(5,N.t)), |
|
393 |
"sigma.eta.IW"=list(2, diag(0.001,p))) |
|
394 |
cov.model <- "exponential" |
|
395 |
|
|
396 |
## Run the model |
|
397 |
n.samples <- 500 |
|
398 |
m.1=spDynLM(mods,data=mdata,coords=coords,knots=coordinates(knots),n.samples=n.samples,starting=starting,tuning=tuning,priors=priors,cov.model=cov.model,get.fitted=T,n.report=25) |
|
399 |
|
|
400 |
save(m.1,file="output/m.1.Rdata") |
|
401 |
## summarize |
|
402 |
burn.in <- floor(0.75*n.samples) |
|
403 |
quant <- function(x){quantile(x, prob=c(0.5, 0.025, 0.975))} |
|
404 |
beta <- apply(m.1$p.beta.samples[burn.in:n.samples,], 2, quant) |
|
405 |
beta.0 <- beta[,grep("Intercept", colnames(beta))] |
|
406 |
beta.1 <- beta[,grep("mod09", colnames(beta))] |
|
407 |
|
|
408 |
|
|
409 |
|
|
410 |
|
|
411 |
### Compare time periods |
|
412 |
library(texreg) |
|
413 |
extract.lm <- function(model) { |
|
414 |
s <- summary(model) |
|
415 |
names <- rownames(s$coef) |
|
416 |
co <- s$coef[, 1] |
|
417 |
se <- s$coef[, 2] |
|
418 |
pval <- s$coef[, 4] |
|
419 |
rs <- s$r.squared |
|
420 |
n <- as.integer(nobs(model)) |
|
421 |
rmse=sqrt(mean((residuals(s)^2))) |
|
422 |
gof <- c(rs, rmse, n) |
|
423 |
gof.names <- c("R-Squared","RMSE","n") |
|
424 |
tr <- createTexreg(coef.names = names, coef = co, se = se, |
|
425 |
pvalues = pval, gof.names = gof.names, gof = gof) |
|
426 |
return(tr) |
|
427 |
} |
|
428 |
setMethod("extract", signature = className("lm", "stats"),definition = extract.lm) |
|
429 |
|
|
430 |
forms=c("cld~mod09+month2+lat") |
|
431 |
lm_all=lm(cld_all~mod09+lat,data=cldm[!is.na(cldm$cld),]) |
|
432 |
|
|
433 |
|
|
434 |
### Compare two time periods |
|
435 |
lm_all1=lm(cld_all~mod09,data=cldm[!is.na(cldm$cld)&cldm$cldn_all>=10,]) |
|
436 |
|
|
437 |
lm_mod=lm(cld~mod09,data=cldm[cldm$cldn==10,]) |
|
438 |
mods=list("1970-2009"=lm_all1,"2000-2009"=lm_mod) |
|
439 |
|
|
440 |
screenreg(mods,digits=2,single.row=T,custom.model.names=names(mods),custom.coef.names = c("Intercept", "MODCF")) |
|
441 |
|
|
442 |
htmlreg(mods,file = "output/tempstab.doc", |
|
443 |
custom.model.names = names(mods), |
|
444 |
single.row = T, inline.css = FALSE,doctype = TRUE, html.tag = TRUE, head.tag = TRUE, body.tag = TRUE) |
|
445 |
|
|
446 |
|
|
447 |
## assess latitude bias |
|
448 |
cldm$abslat=abs(cldm$lat) |
|
449 |
cldm$absdif=abs(cldm$difm) |
|
450 |
abslm=lm(absdif~abslat*I(abslat^2),data=cldm[cldm$cldn_all>30,]) |
|
451 |
|
|
452 |
xyplot(absdif~abslat|month2,type=c("p","smooth"),data=cldm,cex=.25,pch=16) |
|
453 |
|
|
454 |
plot(absdif~abslat,data=cldm[cldm$cldn_all>30,],cex=.25,pch=16) |
|
455 |
lines(0:90,predict(abslm,newdata=data.frame(abslat=0:90),type="response"),col="red") |
|
456 |
|
|
457 |
bf=anovaBF(dif~lulcc+month2,data=cldm[!is.na(cldm$dif)&!is.na(cldm$lulcc),]) |
|
458 |
ch=posterior(bf, iterations = 1000) |
|
459 |
summary(bf) |
|
460 |
plot(bf) |
|
461 |
|
|
462 |
## explore validation error |
|
463 |
cldm$lulcc=as.factor(IGBP$class[match(cldm$lulc,IGBP$ID)]) |
|
464 |
|
|
465 |
## Table of RMSE's by lulc by month |
|
466 |
lulctl=ddply(cldm,c("month","lulc"),function(x) c(count=nrow(x),rmse=sqrt(mean((x$mod09-x$cld)^2,na.rm=T)))) |
|
467 |
lulctl=lulctl[!is.na(lulctl$lulc),] |
|
468 |
lulctl$lulcc=as.factor(IGBP$class[match(lulctl$lulc,IGBP$ID)]) |
|
469 |
|
|
470 |
lulctl=ddply(cldm,c("lulc"),function(x) c(count=nrow(x),mean=paste(round(mean(x$difm,na.rm=T),2)," (",round(sd(x$difm,na.rm=T),2),")",sep=""),rmse=round(sqrt(mean((x$difm)^2,na.rm=T)),2))) |
|
471 |
lulctl$lulcc=as.factor(IGBP$class[match(lulctl$lulc,IGBP$ID)]) |
|
472 |
print(xtable(lulctl[order(lulctl$rmse),c("lulcc","count","mean","rmse")],digits=1),type="html",include.rownames=F,file="output/lulcc.doc",row.names=F) |
|
473 |
|
|
474 |
|
|
475 |
lulcrmse=cast(lulcrmsel,lulcc~month,value="rmse") |
|
476 |
lulcrmse |
|
477 |
|
|
478 |
lulcrmse.q=round(do.call(rbind,apply(lulcrmse,1,function(x) data.frame(Min=min(x,na.rm=T),Mean=mean(x,na.rm=T),Max=max(x,na.rm=T),SD=sd(x,na.rm=T)))),1)#quantile,c(0.025,0.5,.975),na.rm=T)),1) |
|
479 |
lulcrmse.q=lulcrmse.q[order(lulcrmse.q$Mean,decreasing=T),] |
|
480 |
lulcrmse.q |
|
481 |
|
|
482 |
print(xtable(lulcrmse,digits=1),"html") |
|
483 |
|
|
484 |
bgyr=colorRampPalette(c("blue","green","yellow","red")) |
|
485 |
levelplot(rmse~month*lulcc,data=lulcrmsel,col.regions=bgyr(1000),at=quantile(lulcrmsel$rmse,seq(0,1,len=100),na.rm=T)) |
|
486 |
|
|
487 |
|
|
488 |
### Linear models |
|
489 |
summary(lm(dif~as.factor(lulc)+lat+month2,data=cldm)) |
|
490 |
|
climate/research/cloud/MOD09/3_combine.R | ||
---|---|---|
1 | 1 |
################################################################################ |
2 | 2 |
### calculate monthly means of terra and aqua |
3 |
|
|
3 |
setwd("/mnt/data/personal/adamw/projects/cloud") |
|
4 | 4 |
datadir="/mnt/data2/projects/cloud/" |
5 | 5 |
|
6 |
library(raster) |
|
7 |
library(foreach) |
|
8 |
library(multicore) |
|
9 |
library(doMC) |
|
10 |
registerDoMC(12) |
|
11 |
|
|
12 |
|
|
13 |
### assemble list of files to process |
|
14 |
df=data.frame(path=list.files(paste(datadir,"/mcd09ctif",sep=""),full=T,pattern="M[Y|O].*[0-9]*[mean|sd].*tif$"),stringsAsFactors=F) |
|
15 |
df[,c("sensor","month","type")]=do.call(rbind.data.frame,strsplit(basename(df$path),"_|[.]"))[,c(1,2,3)] |
|
16 |
|
|
17 |
df2=unique(df[,c("month","type")]) |
|
18 |
|
|
19 |
overwrite=T |
|
20 |
|
|
6 | 21 |
# Create combined (MOD+MYD) corrected mean CF |
7 |
foreach(i=1:12) %dopar% {
|
|
8 |
f=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*[O|Y].*_",sprintf("%02d",i),"[.]tif$",sep=""),full=T)
|
|
22 |
foreach(i=1:nrow(df2), .options.multicore=list(preschedule=FALSE)) %dopar% {
|
|
23 |
f=df$path[df$month==df2$month[i]&df$type==df2$type[i]]
|
|
9 | 24 |
## Define output and check if it already exists |
10 |
tmcd=paste(datadir,"/mcd09ctif/MCD09_",sprintf("%02d", i),"_uncompressed.tif",sep="")
|
|
11 |
tmcd2=paste(datadir,"/mcd09ctif/MCD09_",sprintf("%02d", i),".tif",sep="")
|
|
25 |
tmcd=paste(datadir,"/mcd09ctif/MCD09_",df2$type[i],"_",df2$month[i],"_uncompressed.tif",sep="")
|
|
26 |
tmcd2=paste(datadir,"/mcd09ctif/MCD09_",df2$type[i],"_",df2$month[i],".tif",sep="")
|
|
12 | 27 |
## check if output already exists |
13 |
# if(file.exists(tmcd2)){print(paste(tmcd2,"Exists, moving on..."));return(NULL)}
|
|
14 |
if(file.exists(tmcd2)){print(paste(tmcd2,"Exists, deleting it..."));file.remove(tmcd,tmcd2)} |
|
28 |
if(!overwrite&file.exists(tmcd2)){print(paste(tmcd2,"Exists, moving on..."));return(NULL)}
|
|
29 |
if(overwrite&file.exists(tmcd2)){print(paste(tmcd2,"Exists, deleting it..."));file.remove(tmcd,tmcd2)}
|
|
15 | 30 |
## Take average between images |
16 | 31 |
## switch NA values to 32768 to facilitate recasting to 8-bit below, otherwise they are confounded with 0 cloud values |
17 |
ops=paste("-t_srs 'EPSG:4326' -multi -srcnodata -32768 -dstnodata 32767 -r bilinear -te -180 -90 180 90 -tr 0.008333333333333 -0.008333333333333",
|
|
32 |
ops=paste(" -t_srs 'EPSG:4326' -multi -srcnodata 65535 -dstnodata 65535 -r bilinear -te -180 -90 180 90 -tr 0.008333333333333 -0.008333333333333",
|
|
18 | 33 |
"-co BIGTIFF=YES --config GDAL_CACHEMAX 20000 -wm 2000 -wo NUM_THREADS:10 -wo SOURCE_EXTRA=5") |
19 | 34 |
system(paste("gdalwarp -overwrite -r average -co COMPRESS=LZW -co ZLEVEL=9 ",ops," ",paste(f,collapse=" ")," ",tmcd)) |
20 | 35 |
## update metadata |
21 |
tags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS MOD09GA and MYD09GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).", |
|
36 |
if(df2$type[i]=="mean") |
|
37 |
tags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS MOD09GA and MYD09GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).", |
|
22 | 38 |
"The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days.'"), |
23 | 39 |
"TIFFTAG_DOCUMENTNAME='Collection 5 MCD09 Cloud Frequency'", |
24 | 40 |
paste("TIFFTAG_DATETIME='2013",sprintf("%02d", i),"15'",sep=""), |
25 | 41 |
"TIFFTAG_ARTIST='Adam M. Wilson (adam.wilson@yale.edu)'") |
26 |
system(paste("/usr/local/src/gdal-1.10.0/swig/python/scripts/gdal_edit.py ",tmcd," ",paste("-mo ",tags,sep="",collapse=" "),sep="")) |
|
27 |
# create final fixed image |
|
28 |
system(paste("gdal_translate -co COMPRESS=LZW -co ZLEVEL=9 -co PREDICTOR=2 ",tmcd," ",tmcd2,sep="")) |
|
29 |
writeLines(paste("Finished month",i)) |
|
30 |
} |
|
31 |
|
|
32 |
################################################ |
|
33 |
# Create combined (MOD+MYD) corrected mean CF SD |
|
34 |
foreach(i=1:12) %dopar% { |
|
35 |
f=list.files(paste(datadir,"/mcd09tif",sep=""),pattern=paste(".*[O|Y].*_",sprintf("%02d",i),"[.]tif$",sep=""),full=T) |
|
36 |
## Define output and check if it already exists |
|
37 |
tmcd=paste(datadir,"/mcd09ctif/MCD09sd_",sprintf("%02d", i),"_uncompressed.tif",sep="") |
|
38 |
tmcd=paste(datadir,"/mcd09ctif/MCD09sd_",sprintf("%02d", i),"_uncompressed.tif",sep="") |
|
39 |
tmcd2=paste(datadir,"/mcd09ctif/MCD09sd_",sprintf("%02d", i),".tif",sep="") |
|
40 |
## check if output already exists |
|
41 |
if(file.exists(tmcd2)){print(paste(tmcd2,"Exists, moving on..."));return(NULL)} |
|
42 |
## Take average between images |
|
43 |
ops=paste("-t_srs 'EPSG:4326' -multi -srcnodata -32768 -dstnodata -32768 -r bilinear -te -180 -90 180 90 -tr 0.008333333333333 -0.008333333333333", |
|
44 |
"-co BIGTIFF=YES --config GDAL_CACHEMAX 20000 -wm 2000 -wo NUM_THREADS:10 -wo SOURCE_EXTRA=5") |
|
45 |
system(paste("gdalwarp -overwrite -r average -co COMPRESS=LZW -co ZLEVEL=9 ",ops," ",paste(f,collapse=" ")," ",tmcd)) |
|
46 |
## update metadata |
|
42 |
if(df2$type[i]=="sd") |
|
47 | 43 |
tags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Standard Deviation of the Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS", |
48 | 44 |
" MOD09GA and MYD09GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).", |
49 |
"The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days"), |
|
45 |
"The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days.'"),
|
|
50 | 46 |
"TIFFTAG_DOCUMENTNAME='Collection 5 MCD09 SD of Cloud Frequency'", |
51 | 47 |
paste("TIFFTAG_DATETIME='2013",sprintf("%02d", i),"15'",sep=""), |
52 | 48 |
"TIFFTAG_ARTIST='Adam M. Wilson (adam.wilson@yale.edu)'") |
53 | 49 |
system(paste("/usr/local/src/gdal-1.10.0/swig/python/scripts/gdal_edit.py ",tmcd," ",paste("-mo ",tags,sep="",collapse=" "),sep="")) |
54 | 50 |
# create final fixed image |
55 |
system(paste("gdal_translate -b 2 -a_nodata -32768 -co COMPRESS=LZW -co ZLEVEL=9 -co PREDICTOR=2 ",tmcd," ",tmcd2,sep="")) |
|
56 |
writeLines(paste("Finished month",i)) |
|
51 |
system(paste("gdal_translate -co COMPRESS=LZW -co ZLEVEL=9 -co PREDICTOR=2 ",tmcd," ",tmcd2,sep="")) |
|
52 |
file.remove(tmcd) |
|
53 |
writeLines(paste("########################################## Finished ",tmcd2)) |
|
57 | 54 |
} |
58 | 55 |
|
59 | 56 |
|
60 | 57 |
|
61 | 58 |
################################################################################# |
62 | 59 |
###### convert to 8-bit compressed file, add colors and other details |
63 |
f2=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09_[0-9].[.]tif$",sep=""),full=T) |
|
64 | 60 |
|
65 |
for( i in 1:length(f2)){ |
|
61 |
|
|
62 |
f2=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09_.*_[0-9].[.]tif$",sep=""),full=T) |
|
63 |
|
|
64 |
|
|
65 |
foreach(i=1:length(f2), .options.multicore=list(preschedule=FALSE)) %dopar% { |
|
66 | 66 |
file=f2[i] |
67 |
outfilevrt=paste(datadir,"/mcd09ctif/",sub(".tif",".vrt",basename(file)),sep="") |
|
68 |
outfile=paste("data/mcd09tif/MCD09_",sprintf("%02d",i),".tif",sep="") |
|
69 |
outfilesd=paste("data/mcd09tif/MCD09_",sprintf("%02d",i),"_sd.tif",sep="") |
|
70 |
## create VRT and edit the color table |
|
71 |
## create the vrt to add a color table following https://trac.osgeo.org/gdal/wiki/FAQRaster#Howtocreateormodifyanimagecolortable |
|
72 |
system(paste("gdal_translate -scale 0 10000 0 100 -of VRT ",file," ",outfilevrt)) |
|
67 |
outfilevrt=sub("[.]tif",".vrt",file) |
|
68 |
outfile=paste("data/mcd09tif/",basename(file),sep="") |
|
69 |
## rescale to 0-100 using a VRT |
|
70 |
system(paste("gdal_translate -scale 0 10000 0 100 -of VRT ",file," ",outfilevrt)) |
|
71 |
## add color table for 8-bit data |
|
73 | 72 |
vrt=scan(outfilevrt,what="char") |
74 | 73 |
hd=c("<ColorInterp>Palette</ColorInterp>","<ColorTable>") |
75 | 74 |
ft="</ColorTable>" |
... | ... | |
81 | 80 |
## update missing data flag following http://lists.osgeo.org/pipermail/gdal-dev/2010-February/023541.html |
82 | 81 |
csi=grep("<ComplexSource>",vrt2) # get index of current color table |
83 | 82 |
vrt2=c(vrt2[1:csi],"<NODATA>327</NODATA>",vrt2[(csi+1):length(vrt2)]) |
84 |
write.table(vrt2,file=outfilevrt,col.names=F,row.names=F,quote=F) |
|
85 |
|
|
86 |
#system(paste("gdal_translate -of VRT -scale 0 10000 0 100 ",outfilevrt," ",outfilevrt2)) |
|
87 |
|
|
88 |
|
|
89 |
# system(paste("pkreclass -i ",outfilevrt," ",paste("-mo ",tags,sep="",collapse=" ")," ",outfilevrt," ",outfile2)) |
|
83 |
write.table(vrt2,file=outfilevrt,col.names=F,row.names=F,quote=F) |
|
90 | 84 |
tags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS M*D09GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).", |
91 | 85 |
"The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days. ", |
92 | 86 |
"Band Descriptions: 1) Mean Monthly Cloud Frequency'"), |
... | ... | |
100 | 94 |
|
101 | 95 |
################ |
102 | 96 |
### calculate inter vs. intra annual variability |
103 |
f3=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09_[0-9].[.]tif$",sep=""),full=T) |
|
104 |
f3sd=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09sd_[0-9].[.]tif$",sep=""),full=T) |
|
97 |
f3=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09_mean_[0-9].[.]tif$",sep=""),full=T)
|
|
98 |
f3sd=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09_sd_[0-9].[.]tif$",sep=""),full=T)
|
|
105 | 99 |
|
106 | 100 |
dmean=stack(as.list(f3)) |
107 | 101 |
dsd=stack(as.list(f3sd)) |
108 | 102 |
|
109 |
dinter=calc(dmean,sd,file=paste(datadir,"/mcd09ctif/inter.tif",sep=""),options=c("COMPRESS=LZW","ZLEVEL=9")) |
|
110 |
dintra=calc(dsd,mean,file=paste(datadir,"/mcd09ctif/intra.tif",sep=""),options=c("COMPRESS=LZW","ZLEVEL=9")) |
|
103 |
beginCluster(12) |
|
104 |
|
|
105 |
## Function to calculate standard deviation and round it to nearest integer |
|
106 |
Rsd=function(x) calc(x,function(x) round(sd(x,na.rm=T))) |
|
107 |
|
|
108 |
dinter=clusterR(dmean,Rsd,file=paste(datadir,"/mcd09ctif/inter.tif",sep=""),options=c("COMPRESS=LZW","PREDICTOR=2"),overwrite=T,dataType='INT1U',NAflag=255) |
|
109 |
dintra=clusterR(dsd,mean,file=paste(datadir,"/mcd09ctif/intra.tif",sep=""),options=c("COMPRESS=LZW","PREDICTOR=2"),overwrite=T,dataType='INT1U',NAflag=255) |
|
110 |
|
|
111 |
endCluster() |
|
111 | 112 |
|
112 | 113 |
tplot=F |
113 | 114 |
if(tplot){ |
climate/research/cloud/MOD09/4_validate.R | ||
---|---|---|
1 |
### Script to download and process the NDP-026D station cloud dataset |
|
2 |
### to validate MODIS cloud frequencies |
|
3 |
|
|
4 |
setwd("/mnt/data/personal/adamw/projects/cloud/") |
|
5 |
|
|
6 |
library(multicore) |
|
7 |
library(doMC) |
|
8 |
library(rasterVis) |
|
9 |
library(rgdal) |
|
10 |
library(reshape) |
|
11 |
library(maptools) |
|
12 |
library(rgeos) |
|
13 |
|
|
14 |
## Data available here http://cdiac.ornl.gov/epubs/ndp/ndp026d/ndp026d.html |
|
15 |
|
|
16 |
download=F #download data? |
|
17 |
## Get station locations |
|
18 |
if(download) system("wget -N -nd http://cdiac.ornl.gov/ftp/ndp026d/cat01/01_STID -P data/NDP026D/data/") |
|
19 |
|
|
20 |
st=read.table("data/NDP026D/data/01_STID",skip=1) |
|
21 |
colnames(st)=c("StaID","LAT","LON","ELEV","ny1","fy1","ly1","ny7","fy7","ly7","SDC","b5c") |
|
22 |
st$lat=st$LAT/100 |
|
23 |
st$lon=st$LON/100 |
|
24 |
st$lon[st$lon>180]=st$lon[st$lon>180]-360 |
|
25 |
st=st[,c("StaID","ELEV","lat","lon")] |
|
26 |
colnames(st)=c("id","elev","lat","lon") |
|
27 |
write.csv(st,"stations.csv",row.names=F) |
|
28 |
coordinates(st)=c("lon","lat") |
|
29 |
projection(st)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs" |
|
30 |
st@data[,c("lon","lat")]=coordinates(st) |
|
31 |
|
|
32 |
## download data |
|
33 |
if(download){ |
|
34 |
system("wget -N -nd ftp://cdiac.ornl.gov/pub/ndp026d/cat67_78/* -A '.tc.Z' -P data/NDP026D/data/") |
|
35 |
system("gunzip data/*.Z") |
|
36 |
} |
|
37 |
|
|
38 |
## define FWF widths |
|
39 |
f162=c(5,5,4,7,7,7,4) #format 162 |
|
40 |
c162=c("StaID","YR","Nobs","Amt","Fq","AWP","NC") |
|
41 |
|
|
42 |
## use monthly timeseries |
|
43 |
cld=do.call(rbind.data.frame,mclapply(sprintf("%02d",1:12),function(m) { |
|
44 |
d=read.fwf(list.files("data/NDP026D/data",pattern=paste("MNYDC.",m,".tc$",sep=""),full=T),skip=1,widths=f162) |
|
45 |
colnames(d)=c162 |
|
46 |
d$month=as.numeric(m) |
|
47 |
print(m) |
|
48 |
return(d)} |
|
49 |
)) |
|
50 |
|
|
51 |
## add lat/lon |
|
52 |
cld[,c("lat","lon")]=coordinates(st)[match(cld$StaID,st$id),] |
|
53 |
|
|
54 |
## drop missing values |
|
55 |
cld=cld[,!grepl("Fq|AWP|NC",colnames(cld))] |
|
56 |
cld$Amt[cld$Amt<0]=NA |
|
57 |
cld$Amt=cld$Amt/100 |
|
58 |
cld=cld[!is.na(cld$Amt),] |
|
59 |
|
|
60 |
## table of stations with > 20 observations per month |
|
61 |
cast(cld,StaID~YR,value="Nobs") |
|
62 |
mtab=ddply(cld,c('StaID','month'),function(df){ data.frame(count=sum(df$Nobs>20,na.rm=T))}) |
|
63 |
#mtab2=mtab[table(mtab$count>10)] |
|
64 |
stem(mtab$count) |
|
65 |
|
|
66 |
## calculate means and sds for full record (1970-2009) |
|
67 |
Nobsthresh=20 #minimum number of observations to include |
|
68 |
|
|
69 |
cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){ |
|
70 |
data.frame( |
|
71 |
month=x$month[1], |
|
72 |
StaID=x$StaID[1], |
|
73 |
cld_all=mean(x$Amt[x$Nobs>=Nobsthresh],na.rm=T), # full record |
|
74 |
cldsd_all=sd(x$Amt[x$Nobs>=Nobsthresh],na.rm=T), |
|
75 |
cldn_all=length(x$Amt[x$Nobs>=Nobsthresh]), |
|
76 |
cld=mean(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T), #only MODIS epoch |
|
77 |
cldsd=sd(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T), |
|
78 |
cldn=length(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh]))})) |
|
79 |
|
|
80 |
cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")] |
|
81 |
|
|
82 |
|
|
83 |
|
|
84 |
## add the EarthEnvCloud data to cld |
|
85 |
mod09=stack(list.files("data/mcd09tif/",pattern="MCD09_[0-9]*[.]tif",full=T)) |
|
86 |
NAvalue(mod09)=255 |
|
87 |
#mod09std=brick("~/acrobates/adamw/projects/cloud/data/cloud_ymonstd.nc") |
|
88 |
|
|
89 |
## overlay the data with 32km diameter (16km radius) buffer |
|
90 |
## buffer size from Dybbroe, et al. (2005) doi:10.1175/JAM-2189.1. |
|
91 |
buf=16000 |
|
92 |
bins=cut(st$lat,10) |
|
93 |
rerun=F |
|
94 |
if(rerun&file.exists("valid.csv")) file.remove("valid.csv") |
|
95 |
mod09sta=lapply(levels(bins),function(lb) { |
|
96 |
l=which(bins==lb) |
|
97 |
## mean |
|
98 |
td=extract(mod09,st[l,],buffer=buf,fun=mean,na.rm=T,df=T) |
|
99 |
td$id=st$id[l] |
|
100 |
td$type="mean" |
|
101 |
## std |
|
102 |
td2=extract(mod09std,st[l,],buffer=buf,fun=mean,na.rm=T,df=T) |
|
103 |
td2$id=st$id[l] |
|
104 |
td2$type="sd" |
|
105 |
print(lb)#as.vector(c(l,td[,1:4]))) |
|
106 |
write.table(rbind(td,td2),"valid.csv",append=T,col.names=F,quote=F,sep=",",row.names=F) |
|
107 |
td |
|
108 |
})#,mc.cores=3) |
|
109 |
|
|
110 |
## read it back in |
|
111 |
mod09st=read.csv("valid.csv",header=F)[,-c(1)] |
|
112 |
colnames(mod09st)=c(names(mod09),"id","type") |
|
113 |
mod09stl=melt(mod09st,id.vars=c("id","type")) |
|
114 |
mod09stl[,c("year","month")]=do.call(rbind,strsplit(sub("X","",mod09stl$variable),"[.]"))[,1:2] |
|
115 |
mod09stl$value[mod09stl$value<0]=NA |
|
116 |
mod09stl=cast(mod09stl,id+year+month~type,value="value") |
|
117 |
|
|
118 |
## add it to cld |
|
119 |
cldm$mod09=mod09stl$mean[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))] |
|
120 |
cldm$mod09sd=mod09stl$sd[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))] |
|
121 |
|
|
122 |
|
|
123 |
## LULC |
|
124 |
#system(paste("gdalwarp -r near -co \"COMPRESS=LZW\" -tr ",paste(res(mod09),collapse=" ",sep=""), |
|
125 |
# "-tap -multi -t_srs \"", projection(mod09),"\" /mnt/data/jetzlab/Data/environ/global/landcover/MODIS/MCD12Q1_IGBP_2005_v51.tif ../modis/mod12/MCD12Q1_IGBP_2005_v51.tif")) |
|
126 |
lulc=raster("~/acrobates/adamw/projects/interp/data/modis/mod12/MCD12Q1_IGBP_2005_v51.tif") |
|
127 |
require(plotKML); data(worldgrids_pal) #load IGBP palette |
|
128 |
IGBP=data.frame(ID=0:16,col=worldgrids_pal$IGBP[-c(18,19)],stringsAsFactors=F) |
|
129 |
IGBP$class=rownames(IGBP);rownames(IGBP)=1:nrow(IGBP) |
|
130 |
levels(lulc)=list(IGBP) |
|
131 |
## function to get modal lulc value |
|
132 |
Mode <- function(x) { |
|
133 |
ux <- na.omit(unique(x)) |
|
134 |
ux[which.max(tabulate(match(x, ux)))] |
|
135 |
} |
|
136 |
lulcst=extract(lulc,st,fun=Mode,buffer=buf,df=T) |
|
137 |
colnames(lulcst)=c("id","lulc") |
|
138 |
## add it to cld |
|
139 |
cldm$lulc=lulcst$lulc[match(cldm$StaID,lulcst$id)] |
|
140 |
cldm$lulcc=IGBP$class[match(cldm$lulc,IGBP$ID)] |
|
141 |
|
|
142 |
|
|
143 |
### Add biome data |
|
144 |
biome=readOGR("../teow/","biomes") |
|
145 |
projection(biome)=projection(st) |
|
146 |
#st$biome=over(st,biome,returnList=F)$BiomeID |
|
147 |
dists=apply(gDistance(st,biome,byid=T),2,which.min) |
|
148 |
st$biomec=biome$code[dists] |
|
149 |
st$realm=biome$realm[dists] |
|
150 |
st$biome=biome$biome[dists] |
|
151 |
|
|
152 |
cldm$biomec=st$biomec[match(cldm$StaID,st$id)] |
|
153 |
cldm$realm=st$relam[match(cldm$StaID,st$id)] |
|
154 |
cldm$biome=st$biome[match(cldm$StaID,st$id)] |
|
155 |
|
|
156 |
|
|
157 |
## write out the tables |
|
158 |
write.csv(cld,file="cld.csv",row.names=F) |
|
159 |
write.csv(cldm,file="cldm.csv",row.names=F) |
|
160 |
writeOGR(st,dsn=".",layer="stations",driver="ESRI Shapefile",overwrite_layer=T) |
|
161 |
######################################################################### |
|
162 |
|
climate/research/cloud/MOD09/NDP-026D.R | ||
---|---|---|
1 |
### Script to download and process the NDP-026D station cloud dataset |
|
2 |
### to validate MODIS cloud frequencies |
|
3 |
|
|
4 |
setwd("~/acrobates/adamw/projects/cloud/data/NDP026D") |
|
5 |
|
|
6 |
library(multicore) |
|
7 |
library(doMC) |
|
8 |
library(rasterVis) |
|
9 |
library(rgdal) |
|
10 |
library(reshape) |
|
11 |
library(maptools) |
|
12 |
library(rgeos) |
|
13 |
|
|
14 |
## Data available here http://cdiac.ornl.gov/epubs/ndp/ndp026d/ndp026d.html |
|
15 |
|
|
16 |
## Get station locations |
|
17 |
system("wget -N -nd http://cdiac.ornl.gov/ftp/ndp026d/cat01/01_STID -P data/") |
|
18 |
st=read.table("data/01_STID",skip=1) |
|
19 |
colnames(st)=c("StaID","LAT","LON","ELEV","ny1","fy1","ly1","ny7","fy7","ly7","SDC","b5c") |
|
20 |
st$lat=st$LAT/100 |
|
21 |
st$lon=st$LON/100 |
|
22 |
st$lon[st$lon>180]=st$lon[st$lon>180]-360 |
|
23 |
st=st[,c("StaID","ELEV","lat","lon")] |
|
24 |
colnames(st)=c("id","elev","lat","lon") |
|
25 |
write.csv(st,"stations.csv",row.names=F) |
|
26 |
coordinates(st)=c("lon","lat") |
|
27 |
projection(st)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs" |
|
28 |
st@data[,c("lon","lat")]=coordinates(st) |
|
29 |
|
|
30 |
## download data |
|
31 |
system("wget -N -nd ftp://cdiac.ornl.gov/pub/ndp026d/cat67_78/* -A '.tc.Z' -P data/") |
|
32 |
|
|
33 |
system("gunzip data/*.Z") |
|
34 |
|
|
35 |
## define FWF widths |
|
36 |
f162=c(5,5,4,7,7,7,4) #format 162 |
|
37 |
c162=c("StaID","YR","Nobs","Amt","Fq","AWP","NC") |
|
38 |
|
|
39 |
## use monthly timeseries |
|
40 |
cld=do.call(rbind.data.frame,mclapply(sprintf("%02d",1:12),function(m) { |
|
41 |
d=read.fwf(list.files("data",pattern=paste("MNYDC.",m,".tc$",sep=""),full=T),skip=1,widths=f162) |
|
42 |
colnames(d)=c162 |
|
43 |
d$month=as.numeric(m) |
|
44 |
print(m) |
|
45 |
return(d)} |
|
46 |
)) |
|
47 |
|
|
48 |
## add lat/lon |
|
49 |
cld[,c("lat","lon")]=coordinates(st)[match(cld$StaID,st$id),] |
|
50 |
|
|
51 |
## drop missing values |
|
52 |
cld=cld[,!grepl("Fq|AWP|NC",colnames(cld))] |
|
53 |
cld$Amt[cld$Amt<0]=NA |
|
54 |
cld$Amt=cld$Amt/100 |
|
55 |
cld=cld[!is.na(cld$Amt),] |
|
56 |
|
|
57 |
## table of stations with > 20 observations per month |
|
58 |
cast(cld,StaID~YR,value="Nobs") |
|
59 |
mtab=ddply(cld,c('StaID','month'),function(df){ data.frame(count=sum(df$Nobs>20,na.rm=T))}) |
|
60 |
mtab2=mtab[ |
|
61 |
table(mtab$count>10) |
|
62 |
stem(mtab$count) |
|
63 |
|
|
64 |
## calculate means and sds for full record (1970-2009) |
|
65 |
Nobsthresh=20 #minimum number of observations to include |
|
66 |
|
|
67 |
cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){ |
|
68 |
data.frame( |
|
69 |
month=x$month[1], |
|
70 |
StaID=x$StaID[1], |
|
71 |
cld_all=mean(x$Amt[x$Nobs>=Nobsthresh],na.rm=T), # full record |
|
72 |
cldsd_all=sd(x$Amt[x$Nobs>=Nobsthresh],na.rm=T), |
|
73 |
cldn_all=length(x$Amt[x$Nobs>=Nobsthresh]), |
|
74 |
cld=mean(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T), #only MODIS epoch |
|
75 |
cldsd=sd(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T), |
|
76 |
cldn=length(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh]))})) |
|
77 |
|
|
78 |
cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")] |
|
79 |
|
|
80 |
|
|
81 |
|
|
82 |
## add the MOD09 data to cld |
|
83 |
#### Evaluate MOD35 Cloud data |
|
84 |
mod09=brick("~/acrobates/adamw/projects/cloud/data/cloud_ymonmean.nc") |
|
85 |
mod09std=brick("~/acrobates/adamw/projects/cloud/data/cloud_ymonstd.nc") |
|
86 |
|
|
87 |
## overlay the data with 32km diameter (16km radius) buffer |
|
88 |
## buffer size from Dybbroe, et al. (2005) doi:10.1175/JAM-2189.1. |
|
89 |
buf=16000 |
|
90 |
bins=cut(st$lat,10) |
|
91 |
rerun=F |
|
92 |
if(rerun&file.exists("valid.csv")) file.remove("valid.csv") |
|
93 |
mod09sta=lapply(levels(bins),function(lb) { |
|
94 |
l=which(bins==lb) |
|
95 |
## mean |
|
96 |
td=extract(mod09,st[l,],buffer=buf,fun=mean,na.rm=T,df=T) |
|
97 |
td$id=st$id[l] |
|
98 |
td$type="mean" |
|
99 |
## std |
|
100 |
td2=extract(mod09std,st[l,],buffer=buf,fun=mean,na.rm=T,df=T) |
|
101 |
td2$id=st$id[l] |
|
102 |
td2$type="sd" |
|
103 |
print(lb)#as.vector(c(l,td[,1:4]))) |
|
104 |
write.table(rbind(td,td2),"valid.csv",append=T,col.names=F,quote=F,sep=",",row.names=F) |
|
105 |
td |
|
106 |
})#,mc.cores=3) |
|
107 |
|
|
108 |
## read it back in |
|
109 |
mod09st=read.csv("valid.csv",header=F)[,-c(1)] |
|
110 |
colnames(mod09st)=c(names(mod09),"id","type") |
|
111 |
mod09stl=melt(mod09st,id.vars=c("id","type")) |
|
112 |
mod09stl[,c("year","month")]=do.call(rbind,strsplit(sub("X","",mod09stl$variable),"[.]"))[,1:2] |
|
113 |
mod09stl$value[mod09stl$value<0]=NA |
|
114 |
mod09stl=cast(mod09stl,id+year+month~type,value="value") |
|
115 |
|
|
116 |
## add it to cld |
|
117 |
cldm$mod09=mod09stl$mean[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))] |
|
118 |
cldm$mod09sd=mod09stl$sd[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))] |
|
119 |
|
|
120 |
|
|
121 |
## LULC |
|
122 |
#system(paste("gdalwarp -r near -co \"COMPRESS=LZW\" -tr ",paste(res(mod09),collapse=" ",sep=""), |
|
123 |
# "-tap -multi -t_srs \"", projection(mod09),"\" /mnt/data/jetzlab/Data/environ/global/landcover/MODIS/MCD12Q1_IGBP_2005_v51.tif ../modis/mod12/MCD12Q1_IGBP_2005_v51.tif")) |
|
124 |
lulc=raster("~/acrobates/adamw/projects/interp/data/modis/mod12/MCD12Q1_IGBP_2005_v51.tif") |
|
125 |
require(plotKML); data(worldgrids_pal) #load IGBP palette |
|
126 |
IGBP=data.frame(ID=0:16,col=worldgrids_pal$IGBP[-c(18,19)],stringsAsFactors=F) |
|
127 |
IGBP$class=rownames(IGBP);rownames(IGBP)=1:nrow(IGBP) |
|
128 |
levels(lulc)=list(IGBP) |
|
129 |
## function to get modal lulc value |
|
130 |
Mode <- function(x) { |
|
131 |
ux <- na.omit(unique(x)) |
|
132 |
ux[which.max(tabulate(match(x, ux)))] |
|
133 |
} |
|
134 |
lulcst=extract(lulc,st,fun=Mode,buffer=buf,df=T) |
|
135 |
colnames(lulcst)=c("id","lulc") |
|
136 |
## add it to cld |
|
137 |
cldm$lulc=lulcst$lulc[match(cldm$StaID,lulcst$id)] |
|
138 |
cldm$lulcc=IGBP$class[match(cldm$lulc,IGBP$ID)] |
|
139 |
|
|
140 |
|
|
141 |
### Add biome data |
|
142 |
biome=readOGR("../teow/","biomes") |
|
143 |
projection(biome)=projection(st) |
|
144 |
#st$biome=over(st,biome,returnList=F)$BiomeID |
|
145 |
dists=apply(gDistance(st,biome,byid=T),2,which.min) |
|
146 |
st$biomec=biome$code[dists] |
|
147 |
st$realm=biome$realm[dists] |
|
148 |
st$biome=biome$biome[dists] |
|
149 |
|
|
150 |
cldm$biomec=st$biomec[match(cldm$StaID,st$id)] |
|
151 |
cldm$realm=st$relam[match(cldm$StaID,st$id)] |
|
152 |
cldm$biome=st$biome[match(cldm$StaID,st$id)] |
|
153 |
|
|
154 |
|
|
155 |
## write out the tables |
|
156 |
write.csv(cld,file="cld.csv",row.names=F) |
|
157 |
write.csv(cldm,file="cldm.csv",row.names=F) |
|
158 |
writeOGR(st,dsn=".",layer="stations",driver="ESRI Shapefile",overwrite_layer=T) |
|
159 |
######################################################################### |
|
160 |
|
climate/research/cloud/MOD09/ee/ee_compile.R | ||
---|---|---|
1 | 1 |
### Script to compile the monthly cloud data from earth engine into a netcdf file for further processing |
2 | 2 |
|
3 |
library(rasterVis)
|
|
3 |
library(raster) |
|
4 | 4 |
library(doMC) |
5 |
library(multicore) |
|
6 |
library(foreach) |
|
7 |
library(mgcv) |
|
8 |
library(RcppOctave) |
|
9 | 5 |
registerDoMC(12) |
10 | 6 |
|
11 | 7 |
|
... | ... | |
42 | 38 |
} |
43 | 39 |
|
44 | 40 |
rasterOptions(tmpdir=tmpfs,overwrite=T, format="GTiff",maxmemory=1e9) |
45 |
|
|
46 |
|
|
47 | 41 |
rerun=T # set to true to recalculate all dates even if file already exists |
48 | 42 |
|
49 | 43 |
## define month-sensors to process |
... | ... | |
53 | 47 |
#jobs=jobs[jobs$sensor=="MYD09",] |
54 | 48 |
|
55 | 49 |
|
50 |
|
|
51 |
### add boundaries to file list to remove problematic pixels at high latitudes |
|
52 |
## these boundaries were later added to the earth engine script, so if it is re-run this is not necessary |
|
53 |
#xmin,xmax,ymin,ymax |
|
54 |
mextent=list( |
|
55 |
"01"=c(-180,-90,180,73.5),# |
|
56 |
"02"=c(-180,-90,180,84), # |
|
57 |
"03"=c(-180,-90,180,90), |
|
58 |
"04"=c(-180,-90,180,90), |
|
59 |
"05"=c(-180,-69,180,90), |
|
60 |
"06"=c(-180,-62.5,180,90), # |
|
61 |
"07"=c(-180,-67,180,90), # |
|
62 |
"08"=c(-180,-77,180,90), |
|
63 |
"09"=c(-180,-77,180,90), |
|
64 |
"10"=c(-180,-90,180,89), # |
|
65 |
"11"=c(-180,-90,180,77), |
|
66 |
"12"=c(-180,-90,180,69) |
|
67 |
) |
|
68 |
|
|
69 |
## project to sinusoidal |
|
70 |
proj="'+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs'" |
|
71 |
|
|
72 |
mextentsin=lapply(mextent,function(x) c(project(t(x[1:2]),sub("'","",proj)),project(t(x[3:4]),sub("'","",proj)))) |
|
73 |
|
|
56 | 74 |
## Loop over data to mosaic tifs, compress, and add metadata |
57 | 75 |
foreach(i=1:nrow(jobs)) %dopar% { |
58 | 76 |
## get month |
59 | 77 |
m=jobs$month[i] |
78 |
cm=sprintf("%02d",m) |
|
79 |
|
|
60 | 80 |
date=df$date[df$month==m][1] |
61 | 81 |
print(date) |
62 | 82 |
## get sensor |
... | ... | |
64 | 84 |
s2=sub("GA","",s) |
65 | 85 |
|
66 | 86 |
## Define output and check if it already exists |
67 |
tvrt=paste(datadir,"/mcd09tif/",s2,"_",sprintf("%02d", m),".vrt",sep="") |
|
68 |
ttif1=paste(datadir,"/mcd09tif/",s2,"_",sprintf("%02d", m),"_uncompressed.tif",sep="") |
|
69 |
ttif2=paste(datadir,"/mcd09tif/",s2,"_",sprintf("%02d", m),".tif",sep="") |
|
87 |
tvrt=paste(datadir,"/mcd09tif/",s2,"_",cm,"_globalsin.vrt",sep="") |
|
88 |
tvrt2=paste(datadir,"/mcd09tif/",s2,"_",cm,"_globalwgs84.vrt",sep="") |
|
89 |
ttif=paste(datadir,"/mcd09tif/",s2,"_",cm,"_mean.tif",sep="") |
|
90 |
ttif2=paste(datadir,"/mcd09tif/",s2,"_",cm,"_sd.tif",sep="") |
|
70 | 91 |
|
71 | 92 |
## check if output already exists |
72 |
if(!rerun&file.exists(ttif1)) return(NA)
|
|
93 |
if(!rerun&file.exists(ttif)) return(NA) |
|
73 | 94 |
## build VRT to merge tiles |
74 |
proj="'+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs'"
|
|
75 |
system(paste("gdalbuildvrt -b 1 -b 2 -srcnodata -32768 ",tvrt," ",paste(df$path[df$month==m&df$sensor==s],collapse=" ")))
|
|
95 |
## include subseting using mextentsin object created above to ensure cropping of problematic values
|
|
96 |
system(paste("gdalbuildvrt -b 1 -b 2 -te ",paste(round(mextentsin[[cm]]),collapse=" ")," -srcnodata -32768 -vrtnodata 32767 ",tvrt," ",paste(df$path[df$month==m&df$sensor==s],collapse=" ")))
|
|
76 | 97 |
## Merge to geotif in temporary directory |
77 |
## specify sourc projection because it gts it slightly wrong by default #-ot Int16 -dstnodata -32768 |
|
78 |
ops=paste("-s_srs ",proj," -t_srs 'EPSG:4326' -multi -srcnodata -32768 -ot Int16 -dstnodata -32768 -r bilinear -te -180 -90 180 90 -tr 0.008333333333333 -0.008333333333333", |
|
79 |
"-co BIGTIFF=YES --config GDAL_CACHEMAX 2000 -wm 2000 -wo NUM_THREADS:10 -co COMPRESS=LZW -co PREDICTOR=2") |
|
80 |
## if file exists, remove it avoid warping into existing file |
|
81 |
if(file.exists(ttif1)) file.remove(ttif1) |
|
82 |
## run the warp |
|
83 |
system(paste("gdalwarp -overwrite ",ops," ",tvrt," ",ttif1)) |
|
98 |
## specify sourc projection because it gets it slightly wrong by default |
|
99 |
ops=paste("-multi -of vrt --config GDAL_CACHEMAX 500 -wm 500 -wo NUM_THREADS:10 -srcnodata 32767 -dstnodata 32767 -s_srs ",proj," -t_srs 'EPSG:4326' ", |
|
100 |
" -ot Int16 -r bilinear -te -180 -90 180 90 -tr 0.008333333333333 -0.008333333333333") |
|
101 |
## create the warpped VRT |
|
102 |
system(paste("gdalwarp -overwrite ",ops," ",tvrt," ",tvrt2)) |
|
84 | 103 |
|
85 | 104 |
## Compress file and add metadata tags |
86 |
ops2=paste("-ot Int16 -co COMPRESS=LZW -co PREDICTOR=2 -stats") |
|
87 |
tags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS ",s,"GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).", |
|
88 |
"The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days. ", |
|
89 |
"Band Descriptions: 1) Mean Monthly Cloud Frequency x 10000 2) Standard Deviation of Mean Monthly Cloud x 10000'"), |
|
90 |
"TIFFTAG_DOCUMENTNAME='Collection 5 ",s," Cloud Frequency'", |
|
105 |
ops2=paste("-ot Int16 -co COMPRESS=LZW -co PREDICTOR=2 -stats -a_nodata 32767 ") |
|
106 |
meantags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Monthly Cloud Frequency (x10000) for 2000-2013 extracted from C5 MODIS ",s,"GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).", |
|
107 |
"The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days. '"), |
|
108 |
paste("TIFFTAG_DOCUMENTNAME='Collection 5 ",s," Mean Cloud Frequency'",sep=""), |
|
109 |
paste("TIFFTAG_DATETIME='2013",sprintf("%02d", m),"15'",sep=""), |
|
110 |
"TIFFTAG_ARTIST='Adam M. Wilson (adam.wilson@yale.edu)'") |
|
111 |
sdtags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Standard Deviation (x10000) of Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS ",s,"GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).", |
|
112 |
"The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days. '"), |
|
113 |
paste("TIFFTAG_DOCUMENTNAME='Collection 5 ",s," SD Cloud Frequency'",sep=""), |
|
91 | 114 |
paste("TIFFTAG_DATETIME='2013",sprintf("%02d", m),"15'",sep=""), |
92 | 115 |
"TIFFTAG_ARTIST='Adam M. Wilson (adam.wilson@yale.edu)'") |
93 |
system(paste("gdal_translate ",ops2," ",paste("-mo ",tags,sep="",collapse=" ")," ",ttif1," ",ttif2)) |
|
94 | 116 |
|
95 |
## delete temporary files |
|
96 |
file.remove(tvrt,ttif1) |
|
117 |
## run the merge, warp, and compress all in one step... |
|
118 |
system(paste("gdal_translate -b 1 ",ops2," ",paste("-mo ",meantags,sep="",collapse=" ")," ",tvrt2," ",ttif)) |
|
119 |
system(paste("gdal_translate -b 2 ",ops2," ",paste("-mo ",sdtags,sep="",collapse=" ")," ",tvrt2," ",ttif2)) |
|
97 | 120 |
writeLines(paste("Month:",m," Sensor:",s," Finished")) |
98 | 121 |
} |
99 | 122 |
|
Also available in: Unified diff
Update bounding box to remove high-latitude garbarge