/ - Diff - Environment and organisms - NCEAS Projects

« Previous | Next »

Revision a29da850

Added by Adam Wilson almost 11 years ago

ID a29da8504abf516339769fb55a4c7e520e0c889b
Parent 7239cefc
Child f9695052

Update bounding box to remove high-latitude garbarge

     ###  Script to compile the monthly cloud data from earth engine into a netcdf file for further processing
     library(rasterVis)
     library(multicore)
     library(doMC)
     library(foreach)
     library(RcppOctave)
     library(rgdal)
     registerDoMC(12)
-...
     ## Specify path to VSNR souce code and add it to RcppOctave path
     library(RcppOctave)
     mpath="/mnt/data/personal/adamw/projects/environmental-layers/climate/research/cloud/MOD09/vsnr/"
     .O$addpath(mpath)
-...
     #########################################
     ####  Bias correction functions
     fgabor=function(d,theta=-15,x=200,y=5){
     fgabor=function(d,theta,x=200,y=5){
         thetaR=(theta*pi)/180
         cds=expand.grid(x=(1:nrow(d))-round(nrow(d)/2),y=(1:ncol(d))-round(ncol(d)/2))
         sigma_x=x
-...
     allexts=rbind.data.frame(modexts,mydexts)
     ### assemble list of files to process
     df2=data.frame(path=list.files(paste(datadir,"/mcd09tif",sep=""),full=T,pattern="[0-9][.]tif$"),stringsAsFactors=F)
     df2[,c("sensor","month")]=do.call(rbind.data.frame,strsplit(basename(df2$path),"_|[.]"))[,c(1,2)]
     df2=data.frame(path=list.files(paste(datadir,"/mcd09tif",sep=""),full=T,pattern="[0-9]*[mean|sd].*tif$"),stringsAsFactors=F)
     df2[,c("sensor","month","type")]=do.call(rbind.data.frame,strsplit(basename(df2$path),"_|[.]"))[,c(1,2,3)]
     ## create a list of jobs to process
     jobs=data.frame(allexts,month=rep(sprintf("%02d",1:12),each=nrow(allexts)))
     jobs$path=df2$path[match(paste(jobs$sensor,jobs$month),paste(df2$sensor,df2$month))]
     jobs=rbind.data.frame(cbind.data.frame(type="mean",jobs),cbind.data.frame(type="sd",jobs))
     jobs$path=df2$path[match(paste(jobs$sensor,jobs$month,jobs$type),paste(df2$sensor,df2$month,df2$type))]
     ## drop any jobs with no associated files
     jobs=jobs[!is.na(jobs$path),]
     ## loop over sensor-months to create full grid of corrected values
     foreach( i=1:nrow(jobs)) %dopar% {
     foreach( i=1:nrow(jobs), .options.multicore=list(preschedule=FALSE)) %dopar% {
         file=jobs$path[i]
         toutfile=paste(datadir,"mcd09bias/", sub(".tif","",basename(file)),"_",jobs$tile[i],".tif",sep="")
     #    if(file.exists(toutfile)) {writeLines(paste(toutfile,"Exists, moving on"));next}
         if(file.exists(toutfile)) {writeLines(paste(toutfile,"Exists, moving on"));return(NULL)}
         writeLines(paste("Starting: ",toutfile," tile:",jobs$tile[i]," ( ",i," out of ",nrow(jobs),")"))
         ## set sensor-specific parameters
         ## add extra region for correction depending on which sensor is being processed
         ## set angle of orbital artefacts to be corrected
         sensor=jobs$sensor[i]
         if(sensor=="MOD09") scanangle=-15
-...
         res=vsnr(d2,gabor=psi,alpha=2,p=2,epsilon=1,prec=5e-6,maxit=50,C=1,full=F)
         res2=res*100
         ## write the file
         writeRaster(res2,file=toutfile,overwrite=T,datatype='INT2S',options=c("COMPRESS=LZW", "PREDICTOR=2"),NAvalue=-32768)
         writeRaster(res2,file=toutfile,overwrite=T,datatype='INT2S',options=c("COMPRESS=LZW", "PREDICTOR=2"),NAvalue=32767)
         ## remove temporary files
         rmr(d);rmr(d2);rmr(psi);rmr(res);rmr(res2)
         ## remove old temporary files older than x hours
-...
+    }
     ############################################
     ## now mosaic the tiles with the original image to keep only the corrected data (when available) and the uncorrected data where there is no tile.
     ## this relies on df2 created above
     #tfs=list.files(paste(datadir,"/mcd09bias",sep=""),pattern="M.*_[0-9]._[0-9][.]tif",full=T)
     #file.rename(tfs,paste(substr(tfs,1,46),"mean_",substr(tfs,47,52),sep=""))
     ## define color scale for mean and sd
     mkct=function(palette,vals,bit)
         data.frame(id=0:(2^bit-1),t(col2rgb(c(palette(length(vals)+1),rep("#00000000",(2^bit)-length(vals)-1)),alpha=T)))
     colR=colorRampPalette(c("#08306b","#0d57a1","#2878b8","#4997c9","#72b2d7","#a2cbe2","#c7dcef","#deebf7","#f7fbff"))
     meancols=mkct(colR,vals=0:10000,bit=16)
     write.table(meancols,file="data/colors16_mean.txt",quote=F,row.names=F,col.names=F)
     colR2=colorRampPalette(c("#0000FF","#00FF80","#FF0080"))
     sdcols=mkct(colR2,vals=0:10000,bit=16)
     write.table(sdcols,file="data/colors16_sd.txt",quote=F,row.names=F,col.names=F)
     foreach( i=1:nrow(df2)) %dopar% {
         ifile=df2$path[i]
         outfile=paste(datadir,"/mcd09ctif/",df2$sensor[i],"_",df2$month[i],"_uncompressed.tif",sep="")
         outfile2=paste(datadir,"/mcd09ctif/",df2$sensor[i],"_",df2$month[i],".tif",sep="")
         ##    if(file.exists(outfile)) {print(paste(outfile," exists, moving on...")); return(NULL)}
         ## create VRT of first band of the full image
         fvrt=sub("[.]tif",".vrt",ifile)
         system(paste("gdalbuildvrt -b 1 ",fvrt," ",ifile))
         itype=df2$type[i]
         outfile=paste(datadir,"/mcd09ctif/",sub(".tif",".vrt",basename(ifile)),sep="")
         outfile2=paste(datadir,"/mcd09ctif/unmasked_",basename(ifile),sep="")
         outfile3=paste(datadir,"/mcd09ctif/",basename(ifile),sep="")
     #    if(file.exists(outfile3)) {print(paste(outfile," exists, moving on...")); return(NULL)}
         ## mosaic the tiles with the original data (keeping the new data when available)
         tfiles=paste(c(fvrt,list.files(paste(datadir,"/mcd09bias",sep=""),pattern=paste(sub("[.]tif","",basename(outfile2)),"_[0-9]*[.]tif",sep=""),full=T)),collapse=" ")
         if(file.exists(outfile)) file.remove(outfile2,outfile)
         system(paste("gdal_merge.py --config GDAL_CACHEMAX 10000 -init -32768 -n -32768 -co BIGTIFF=yes  -o ",outfile," ",tfiles,sep=""))
         system(paste("gdal_translate -co COMPRESS=LZW -co ZLEVEL=9 -co PREDICTOR=2 ",outfile," ",outfile2,sep=""))
         file.remove(fvrt,outfile)
         writeLines(paste("Finished ",outfile))
         tfiles=paste(c(ifile,list.files(paste(datadir,"/mcd09bias",sep=""),pattern=paste(sub("[.]tif","",basename(outfile3)),"_[0-9]*[.]tif",sep=""),full=T)),collapse=" ")
         system(paste("gdalbuildvrt -srcnodata 32767 -vrtnodata 32767 ",outfile," ",tfiles,sep=""))
         system(paste("gdal_translate -a_ullr -180 90 180 -90 -a_nodata 32767 -co COMPRESS=LZW -co ZLEVEL=9 -co PREDICTOR=2 ",outfile," ",outfile2,sep=""))
         ## use pksetmask to set any values >100 (except the missing values) to 100
         ## these exist due to the reprojection to wgs84 from sinusoidal, there are a few pixels with values slightly
         ## greater than 100
         ## also convert to an unsigned 16-bit integer to allow adding a color table
         ict=ifelse(itype=="mean","data/colors16_mean.txt","data/colors16_sd.txt")
         system(paste("pksetmask -i ",outfile2," -m ",outfile2," -ot UInt16 ",
                      "--operator='>' --msknodata 20000 --nodata 65535  --operator='>' --msknodata 10000 --nodata 10000 --operator='<' --msknodata 0 --nodata 65535 ",
                      " -ct ",ict,"  -co COMPRESS=LZW -co PREDICTOR=2 -o ",outfile3))
         ## clean up temporary files
         file.remove(outfile,outfile2)
         writeLines(paste("#######################################             Finished ",outfile))
+    }
     ## check output
     for(i in 1:nrow(df2)) {
         ifile=df2$path[i]
         outfile3=paste(datadir,"/mcd09ctif/",basename(ifile),sep="")
         print(ifile)
         system(paste("gdalinfo ",outfile3,"| grep 'Size is'"))
+    }

     ### Produce summary of cloud frequency by biome
     setwd("~/acrobates/adamw/projects/cloud/")
     ## libraries
     library(rasterVis)
     library(latticeExtra)
     library(xtable)
     library(texreg)
     library(reshape)
     library(caTools)
     library(rgeos)
     library(raster)
     #### Evaluate MOD35 Cloud data
     mod09c=brick("data/cloud_ymonmean.nc",varname="CF");names(mod09c)=month.name
     mod09sd=brick("data/cloud_std.nc",varname="CFsd")
     ## rasterize the biome dataset
     bpath="data/teow/biomes.shp"
     ## create a copy with a numeric biome code
     biome=readOGR("data/teow/","biomes")
     bcode=unique(data.frame(code=code,realm=biome$realm,biome=biome$biome))
     system(paste("gdal_rasterize gdal_rasterize -a code -burn 0 -l mask ",bpath,"  work.tif"))
     biome=readOGR("data/teow/","biomes")
          n_biomesamples=1000
          library(multicore)
          biomesample=do.call(rbind.data.frame,mclapply(1:length(biome),function(i)
              data.frame(code=biome$code[i],coordinates(spsample(biome[i,],n=n_biomesamples,type="stratified",nsig=2)))))
          colnames(biomesample)[2:3]=c("lon","lat")
          biomesample[,c("biome","realm")]=biome@data[match(biomesample$code,biome$code),c("biome","realm")]
          write.csv(biomesample,"output/biomesamplepoints.csv",row.names=F)
+     }
          ## Extract data for points
      if(!file.exists("output/biomesamplepoints_cloud.csv")){
          biomesample=read.csv("output/biomesamplepoints.csv")
          biomep=raster::extract(mod09c,biomesample,sp=T)
          biomep$lon=biomesample$lon
          biomep@data[,c("lon","lat")]=coordinates(biomep)
          write.csv(biomep@data,"output/biomesamplepoints_cloud.csv",row.names=F)
+    }
     biomep=read.csv("output/biomesamplepoints_cloud.csv")
     coordinates(biomep)=c("lon","lat")
     projection(biomep)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
     ## get stratified sample of points from biomes for illustration
      if(!file.exists("output/biomesamplepoints.csv")){
          biome=readOGR("data/teow/","biomes")
          n_biomesamples=1000
          library(multicore)
          biomesample=do.call(rbind.data.frame,mclapply(1:length(biome),function(i)
              data.frame(code=biome$code[i],coordinates(spsample(biome[i,],n=n_biomesamples,type="stratified",nsig=2)))))
          colnames(biomesample)[2:3]=c("lon","lat")
          biomesample[,c("biome","realm")]=biome@data[match(biomesample$code,biome$code),c("biome","realm")]
          write.csv(biomesample,"output/biomesamplepoints.csv",row.names=F)
+     }
          ## Extract data for points
      if(!file.exists("output/biomesamplepoints_cloud.csv")){
          biomesample=read.csv("output/biomesamplepoints.csv")
          biomep=raster::extract(mod09c,biomesample,sp=T)
          biomep$lon=biomesample$lon
          biomep@data[,c("lon","lat")]=coordinates(biomep)
          write.csv(biomep@data,"output/biomesamplepoints_cloud.csv",row.names=F)
+    }
     biomep=read.csv("output/biomesamplepoints_cloud.csv")
     coordinates(biomep)=c("lon","lat")
     projection(biomep)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
     #plot(mod09a,layers=1,margin=F,maxpixels=100)
     ## calculated differences
     cldm$difm=cldm$mod09-cldm$cld_all
     cldm$difs=cldm$mod09sd+cldm$cldsd_all
     #clda$dif=clda$mod09-clda$cld
     ## read in global coasts for nice plotting
     library(maptools)
     library(rgdal)
     #coast=getRgshhsMap("/mnt/data/jetzlab/Data/environ/global/gshhg/gshhs_h.b", xlim = NULL, ylim = NULL, level = 4)
     land=readShapePoly("/mnt/data/jetzlab/Data/environ/global/gshhg/GSHHS_shp/c/GSHHS_c_L1.shp",force_ring=TRUE)
     projection(land)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
     CP <- as(extent(-180, 180, -60, 84), "SpatialPolygons")
     proj4string(CP) <- CRS(proj4string(land))
     coast=as(land[land$area>50,],"SpatialLines")
     ## Clip the map
     land <- gIntersection(land, CP, byid=F)
     coast <- gIntersection(coast, CP, byid=F)
     ## get stratified sample of points from biomes for illustration
      if(!file.exists("output/biomesamplepoints.csv")){
          biome=readOGR("data/teow/","biomes")
          n_biomesamples=1000
          library(multicore)
          biomesample=do.call(rbind.data.frame,mclapply(1:length(biome),function(i)
              data.frame(code=biome$code[i],coordinates(spsample(biome[i,],n=n_biomesamples,type="stratified",nsig=2)))))
          colnames(biomesample)[2:3]=c("lon","lat")
          biomesample[,c("biome","realm")]=biome@data[match(biomesample$code,biome$code),c("biome","realm")]
          write.csv(biomesample,"output/biomesamplepoints.csv",row.names=F)
+     }
          ## Extract data for points
      if(!file.exists("output/biomesamplepoints_cloud.csv")){
          biomesample=read.csv("output/biomesamplepoints.csv")
          biomep=raster::extract(mod09c,biomesample,sp=T)
          biomep$lon=biomesample$lon
          biomep@data[,c("lon","lat")]=coordinates(biomep)
          write.csv(biomep@data,"output/biomesamplepoints_cloud.csv",row.names=F)
+    }
     biomep=read.csv("output/biomesamplepoints_cloud.csv")
     coordinates(biomep)=c("lon","lat")
     projection(biomep)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
     ## Figures
     n=100
     at=seq(0,100,length=n)
     colr=colorRampPalette(c("black","green","red"))
     cols=colr(n)
     ## set plotting parameters
     my.theme = trellis.par.get()
     my.theme$strip.background=list(col="transparent")
     trellis.par.set(my.theme)
     #pdf("output/Figures.pdf",width=11,height=8.5)
     png("output/CF_Figures_%03d.png",width=5000,height=4000,res=600,pointsize=42,bg="white")
     ## set plotting parameters
     my.theme = trellis.par.get()
     my.theme$strip.background=list(col="transparent")
     trellis.par.set(my.theme)
     res=1e6
     greg=list(ylim=c(-60,84),xlim=c(-180,180))
     ## Figure 1: 4-panel summaries
     #- Annual average
     levelplot(mod09a,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(space="bottom",adj=1),
       margin=F,maxpixels=res,ylab="",xlab="",useRaster=T,ylim=greg$ylim)+
         layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
         layer(sp.lines(coast,col="black"),under=F)
     ## Mean annual with validation stations
     levelplot(mod09a,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(title="Cloud Frequency (%)",space="bottom",adj=1),
       margin=F,maxpixels=res,ylab="",xlab="",useRaster=T,ylim=greg$ylim)+
         layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
         layer(panel.xyplot(lon,lat,pch=16,cex=.3,col="black"),data=data.frame(coordinates(st)))+
       layer(sp.lines(coast,col="black"),under=F)
     ## Seasonal Means
     levelplot(mod09s,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(title="Cloud Frequency (%)", space="bottom",adj=2),
       margin=F,maxpixels=res,ylab="",xlab="",useRaster=T,ylim=greg$ylim)+
         layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
         layer(sp.lines(coast,col="black"),under=F)
     ## Monthly Means
     levelplot(mod09c,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(title="Cloud Frequency (%)", space="bottom",adj=1),
       margin=F,maxpixels=res,ylab="Latitude",xlab="Longitude",useRaster=T,ylim=greg$ylim)+
         layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
         layer(sp.lines(coast,col="black"),under=F)
     #- Monthly minimum
     #- Monthly maximum
     #- STDEV or Min-Max
     p_mac=levelplot(mod09a,col.regions=colr(n),cuts=99,at=seq(0,100,length=100),margin=F,
         ylim=greg$ylim,maxpixels=res/10,colorkey=list(title="Cloud Frequency (%)", space="top",height=.75),xlab="",ylab="",useRaster=T)+
         layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
         layer(sp.lines(coast,col="black"),under=F)
     p_max=levelplot(mod09max,col.regions=colr(n),cuts=99,at=seq(0,100,length=100),margin=F,maxpixels=res/10,
         ylim=greg$ylim,colorkey=list(space="bottom",height=.75),useRaster=T)+
         layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
         layer(sp.lines(coast,col="black"),under=F)
     p_intra=levelplot(mod09intra,col.regions=colr(n),cuts=99,at=seq(0,40,length=100),margin=F,maxpixels=res/100,
         ylim=greg$ylim,colorkey=list(space="bottom",height=.75),useRaster=T)+
         layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
         layer(sp.lines(coast,col="black"),under=F)
     p_inter=levelplot(mod09inter,col.regions=colr(n),cuts=99,at=seq(0,15,length=100),margin=F,maxpixels=res/100,
         ylim=greg$ylim,colorkey=list(title="Cloud Frequency (%)", space="top",height=.75),useRaster=T)+
         layer(panel.polygon(x=c(-180,-180,180,180),y=c(-90,90,90,-90),col="black"),under=T)+
         layer(sp.lines(coast,col="black"),under=F)
     p3=c("Mean Cloud Frequency (%)"=p_mac,"Max Cloud Frequency (%)"=p_max,"Interannual Variability (sd)"=p_inter,"Intraannual Variability (sd)"=p_intra,x.same=T,y.same=F,merge.legends=T,layout=c(2,2))
     print(p3)
     bgr=function(x,n=100,br=0,c1=c("darkblue","blue","grey"),c2=c("grey","red","purple")){
         at=unique(c(seq(min(x,na.rm=T),max(x,na.rm=T),len=n)))
         bg=colorRampPalette(c1)
         gr=colorRampPalette(c2)
         return(list(at=at,col=c(bg(sum(at<br)),gr(sum(at>=br)))))
+    }
     cldm$resid=NA
     # get residuals of simple linear model
     cldm$resid[!is.na(cldm$cld_all)&!is.na(cldm$mod09)]=residuals(lm(mod09~cld_all,data=cldm))
     colat=bgr(cldm$resid)
     phist=histogram(cldm$resid,breaks=colat$at,border=NA,col=colat$col,xlim=c(-30,30),type="count",xlab="MODCF Residuals")#,seq(0,1,len=6),na.rm=T)
     pmap=xyplot(lat~lon|month2,data=cldm,groups=cut(cldm$resid,rev(colat$at)),
            par.settings=list(superpose.symbol=list(col=colat$col)),pch=16,cex=.25,
            auto.key=F,#list(space="right",title="Difference\n(MOD09-NDP026D)",cex.title=1),asp=1,
            ylab="Latitude",xlab="Longitude")+
       layer(sp.lines(coast,col="black",lwd=.1),under=F)
     print(phist,position=c(0,.75,1,1),more=T)
     print(pmap,position=c(0,0,1,.78))
     ### heatmap of mod09 vs. NDP for all months
     hmcols=colorRampPalette(c("grey","blue","red","purple"))
     #hmcols=colorRampPalette(c(grey(.8),grey(.3),grey(.2)))
     tr=c(0,66)
     colkey <- draw.colorkey(list(col = hmcols(tr[2]), at = tr[1]:tr[2],height=.25))
     xyplot(cld_all~mod09|month2,data=cldm,panel=function(x,y,subscripts){
       n=50
       bins=seq(0,100,len=n)
       tb=melt(as.matrix(table(
         x=cut(x,bins,labels=bins[-1]),
         y=cut(y,bins,labels=bins[-1]))))
       qat=unique(tb$value)
       print(max(qat))
       qat=tr[1]:tr[2]#unique(tb$value)
       panel.levelplot(tb$x,tb$y,tb$value,at=qat,col.regions=c("transparent",hmcols(length(qat))),subscripts=1:nrow(tb))
     #  panel.abline(0,1,col="black",lwd=2)
       panel.abline(lm(y ~ x),col="black",lwd=2)
     #  panel.ablineq(lm(y ~ x), r.sq = TRUE,at = 0.6,pos=1, offset=0,digits=2,col="blue")
       panel.text(70,10,bquote(paste(R^2,"=",.(round(summary(lm(y ~ x))$r.squared,2)))),cex=1)
     },asp=1,scales=list(at=seq(0,100,len=6),useRaster=T,colorkey=list(width=.5,title="Number of Stations")),
               ylab="NDP Mean Cloud Amount (%)",xlab="MODCF Cloud Frequency (%)",
                   legend= list(right = list(fun = colkey)))#+ layer(panel.abline(0,1,col="black",lwd=2))
     bwplot(lulcc~difm,data=cldm,horiz=T,xlab="Difference (MOD09-Observed)",varwidth=T,notch=T)+layer(panel.abline(v=0))
     dev.off()
     ####################################################################
     ### Regional Comparisons
     ## Compare with worldclim and NPP
     #wc=stack(as.list(paste("/mnt/data/jetzlab/Data/environ/global/worldclim/prec_",1:12,".bil",sep="")))
     wc_map=stack(as.list(paste("/mnt/data/jetzlab/Data/environ/global/worldclim/bio_12.bil",sep="")))
     wc_dem=stack(as.list(paste("/mnt/data/jetzlab/Data/environ/global/worldclim/alt.bil",sep="")))
     regs=list(
       Cascades=extent(c(-122.8,-118,44.9,47)),
       Hawaii=extent(c(-156.5,-154,18.75,20.5)),
       Boliva=extent(c(-71,-63,-20,-15)),
       Venezuela=extent(c(-69,-59,0,7)),
       CFR=extent(c(17.75,22.5,-34.8,-32.6)),
       Madagascar=extent(c(46,52,-17,-12))
       #reg2=extent(c(-81,-70,-4,10))
+      )
     ## read in GEWEX 1-degree data
     gewex=mean(brick("data/gewex/CA_PATMOSX_NOAA.nc",varname="a_CA"))
     names(gewex)="PATMOS-x GEWEX AVHRR"
     ## calculate 1-degree means of MODCF data
     #MOD_gewex=gewex
     #MOD_gewex@data@values=1:length(MOD_gewex@data@values)
     #MOD_gewex2=zonal(mod09a,MOD_gewex,fun='mean')
     png("output/Resolution_Figures_%03d.png",width=5500,height=4000,res=600,pointsize=36,bg="white")
     trellis.par.set(my.theme)
     #pdf("output/mod09_resolution.pdf",width=11,height=8.5)
     r="Venezuela"
     # ylab.right = "Cloud Frequency (%)",par.settings = list(layout.widths = list(axis.key.padding = 0.1,axis.left=0.6,ylab.right = 3,right.padding=2)),
     pars=list(layout.heights=list(key.bottom=2,key.top=1),layout.widths = list(axis.key.padding = 3,axis.left=0.6))
     p1=levelplot(crop(mod09a,regs[[r]]),col.regions=grey(seq(0,1,len=100)),at=seq(45,100,len=99),
         colorkey=list(space="top",width=1,height=.75,labels=list(labels=c(50,75,100),at=c(50,75,100))),
         cuts=99,margin=F,max.pixels=1e6,par.settings = pars)
     p2=levelplot(crop(gewex,regs[[r]]),col.regions=grey(seq(0,1,len=100)),at=seq(.45,1,len=99),cuts=99,margin=F,max.pixels=1e6,
         colorkey=list(space="top",width=1,height=.75,labels=list(labels=c(50,75,100),at=c(.50,.75,1))),
         par.settings = pars)
     tmap=crop(wc_map,regs[[r]])
     p3=levelplot(tmap,col.regions=grey(seq(0,1,len=100)),cuts=100,at=seq(tmap@data@min,tmap@data@max,len=100),margin=F,maxpixels=1e6,
         colorkey=list(space="bottom",height=.75,width=1),xlab="",ylab="",main=names(regs)[r],useRaster=T,
         par.settings = pars)
     p4=levelplot(crop(wc_dem,regs[[r]]),col.regions=grey(seq(0,1,len=100)),cuts=99,margin=F,max.pixels=1e6,
         colorkey=list(space="bottom",height=.75,width=1),
         par.settings = pars)#,labels=list(labels=c(1000,4000),at=c(1000,4000))))
     print(c("MODCF (%)"=p1,"PATMOS-x GEWEX (%)"=p2,"WorldClim Precip (mm)"=p3,"Elevation (m)"=p4,x.same=T,y.same=T,merge.legends=T,layout=c(2,2)))
     dev.off()
     #########################################
     ### Some stats for paper
     ## number of stations retained
     length(unique(cldm$StaID[!is.na(cldm$cld_all)]))
     length(unique(cldm$StaID[!is.na(cldm$cld)]))
     # approximate size of mod09ga archive - get total size for one day from the USGS website
     size=scan("http://e4ftl01.cr.usgs.gov/MOLT/MOD09GA.005/2000.04.30/",what="char")
     ## extract all filesizes in MB (all the HDFs) and sum them and covert to TB for the length of the full record
     sum(as.numeric(sub("M","",grep("[0-9]*M$",size,value=T))))/1024/1024*as.numeric(as.Date("2013-12-31")-as.Date("2000-02-24"))
     ## seasonal variability
     cellStats(mod09sd,"mean")
     ## Validation table construction
     quantile(cldm$difm,na.rm=T)
     summary(lm(cld_all~mod09+lat,data=cldm))
     ###################################################################
     ### summary by biome
     biomep$id=1:nrow(biomep)
     biomepl=melt(biomep@data,id.vars=c("id","code","biome","realm"))
     colnames(biomepl)[grep("variable",colnames(biomepl))]="month"
     biomepl$month=factor(biomepl$month,ordered=T,levels=month.name)
     biomepl$realm=factor(biomepl$realm,ordered=T,levels=c("Antarctic","Australasia","Oceania", "IndoMalay", "Neotropics","Palearctic","Nearctic" ))
     biomepl$value[biomepl$value<0]=NA
     png("output/Biome_Figures_%03d.png",width=5500,height=4000,res=600,pointsize=36,bg="white")
     trellis.par.set(my.theme)
     #[biomepl$id%in%sample(biomep$id,10000),]
     p1=useOuterStrips(xyplot(value~month|realm+biome,groups=id,data=biomepl,panel=function(x,y,groups = groups, subscripts = subscripts){
         panel.xyplot(x,y,col=grey(0.6,0.2),type="l",lwd=.5,groups=groups,subscripts=subscripts)
         panel.smoother(y ~ s(x), method = "gam",lwd=2,subscripts=subscripts,n=24)
     },scales=list(y=list(at=c(0,100),lim=c(-20,120),cex=.75,alternating=2,tck=c(0,1)),x=list(at=c(1,7),rot=45,alternating=1)),ylab="Biome",xlab.top="Geographic Realm",ylab.right="MODCF (%)", xlab="Month"),
         strip=strip.custom(par.strip.text = list(cex = .7)),strip.left=strip.custom(horizontal=TRUE,par.strip.text = list(cex = .75)))
     p1$par.settings$layout.widths$strip.left[1]=13
     p1$par.strip.text$lines=.65
     print(p1)
     dev.off()
     ####################################################################
     ## assess temporal stability
     ## spatialy subset data to stations at least 10km apart
     st2=remove.duplicates(st,zero=10)
     ## Subset data
     ## drop missing observations
     cldm.t=cldm[!is.na(cldm$cld_all)&!is.na(cldm$mod09)&!is.na(cldm$biome),]
     cldm.t=cldm.t[cldm.t$lat>=-60,]
     #  make sure all stations have all mod09 data
     stdrop=names(which(tapply(cldm.t$month,cldm.t$StaID,length)!=12))
     cldm.t=cldm.t[!cldm.t$StaID%in%stdrop,]
     # Keep only stations at least 10km apart
     cldm.t=cldm.t[cldm.t$StaID%in%st2$id,]
     ## Subset to only some months, if desired
     #cldm.t=cldm.t[cldm.t$month%in%1:3,]
     ## Select Knots
     knots=spsample(land,500,type="regular")
                                             #  reshape data
     m.cld=cast(cldm.t,StaID+lat+lon+biome~month,value="cld_all");colnames(m.cld)[-(1:4)]=paste("cld.",colnames(m.cld)[-(1:4)],sep="")
     m.mod09=cast(cldm.t,StaID~month,value="mod09");colnames(m.mod09)[-1]=paste("mod09.",colnames(m.mod09)[-1],sep="")
     mdata=cbind(m.cld,m.mod09)
     ## cast to
     coords <- as.matrix(m.cld[,c("lon","lat")])#as.matrix(ne.temp[,c("UTMX", "UTMY")]/1000)
     max.d <- max(iDist(coords))
     ##make symbolic model formula statement for each month
     mods <- lapply(paste(paste(paste("cld.",1:N.t,sep=''),paste("mod09.",1:N.t,sep=''),sep='~'),"",sep=""), as.formula)
     tlm=model.matrix(lm(mods[[1]],data=mdata))
     N.t <- ncol(m.mod09)-1 ##number of months
     n <- nrow(m.cld) ##number of observation per months
     p <- ncol(tlm) #number of regression parameters in each month
     starting <- list("beta"=rep(0,N.t*p), "phi"=rep(3/(0.5*max.d), N.t),
                      "sigma.sq"=rep(2,N.t), "tau.sq"=rep(1, N.t),
                      "sigma.eta"=diag(rep(0.01, p)))
     tuning <- list("phi"=rep(5, N.t))
     priors <- list("beta.0.Norm"=list(rep(0,p), diag(1000,p)),
                    "phi.Unif"=list(rep(3/(0.9*max.d), N.t), rep(3/(0.05*max.d), N.t)),
                    "sigma.sq.IG"=list(rep(2,N.t), rep(10,N.t)),
                    "tau.sq.IG"=list(rep(2,N.t), rep(5,N.t)),
                    "sigma.eta.IW"=list(2, diag(0.001,p)))
     cov.model <- "exponential"
     ## Run the model
     n.samples <- 500
     m.1=spDynLM(mods,data=mdata,coords=coords,knots=coordinates(knots),n.samples=n.samples,starting=starting,tuning=tuning,priors=priors,cov.model=cov.model,get.fitted=T,n.report=25)
     save(m.1,file="output/m.1.Rdata")
     ## summarize
     burn.in <- floor(0.75*n.samples)
     quant <- function(x){quantile(x, prob=c(0.5, 0.025, 0.975))}
     beta <- apply(m.1$p.beta.samples[burn.in:n.samples,], 2, quant)
     beta.0 <- beta[,grep("Intercept", colnames(beta))]
     beta.1 <- beta[,grep("mod09", colnames(beta))]
     ### Compare time periods
     library(texreg)
      extract.lm <- function(model) {
          s <- summary(model)
          names <- rownames(s$coef)
          co <- s$coef[, 1]
          se <- s$coef[, 2]
          pval <- s$coef[, 4]
          rs <- s$r.squared
          n <- as.integer(nobs(model))
          rmse=sqrt(mean((residuals(s)^2)))
          gof <- c(rs, rmse, n)
          gof.names <- c("R-Squared","RMSE","n")
          tr <- createTexreg(coef.names = names, coef = co, se = se,
                             pvalues = pval, gof.names = gof.names, gof = gof)
          return(tr)
+     }
     setMethod("extract", signature = className("lm", "stats"),definition = extract.lm)
     forms=c("cld~mod09+month2+lat")
     lm_all=lm(cld_all~mod09+lat,data=cldm[!is.na(cldm$cld),])
     ### Compare two time periods
     lm_all1=lm(cld_all~mod09,data=cldm[!is.na(cldm$cld)&cldm$cldn_all>=10,])
     lm_mod=lm(cld~mod09,data=cldm[cldm$cldn==10,])
     mods=list("1970-2009"=lm_all1,"2000-2009"=lm_mod)
     screenreg(mods,digits=2,single.row=T,custom.model.names=names(mods),custom.coef.names = c("Intercept", "MODCF"))
     htmlreg(mods,file = "output/tempstab.doc",
             custom.model.names = names(mods),
             single.row = T, inline.css = FALSE,doctype = TRUE, html.tag = TRUE, head.tag = TRUE, body.tag = TRUE)
     ## assess latitude bias
     cldm$abslat=abs(cldm$lat)
     cldm$absdif=abs(cldm$difm)
     abslm=lm(absdif~abslat*I(abslat^2),data=cldm[cldm$cldn_all>30,])
     xyplot(absdif~abslat|month2,type=c("p","smooth"),data=cldm,cex=.25,pch=16)
     plot(absdif~abslat,data=cldm[cldm$cldn_all>30,],cex=.25,pch=16)
     lines(0:90,predict(abslm,newdata=data.frame(abslat=0:90),type="response"),col="red")
     bf=anovaBF(dif~lulcc+month2,data=cldm[!is.na(cldm$dif)&!is.na(cldm$lulcc),])
     ch=posterior(bf, iterations = 1000)
     summary(bf)
     plot(bf)
     ## explore validation error
     cldm$lulcc=as.factor(IGBP$class[match(cldm$lulc,IGBP$ID)])
     ## Table of RMSE's by lulc by month
     lulctl=ddply(cldm,c("month","lulc"),function(x) c(count=nrow(x),rmse=sqrt(mean((x$mod09-x$cld)^2,na.rm=T))))
     lulctl=lulctl[!is.na(lulctl$lulc),]
     lulctl$lulcc=as.factor(IGBP$class[match(lulctl$lulc,IGBP$ID)])
     lulctl=ddply(cldm,c("lulc"),function(x) c(count=nrow(x),mean=paste(round(mean(x$difm,na.rm=T),2)," (",round(sd(x$difm,na.rm=T),2),")",sep=""),rmse=round(sqrt(mean((x$difm)^2,na.rm=T)),2)))
     lulctl$lulcc=as.factor(IGBP$class[match(lulctl$lulc,IGBP$ID)])
         print(xtable(lulctl[order(lulctl$rmse),c("lulcc","count","mean","rmse")],digits=1),type="html",include.rownames=F,file="output/lulcc.doc",row.names=F)
     lulcrmse=cast(lulcrmsel,lulcc~month,value="rmse")
     lulcrmse
     lulcrmse.q=round(do.call(rbind,apply(lulcrmse,1,function(x) data.frame(Min=min(x,na.rm=T),Mean=mean(x,na.rm=T),Max=max(x,na.rm=T),SD=sd(x,na.rm=T)))),1)#quantile,c(0.025,0.5,.975),na.rm=T)),1)
     lulcrmse.q=lulcrmse.q[order(lulcrmse.q$Mean,decreasing=T),]
     lulcrmse.q
     print(xtable(lulcrmse,digits=1),"html")
     bgyr=colorRampPalette(c("blue","green","yellow","red"))
     levelplot(rmse~month*lulcc,data=lulcrmsel,col.regions=bgyr(1000),at=quantile(lulcrmsel$rmse,seq(0,1,len=100),na.rm=T))
     ### Linear models
     summary(lm(dif~as.factor(lulc)+lat+month2,data=cldm))

     ################################################################################
     ###  calculate monthly means of terra and aqua
     setwd("/mnt/data/personal/adamw/projects/cloud")
     datadir="/mnt/data2/projects/cloud/"
     library(raster)
     library(foreach)
     library(multicore)
     library(doMC)
     registerDoMC(12)
     ### assemble list of files to process
     df=data.frame(path=list.files(paste(datadir,"/mcd09ctif",sep=""),full=T,pattern="M[Y|O].*[0-9]*[mean|sd].*tif$"),stringsAsFactors=F)
     df[,c("sensor","month","type")]=do.call(rbind.data.frame,strsplit(basename(df$path),"_|[.]"))[,c(1,2,3)]
     df2=unique(df[,c("month","type")])
     overwrite=T
     # Create combined (MOD+MYD) corrected mean CF
         foreach(i=1:12) %dopar% {
             f=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*[O|Y].*_",sprintf("%02d",i),"[.]tif$",sep=""),full=T)
         foreach(i=1:nrow(df2), .options.multicore=list(preschedule=FALSE)) %dopar% {
             f=df$path[df$month==df2$month[i]&df$type==df2$type[i]]
             ## Define output and check if it already exists
             tmcd=paste(datadir,"/mcd09ctif/MCD09_",sprintf("%02d", i),"_uncompressed.tif",sep="")
             tmcd2=paste(datadir,"/mcd09ctif/MCD09_",sprintf("%02d", i),".tif",sep="")
             tmcd=paste(datadir,"/mcd09ctif/MCD09_",df2$type[i],"_",df2$month[i],"_uncompressed.tif",sep="")
             tmcd2=paste(datadir,"/mcd09ctif/MCD09_",df2$type[i],"_",df2$month[i],".tif",sep="")
             ## check if output already exists
     #        if(file.exists(tmcd2)){print(paste(tmcd2,"Exists, moving on..."));return(NULL)}
             if(file.exists(tmcd2)){print(paste(tmcd2,"Exists, deleting it..."));file.remove(tmcd,tmcd2)}
             if(!overwrite&file.exists(tmcd2)){print(paste(tmcd2,"Exists, moving on..."));return(NULL)}
             if(overwrite&file.exists(tmcd2)){print(paste(tmcd2,"Exists, deleting it..."));file.remove(tmcd,tmcd2)}
             ## Take average between images
             ## switch NA values to 32768 to facilitate recasting to 8-bit below, otherwise they are confounded with 0 cloud values
             ops=paste("-t_srs 'EPSG:4326' -multi -srcnodata -32768 -dstnodata 32767 -r bilinear -te -180 -90 180 90 -tr 0.008333333333333 -0.008333333333333",
             ops=paste(" -t_srs 'EPSG:4326' -multi -srcnodata 65535 -dstnodata 65535 -r bilinear -te -180 -90 180 90 -tr 0.008333333333333 -0.008333333333333",
                 "-co BIGTIFF=YES  --config GDAL_CACHEMAX 20000 -wm 2000 -wo NUM_THREADS:10 -wo SOURCE_EXTRA=5")
             system(paste("gdalwarp -overwrite -r average -co COMPRESS=LZW -co ZLEVEL=9  ",ops," ",paste(f,collapse=" ")," ",tmcd))
             ## update metadata
             tags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS MOD09GA and MYD09GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).",
             if(df2$type[i]=="mean")
                 tags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS MOD09GA and MYD09GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).",
                 "The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days.'"),
                 "TIFFTAG_DOCUMENTNAME='Collection 5 MCD09 Cloud Frequency'",
                 paste("TIFFTAG_DATETIME='2013",sprintf("%02d", i),"15'",sep=""),
                   "TIFFTAG_ARTIST='Adam M. Wilson (adam.wilson@yale.edu)'")
             system(paste("/usr/local/src/gdal-1.10.0/swig/python/scripts/gdal_edit.py ",tmcd," ",paste("-mo ",tags,sep="",collapse=" "),sep=""))
             # create final fixed image
             system(paste("gdal_translate -co COMPRESS=LZW -co ZLEVEL=9 -co PREDICTOR=2 ",tmcd," ",tmcd2,sep=""))
             writeLines(paste("Finished month",i))
+        }
     ################################################
     # Create combined (MOD+MYD) corrected mean CF SD
         foreach(i=1:12) %dopar% {
             f=list.files(paste(datadir,"/mcd09tif",sep=""),pattern=paste(".*[O|Y].*_",sprintf("%02d",i),"[.]tif$",sep=""),full=T)
             ## Define output and check if it already exists
             tmcd=paste(datadir,"/mcd09ctif/MCD09sd_",sprintf("%02d", i),"_uncompressed.tif",sep="")
             tmcd=paste(datadir,"/mcd09ctif/MCD09sd_",sprintf("%02d", i),"_uncompressed.tif",sep="")
             tmcd2=paste(datadir,"/mcd09ctif/MCD09sd_",sprintf("%02d", i),".tif",sep="")
             ## check if output already exists
             if(file.exists(tmcd2)){print(paste(tmcd2,"Exists, moving on..."));return(NULL)}
             ## Take average between images
             ops=paste("-t_srs 'EPSG:4326' -multi -srcnodata -32768 -dstnodata -32768 -r bilinear -te -180 -90 180 90 -tr 0.008333333333333 -0.008333333333333",
                 "-co BIGTIFF=YES  --config GDAL_CACHEMAX 20000 -wm 2000 -wo NUM_THREADS:10 -wo SOURCE_EXTRA=5")
             system(paste("gdalwarp -overwrite -r average -co COMPRESS=LZW -co ZLEVEL=9  ",ops," ",paste(f,collapse=" ")," ",tmcd))
             ## update metadata
             if(df2$type[i]=="sd")
             tags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Standard Deviation of the Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS",
                 " MOD09GA and MYD09GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).",
                 "The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days"),
                 "The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days.'"),
                 "TIFFTAG_DOCUMENTNAME='Collection 5 MCD09 SD of Cloud Frequency'",
                 paste("TIFFTAG_DATETIME='2013",sprintf("%02d", i),"15'",sep=""),
                   "TIFFTAG_ARTIST='Adam M. Wilson (adam.wilson@yale.edu)'")
             system(paste("/usr/local/src/gdal-1.10.0/swig/python/scripts/gdal_edit.py ",tmcd," ",paste("-mo ",tags,sep="",collapse=" "),sep=""))
             # create final fixed image
             system(paste("gdal_translate -b 2 -a_nodata -32768 -co COMPRESS=LZW -co ZLEVEL=9 -co PREDICTOR=2 ",tmcd," ",tmcd2,sep=""))
             writeLines(paste("Finished month",i))
             system(paste("gdal_translate -co COMPRESS=LZW -co ZLEVEL=9 -co PREDICTOR=2 ",tmcd," ",tmcd2,sep=""))
             file.remove(tmcd)
             writeLines(paste("##########################################    Finished ",tmcd2))
+        }
     #################################################################################
     ###### convert to 8-bit compressed file, add colors and other details
     f2=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09_[0-9].[.]tif$",sep=""),full=T)
     for( i in 1:length(f2)){
     f2=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09_.*_[0-9].[.]tif$",sep=""),full=T)
     foreach(i=1:length(f2), .options.multicore=list(preschedule=FALSE)) %dopar% {
         file=f2[i]
         outfilevrt=paste(datadir,"/mcd09ctif/",sub(".tif",".vrt",basename(file)),sep="")
         outfile=paste("data/mcd09tif/MCD09_",sprintf("%02d",i),".tif",sep="")
         outfilesd=paste("data/mcd09tif/MCD09_",sprintf("%02d",i),"_sd.tif",sep="")
         ## create VRT and edit the color table
         ## create the vrt to add a color table following https://trac.osgeo.org/gdal/wiki/FAQRaster#Howtocreateormodifyanimagecolortable
         system(paste("gdal_translate  -scale 0 10000 0 100 -of VRT ",file," ",outfilevrt))
         outfilevrt=sub("[.]tif",".vrt",file)
         outfile=paste("data/mcd09tif/",basename(file),sep="")
         ## rescale to 0-100 using a VRT
         system(paste("gdal_translate  -scale 0 10000 0 100 -of VRT ",file," ",outfilevrt))
         ## add color table for 8-bit data
         vrt=scan(outfilevrt,what="char")
         hd=c("<ColorInterp>Palette</ColorInterp>","<ColorTable>")
         ft="</ColorTable>"
-...
         ## update missing data flag following http://lists.osgeo.org/pipermail/gdal-dev/2010-February/023541.html
         csi=grep("<ComplexSource>",vrt2)  # get index of current color table
         vrt2=c(vrt2[1:csi],"<NODATA>327</NODATA>",vrt2[(csi+1):length(vrt2)])
         write.table(vrt2,file=outfilevrt,col.names=F,row.names=F,quote=F)
         #system(paste("gdal_translate  -of VRT -scale 0 10000 0 100  ",outfilevrt," ",outfilevrt2))
                                             #    system(paste("pkreclass  -i ",outfilevrt," ",paste("-mo ",tags,sep="",collapse=" ")," ",outfilevrt," ",outfile2))
         write.table(vrt2,file=outfilevrt,col.names=F,row.names=F,quote=F)
         tags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS M*D09GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).",
             "The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days. ",
             "Band Descriptions: 1) Mean Monthly Cloud Frequency'"),
-...
     ################
     ### calculate inter vs. intra annual variability
     f3=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09_[0-9].[.]tif$",sep=""),full=T)
     f3sd=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09sd_[0-9].[.]tif$",sep=""),full=T)
     f3=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09_mean_[0-9].[.]tif$",sep=""),full=T)
     f3sd=list.files(paste(datadir,"/mcd09ctif",sep=""),pattern=paste(".*MCD09_sd_[0-9].[.]tif$",sep=""),full=T)
     dmean=stack(as.list(f3))
     dsd=stack(as.list(f3sd))
     dinter=calc(dmean,sd,file=paste(datadir,"/mcd09ctif/inter.tif",sep=""),options=c("COMPRESS=LZW","ZLEVEL=9"))
     dintra=calc(dsd,mean,file=paste(datadir,"/mcd09ctif/intra.tif",sep=""),options=c("COMPRESS=LZW","ZLEVEL=9"))
     beginCluster(12)
     ## Function to calculate standard deviation and round it to nearest integer
     Rsd=function(x) calc(x,function(x) round(sd(x,na.rm=T)))
     dinter=clusterR(dmean,Rsd,file=paste(datadir,"/mcd09ctif/inter.tif",sep=""),options=c("COMPRESS=LZW","PREDICTOR=2"),overwrite=T,dataType='INT1U',NAflag=255)
     dintra=clusterR(dsd,mean,file=paste(datadir,"/mcd09ctif/intra.tif",sep=""),options=c("COMPRESS=LZW","PREDICTOR=2"),overwrite=T,dataType='INT1U',NAflag=255)
     endCluster()
     tplot=F
     if(tplot){

     ### Script to download and process the NDP-026D station cloud dataset
     ### to validate MODIS cloud frequencies
     setwd("/mnt/data/personal/adamw/projects/cloud/")
     library(multicore)
     library(doMC)
     library(rasterVis)
     library(rgdal)
     library(reshape)
     library(maptools)
     library(rgeos)
     ## Data available here http://cdiac.ornl.gov/epubs/ndp/ndp026d/ndp026d.html
     download=F  #download data?
     ## Get station locations
     if(download)   system("wget -N -nd http://cdiac.ornl.gov/ftp/ndp026d/cat01/01_STID -P data/NDP026D/data/")
     st=read.table("data/NDP026D/data/01_STID",skip=1)
     colnames(st)=c("StaID","LAT","LON","ELEV","ny1","fy1","ly1","ny7","fy7","ly7","SDC","b5c")
     st$lat=st$LAT/100
     st$lon=st$LON/100
     st$lon[st$lon>180]=st$lon[st$lon>180]-360
     st=st[,c("StaID","ELEV","lat","lon")]
     colnames(st)=c("id","elev","lat","lon")
     write.csv(st,"stations.csv",row.names=F)
     coordinates(st)=c("lon","lat")
     projection(st)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
     st@data[,c("lon","lat")]=coordinates(st)
     ## download data
     if(download){
         system("wget -N -nd ftp://cdiac.ornl.gov/pub/ndp026d/cat67_78/* -A '.tc.Z' -P data/NDP026D/data/")
         system("gunzip data/*.Z")
+    }
     ## define FWF widths
     f162=c(5,5,4,7,7,7,4) #format 162
     c162=c("StaID","YR","Nobs","Amt","Fq","AWP","NC")
     ## use monthly timeseries
     cld=do.call(rbind.data.frame,mclapply(sprintf("%02d",1:12),function(m) {
       d=read.fwf(list.files("data/NDP026D/data",pattern=paste("MNYDC.",m,".tc$",sep=""),full=T),skip=1,widths=f162)
       colnames(d)=c162
       d$month=as.numeric(m)
       print(m)
       return(d)}
       ))
     ## add lat/lon
     cld[,c("lat","lon")]=coordinates(st)[match(cld$StaID,st$id),]
     ## drop missing values
     cld=cld[,!grepl("Fq|AWP|NC",colnames(cld))]
     cld$Amt[cld$Amt<0]=NA
     cld$Amt=cld$Amt/100
     cld=cld[!is.na(cld$Amt),]
     ## table of stations with > 20 observations per month
     cast(cld,StaID~YR,value="Nobs")
     mtab=ddply(cld,c('StaID','month'),function(df){ data.frame(count=sum(df$Nobs>20,na.rm=T))})
     #mtab2=mtab[table(mtab$count>10)]
     stem(mtab$count)
     ## calculate means and sds for full record (1970-2009)
     Nobsthresh=20 #minimum number of observations to include
     cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){
       data.frame(
           month=x$month[1],
           StaID=x$StaID[1],
           cld_all=mean(x$Amt[x$Nobs>=Nobsthresh],na.rm=T),  # full record
           cldsd_all=sd(x$Amt[x$Nobs>=Nobsthresh],na.rm=T),
           cldn_all=length(x$Amt[x$Nobs>=Nobsthresh]),
           cld=mean(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T), #only MODIS epoch
           cldsd=sd(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T),
           cldn=length(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh]))}))
         cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")]
     ## add the EarthEnvCloud data to cld
     mod09=stack(list.files("data/mcd09tif/",pattern="MCD09_[0-9]*[.]tif",full=T))
     NAvalue(mod09)=255
     #mod09std=brick("~/acrobates/adamw/projects/cloud/data/cloud_ymonstd.nc")
     ## overlay the data with 32km diameter (16km radius) buffer
     ## buffer size from Dybbroe, et al. (2005) doi:10.1175/JAM-2189.1.
     buf=16000
     bins=cut(st$lat,10)
     rerun=F
     if(rerun&file.exists("valid.csv")) file.remove("valid.csv")
     mod09sta=lapply(levels(bins),function(lb) {
       l=which(bins==lb)
       ## mean
       td=extract(mod09,st[l,],buffer=buf,fun=mean,na.rm=T,df=T)
       td$id=st$id[l]
       td$type="mean"
       ## std
       td2=extract(mod09std,st[l,],buffer=buf,fun=mean,na.rm=T,df=T)
       td2$id=st$id[l]
       td2$type="sd"
       print(lb)#as.vector(c(l,td[,1:4])))
       write.table(rbind(td,td2),"valid.csv",append=T,col.names=F,quote=F,sep=",",row.names=F)
       td
     })#,mc.cores=3)
     ## read it back in
     mod09st=read.csv("valid.csv",header=F)[,-c(1)]
     colnames(mod09st)=c(names(mod09),"id","type")
     mod09stl=melt(mod09st,id.vars=c("id","type"))
     mod09stl[,c("year","month")]=do.call(rbind,strsplit(sub("X","",mod09stl$variable),"[.]"))[,1:2]
     mod09stl$value[mod09stl$value<0]=NA
     mod09stl=cast(mod09stl,id+year+month~type,value="value")
     ## add it to cld
     cldm$mod09=mod09stl$mean[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))]
     cldm$mod09sd=mod09stl$sd[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))]
     ## LULC
     #system(paste("gdalwarp -r near -co \"COMPRESS=LZW\" -tr ",paste(res(mod09),collapse=" ",sep=""),
     #             "-tap -multi -t_srs \"",   projection(mod09),"\" /mnt/data/jetzlab/Data/environ/global/landcover/MODIS/MCD12Q1_IGBP_2005_v51.tif ../modis/mod12/MCD12Q1_IGBP_2005_v51.tif"))
     lulc=raster("~/acrobates/adamw/projects/interp/data/modis/mod12/MCD12Q1_IGBP_2005_v51.tif")
     require(plotKML); data(worldgrids_pal)  #load IGBP palette
     IGBP=data.frame(ID=0:16,col=worldgrids_pal$IGBP[-c(18,19)],stringsAsFactors=F)
     IGBP$class=rownames(IGBP);rownames(IGBP)=1:nrow(IGBP)
     levels(lulc)=list(IGBP)
     ## function to get modal lulc value
     Mode <- function(x) {
           ux <- na.omit(unique(x))
             ux[which.max(tabulate(match(x, ux)))]
+          }
     lulcst=extract(lulc,st,fun=Mode,buffer=buf,df=T)
     colnames(lulcst)=c("id","lulc")
     ## add it to cld
     cldm$lulc=lulcst$lulc[match(cldm$StaID,lulcst$id)]
     cldm$lulcc=IGBP$class[match(cldm$lulc,IGBP$ID)]
     ### Add biome data
     biome=readOGR("../teow/","biomes")
     projection(biome)=projection(st)
     #st$biome=over(st,biome,returnList=F)$BiomeID
     dists=apply(gDistance(st,biome,byid=T),2,which.min)
     st$biomec=biome$code[dists]
     st$realm=biome$realm[dists]
     st$biome=biome$biome[dists]
     cldm$biomec=st$biomec[match(cldm$StaID,st$id)]
     cldm$realm=st$relam[match(cldm$StaID,st$id)]
     cldm$biome=st$biome[match(cldm$StaID,st$id)]
     ## write out the tables
     write.csv(cld,file="cld.csv",row.names=F)
     write.csv(cldm,file="cldm.csv",row.names=F)
     writeOGR(st,dsn=".",layer="stations",driver="ESRI Shapefile",overwrite_layer=T)
     #########################################################################

     ### Script to download and process the NDP-026D station cloud dataset
     ### to validate MODIS cloud frequencies
     setwd("~/acrobates/adamw/projects/cloud/data/NDP026D")
     library(multicore)
     library(doMC)
     library(rasterVis)
     library(rgdal)
     library(reshape)
     library(maptools)
     library(rgeos)
     ## Data available here http://cdiac.ornl.gov/epubs/ndp/ndp026d/ndp026d.html
     ## Get station locations
     system("wget -N -nd http://cdiac.ornl.gov/ftp/ndp026d/cat01/01_STID -P data/")
     st=read.table("data/01_STID",skip=1)
     colnames(st)=c("StaID","LAT","LON","ELEV","ny1","fy1","ly1","ny7","fy7","ly7","SDC","b5c")
     st$lat=st$LAT/100
     st$lon=st$LON/100
     st$lon[st$lon>180]=st$lon[st$lon>180]-360
     st=st[,c("StaID","ELEV","lat","lon")]
     colnames(st)=c("id","elev","lat","lon")
     write.csv(st,"stations.csv",row.names=F)
     coordinates(st)=c("lon","lat")
     projection(st)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
     st@data[,c("lon","lat")]=coordinates(st)
     ## download data
     system("wget -N -nd ftp://cdiac.ornl.gov/pub/ndp026d/cat67_78/* -A '.tc.Z' -P data/")
     system("gunzip data/*.Z")
     ## define FWF widths
     f162=c(5,5,4,7,7,7,4) #format 162
     c162=c("StaID","YR","Nobs","Amt","Fq","AWP","NC")
     ## use monthly timeseries
     cld=do.call(rbind.data.frame,mclapply(sprintf("%02d",1:12),function(m) {
       d=read.fwf(list.files("data",pattern=paste("MNYDC.",m,".tc$",sep=""),full=T),skip=1,widths=f162)
       colnames(d)=c162
       d$month=as.numeric(m)
       print(m)
       return(d)}
       ))
     ## add lat/lon
     cld[,c("lat","lon")]=coordinates(st)[match(cld$StaID,st$id),]
     ## drop missing values
     cld=cld[,!grepl("Fq|AWP|NC",colnames(cld))]
     cld$Amt[cld$Amt<0]=NA
     cld$Amt=cld$Amt/100
     cld=cld[!is.na(cld$Amt),]
     ## table of stations with > 20 observations per month
     cast(cld,StaID~YR,value="Nobs")
     mtab=ddply(cld,c('StaID','month'),function(df){ data.frame(count=sum(df$Nobs>20,na.rm=T))})
     mtab2=mtab[
         table(mtab$count>10)
     stem(mtab$count)
     ## calculate means and sds for full record (1970-2009)
     Nobsthresh=20 #minimum number of observations to include
     cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){
       data.frame(
           month=x$month[1],
           StaID=x$StaID[1],
           cld_all=mean(x$Amt[x$Nobs>=Nobsthresh],na.rm=T),  # full record
           cldsd_all=sd(x$Amt[x$Nobs>=Nobsthresh],na.rm=T),
           cldn_all=length(x$Amt[x$Nobs>=Nobsthresh]),
           cld=mean(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T), #only MODIS epoch
           cldsd=sd(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T),
           cldn=length(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh]))}))
         cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")]
     ## add the MOD09 data to cld
     #### Evaluate MOD35 Cloud data
     mod09=brick("~/acrobates/adamw/projects/cloud/data/cloud_ymonmean.nc")
     mod09std=brick("~/acrobates/adamw/projects/cloud/data/cloud_ymonstd.nc")
     ## overlay the data with 32km diameter (16km radius) buffer
     ## buffer size from Dybbroe, et al. (2005) doi:10.1175/JAM-2189.1.
     buf=16000
     bins=cut(st$lat,10)
     rerun=F
     if(rerun&file.exists("valid.csv")) file.remove("valid.csv")
     mod09sta=lapply(levels(bins),function(lb) {
       l=which(bins==lb)
       ## mean
       td=extract(mod09,st[l,],buffer=buf,fun=mean,na.rm=T,df=T)
       td$id=st$id[l]
       td$type="mean"
       ## std
       td2=extract(mod09std,st[l,],buffer=buf,fun=mean,na.rm=T,df=T)
       td2$id=st$id[l]
       td2$type="sd"
       print(lb)#as.vector(c(l,td[,1:4])))
       write.table(rbind(td,td2),"valid.csv",append=T,col.names=F,quote=F,sep=",",row.names=F)
       td
     })#,mc.cores=3)
     ## read it back in
     mod09st=read.csv("valid.csv",header=F)[,-c(1)]
     colnames(mod09st)=c(names(mod09),"id","type")
     mod09stl=melt(mod09st,id.vars=c("id","type"))
     mod09stl[,c("year","month")]=do.call(rbind,strsplit(sub("X","",mod09stl$variable),"[.]"))[,1:2]
     mod09stl$value[mod09stl$value<0]=NA
     mod09stl=cast(mod09stl,id+year+month~type,value="value")
     ## add it to cld
     cldm$mod09=mod09stl$mean[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))]
     cldm$mod09sd=mod09stl$sd[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))]
     ## LULC
     #system(paste("gdalwarp -r near -co \"COMPRESS=LZW\" -tr ",paste(res(mod09),collapse=" ",sep=""),
     #             "-tap -multi -t_srs \"",   projection(mod09),"\" /mnt/data/jetzlab/Data/environ/global/landcover/MODIS/MCD12Q1_IGBP_2005_v51.tif ../modis/mod12/MCD12Q1_IGBP_2005_v51.tif"))
     lulc=raster("~/acrobates/adamw/projects/interp/data/modis/mod12/MCD12Q1_IGBP_2005_v51.tif")
     require(plotKML); data(worldgrids_pal)  #load IGBP palette
     IGBP=data.frame(ID=0:16,col=worldgrids_pal$IGBP[-c(18,19)],stringsAsFactors=F)
     IGBP$class=rownames(IGBP);rownames(IGBP)=1:nrow(IGBP)
     levels(lulc)=list(IGBP)
     ## function to get modal lulc value
     Mode <- function(x) {
           ux <- na.omit(unique(x))
             ux[which.max(tabulate(match(x, ux)))]
+          }
     lulcst=extract(lulc,st,fun=Mode,buffer=buf,df=T)
     colnames(lulcst)=c("id","lulc")
     ## add it to cld
     cldm$lulc=lulcst$lulc[match(cldm$StaID,lulcst$id)]
     cldm$lulcc=IGBP$class[match(cldm$lulc,IGBP$ID)]
     ### Add biome data
     biome=readOGR("../teow/","biomes")
     projection(biome)=projection(st)
     #st$biome=over(st,biome,returnList=F)$BiomeID
     dists=apply(gDistance(st,biome,byid=T),2,which.min)
     st$biomec=biome$code[dists]
     st$realm=biome$realm[dists]
     st$biome=biome$biome[dists]
     cldm$biomec=st$biomec[match(cldm$StaID,st$id)]
     cldm$realm=st$relam[match(cldm$StaID,st$id)]
     cldm$biome=st$biome[match(cldm$StaID,st$id)]
     ## write out the tables
     write.csv(cld,file="cld.csv",row.names=F)
     write.csv(cldm,file="cldm.csv",row.names=F)
     writeOGR(st,dsn=".",layer="stations",driver="ESRI Shapefile",overwrite_layer=T)
     #########################################################################

     ###  Script to compile the monthly cloud data from earth engine into a netcdf file for further processing
     library(rasterVis)
     library(raster)
     library(doMC)
     library(multicore)
     library(foreach)
     library(mgcv)
     library(RcppOctave)
     registerDoMC(12)
-...
+    }
     rasterOptions(tmpdir=tmpfs,overwrite=T, format="GTiff",maxmemory=1e9)
     rerun=T  # set to true to recalculate all dates even if file already exists
     ## define month-sensors to process
-...
     #jobs=jobs[jobs$sensor=="MYD09",]
     ### add boundaries to file list to remove problematic pixels at high latitudes
     ## these boundaries were later added to the earth engine script, so if it is re-run this is not necessary
     #xmin,xmax,ymin,ymax
     mextent=list(
         "01"=c(-180,-90,180,73.5),#
         "02"=c(-180,-90,180,84),  #
         "03"=c(-180,-90,180,90),
         "04"=c(-180,-90,180,90),
         "05"=c(-180,-69,180,90),
         "06"=c(-180,-62.5,180,90),  #
         "07"=c(-180,-67,180,90),    #
         "08"=c(-180,-77,180,90),
         "09"=c(-180,-77,180,90),
         "10"=c(-180,-90,180,89), #
         "11"=c(-180,-90,180,77),
         "12"=c(-180,-90,180,69)
+    )
     ## project to sinusoidal
     proj="'+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs'"
     mextentsin=lapply(mextent,function(x) c(project(t(x[1:2]),sub("'","",proj)),project(t(x[3:4]),sub("'","",proj))))
     ## Loop over data to mosaic tifs, compress, and add metadata
         foreach(i=1:nrow(jobs)) %dopar% {
             ## get month
             m=jobs$month[i]
             cm=sprintf("%02d",m)
             date=df$date[df$month==m][1]
             print(date)
             ## get sensor
-...
             s2=sub("GA","",s)
             ## Define output and check if it already exists
             tvrt=paste(datadir,"/mcd09tif/",s2,"_",sprintf("%02d", m),".vrt",sep="")
             ttif1=paste(datadir,"/mcd09tif/",s2,"_",sprintf("%02d", m),"_uncompressed.tif",sep="")
             ttif2=paste(datadir,"/mcd09tif/",s2,"_",sprintf("%02d", m),".tif",sep="")
             tvrt=paste(datadir,"/mcd09tif/",s2,"_",cm,"_globalsin.vrt",sep="")
             tvrt2=paste(datadir,"/mcd09tif/",s2,"_",cm,"_globalwgs84.vrt",sep="")
             ttif=paste(datadir,"/mcd09tif/",s2,"_",cm,"_mean.tif",sep="")
             ttif2=paste(datadir,"/mcd09tif/",s2,"_",cm,"_sd.tif",sep="")
             ## check if output already exists
             if(!rerun&file.exists(ttif1)) return(NA)
             if(!rerun&file.exists(ttif)) return(NA)
             ## build VRT to merge tiles
             proj="'+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs'"
             system(paste("gdalbuildvrt -b 1 -b 2 -srcnodata -32768 ",tvrt," ",paste(df$path[df$month==m&df$sensor==s],collapse=" ")))
             ## include subseting using mextentsin object created above to ensure cropping of problematic values
             system(paste("gdalbuildvrt -b 1 -b 2 -te ",paste(round(mextentsin[[cm]]),collapse=" ")," -srcnodata -32768 -vrtnodata 32767 ",tvrt," ",paste(df$path[df$month==m&df$sensor==s],collapse=" ")))
             ## Merge to geotif in temporary directory
             ## specify sourc projection because it gts it slightly wrong by default #-ot Int16 -dstnodata -32768
             ops=paste("-s_srs ",proj,"  -t_srs 'EPSG:4326' -multi -srcnodata -32768  -ot Int16 -dstnodata -32768 -r bilinear -te -180 -90 180 90 -tr 0.008333333333333 -0.008333333333333",
                 "-co BIGTIFF=YES --config GDAL_CACHEMAX 2000 -wm 2000 -wo NUM_THREADS:10 -co COMPRESS=LZW -co PREDICTOR=2")
             ## if file exists, remove it avoid warping into existing file
             if(file.exists(ttif1)) file.remove(ttif1)
             ## run the warp
             system(paste("gdalwarp -overwrite ",ops," ",tvrt," ",ttif1))
             ## specify sourc projection because it gets it slightly wrong by default
             ops=paste("-multi -of vrt --config GDAL_CACHEMAX 500 -wm 500 -wo NUM_THREADS:10 -srcnodata 32767 -dstnodata 32767 -s_srs ",proj,"  -t_srs 'EPSG:4326' ",
                 " -ot Int16 -r bilinear -te -180 -90 180 90 -tr 0.008333333333333 -0.008333333333333")
             ## create the warpped VRT
             system(paste("gdalwarp -overwrite ",ops," ",tvrt," ",tvrt2))
             ## Compress file and add metadata tags
             ops2=paste("-ot Int16 -co COMPRESS=LZW -co PREDICTOR=2 -stats")
             tags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS ",s,"GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).",
                 "The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days. ",
                 "Band Descriptions: 1) Mean Monthly Cloud Frequency x 10000 2) Standard Deviation of Mean Monthly Cloud x 10000'"),
                   "TIFFTAG_DOCUMENTNAME='Collection 5 ",s," Cloud Frequency'",
             ops2=paste("-ot Int16 -co COMPRESS=LZW -co PREDICTOR=2 -stats -a_nodata 32767 ")
             meantags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Monthly Cloud Frequency (x10000) for 2000-2013 extracted from C5 MODIS ",s,"GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).",
                 "The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days. '"),
                   paste("TIFFTAG_DOCUMENTNAME='Collection 5 ",s," Mean Cloud Frequency'",sep=""),
                   paste("TIFFTAG_DATETIME='2013",sprintf("%02d", m),"15'",sep=""),
                   "TIFFTAG_ARTIST='Adam M. Wilson (adam.wilson@yale.edu)'")
             sdtags=c(paste("TIFFTAG_IMAGEDESCRIPTION='Standard Deviation (x10000) of Monthly Cloud Frequency for 2000-2013 extracted from C5 MODIS ",s,"GA PGE11 internal cloud mask algorithm (embedded in state_1km bit 10).",
                 "The daily cloud mask time series were summarized to mean cloud frequency (CF) by calculating the proportion of cloudy days. '"),
                   paste("TIFFTAG_DOCUMENTNAME='Collection 5 ",s," SD Cloud Frequency'",sep=""),
                   paste("TIFFTAG_DATETIME='2013",sprintf("%02d", m),"15'",sep=""),
                   "TIFFTAG_ARTIST='Adam M. Wilson (adam.wilson@yale.edu)'")
             system(paste("gdal_translate  ",ops2," ",paste("-mo ",tags,sep="",collapse=" ")," ",ttif1," ",ttif2))
             ## delete temporary files
             file.remove(tvrt,ttif1)
             ## run the merge, warp, and compress all in one step...
             system(paste("gdal_translate -b 1  ",ops2," ",paste("-mo ",meantags,sep="",collapse=" ")," ",tvrt2," ",ttif))
             system(paste("gdal_translate -b 2  ",ops2," ",paste("-mo ",sdtags,sep="",collapse=" ")," ",tvrt2," ",ttif2))
             writeLines(paste("Month:",m," Sensor:",s," Finished"))
+        }

Also available in: Unified diff

Project

General

Profile

Revision a29da850

Added by Adam Wilson almost 11 years ago