Project

General

Profile

« Previous | Next » 

Revision c33d3b68

Added by Adam M. Wilson over 12 years ago

Updated MOD06 compile script to process the tiles in parallel using GRASS. This brings processing time for 10 year archive for oregon from 48 hours to ~2 hours on 24 cores. Also added several exploratory analysis to the data_summary script.

View differences:

climate/procedures/MOD06_L2_data_compile.r
162 162
Sys.setenv(GRASS_OVERWRITE=1)
163 163
Sys.setenv(DEBUG=0)
164 164

  
165
initGRASS(gisBase="/usr/lib/grass64",SG=td,gisDbase=gisDbase,location=gisLocation,mapset="PERMANENT",override=T,pid=Sys.getpid())
166
getLocationProj()
167
system(paste("g.proj -c proj4=\"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +datum=WGS84 +units=m +no_defs\"",sep=""))
168

  
169
#system("g.mapset PERMANENT")
170
execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",outdir,"/",fs$file[1],"\":mod06:Cloud_Mask_1km_0",sep=""),
171
          output="modisgrid",flags=c("quiet","overwrite","o"))
172
system("g.region rast=modisgrid save=roi --overwrite")
173
system("g.region roi")
174
system("g.region -p")
175
getLocationProj()
176

  
177 165
## temporary objects to test function below
178 166
 i=1
179 167
file=paste(outdir,"/",fs$file[1],sep="")
......
182 170

  
183 171
### Function to extract various SDSs from a single gridded HDF file and use QA data to throw out 'bad' observations
184 172
loadcloud<-function(date,fs){
185
    ## Identify which files to process
173
### set up grass session
174
  tf=paste(tempdir(),"/grass", Sys.getpid(),"/", sep="")
175
 
176
  ## set up tempfile for this PID
177
  initGRASS(gisBase="/usr/lib/grass64",gisDbase=tf,SG=td,override=T,location="mod06",mapset="PERMANENT",home=tf,pid=Sys.getpid())
178
  system(paste("g.proj -c proj4=\"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +datum=WGS84 +units=m +no_defs\"",sep=""))
179

  
180
#system("g.mapset PERMANENT")
181
  execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",outdir,"/",fs$file[1],"\":mod06:Cloud_Mask_1km_0",sep=""),
182
            output="modisgrid",flags=c("quiet","overwrite","o"))
183
  system("g.region rast=modisgrid save=roi --overwrite")
184
  system("g.region roi")
185
  system("g.region -p")
186
#  getLocationProj()
187

  
188

  
189
  ## Identify which files to process
186 190
  tfs=fs$file[fs$date==date]
187 191
  nfs=length(tfs)
188
  unlink_.gislock()
189
    ## set new PID for this grass process (running within a spawned R session if using multicore)
190
  set.GIS_LOCK(Sys.getpid())
191
    ## create new mapset to hold all data for this day
192
  system(paste("g.mapset -c mapset=",gisMapset,"_",format(date,"%Y%m%d"),sep=""))
193
  #  file.copy(paste(gisDbase,"/",gisLocation,"/PERMANENT/DEFAULT_WIND",sep=""),paste(gisDbase,"/",gisLocation,"/",gisMapset,"_",format(date,"%Y%m%d"),"/WIND",sep=""))
194
  system("g.region roi@PERMANENT")
192

  
193
  ### print some summary info
195 194
  print(date)
196
  print(gmeta6())
197
  print(Sys.getpid())
198 195
  ## loop through scenes and process QA flags
199 196
  for(i in 1:nfs){
200 197
     file=paste(outdir,"/",tfs[i],sep="")
......
217 214
                 QA_COT3_",i,"=  ((QA_",i," / 2^3) % 2^2 )==0
218 215
                 QA_CER_",i,"=   ((QA_",i," / 2^5) % 2^1 )==1
219 216
                 QA_CER2_",i,"=  ((QA_",i," / 2^6) % 2^2 )==3
220
                 QA_CWP_",i,"=   ((QA_",i," / 2^8) % 2^1 )==1
221
                 QA_CWP2_",i,"=  ((QA_",i," / 2^9) % 2^2 )==3
222 217
EOF",sep="")) 
218
#                 QA_CWP_",i,"=   ((QA_",i," / 2^8) % 2^1 )==1
219
#                 QA_CWP2_",i,"=  ((QA_",i," / 2^9) % 2^2 )==3
223 220

  
224 221
   ## Optical Thickness
225 222
   execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",file,"\":mod06:Cloud_Optical_Thickness",sep=""),
......
244 241
   system(paste("r.mapcalc \"CER2_",i,"=if(CM_clear_",i,"==0,CER_",i,",0)\"",sep=""))   
245 242

  
246 243
   ## Cloud Water Path
247
   execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",file,"\":mod06:Cloud_Water_Path",sep=""),
248
            output=paste("CWP_",i,sep=""),title="cloud_water_path",
249
            flags=c("overwrite","o")) ; print("")
250
   execGRASS("r.null",map=paste("CWP_",i,sep=""),setnull="-9999")
251
   ## keep only positive CWP values where quality is 'useful' and 'very good' & scale to real units
252
#   system(paste("r.mapcalc \"CWP=if(QA_CWP&&QA_CWP2,CWP,null())\""))   
244
#   execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",file,"\":mod06:Cloud_Water_Path",sep=""),
245
#            output=paste("CWP_",i,sep=""),title="cloud_water_path",
246
#            flags=c("overwrite","o")) ; print("")
247
#   execGRASS("r.null",map=paste("CWP_",i,sep=""),setnull="-9999")
253 248
   ## keep only positive CER values where quality is 'useful' and 'very good' & scale to real units
254
   system(paste("r.mapcalc \"CWP_",i,"=if(QA_CWP_",i,"&&QA_CWP2_",i,"&&CWP_",i,">=0,CWP_",i,"*0.009999999776482582,null())\"",sep=""))   
249
#   system(paste("r.mapcalc \"CWP_",i,"=if(QA_CWP_",i,"&&QA_CWP2_",i,"&&CWP_",i,">=0,CWP_",i,"*0.009999999776482582,null())\"",sep=""))   
255 250
   ## set CER to 0 in clear-sky pixels
256
   system(paste("r.mapcalc \"CWP2_",i,"=if(CM_clear_",i,"==0,CWP_",i,",0)\"",sep=""))   
251
#   system(paste("r.mapcalc \"CWP2_",i,"=if(CM_clear_",i,"==0,CWP_",i,",0)\"",sep=""))   
257 252

  
258 253
     
259 254
 } #end loop through sub daily files
......
271 266
EOF",sep=""))
272 267

  
273 268
  #### Write the file to a geotiff
274
  execGRASS("r.out.gdal",input="CER_daily",output=paste(tifdir,"/CER_",format(date,"%Y%m%d"),".tif",sep=""),nodata=-999)
275
  execGRASS("r.out.gdal",input="COT_daily",output=paste(tifdir,"/COT_",format(date,"%Y%m%d"),".tif",sep=""),nodata=-999)
276
  execGRASS("r.out.gdal",input="CLD_daily",output=paste(tifdir,"/CLD_",format(date,"%Y%m%d"),".tif",sep=""),nodata=-999)
269
  execGRASS("r.out.gdal",input="CER_daily",output=paste(tifdir,"/CER_",format(date,"%Y%m%d"),".tif",sep=""),nodata=-999,flags=c("quiet"))
270
  execGRASS("r.out.gdal",input="COT_daily",output=paste(tifdir,"/COT_",format(date,"%Y%m%d"),".tif",sep=""),nodata=-999,flags=c("quiet"))
271
  execGRASS("r.out.gdal",input="CLD_daily",output=paste(tifdir,"/CLD_",format(date,"%Y%m%d"),".tif",sep=""),nodata=99,flags=c("quiet"))
277 272

  
278 273
### delete the temporary files 
279 274
  unlink_.gislock()
280 275
  system("/usr/lib/grass64/etc/clean_temp")
281
# system(paste("rm -R ",gmeta6()$GISDBASE,"/",gmeta6()$LOCATION_NAME,"/",gmeta6()$MAPSET,sep=""))
282

  
276
 system(paste("rm -R ",tf,sep=""))
277
### print update
278
  print(paste(" ###################################################################               Finished ",date,"
279
################################################################"))
283 280
}
284 281

  
285 282

  
......
287 284
### Now run it
288 285

  
289 286
tdates=sort(unique(fs$date))
290
done=tdates%in%as.Date(substr(list.files("data/modis/MOD06_L2_tif"),5,12),"%Y%m%d")
287
done=tdates%in%as.Date(substr(list.files(tifdir),5,12),"%Y%m%d")
291 288
table(done)
292 289
tdates=tdates[!done]
293 290

  
294
lapply(tdates,function(date) loadcloud(date,fs=fs))
291
mclapply(tdates,function(date) loadcloud(date,fs=fs))
295 292

  
296 293
 
297
## unlock the grass database
298
unlink_.gislock()
299

  
300

  
301 294

  
302 295
#######################################################################################33
303 296
###  Produce the monthly averages
......
328 321
mclapply(1:nrow(vs),function(i){
329 322
  print(paste("Starting ",vs$type[i]," for month ",vs$month[i]))
330 323
  td=stack(fs2$path[which(fs2$month==vs$month[i]&fs2$type==vs$type[i])])
324
  print(paste("Processing Metric ",vs$type[i]," for month ",vs$month[i]))
331 325
  calc(td,mean,na.rm=T,
332 326
       filename=paste(summarydatadir,"/",vs$type[i],"_mean_",vs$month[i],".tif",sep=""),
333 327
       format="GTiff")
climate/procedures/MOD06_L2_data_summary.r
9 9
library(sp)
10 10
library(spgrass6)
11 11
library(rgdal)
12
library(reshape)
12
library(reshape2)
13 13
library(ncdf4)
14 14
library(geosphere)
15 15
library(rgeos)
......
19 19
library(rgl)
20 20
library(hdf5)
21 21
library(rasterVis)
22
library(heR.Misc)
22 23

  
23 24
X11.options(type="Xlib")
24 25
ncores=20  #number of threads to use
......
27 28
setwd("/home/adamw/acrobates/projects/interp")
28 29

  
29 30
roi=readOGR("data/regions/Test_sites/Oregon.shp","Oregon")
31
roi_geo=as(roi,"SpatialLines")
30 32
roi=spTransform(roi,CRS(" +proj=sinu +lon_0=0 +x_0=0 +y_0=0"))
31 33
roil=as(roi,"SpatialLines")
32 34

  
......
58 60
cotm=mean(cot,na.rm=T)
59 61
### TODO: change to bilinear!
60 62

  
61
### get station data
63
cldfiles=list.files(summarydatadir,pattern="CLD_mean_.*tif$",full=T); cldfiles
64
cld=brick(stack(cldfiles))
65
cld[cld==0]=NA
66
setZ(cld,months,name="time")
67
cld@z=list(months)
68
cld@zname="time"
69
layerNames(cld) <- as.character(format(months,"%b"))
70
#cot=projectRaster(from=cot,crs="+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0",method="ngb")
71
cldm=mean(cld,na.rm=T)
72
### TODO: change to bilinear!
73

  
74

  
75
### get station data, subset to stations in region, and transform to sinusoidal
62 76
load("data/ghcn/roi_ghcn.Rdata")
63 77
load("data/allstations.Rdata")
64 78

  
65
d2=d[d$variable=="ppt",]
79
d2=d[d$variable=="ppt"&d$date>=as.Date("2000-01-01"),]
66 80
d2=d2[,-grep("variable",colnames(d2)),]
67 81
st2=st[st$id%in%d$id,]
68
st2=spTransform(st2,CRS(" +proj=sinu +lon_0=0 +x_0=0 +y_0=0"))
82
#st2=spTransform(st2,CRS(" +proj=sinu +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +towgs84=0,0,0"))
69 83
d2[,c("lon","lat")]=coordinates(st2)[match(d2$id,st2$id),]
70 84
d2$month=format(d2$date,"%m")
85
d2$value=d2$value/10 #convert to mm
71 86

  
72 87
### extract MOD06 data for each station
73 88
stcer=extract(cer,st2)#;colnames(stcer)=paste("cer_mean_",1:12,sep="")
74 89
stcot=extract(cot,st2)#;colnames(stcot)=paste("cot_mean_",1:12,sep="")
75
mod06=cbind.data.frame(id=st2$id,lat=st2$lat,lon=st2$lon,stcer,stcot)
90
stcld=extract(cld,st2)#;colnames(stcld)=paste("cld_mean_",1:12,sep="")
91
mod06=cbind.data.frame(id=st2$id,lat=st2$lat,lon=st2$lon,stcer,stcot,stcld)
76 92
mod06l=melt(mod06,id.vars=c("id","lon","lat"))
77 93
mod06l[,c("variable","moment","month")]=do.call(rbind,strsplit(as.character(mod06l$variable),"_"))
78
mod06l=cast(mod06l,id+lon+lat+month~variable+moment,value="value")
94
mod06l=as.data.frame(cast(mod06l,id+lon+lat+month~variable+moment,value="value"))
95

  
96
### Identify stations that have < 10 years of data
97
cnts=cast(d2,id~.,fun=function(x) length(x[!is.na(x)]),value="count");colnames(cnts)[colnames(cnts)=="(all)"]="count"
98
summary(cnts)
99
## drop them
100
d2=d2[d2$id%in%cnts$id[cnts$count>=365*10],]
101

  
79 102

  
80 103
### generate monthly means of station data
81 104
dc=cast(d2,id+lon+lat~month,value="value",fun=function(x) mean(x,na.rm=T)*30)
82 105
dcl=melt(dc,id.vars=c("id","lon","lat"),value="ppt")
83 106

  
107

  
84 108
## merge station data with mod06
85 109
mod06s=merge(dcl,mod06l)
86 110
mod06s$lvalue=log(mod06s$value+1)
111
colnames(mod06s)[colnames(mod06s)=="value"]="ppt"
87 112

  
88 113

  
89 114
###################################################################
90 115
###################################################################
91 116
### draw some plots
117
gq=function(x,n=10,cut=F) {
118
  if(!cut) unique(quantile(x,seq(0,1,len=n+1)))
119
  if(cut)  cut(x,unique(quantile(x,seq(0,1,len=n+1))))
120
}
92 121

  
93 122
bgyr=colorRampPalette(c("blue","green","yellow","red"))
94 123

  
95 124
pdf("output/MOD06_summary.pdf",width=11,height=8.5)
96 125

  
126
# % cloudy maps
127
title="Cloudiness (% cloudy days) "
128
at=unique(quantile(as.matrix(cld),seq(0,1,len=100),na.rm=T))
129
p=levelplot(cld,xlab.top=title,at=at,col.regions=bgyr(length(at)))+layer(sp.lines(roil, lwd=1.2, col='black'))
130
print(p)
131
bwplot(cer,main=title,ylab="Cloud Effective Radius (microns)")
132

  
97 133
# CER maps
98 134
title="Cloud Effective Radius (microns)"
99 135
at=quantile(as.matrix(cer),seq(0,1,len=100),na.rm=T)
......
118 154
### monthly comparisons of variables
119 155
mod06sl=melt(mod06s,measure.vars=c("value","COT_mean","CER_mean"))
120 156
bwplot(value~month|variable,data=mod06sl,cex=.5,pch=16,col="black",scales=list(y=list(relation="free")),layout=c(1,3))
121
splom(mod06s[grep("CER|COT",colnames(mod06s))],cex=.2,pch=16)
157
splom(mod06s[grep("CER|COT|CLD",colnames(mod06s))],cex=.2,pch=16)
158

  
159
### run some regressions
160
summary(lm(log(ppt)~CER_mean*month,data=mod06s))
161

  
162
xyplot(ppt~CLD_mean,data=mod06s,cex=.5,pch=16,col="black",scales=list(y=list(log=F)),main="Comparison of monthly mean CLD and precipitation",ylab="Precipitation (log axis)",xlab="% Days Cloudy")
163
xyplot(ppt~CER_mean,data=mod06s,cex=.5,pch=16,col="black",scales=list(y=list(log=T)),main="Comparison of monthly mean CER and precipitation",ylab="Precipitation (log axis)")
164
xyplot(ppt~COT_mean,data=mod06s,cex=.5,pch=16,col="black",scales=list(y=list(log=T)),main="Comparison of monthly mean COT and precipitation",ylab="Precipitation (log axis)")
165

  
166
xyplot(ppt~CER_mean|month,data=mod06s,cex=.5,pch=16,col="black",scales=list(log=T,relation="free"))
167
xyplot(ppt~COT_mean|month,data=mod06s,cex=.5,pch=16,col="black",scales=list(log=T,relation="free"))
168

  
169
 xyplot(ppt~COT_mean|id,data=mod06s,panel=function(x,y,group){
170
  panel.xyplot(x,y,type=c("r"),cex=.5,pch=16,col="red")
171
  panel.xyplot(x,y,type=c("p"),cex=.5,pch=16,col="black")
172
} ,scales=list(y=list(log=T)),strip=F,main="Monthly Mean Precipitation and Cloud Optical Thickness by station",sub="Each panel is a station, each point is a monthly mean",ylab="Precipitation (mm, log axis)",xlab="Mean Monthly Cloud Optical Thickness")
173

  
174
### Calculate the slope of each line and plot it on a map
175
mod06s.sl=dapply(mod06s,list(id=mod06s$id),function(x){
176
  lm1=lm(log(x$ppt)~x$CER_mean,)
177
  data.frame(lat=x$lat[1],lon=x$lon[1],intcpt=coefficients(lm1)[1],cer=coefficients(lm1)[2],r2=summary(lm1)$r.squared)
178
})
179
mod06s.sl$cex=gq(mod06s.sl$r2,n=5,cut=T)
180
mod06s.sl$cer.s=gq(mod06s.sl$cer,n=5,cut=T)
181

  
182
xyplot(lat~lon,group=cer.s,data=mod06s.sl,par.settings = list(superpose.symbol = list(pch =16, col=bgyr(5),cex=1)),auto.key=list(space="right",title="Slope Coefficient"),asp=1,
183
       main="Slopes of linear regressions {log(ppt)~CloudEffectiveRadius}")+
184
  layer(sp.lines(roi_geo, lwd=1.2, col='black'))
185

  
186

  
187
############################################################
188
### simple regression to get spatial residuals
189
m="01"
190
mod06s2=mod06s#[mod06s$month==m,]
191

  
192
lm1=lm(ppt~CER_mean*month,data=mod06s2)
193
summary(lm1)
194
mod06s2$pred=predict(lm1,mod06s2)
195
mod06s2$resid=mod06s2$pred-mod06s2$ppt
122 196

  
123
xyplot(value~CER_mean,data=mod06s,cex=.5,pch=16,col="black",scales=list(y=list(log=T)),main="Comparison of monthly mean CER and precipitation",ylab="Precipitation (log axis)")
124
xyplot(value~COT_mean,data=mod06s,cex=.5,pch=16,col="black",scales=list(y=list(log=T)),main="Comparison of monthly mean COT and precipitation",ylab="Precipitation (log axis)")
197
mod06sr=cast(mod06s2,id+lon+lat~month,value="resid",fun=function(x) mean(x,na.rm=T))
198
mod06sr=melt(mod06sr,id.vars=c("id","lon","lat"),value="resid")
199
mod06sr$residg=cut(mod06sr$value,quantile(mod06sr$value,seq(0,1,len=11),na.rm=T))
125 200

  
126
xyplot(value~CER_mean|month,data=mod06s,cex=.5,pch=16,col="black",scales=list(log=T,relation="free"))
127
xyplot(value~COT_mean|month,data=mod06s,cex=.5,pch=16,col="black",scales=list(log=T,relation="free"))
201
  xyplot(lat~lon|month,group=residg,data=mod06sr,
202
         par.settings = list(superpose.symbol = list(pch =16, col=terrain.colors(10),cex=.5)),
203
           auto.key=T)
204
         
128 205

  
129
#xyplot(value~CER_mean|month+cut(COT_mean,breaks=c(0,10,20)),data=mod06s,cex=.5,pch=16,col="black")#,scales=list(relation="fixed"))
130 206

  
207
plot(pred~value,data=mod06s,log="xy")
131 208

  
132
summary(lm(lvalue~COT_mean*month,data=mod06s))
133 209

  
134 210

  
135 211
dev.off()

Also available in: Unified diff