Revision 2c238837
Added by Adam Wilson about 12 years ago
climate/procedures/MOD06_L2_process.r | ||
---|---|---|
1 |
#!/bin/r |
|
2 |
|
|
1 | 3 |
################################################################################### |
2 | 4 |
### R code to aquire and process MOD06_L2 cloud data from the MODIS platform |
3 | 5 |
|
6 |
## load command line arguments (mname) |
|
7 |
args=(commandArgs(TRUE)) ##args is now a list of character vectors |
|
8 |
## Then cycle through each element of the list and evaluate the expressions. |
|
9 |
eval(parse(text=args)) |
|
10 |
|
|
11 |
system("source ~/moduleload") |
|
12 |
|
|
13 |
print(args) |
|
14 |
|
|
15 |
tile="h11v08" |
|
16 |
outdir="2_daily" #directory for separate daily files |
|
17 |
outdir2="3_summary" #directory for combined daily files and summarized files |
|
18 |
|
|
19 |
print(paste("Processing tile",tile," for date",date)) |
|
4 | 20 |
|
5 | 21 |
## load libraries |
6 |
require(rgdal) |
|
7 | 22 |
require(reshape) |
8 | 23 |
#require(ncdf4) |
9 | 24 |
require(geosphere) |
10 | 25 |
require(raster) |
26 |
#require(rgdal) |
|
11 | 27 |
require(spgrass6) |
12 |
## packages for parallelization |
|
13 |
#library(foreach) |
|
14 |
#library(doMPI) |
|
15 |
library(multicore) |
|
16 | 28 |
|
17 |
## register cluster and number of cores to use |
|
18 |
ncores=as.numeric(system("echo $NCORES",intern=T)) |
|
19 |
#cl=startMPIcluster(20,verbose=F) |
|
20 |
#registerDoMPI(cl) |
|
21 | 29 |
|
22 | 30 |
## specify some working directories |
23 | 31 |
setwd("/nobackupp1/awilso10/mod06") |
24 | 32 |
|
25 |
outdir="2_daily" #directory for separate daily files |
|
26 |
outdir2="3_summary" #directory for combined daily files and summarized files |
|
27 |
|
|
28 | 33 |
print(paste("tempdir()=",tempdir())) |
29 | 34 |
print(paste("TMPDIR=",Sys.getenv("TMPDIR"))) |
30 | 35 |
|
31 |
### get list of files to process |
|
32 |
datadir="/nobackupp4/datapool/modis/MOD06_L2.005/" |
|
33 |
|
|
34 |
fs=data.frame(path=list.files(datadir,full=T,recursive=T,pattern="hdf"),stringsAsFactors=F) |
|
35 |
fs$file=basename(fs$path) |
|
36 |
fs$date=as.Date(substr(fs$file,11,17),"%Y%j") |
|
37 |
fs$month=format(fs$date,"%m") |
|
38 |
fs$year=format(fs$date,"%Y") |
|
39 |
fs$time=substr(fs$file,19,22) |
|
40 |
fs$datetime=as.POSIXct(strptime(paste(substr(fs$file,11,17),substr(fs$file,19,22)), '%Y%j %H%M')) |
|
41 |
fs$dateid=format(fs$date,"%Y%m%d") |
|
42 |
fs$path=as.character(fs$path) |
|
43 |
fs$file=as.character(fs$file) |
|
44 |
|
|
45 |
## get all unique dates |
|
46 |
alldates=unique(fs$dateid) |
|
36 |
## load ancillary data |
|
37 |
load(file="allfiles.Rdata") |
|
47 | 38 |
|
48 | 39 |
## load tile information |
49 | 40 |
load(file="modlandTiles.Rdata") |
... | ... | |
51 | 42 |
#modt=readOGR("modgrid","modis_sinusoidal_grid_world",) |
52 | 43 |
#modt@data[,colnames(tb)[3:6]]=tb[match(paste(modt$h,modt$v),paste(tb$ih,tb$iv)),3:6] |
53 | 44 |
#write.csv(modt@data,file="modistile.csv") |
54 |
tile="h11v08" #can move this to submit script if needed |
|
55 |
|
|
56 |
|
|
57 |
## identify which have been completed |
|
58 |
done=alldates%in%substr(list.files(outdir),7,14) |
|
59 |
table(done) |
|
60 |
notdone=alldates[!done] #these are the dates that still need to be processed |
|
61 | 45 |
|
62 | 46 |
## vars to process |
63 | 47 |
vars=as.data.frame(matrix(c( |
... | ... | |
120 | 104 |
print(log) |
121 | 105 |
## confirm file is present |
122 | 106 |
print(paste("Confirming output file (",outfile,") is present and readable by GDAL")) |
123 |
gi=GDALinfo(outfile); print(gi)
|
|
107 |
system(paste("gdalinfo ",outfile))
|
|
124 | 108 |
print(paste("Finished ", file)) |
125 | 109 |
} |
126 | 110 |
|
... | ... | |
144 | 128 |
tf=paste(tempdir(),"/grass", Sys.getpid(),"/", sep="") |
145 | 129 |
print(paste("Set up temporary grass session in",tf)) |
146 | 130 |
|
147 |
## load a MOD11A1 file to define grid |
|
148 |
gridfile=list.files("/nobackupp4/datapool/modis/MOD11A1.005/2006.01.27/",pattern=paste(tile,".*hdf$",sep=""),full=T)[1] |
|
149 |
td=readGDAL(paste("HDF4_EOS:EOS_GRID:\"",gridfile,"\":MODIS_Grid_Daily_1km_LST:Night_view_angl",sep="")) |
|
150 |
projection(td)="+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs +datum=WGS84 +ellps=WGS84 " |
|
151 |
|
|
152 | 131 |
## set up temporary grass instance for this PID |
153 | 132 |
initGRASS(gisBase="/nobackupp1/awilso10/software/grass-6.4.3svn",gisDbase=tf,SG=td,override=T,location="mod06",mapset="PERMANENT",home=tf,pid=Sys.getpid()) |
154 | 133 |
system(paste("g.proj -c proj4=\"",projection(td),"\"",sep="")) |
... | ... | |
161 | 140 |
|
162 | 141 |
## Identify which files to process |
163 | 142 |
tfs=fs$file[fs$dateid==date] |
143 |
tfs=tfs[tfs%in%list.files(tempdir())] |
|
164 | 144 |
nfs=length(tfs) |
165 | 145 |
|
166 | 146 |
## loop through scenes and process QA flags |
... | ... | |
170 | 150 |
execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",file,"\":mod06:Cloud_Mask_1km_0",sep=""), |
171 | 151 |
output=paste("CM1_",i,sep=""),flags=c("overwrite","o")) ; print("") |
172 | 152 |
## extract cloudy and 'confidently clear' pixels |
153 |
|
|
173 | 154 |
system(paste("r.mapcalc <<EOF |
174 | 155 |
CM_cloud_",i," = ((CM1_",i," / 2^0) % 2) == 1 && ((CM1_",i," / 2^1) % 2^2) == 0 |
175 | 156 |
CM_clear_",i," = ((CM1_",i," / 2^0) % 2) == 1 && ((CM1_",i," / 2^1) % 2^2) == 3 |
... | ... | |
289 | 270 |
|
290 | 271 |
## test it |
291 | 272 |
##date=notdone[1] |
292 |
##mod06(date,tile)
|
|
273 |
mod06(date,tile) |
|
293 | 274 |
|
294 | 275 |
## run it for all dates |
295 |
mclapply(notdone,mod06,tile,mc.cores=ncores) # use ncores/2 because system() commands can add second process for each spawned R |
|
296 |
|
|
276 |
#mclapply(notdone,mod06,tile,mc.cores=ncores) # use ncores/2 because system() commands can add second process for each spawned R |
|
297 | 277 |
#foreach(i=notdone[1:3],.packages=(.packages())) %dopar% mod06(i,tile) |
298 |
|
|
299 | 278 |
#foreach(i=1:20) %dopar% print(i) |
300 | 279 |
|
301 | 280 |
|
302 |
################################################################################ |
|
303 |
## now generate the climatologies |
|
304 |
fdly=data.frame( |
|
305 |
path=list.files(outdir,pattern="nc$",full=T), |
|
306 |
file=list.files(outdir,pattern="nc$")) |
|
307 |
fdly$date=as.Date(substr(fdly$file,7,14),"%Y%m%d") |
|
308 |
fdly$month=format(fdly$date,"%m") |
|
309 |
fdly$year=format(fdly$date,"%Y") |
|
310 |
|
|
311 |
## check validity (via npar and ntime) of nc files |
|
312 |
for(i in 1:nrow(fdly)){ |
|
313 |
fdly$ntime[i]=as.numeric(system(paste("cdo sinfo ",fdly$path[i]),intern=T)) |
|
314 |
fdly$npar[i]=as.numeric(system(paste("cdo -s npar ",fdly$path[i]),intern=T)) |
|
315 |
print(i) |
|
316 |
} |
|
317 |
|
|
318 |
## Combine all days within years into a single file (can't mergetime all days at once because this opens too many files) |
|
319 |
tsdir=paste(tempdir(),"/summary",sep="") |
|
320 |
dir.create(tsdir) |
|
321 |
lapply(unique(fdly$year),function(y){ |
|
322 |
system(paste("cdo -O mergetime ",paste(fdly$path[fdly$year==y],collapse=" ")," ",tsdir,"/MOD09_",tile,"_",y,"_daily.nc",sep="")) |
|
323 |
print(paste("Finished merging daily files for year",y)) |
|
324 |
}) |
|
325 |
## Combine the year-by-year files into a single daily file |
|
326 |
system(paste("cdo -O mergetime ",paste(list.files(tsdir,full=T,pattern="daily[.]nc$"),collapse=" ")," ",outdir2,"/MOD09_",tile,"_daily.nc",sep="")) |
|
327 |
|
|
328 |
system(paste("cdo -O monmean ",outdir2,"/MOD09_",tile,"_daily.nc ",outdir2,"/",tile,"_monmean.nc",sep="")) |
|
329 |
system(paste("cdo -O ymonmean ",outdir2,"/MOD09_",tile,"_daily.nc ",outdir2,"/",tile,"_ymonmean.nc",sep="")) |
|
330 |
system(paste("cdo -O ymonstd ",outdir2,"/MOD09_",tile,"_daily.nc ",outdir2,"/",tile,"_ymonstd.nc",sep="")) |
|
331 |
|
|
332 |
print("Finished! %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") |
|
333 |
## quit R |
|
334 | 281 |
q("no") |
335 |
|
|
336 |
|
climate/procedures/Pleiades.R | ||
---|---|---|
7 | 7 |
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="") |
8 | 8 |
save(tb,file="modlandTiles.Rdata") |
9 | 9 |
|
10 |
### Submission script |
|
10 |
outdir="2_daily" #directory for separate daily files |
|
11 |
outdir2="3_summary" #directory for combined daily files and summarized files |
|
12 |
|
|
13 |
## load a MOD11A1 file to define grid |
|
14 |
gridfile=list.files("/nobackupp4/datapool/modis/MOD11A1.005/2006.01.27/",pattern=paste(tile,".*hdf$",sep=""),full=T)[1] |
|
15 |
td=readGDAL(paste("HDF4_EOS:EOS_GRID:\"",gridfile,"\":MODIS_Grid_Daily_1km_LST:Night_view_angl",sep="")) |
|
16 |
projection(td)="+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs +datum=WGS84 +ellps=WGS84 " |
|
17 |
|
|
18 |
|
|
19 |
### get list of files to process |
|
20 |
datadir="/nobackupp4/datapool/modis/MOD06_L2.005/" |
|
21 |
|
|
22 |
fs=data.frame(path=list.files(datadir,full=T,recursive=T,pattern="hdf"),stringsAsFactors=F) |
|
23 |
fs$file=basename(fs$path) |
|
24 |
fs$date=as.Date(substr(fs$file,11,17),"%Y%j") |
|
25 |
fs$month=format(fs$date,"%m") |
|
26 |
fs$year=format(fs$date,"%Y") |
|
27 |
fs$time=substr(fs$file,19,22) |
|
28 |
fs$datetime=as.POSIXct(strptime(paste(substr(fs$file,11,17),substr(fs$file,19,22)), '%Y%j %H%M')) |
|
29 |
fs$dateid=format(fs$date,"%Y%m%d") |
|
30 |
fs$path=as.character(fs$path) |
|
31 |
fs$file=as.character(fs$file) |
|
32 |
|
|
33 |
## get all unique dates |
|
34 |
alldates=unique(fs$dateid) |
|
35 |
|
|
36 |
|
|
37 |
#### Generate submission file |
|
38 |
## identify which have been completed |
|
39 |
done=alldates%in%substr(list.files(outdir),7,14) |
|
40 |
table(done) |
|
41 |
notdone=alldates[!done] #these are the dates that still need to be processed |
|
42 |
|
|
43 |
tile="h11v08" #can move this to submit script if needed |
|
44 |
script="/u/awilso10/environmental-layers/climate/procedures/MOD06_L2_process.r" |
|
45 |
#write.table(paste("--verbose ",script," date=",notdone," tile=\"",tile,"\"",sep=""),file="notdone.txt",row.names=F,col.names=F,quote=F) |
|
46 |
write.table(paste("--verbose ",script," date=",notdone[1:30],sep=""),file="notdone.txt",row.names=F,col.names=F,quote=F) |
|
47 |
|
|
48 |
save(fs,alldates,gridfile,td,file="allfiles.Rdata") |
|
49 |
|
|
50 |
## Submission script |
|
11 | 51 |
|
12 | 52 |
cat(paste(" |
13 | 53 |
#PBS -S /bin/bash |
14 |
#PBS -l select=1:ncpus=16:model=san
|
|
54 |
#PBS -l select=2:ncpus=16:model=san
|
|
15 | 55 |
###PBS -l select=4:ncpus=8:model=neh |
16 | 56 |
##PBS -l select=1:ncpus=12:model=wes |
17 | 57 |
####### old: select=48:ncpus=8:mpiprocs=8:model=neh |
... | ... | |
19 | 59 |
#PBS -j oe |
20 | 60 |
#PBS -m e |
21 | 61 |
#PBS -V |
22 |
####PBS -W group_list=s1007 |
|
23 | 62 |
#PBS -q devel |
24 | 63 |
#PBS -o log/log_^array_index^ |
25 |
#PBS -o log/log_DataCompile |
|
64 |
#PBS -o log/log_DataCompile.log
|
|
26 | 65 |
#PBS -M adam.wilson@yale.edu |
27 | 66 |
#PBS -N MOD06 |
28 | 67 |
|
29 |
#source /usr/share/modules/init/bash |
|
30 |
|
|
31 | 68 |
## cd to working directory |
32 | 69 |
cd /nobackupp1/awilso10/mod06 |
33 | 70 |
|
34 | 71 |
## set some memory limits |
35 | 72 |
# ulimit -d 1500000 -m 1500000 -v 1500000 #limit memory usage |
73 |
source /usr/local/lib/global.profile |
|
36 | 74 |
source /u/awilso10/.bashrc |
37 | 75 |
source /u/awilso10/moduleload |
38 |
source /usr/local/lib/global.profile |
|
39 | 76 |
## export a few important variables |
40 |
export NCORES=16 # use to limit mclapply() to set nubmer of cores, should be select*ncpus above
|
|
77 |
export NNODES=32
|
|
41 | 78 |
export R_LIBS=\"/u/awilso10/R/x86_64-unknown-linux-gnu-library/2.15/\" |
42 | 79 |
## load modules |
43 |
module load gcc hdf4 udunits R nco mpi-intel #mpi-sgi/mpt.2.06r6
|
|
80 |
# module load gcc comp-intel/2012.0.032 netcdf mpi-sgi/mpt.2.06r6 hdf4 udunits R nco
|
|
44 | 81 |
## Run the script! |
45 | 82 |
## current version not parallelizing across nodes! |
46 |
TMPDIR=$TMPDIR Rscript --verbose --vanilla /u/awilso10/environmental-layers/climate/procedures/MOD06_L2_process.r |
|
47 |
exit 0 |
|
48 |
exit 0 |
|
83 |
TMPDIR=$TMPDIR Rscript --verbose --vanilla /u/awilso10/environmental-layers/climate/procedures/MOD06_L2_process.r date=20000403 |
|
84 |
|
|
85 |
WORKLIST=notdone.txt |
|
86 |
EXE="Rscript" |
|
87 |
LOG=log/log_DataCompile.log |
|
49 | 88 |
|
89 |
TMPDIR=$TMPDIR mpiexec -np $NNODES /nobackupp4/pvotava/software/share/mqueue-eg/mqueue/mqueue -l $WORKLIST -p $EXE -v -v -v --random-starts 2-4 --work-analyze #> $LOG |
|
90 |
#mpiexec -np 2 /nobackupp4/pvotava/software/share/mqueue-eg/mqueue/mqueue -l testrun.txt -p $EXE -v -v -v #> $LOG |
|
91 |
#TMPDIR=$TMPDIR mpiexec -np $NNODES /nobackupp4/pvotava/software/share/mqueue-eg/mqueue/mqueue -l $WORKLIST -p $EXE -v -v -v #> $LOG |
|
92 |
exit 0 |
|
50 | 93 |
",sep=""),file="MOD06_process") |
51 | 94 |
|
52 | 95 |
### Check the file |
... | ... | |
55 | 98 |
|
56 | 99 |
## check queue status |
57 | 100 |
system("/u/scicon/tools/bin/node_stats.sh") |
101 |
system("/u/scicon/tools/bin/qtop.pl 479343") |
|
58 | 102 |
|
59 | 103 |
## Submit it (and keep the pid)! |
60 | 104 |
system("qsub MOD06_process") |
... | ... | |
71 | 115 |
system("qstat devel ") |
72 | 116 |
#system("qstat | grep awilso10") |
73 | 117 |
|
118 |
#################################### |
|
119 |
|
|
120 |
|
|
121 |
################################################################################ |
|
122 |
## now generate the climatologies |
|
123 |
fdly=data.frame( |
|
124 |
path=list.files(outdir,pattern="nc$",full=T), |
|
125 |
file=list.files(outdir,pattern="nc$")) |
|
126 |
fdly$date=as.Date(substr(fdly$file,7,14),"%Y%m%d") |
|
127 |
fdly$month=format(fdly$date,"%m") |
|
128 |
fdly$year=format(fdly$date,"%Y") |
|
129 |
|
|
130 |
## check validity (via npar and ntime) of nc files |
|
131 |
for(i in 1:nrow(fdly)){ |
|
132 |
fdly$ntime[i]=as.numeric(system(paste("cdo sinfo ",fdly$path[i]),intern=T)) |
|
133 |
fdly$npar[i]=as.numeric(system(paste("cdo -s npar ",fdly$path[i]),intern=T)) |
|
134 |
print(i) |
|
135 |
} |
|
136 |
|
|
137 |
## Combine all days within years into a single file (can't mergetime all days at once because this opens too many files) |
|
138 |
tsdir=paste(tempdir(),"/summary",sep="") |
|
139 |
dir.create(tsdir) |
|
140 |
lapply(unique(fdly$year),function(y){ |
|
141 |
system(paste("cdo -O mergetime ",paste(fdly$path[fdly$year==y],collapse=" ")," ",tsdir,"/MOD09_",tile,"_",y,"_daily.nc",sep="")) |
|
142 |
print(paste("Finished merging daily files for year",y)) |
|
143 |
}) |
|
144 |
## Combine the year-by-year files into a single daily file |
|
145 |
system(paste("cdo -O mergetime ",paste(list.files(tsdir,full=T,pattern="daily[.]nc$"),collapse=" ")," ",outdir2,"/MOD09_",tile,"_daily.nc",sep="")) |
|
146 |
|
|
147 |
system(paste("cdo -O monmean ",outdir2,"/MOD09_",tile,"_daily.nc ",outdir2,"/",tile,"_monmean.nc",sep="")) |
|
148 |
system(paste("cdo -O ymonmean ",outdir2,"/MOD09_",tile,"_daily.nc ",outdir2,"/",tile,"_ymonmean.nc",sep="")) |
|
149 |
system(paste("cdo -O ymonstd ",outdir2,"/MOD09_",tile,"_daily.nc ",outdir2,"/",tile,"_ymonstd.nc",sep="")) |
|
150 |
|
|
151 |
print("Finished! %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") |
|
152 |
## quit R |
|
153 |
q("no") |
|
154 |
|
|
155 |
|
|
156 |
################################################################# |
|
74 | 157 |
|
75 | 158 |
### copy the files back to Yale |
76 | 159 |
list.files("2_daily") |
... | ... | |
81 | 164 |
|
82 | 165 |
|
83 | 166 |
list.files(" /tmp/Rtmp6I6tFn") |
167 |
|
|
168 |
|
|
169 |
|
|
170 |
|
Also available in: Unified diff
Currently submission hangs using mqueue when loading rgdal library. Will attempt to add a separate submission script that first loads modules before running Rscript on the slaves