1 |
35d59dc1
|
Adam M. Wilson @ pfe
|
#### Script to facilitate processing of MOD06 data
|
2 |
95354b03
|
Adam M. Wilson @ pfe
|
### This script is meant to be run iteratively, rather than unsupervised. There are several steps that require manual checking (such as choosing the number of cores, etc.)
|
3 |
|
|
|
4 |
|
|
## working directory
|
5 |
c1352601
|
Adam M. Wilson @ pfe
|
setwd("/nobackupp1/awilso10/mod35")
|
6 |
35d59dc1
|
Adam M. Wilson @ pfe
|
|
7 |
95354b03
|
Adam M. Wilson @ pfe
|
## load libraries
|
8 |
35d59dc1
|
Adam M. Wilson @ pfe
|
library(rgdal)
|
9 |
|
|
library(raster)
|
10 |
|
|
library(RSQLite)
|
11 |
|
|
|
12 |
95354b03
|
Adam M. Wilson @ pfe
|
## flag to increase verbosity of output
|
13 |
35d59dc1
|
Adam M. Wilson @ pfe
|
verbose=T
|
14 |
|
|
|
15 |
|
|
## get MODLAND tile information
|
16 |
|
|
tb=read.table("http://landweb.nascom.nasa.gov/developers/sn_tiles/sn_bound_10deg.txt",skip=6,nrows=648,header=T)
|
17 |
|
|
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
|
18 |
aba23d60
|
Adam M. Wilson @ pfe
|
tb=tb[tb$lon_min!=-999,]
|
19 |
35d59dc1
|
Adam M. Wilson @ pfe
|
save(tb,file="modlandTiles.Rdata")
|
20 |
|
|
load("modlandTiles.Rdata")
|
21 |
|
|
|
22 |
e4e30b86
|
Adam M. Wilson @ pfe
|
## Choose some tiles to process
|
23 |
35d59dc1
|
Adam M. Wilson @ pfe
|
### list of tiles to process
|
24 |
aba23d60
|
Adam M. Wilson @ pfe
|
tiles=c("h10v08","h11v08","h12v08","h10v07","h11v07","h12v07") # South America
|
25 |
95354b03
|
Adam M. Wilson @ pfe
|
## or a northern block of tiles
|
26 |
e4e30b86
|
Adam M. Wilson @ pfe
|
tiles=apply(expand.grid(paste("h",11:17,sep=""),v=c("v00","v01","v02","v03","v04")),1,function(x) paste(x,collapse="",sep=""))
|
27 |
|
|
## subset to MODLAND tiles
|
28 |
|
|
alltiles=system("ls -r MODTILES/ | grep tif$ | cut -c1-6 | sort | uniq - ",intern=T)
|
29 |
aba23d60
|
Adam M. Wilson @ pfe
|
|
30 |
e4e30b86
|
Adam M. Wilson @ pfe
|
## subset to tiles in global region (not outside global boundary in sinusoidal projection)
|
31 |
|
|
tiles=tiles[tiles%in%alltiles]
|
32 |
aba23d60
|
Adam M. Wilson @ pfe
|
|
33 |
|
|
## subset tile corner matrix to tiles selected above
|
34 |
35d59dc1
|
Adam M. Wilson @ pfe
|
tile_bb=tb[tb$tile%in%tiles,]
|
35 |
|
|
|
36 |
|
|
### get list of files to process
|
37 |
|
|
datadir="/nobackupp4/datapool/modis/MOD35_L2.006/"
|
38 |
|
|
|
39 |
|
|
outdir="daily/" #paste("daily/",tile,sep="")
|
40 |
|
|
|
41 |
|
|
##find swaths in region from sqlite database for the specified date/tile
|
42 |
b3344197
|
Adam M. Wilson @ pfe
|
## this takes a while, about 30 minutes, so only rebuild if you need to update what's available...
|
43 |
|
|
rebuildswathtable=F
|
44 |
|
|
if(rebuildswathtable){
|
45 |
|
|
## path to swath database
|
46 |
|
|
db="/nobackupp4/pvotava/DB/export/swath_geo.sql.sqlite3.db"
|
47 |
|
|
con=dbConnect("SQLite", dbname = db)
|
48 |
|
|
fs=do.call(rbind.data.frame,lapply(1:nrow(tile_bb),function(i){
|
49 |
|
|
d=dbGetQuery(con,paste("SELECT * from swath_geo6
|
50 |
35d59dc1
|
Adam M. Wilson @ pfe
|
WHERE east>=",tile_bb$lon_min[i]," AND
|
51 |
|
|
west<=",tile_bb$lon_max[i]," AND
|
52 |
|
|
north>=",tile_bb$lat_min[i]," AND
|
53 |
|
|
south<=",tile_bb$lat_max[i])
|
54 |
b3344197
|
Adam M. Wilson @ pfe
|
)
|
55 |
|
|
d$tile=tile_bb$tile[i]
|
56 |
|
|
print(paste("Finished tile",tile_bb$tile[i]))
|
57 |
|
|
return(d)
|
58 |
|
|
}))
|
59 |
|
|
con=dbDisconnect(con)
|
60 |
|
|
fs$id=substr(fs$id,7,19)
|
61 |
|
|
|
62 |
|
|
## Identify which swaths are available in the datapool
|
63 |
|
|
swaths=data.frame(path=list.files(datadir,pattern=paste("hdf$"),recursive=T,full=T),stringsAsFactors=F) #all swaths in data pool
|
64 |
|
|
swaths$id=substr(basename(swaths$path),10,22)
|
65 |
|
|
fs$exists=fs$id%in%swaths$id
|
66 |
|
|
fs$path=swaths$path[match(fs$id,swaths$id)]
|
67 |
|
|
|
68 |
|
|
## write tile-swath list to disk
|
69 |
|
|
save(fs,swaths,file="swathtile.Rdata")
|
70 |
|
|
}
|
71 |
|
|
|
72 |
|
|
load("swathtile.Rdata")
|
73 |
|
|
|
74 |
35d59dc1
|
Adam M. Wilson @ pfe
|
if(verbose) print(paste("###############",nrow(fs)," swath IDs recieved from database"))
|
75 |
|
|
|
76 |
|
|
## get all unique dates
|
77 |
|
|
fs$dateid=format(as.Date(paste(fs$year,fs$day,sep=""),"%Y%j"),"%Y%m%d")
|
78 |
b3344197
|
Adam M. Wilson @ pfe
|
#alldates=unique(fs$dateid[fs$exists])
|
79 |
35d59dc1
|
Adam M. Wilson @ pfe
|
|
80 |
|
|
#### Generate submission file
|
81 |
aba23d60
|
Adam M. Wilson @ pfe
|
startdate="2000-03-01"
|
82 |
|
|
stopdate="2011-12-31"
|
83 |
b3344197
|
Adam M. Wilson @ pfe
|
## just 2005-2010
|
84 |
|
|
startdate="2009-01-01"
|
85 |
|
|
stopdate="2009-12-31"
|
86 |
aba23d60
|
Adam M. Wilson @ pfe
|
|
87 |
|
|
alldates=format(seq(as.Date(startdate),as.Date(stopdate),1),"%Y%m%d")
|
88 |
|
|
|
89 |
35d59dc1
|
Adam M. Wilson @ pfe
|
proclist=expand.grid(date=alldates,tile=tiles)
|
90 |
|
|
proclist$year=substr(proclist$date,1,4)
|
91 |
aba23d60
|
Adam M. Wilson @ pfe
|
|
92 |
52ae84b1
|
Adam M. Wilson @ pfe
|
## identify tile-dates with no available swaths
|
93 |
aba23d60
|
Adam M. Wilson @ pfe
|
avail=unique(cbind.data.frame(tile=fs$tile,date=fs$dateid)[fs$exists, ])
|
94 |
|
|
proclist$avail=paste(proclist$tile,proclist$date,sep="_")%in%paste(avail$tile,avail$date,sep="_")
|
95 |
|
|
|
96 |
35d59dc1
|
Adam M. Wilson @ pfe
|
## identify which have been completed
|
97 |
b3344197
|
Adam M. Wilson @ pfe
|
#fdone=data.frame(path=system("ssh lou 'find MOD35/daily -name \"*.nc\"' ",intern=T))
|
98 |
|
|
fdone=data.frame(path=list.files(outdir,pattern="nc$",recursive=T))
|
99 |
35d59dc1
|
Adam M. Wilson @ pfe
|
fdone$date=substr(basename(as.character(fdone$path)),14,21)
|
100 |
|
|
fdone$tile=substr(basename(as.character(fdone$path)),7,12)
|
101 |
|
|
proclist$done=paste(proclist$tile,proclist$date,sep="_")%in%substr(basename(as.character(fdone$path)),7,21)
|
102 |
|
|
|
103 |
|
|
### report on what has already been processed
|
104 |
|
|
print(paste(sum(!proclist$done)," out of ",nrow(proclist)," (",round(100*sum(!proclist$done)/nrow(proclist),2),"%) remain"))
|
105 |
95354b03
|
Adam M. Wilson @ pfe
|
stem(table(tile=proclist$tile[proclist$done],year=proclist$year[proclist$done]))
|
106 |
35d59dc1
|
Adam M. Wilson @ pfe
|
table(tile=proclist$tile[proclist$done],year=proclist$year[proclist$done])
|
107 |
b3344197
|
Adam M. Wilson @ pfe
|
table(table(tile=proclist$tile[!proclist$done],year=proclist$year[!proclist$done]))
|
108 |
|
|
|
109 |
|
|
### explore tile counts
|
110 |
|
|
#x=table(tile=proclist$tile[proclist$done],year=proclist$year[proclist$done])
|
111 |
|
|
#x=x[order(rownames(x)),]
|
112 |
35d59dc1
|
Adam M. Wilson @ pfe
|
|
113 |
|
|
script="/u/awilso10/environmental-layers/climate/procedures/MOD35_L2_process.r"
|
114 |
e4e30b86
|
Adam M. Wilson @ pfe
|
|
115 |
35d59dc1
|
Adam M. Wilson @ pfe
|
## write the table processed by mpiexec
|
116 |
e4e30b86
|
Adam M. Wilson @ pfe
|
tp=T # rerun everything
|
117 |
be64daa8
|
Adam M. Wilson @ pfe
|
tp=((!proclist$done)&proclist$avail) #date-tiles to process
|
118 |
aba23d60
|
Adam M. Wilson @ pfe
|
table(Available=proclist$avail,Completed=proclist$done)
|
119 |
95354b03
|
Adam M. Wilson @ pfe
|
table(tp)
|
120 |
aba23d60
|
Adam M. Wilson @ pfe
|
|
121 |
|
|
write.table(paste("--verbose ",script," --date ",proclist$date[tp]," --verbose T --tile ",proclist$tile[tp],sep=""),
|
122 |
35d59dc1
|
Adam M. Wilson @ pfe
|
file=paste("notdone.txt",sep=""),row.names=F,col.names=F,quote=F)
|
123 |
|
|
|
124 |
95354b03
|
Adam M. Wilson @ pfe
|
## try running it once for a single tile-date to get estimate of time/tile-day
|
125 |
|
|
test=F
|
126 |
|
|
if(test){
|
127 |
|
|
i=2
|
128 |
|
|
time1=system.time(system(paste("Rscript --verbose ",script," --date ",proclist$date[i]," --verbose T --tile ",proclist$tile[i],sep="")))
|
129 |
|
|
hours=round(length(proclist$date[tp])*142/60/60)
|
130 |
|
|
hours=round(length(proclist$date[tp])*time1[3]/60/60,1)
|
131 |
|
|
hours/240
|
132 |
|
|
print(paste("Based on runtime of previous command, it will take",hours," hours to process the full set"))
|
133 |
|
|
}
|
134 |
|
|
|
135 |
|
|
|
136 |
35d59dc1
|
Adam M. Wilson @ pfe
|
### qsub script
|
137 |
|
|
cat(paste("
|
138 |
|
|
#PBS -S /bin/bash
|
139 |
95354b03
|
Adam M. Wilson @ pfe
|
#PBS -l select=28:ncpus=8:mpiprocs=8
|
140 |
52ae84b1
|
Adam M. Wilson @ pfe
|
##PBS -l select=100:ncpus=8:mpiprocs=8
|
141 |
|
|
##PBS -l walltime=8:00:00
|
142 |
95354b03
|
Adam M. Wilson @ pfe
|
#PBS -l walltime=2:00:00
|
143 |
35d59dc1
|
Adam M. Wilson @ pfe
|
#PBS -j n
|
144 |
|
|
#PBS -m be
|
145 |
|
|
#PBS -N mod35
|
146 |
95354b03
|
Adam M. Wilson @ pfe
|
##PBS -q normal
|
147 |
|
|
#PBS -q devel
|
148 |
35d59dc1
|
Adam M. Wilson @ pfe
|
#PBS -V
|
149 |
|
|
|
150 |
52ae84b1
|
Adam M. Wilson @ pfe
|
#CORES=800
|
151 |
95354b03
|
Adam M. Wilson @ pfe
|
CORES=224
|
152 |
be64daa8
|
Adam M. Wilson @ pfe
|
|
153 |
35d59dc1
|
Adam M. Wilson @ pfe
|
HDIR=/u/armichae/pr/
|
154 |
5af36cdd
|
Adam M. Wilson @ pfe
|
source $HDIR/etc/environ.sh
|
155 |
35d59dc1
|
Adam M. Wilson @ pfe
|
source /u/awilso10/environ.sh
|
156 |
|
|
source /u/awilso10/.bashrc
|
157 |
|
|
IDIR=/nobackupp1/awilso10/mod35/
|
158 |
|
|
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
|
159 |
|
|
WORKLIST=$IDIR/notdone.txt
|
160 |
|
|
EXE=Rscript
|
161 |
|
|
LOGSTDOUT=$IDIR/log/mod35_stdout
|
162 |
|
|
LOGSTDERR=$IDIR/log/mod35_stderr
|
163 |
|
|
### use mpiexec to parallelize across days
|
164 |
|
|
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
|
165 |
|
|
",sep=""),file=paste("mod35_qsub",sep=""))
|
166 |
|
|
|
167 |
|
|
### Check the files
|
168 |
|
|
system(paste("cat mod35_qsub",sep=""))
|
169 |
|
|
system(paste("cat notdone.txt | head",sep=""))
|
170 |
|
|
system(paste("cat notdone.txt | wc -l ",sep=""))
|
171 |
|
|
|
172 |
95354b03
|
Adam M. Wilson @ pfe
|
|
173 |
35d59dc1
|
Adam M. Wilson @ pfe
|
## Submit it
|
174 |
|
|
system(paste("qsub mod35_qsub",sep=""))
|
175 |
b3344197
|
Adam M. Wilson @ pfe
|
|
176 |
c24e32a8
|
Adam M. Wilson @ pfe
|
system("qstat -u awilso10")
|
177 |
35d59dc1
|
Adam M. Wilson @ pfe
|
|
178 |
|
|
#######################################################
|
179 |
|
|
### Now submit the script to generate the climatologies
|
180 |
|
|
|
181 |
e4e30b86
|
Adam M. Wilson @ pfe
|
## report 'mostly' finished tiles
|
182 |
|
|
## this relyies on proclist above so be sure to update above before running
|
183 |
|
|
md=table(tile=proclist$tile[!proclist$done],year=proclist$year[!proclist$done])
|
184 |
|
|
mdt=names(md[md<10,])
|
185 |
|
|
tiles=mdt
|
186 |
5af36cdd
|
Adam M. Wilson @ pfe
|
|
187 |
35d59dc1
|
Adam M. Wilson @ pfe
|
tiles
|
188 |
b3344197
|
Adam M. Wilson @ pfe
|
ctiles=c("h10v08","h11v08","h12v08","h10v07","h11v07","h12v07") # South America
|
189 |
|
|
|
190 |
be64daa8
|
Adam M. Wilson @ pfe
|
ctiles=tiles#[c(1:3)] #subset to only some tiles (for example if some aren't finished yet)?
|
191 |
aba23d60
|
Adam M. Wilson @ pfe
|
climatescript="/pleiades/u/awilso10/environmental-layers/climate/procedures/MOD35_Climatology.r"
|
192 |
35d59dc1
|
Adam M. Wilson @ pfe
|
|
193 |
be64daa8
|
Adam M. Wilson @ pfe
|
## check which tiles have been processed and are on lou with a filename "MOD35_[tile].nc"
|
194 |
b3344197
|
Adam M. Wilson @ pfe
|
cdone=data.frame(path="",tile="") #use this if you want to re-run everything
|
195 |
be64daa8
|
Adam M. Wilson @ pfe
|
cdone=data.frame(path=sapply(strsplit(basename(
|
196 |
|
|
system("ssh lou 'find MOD35/summary -name \"MOD35_h[0-9][0-9]v[0-9][0-9].nc\"' ",intern=T)),split="_"),function(x) x[2]))
|
197 |
5af36cdd
|
Adam M. Wilson @ pfe
|
cdone=data.frame(path=sapply(strsplit(basename(
|
198 |
|
|
system("find summary -name \"MOD35_h[0-9][0-9]v[0-9][0-9].nc\"",intern=T)),split="_"),function(x) x[2]))
|
199 |
be64daa8
|
Adam M. Wilson @ pfe
|
cdone$tile=substr(basename(as.character(cdone$path)),1,6)
|
200 |
b3344197
|
Adam M. Wilson @ pfe
|
print(paste(length(ctiles[!ctiles%in%cdone$tile]),"Tiles still need to be processed"))
|
201 |
be64daa8
|
Adam M. Wilson @ pfe
|
|
202 |
35d59dc1
|
Adam M. Wilson @ pfe
|
## write the table processed by mpiexec
|
203 |
be64daa8
|
Adam M. Wilson @ pfe
|
write.table(paste("--verbose ",climatescript," --verbose T --tile ",ctiles[!ctiles%in%cdone$tile],sep=""),
|
204 |
35d59dc1
|
Adam M. Wilson @ pfe
|
file=paste("notdone_climate.txt",sep=""),row.names=F,col.names=F,quote=F)
|
205 |
|
|
|
206 |
|
|
## delay start until previous jobs have finished?
|
207 |
|
|
delay=F
|
208 |
|
|
## check running jobs to get JobID of job you want to wait for
|
209 |
|
|
system("qstat -u awilso10")
|
210 |
|
|
## enter JobID here:
|
211 |
|
|
job="881394.pbspl1.nas.nasa.gov"
|
212 |
|
|
|
213 |
|
|
### qsub script
|
214 |
|
|
cat(paste("
|
215 |
|
|
#PBS -S /bin/bash
|
216 |
e4e30b86
|
Adam M. Wilson @ pfe
|
#PBS -l select=10:ncpus=8:mem=94
|
217 |
5af36cdd
|
Adam M. Wilson @ pfe
|
#PBS -l walltime=2:00:00
|
218 |
35d59dc1
|
Adam M. Wilson @ pfe
|
#PBS -j n
|
219 |
|
|
#PBS -m be
|
220 |
|
|
#PBS -N mod35_climate
|
221 |
5af36cdd
|
Adam M. Wilson @ pfe
|
#PBS -q devel
|
222 |
|
|
##PBS -q normal
|
223 |
b3344197
|
Adam M. Wilson @ pfe
|
##PBS -q ldan
|
224 |
35d59dc1
|
Adam M. Wilson @ pfe
|
#PBS -V
|
225 |
|
|
",if(delay) paste("#PBS -W depend=afterany:",job,sep=""),"
|
226 |
|
|
|
227 |
e4e30b86
|
Adam M. Wilson @ pfe
|
CORES=80
|
228 |
be64daa8
|
Adam M. Wilson @ pfe
|
HDIR=/u/armichae/pr/
|
229 |
aba23d60
|
Adam M. Wilson @ pfe
|
source $HDIR/etc/environ.sh
|
230 |
|
|
source /pleiades/u/awilso10/environ.sh
|
231 |
|
|
source /pleiades/u/awilso10/.bashrc
|
232 |
35d59dc1
|
Adam M. Wilson @ pfe
|
IDIR=/nobackupp1/awilso10/mod35/
|
233 |
|
|
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
|
234 |
|
|
WORKLIST=$IDIR/notdone_climate.txt
|
235 |
|
|
EXE=Rscript
|
236 |
|
|
LOGSTDOUT=$IDIR/log/climatology_stdout
|
237 |
|
|
LOGSTDERR=$IDIR/log/climatology_stderr
|
238 |
aba23d60
|
Adam M. Wilson @ pfe
|
### use mpiexec to parallelize across tiles
|
239 |
35d59dc1
|
Adam M. Wilson @ pfe
|
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
|
240 |
|
|
",sep=""),file=paste("mod35_climatology_qsub",sep=""))
|
241 |
|
|
|
242 |
|
|
## check files
|
243 |
|
|
system(paste("cat mod35_climatology_qsub",sep="")) #qsub submission script
|
244 |
|
|
system(paste("cat notdone_climate.txt | head",sep="")) #top of job file
|
245 |
|
|
system(paste("cat notdone_climate.txt | wc -l ",sep="")) #number of jobs to be run
|
246 |
|
|
|
247 |
|
|
## Submit it
|
248 |
|
|
system(paste("qsub mod35_climatology_qsub",sep=""))
|
249 |
|
|
|
250 |
|
|
## check progress
|
251 |
|
|
system("qstat -u awilso10")
|
252 |
|
|
|
253 |
|
|
## start interactive job on compute node for debugging
|
254 |
|
|
# system("qsub -I -l walltime=2:00:00 -lselect=2:ncpus=16:model=san -q devel")
|
255 |
|
|
|
256 |
|
|
|
257 |
|
|
#################################################################
|
258 |
|
|
### copy the files back to Yale
|
259 |
|
|
|
260 |
5af36cdd
|
Adam M. Wilson @ pfe
|
|
261 |
be64daa8
|
Adam M. Wilson @ pfe
|
system("ssh lou")
|
262 |
|
|
#scp `find MOD35/summary -name "MOD35_h[0-9][0-9]v[0-9][0-9].nc"` adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod35/summary/
|
263 |
e4e30b86
|
Adam M. Wilson @ pfe
|
system("rsync -cavv `find summary -name \"MOD35_h[0-9][0-9]v[0-9][0-9]_mean.nc\"` adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod35/summary/")
|
264 |
5af36cdd
|
Adam M. Wilson @ pfe
|
system("rsync -cavv `find summary -name \"MOD35_h[0-9][0-9]v[0-9][0-9].nc\"` adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod35/summary/")
|
265 |
|
|
|
266 |
|
|
|
267 |
|
|
system("gdalbuildvrt MOD35C6_2009.vrt summary/*2009mean.nc ")
|
268 |
|
|
system("gdal_translate -stats -co \"COMPRESS=LZW\" -of GTiff MOD35C6_2009.vrt MOD35C6_2009.tif ")
|
269 |
|
|
system("scp MOD35C6_2009.tif adamw@acrobates.eeb.24.177.10.190:/Users/adamw/Downloads/")
|
270 |
be64daa8
|
Adam M. Wilson @ pfe
|
exit
|
271 |
35d59dc1
|
Adam M. Wilson @ pfe
|
|