1
|
#### Script to facilitate processing of MOD06 data
|
2
|
|
3
|
setwd("/nobackupp1/awilso10/mod35")
|
4
|
|
5
|
library(rgdal)
|
6
|
library(raster)
|
7
|
library(RSQLite)
|
8
|
|
9
|
|
10
|
verbose=T
|
11
|
|
12
|
## get MODLAND tile information
|
13
|
tb=read.table("http://landweb.nascom.nasa.gov/developers/sn_tiles/sn_bound_10deg.txt",skip=6,nrows=648,header=T)
|
14
|
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
|
15
|
tb=tb[tb$lon_min!=-999,]
|
16
|
save(tb,file="modlandTiles.Rdata")
|
17
|
load("modlandTiles.Rdata")
|
18
|
|
19
|
## delete temporary log file that can grow to GB
|
20
|
system("rm /nobackupp1/awilso10/software/heg/TOOLKIT_MTD/runtime/LogStatus")
|
21
|
|
22
|
|
23
|
tile="h11v08" # Venezuela
|
24
|
#tile="h11v07" # Venezuela coast
|
25
|
#tile="h09v04" # Oregon
|
26
|
tile="h21v09" #Kenya
|
27
|
|
28
|
### list of tiles to process
|
29
|
tiles=c("h11v08","h21v09","h08v04","h09v04","h08v05","h09v05","h20v11","h31v11")
|
30
|
tiles=c("h10v08","h11v08","h12v08","h10v07","h11v07","h12v07") # South America
|
31
|
|
32
|
## subset to MODLAND tiles
|
33
|
modlandtiles=system("ls -r /nobackupp4/datapool/modis/MOD11A1.005/2010* | grep hdf$ | cut -c18-23 | sort | uniq - ",intern=T)
|
34
|
tb$land=tb$tile%in%modlandtiles
|
35
|
tiles=tb$tile[tb$land]
|
36
|
|
37
|
## subset tile corner matrix to tiles selected above
|
38
|
tile_bb=tb[tb$tile%in%tiles,]
|
39
|
|
40
|
### get list of files to process
|
41
|
datadir="/nobackupp4/datapool/modis/MOD35_L2.006/"
|
42
|
|
43
|
outdir="daily/" #paste("daily/",tile,sep="")
|
44
|
|
45
|
##find swaths in region from sqlite database for the specified date/tile
|
46
|
## path to swath database
|
47
|
db="/nobackupp4/pvotava/DB/export/swath_geo.sql.sqlite3.db"
|
48
|
con=dbConnect("SQLite", dbname = db)
|
49
|
fs=do.call(rbind.data.frame,lapply(1:nrow(tile_bb),function(i){
|
50
|
d=dbGetQuery(con,paste("SELECT * from swath_geo6
|
51
|
WHERE east>=",tile_bb$lon_min[i]," AND
|
52
|
west<=",tile_bb$lon_max[i]," AND
|
53
|
north>=",tile_bb$lat_min[i]," AND
|
54
|
south<=",tile_bb$lat_max[i])
|
55
|
)
|
56
|
d$tile=tile_bb$tile[i]
|
57
|
print(paste("Finished tile",tile_bb$tile[i]))
|
58
|
return(d)
|
59
|
}))
|
60
|
con=dbDisconnect(con)
|
61
|
fs$id=substr(fs$id,7,19)
|
62
|
|
63
|
### Identify which swaths are available in the datapool
|
64
|
swaths=data.frame(path=list.files(datadir,pattern=paste("hdf$"),recursive=T,full=T),stringsAsFactors=F) #all swaths in data pool
|
65
|
swaths$id=substr(basename(swaths$path),10,22)
|
66
|
fs$exists=fs$id%in%swaths$id
|
67
|
fs$path=swaths$path[match(fs$id,swaths$id)]
|
68
|
|
69
|
if(verbose) print(paste("###############",nrow(fs)," swath IDs recieved from database"))
|
70
|
|
71
|
## get all unique dates
|
72
|
fs$dateid=format(as.Date(paste(fs$year,fs$day,sep=""),"%Y%j"),"%Y%m%d")
|
73
|
alldates=unique(fs$dateid[fs$exists])
|
74
|
|
75
|
#### Generate submission file
|
76
|
startdate="2000-03-01"
|
77
|
stopdate="2011-12-31"
|
78
|
## just 2005
|
79
|
startdate="2005-01-01"
|
80
|
stopdate="2005-12-31"
|
81
|
|
82
|
alldates=format(seq(as.Date(startdate),as.Date(stopdate),1),"%Y%m%d")
|
83
|
|
84
|
proclist=expand.grid(date=alldates,tile=tiles)
|
85
|
proclist$year=substr(proclist$date,1,4)
|
86
|
|
87
|
## identify tile-dates with no available swaths
|
88
|
avail=unique(cbind.data.frame(tile=fs$tile,date=fs$dateid)[fs$exists, ])
|
89
|
proclist$avail=paste(proclist$tile,proclist$date,sep="_")%in%paste(avail$tile,avail$date,sep="_")
|
90
|
|
91
|
## identify which have been completed
|
92
|
fdone=data.frame(path=system("ssh lou 'find MOD35/daily -name \"*.nc\"' ",intern=T))
|
93
|
#fdone=data.frame(path=list.files(outdir,pattern="nc$",recursive=T))
|
94
|
fdone$date=substr(basename(as.character(fdone$path)),14,21)
|
95
|
fdone$tile=substr(basename(as.character(fdone$path)),7,12)
|
96
|
proclist$done=paste(proclist$tile,proclist$date,sep="_")%in%substr(basename(as.character(fdone$path)),7,21)
|
97
|
|
98
|
### report on what has already been processed
|
99
|
print(paste(sum(!proclist$done)," out of ",nrow(proclist)," (",round(100*sum(!proclist$done)/nrow(proclist),2),"%) remain"))
|
100
|
table(tile=proclist$tile[proclist$done],year=proclist$year[proclist$done])
|
101
|
|
102
|
script="/u/awilso10/environmental-layers/climate/procedures/MOD35_L2_process.r"
|
103
|
|
104
|
## write the table processed by mpiexec
|
105
|
tp=((!proclist$done)&proclist$avail) #date-tiles to process
|
106
|
table(Available=proclist$avail,Completed=proclist$done)
|
107
|
|
108
|
write.table(paste("--verbose ",script," --date ",proclist$date[tp]," --verbose T --tile ",proclist$tile[tp],sep=""),
|
109
|
file=paste("notdone.txt",sep=""),row.names=F,col.names=F,quote=F)
|
110
|
|
111
|
### qsub script
|
112
|
cat(paste("
|
113
|
#PBS -S /bin/bash
|
114
|
#PBS -l select=100:ncpus=8:mpiprocs=8
|
115
|
##PBS -l select=20:ncpus=8:mpiprocs=8
|
116
|
#PBS -l walltime=5:00:00
|
117
|
##PBS -l walltime=2:00:00
|
118
|
#PBS -j n
|
119
|
#PBS -m be
|
120
|
#PBS -N mod35
|
121
|
#PBS -q normal
|
122
|
##PBS -q devel
|
123
|
#PBS -V
|
124
|
|
125
|
CORES=800
|
126
|
#CORES=160
|
127
|
|
128
|
HDIR=/u/armichae/pr/
|
129
|
# source $HDIR/etc/environ.sh
|
130
|
source /u/awilso10/environ.sh
|
131
|
source /u/awilso10/.bashrc
|
132
|
IDIR=/nobackupp1/awilso10/mod35/
|
133
|
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
|
134
|
WORKLIST=$IDIR/notdone.txt
|
135
|
EXE=Rscript
|
136
|
LOGSTDOUT=$IDIR/log/mod35_stdout
|
137
|
LOGSTDERR=$IDIR/log/mod35_stderr
|
138
|
### use mpiexec to parallelize across days
|
139
|
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
|
140
|
",sep=""),file=paste("mod35_qsub",sep=""))
|
141
|
|
142
|
|
143
|
### Check the files
|
144
|
system(paste("cat mod35_qsub",sep=""))
|
145
|
system(paste("cat notdone.txt | head",sep=""))
|
146
|
system(paste("cat notdone.txt | wc -l ",sep=""))
|
147
|
|
148
|
## Submit it
|
149
|
system(paste("qsub mod35_qsub",sep=""))
|
150
|
system("qstat -u awilso10")
|
151
|
|
152
|
#######################################################
|
153
|
### Now submit the script to generate the climatologies
|
154
|
|
155
|
tiles
|
156
|
ctiles=tiles#[c(1:3)] #subset to only some tiles (for example if some aren't finished yet)?
|
157
|
climatescript="/pleiades/u/awilso10/environmental-layers/climate/procedures/MOD35_Climatology.r"
|
158
|
|
159
|
## check which tiles have been processed and are on lou with a filename "MOD35_[tile].nc"
|
160
|
cdone=data.frame(path=sapply(strsplit(basename(
|
161
|
system("ssh lou 'find MOD35/summary -name \"MOD35_h[0-9][0-9]v[0-9][0-9].nc\"' ",intern=T)),split="_"),function(x) x[2]))
|
162
|
cdone$tile=substr(basename(as.character(cdone$path)),1,6)
|
163
|
print(paste(length(ctiles[!ctiles%in%cdone$tile]),"Tiles still need to be processed: /n ",ctiles[!ctiles%in%cdone$tile]))
|
164
|
|
165
|
## write the table processed by mpiexec
|
166
|
write.table(paste("--verbose ",climatescript," --verbose T --tile ",ctiles[!ctiles%in%cdone$tile],sep=""),
|
167
|
file=paste("notdone_climate.txt",sep=""),row.names=F,col.names=F,quote=F)
|
168
|
|
169
|
## delay start until previous jobs have finished?
|
170
|
delay=F
|
171
|
## check running jobs to get JobID of job you want to wait for
|
172
|
system("qstat -u awilso10")
|
173
|
## enter JobID here:
|
174
|
job="881394.pbspl1.nas.nasa.gov"
|
175
|
|
176
|
### qsub script
|
177
|
cat(paste("
|
178
|
#PBS -S /bin/bash
|
179
|
#PBS -l select=1:ncpus=16:mem=94
|
180
|
#PBS -l walltime=24:00:00
|
181
|
#PBS -j n
|
182
|
#PBS -m be
|
183
|
#PBS -N mod35_climate
|
184
|
#PBS -q ldan
|
185
|
#PBS -V
|
186
|
",if(delay) paste("#PBS -W depend=afterany:",job,sep=""),"
|
187
|
|
188
|
CORES=16
|
189
|
HDIR=/u/armichae/pr/
|
190
|
source $HDIR/etc/environ.sh
|
191
|
source /pleiades/u/awilso10/environ.sh
|
192
|
source /pleiades/u/awilso10/.bashrc
|
193
|
IDIR=/nobackupp1/awilso10/mod35/
|
194
|
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
|
195
|
WORKLIST=$IDIR/notdone_climate.txt
|
196
|
EXE=Rscript
|
197
|
LOGSTDOUT=$IDIR/log/climatology_stdout
|
198
|
LOGSTDERR=$IDIR/log/climatology_stderr
|
199
|
### use mpiexec to parallelize across tiles
|
200
|
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
|
201
|
",sep=""),file=paste("mod35_climatology_qsub",sep=""))
|
202
|
|
203
|
## check files
|
204
|
system(paste("cat mod35_climatology_qsub",sep="")) #qsub submission script
|
205
|
system(paste("cat notdone_climate.txt | head",sep="")) #top of job file
|
206
|
system(paste("cat notdone_climate.txt | wc -l ",sep="")) #number of jobs to be run
|
207
|
|
208
|
## Submit it
|
209
|
system(paste("qsub mod35_climatology_qsub",sep=""))
|
210
|
|
211
|
## check progress
|
212
|
system("qstat -u awilso10")
|
213
|
|
214
|
## start interactive job on compute node for debugging
|
215
|
# system("qsub -I -l walltime=2:00:00 -lselect=2:ncpus=16:model=san -q devel")
|
216
|
|
217
|
|
218
|
#################################################################
|
219
|
### copy the files back to Yale
|
220
|
|
221
|
system("ssh lou")
|
222
|
#scp `find MOD35/summary -name "MOD35_h[0-9][0-9]v[0-9][0-9].nc"` adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod35/summary/
|
223
|
rsync -vv `find MOD35/summary -name "MOD35_h[0-9][0-9]v[0-9][0-9].nc"` adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod35/summary/
|
224
|
exit
|
225
|
|
226
|
|