Project

General

Profile

Download (10 KB) Statistics
| Branch: | Revision:
1
#### Script to facilitate processing of MOD06 data
2
### This script is meant to be run iteratively, rather than unsupervised. There are several steps that require manual checking (such as choosing the number of cores, etc.)
3

    
4
## working directory
5
setwd("/nobackupp1/awilso10/mod35")
6

    
7
## load libraries
8
library(rgdal)
9
library(raster)
10
library(RSQLite)
11

    
12
## flag to increase verbosity of output
13
verbose=T
14

    
15
## get MODLAND tile information
16
tb=read.table("http://landweb.nascom.nasa.gov/developers/sn_tiles/sn_bound_10deg.txt",skip=6,nrows=648,header=T)
17
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
18
tb=tb[tb$lon_min!=-999,]
19
save(tb,file="modlandTiles.Rdata")
20
load("modlandTiles.Rdata")
21

    
22
## Choose some tiles to process
23
### list of tiles to process
24
tiles=c("h10v08","h11v08","h12v08","h10v07","h11v07","h12v07")  # South America
25
## or a northern block of tiles
26
tiles=apply(expand.grid(paste("h",11:17,sep=""),v=c("v00","v01","v02","v03","v04")),1,function(x) paste(x,collapse="",sep=""))
27
## subset to MODLAND tiles
28
alltiles=system("ls -r MODTILES/ | grep tif$ | cut -c1-6 | sort | uniq - ",intern=T)
29

    
30
## subset to tiles in global region (not outside global boundary in sinusoidal projection)
31
tiles=tiles[tiles%in%alltiles]
32

    
33
## subset tile corner matrix to tiles selected above
34
tile_bb=tb[tb$tile%in%tiles,]
35

    
36
### get list of files to process
37
datadir="/nobackupp4/datapool/modis/MOD35_L2.006/"
38

    
39
outdir="daily/" #paste("daily/",tile,sep="")
40

    
41
##find swaths in region from sqlite database for the specified date/tile
42
## this takes a while, about 30 minutes, so only rebuild if you need to update what's available...
43
rebuildswathtable=F
44
if(rebuildswathtable){
45
  ## path to swath database
46
  db="/nobackupp4/pvotava/DB/export/swath_geo.sql.sqlite3.db"
47
  con=dbConnect("SQLite", dbname = db)
48
  fs=do.call(rbind.data.frame,lapply(1:nrow(tile_bb),function(i){
49
    d=dbGetQuery(con,paste("SELECT * from swath_geo6
50
            WHERE east>=",tile_bb$lon_min[i]," AND
51
                  west<=",tile_bb$lon_max[i]," AND
52
                  north>=",tile_bb$lat_min[i]," AND
53
                  south<=",tile_bb$lat_max[i])
54
      )
55
    d$tile=tile_bb$tile[i]
56
    print(paste("Finished tile",tile_bb$tile[i]))
57
    return(d)
58
  }))
59
  con=dbDisconnect(con)
60
  fs$id=substr(fs$id,7,19)
61

    
62
  ## Identify which swaths are available in the datapool
63
  swaths=data.frame(path=list.files(datadir,pattern=paste("hdf$"),recursive=T,full=T),stringsAsFactors=F)  #all swaths in data pool
64
  swaths$id=substr(basename(swaths$path),10,22)
65
  fs$exists=fs$id%in%swaths$id 
66
  fs$path=swaths$path[match(fs$id,swaths$id)]
67

    
68
  ## write tile-swath list to disk
69
  save(fs,swaths,file="swathtile.Rdata")
70
}
71

    
72
load("swathtile.Rdata")
73

    
74
if(verbose) print(paste("###############",nrow(fs)," swath IDs recieved from database"))
75

    
76
## get all unique dates
77
fs$dateid=format(as.Date(paste(fs$year,fs$day,sep=""),"%Y%j"),"%Y%m%d")
78
#alldates=unique(fs$dateid[fs$exists])
79

    
80
#### Generate submission file
81
startdate="2000-03-01"
82
stopdate="2011-12-31"
83
## just 2005-2010
84
startdate="2009-01-01"
85
stopdate="2009-12-31"
86

    
87
alldates=format(seq(as.Date(startdate),as.Date(stopdate),1),"%Y%m%d")
88

    
89
proclist=expand.grid(date=alldates,tile=tiles)
90
proclist$year=substr(proclist$date,1,4)
91

    
92
 ## identify tile-dates with no available swaths
93
avail=unique(cbind.data.frame(tile=fs$tile,date=fs$dateid)[fs$exists, ])
94
proclist$avail=paste(proclist$tile,proclist$date,sep="_")%in%paste(avail$tile,avail$date,sep="_")
95

    
96
## identify which have been completed
97
#fdone=data.frame(path=system("ssh lou 'find MOD35/daily -name \"*.nc\"' ",intern=T))
98
fdone=data.frame(path=list.files(outdir,pattern="nc$",recursive=T))
99
fdone$date=substr(basename(as.character(fdone$path)),14,21)
100
fdone$tile=substr(basename(as.character(fdone$path)),7,12)
101
proclist$done=paste(proclist$tile,proclist$date,sep="_")%in%substr(basename(as.character(fdone$path)),7,21)
102

    
103
### report on what has already been processed
104
print(paste(sum(!proclist$done)," out of ",nrow(proclist)," (",round(100*sum(!proclist$done)/nrow(proclist),2),"%) remain"))
105
stem(table(tile=proclist$tile[proclist$done],year=proclist$year[proclist$done]))
106
#table(tile=proclist$tile[proclist$done],year=proclist$year[proclist$done])
107
table(table(tile=proclist$tile[!proclist$done],year=proclist$year[!proclist$done]))
108

    
109
### explore tile counts
110
#x=table(tile=proclist$tile[proclist$done],year=proclist$year[proclist$done])
111
#x=x[order(rownames(x)),]
112

    
113
script="/u/awilso10/environmental-layers/climate/procedures/MOD35_L2_process.r"
114
 
115
## write the table processed by mpiexec
116
tp=T  # rerun everything
117
tp=((!proclist$done)&proclist$avail)  #date-tiles to process
118
table(Available=proclist$avail,Completed=proclist$done)
119
table(tp)
120

    
121
write.table(paste("--verbose ",script," --date ",proclist$date[tp]," --verbose T --tile ",proclist$tile[tp],sep=""),
122
file=paste("notdone.txt",sep=""),row.names=F,col.names=F,quote=F)
123

    
124
## try running it once for a single tile-date to get estimate of time/tile-day
125
test=F
126
if(test){
127
  i=2
128
  time1=system.time(system(paste("Rscript --verbose ",script," --date ",proclist$date[i]," --verbose T --tile ",proclist$tile[i],sep="")))
129
  hours=round(length(proclist$date[tp])*142/60/60)
130
  hours=round(length(proclist$date[tp])*time1[3]/60/60,1); hours
131
  hours/400
132
  print(paste("Based on runtime of previous command, it will take",hours," hours to process the full set"))
133
}
134

    
135

    
136
### qsub script
137
cat(paste("
138
#PBS -S /bin/bash
139
#PBS -l select=50:ncpus=8:mpiprocs=8
140
##PBS -l select=100:ncpus=8:mpiprocs=8
141
##PBS -l walltime=8:00:00
142
#PBS -l walltime=2:00:00
143
#PBS -j n
144
#PBS -m be
145
#PBS -N mod35
146
##PBS -q normal
147
#PBS -q devel
148
#PBS -V
149

    
150
#CORES=800
151
CORES=400
152

    
153
HDIR=/u/armichae/pr/
154
  source $HDIR/etc/environ.sh
155
  source /u/awilso10/environ.sh
156
  source /u/awilso10/.bashrc
157
IDIR=/nobackupp1/awilso10/mod35/
158
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
159
WORKLIST=$IDIR/notdone.txt
160
EXE=Rscript
161
LOGSTDOUT=$IDIR/log/mod35_stdout
162
LOGSTDERR=$IDIR/log/mod35_stderr
163
### use mpiexec to parallelize across days
164
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
165
",sep=""),file=paste("mod35_qsub",sep=""))
166

    
167
### Check the files
168
system(paste("cat mod35_qsub",sep=""))
169
system(paste("cat notdone.txt | head",sep=""))
170
system(paste("cat notdone.txt | wc -l ",sep=""))
171

    
172

    
173
## Submit it
174
system(paste("qsub mod35_qsub",sep=""))
175

    
176
system("qstat -u awilso10")
177

    
178
#######################################################
179
### Now submit the script to generate the climatologies
180

    
181
## report 'mostly' finished tiles
182
## this relies on proclist above so be sure to update above before running
183
md=table(tile=proclist$tile[!proclist$done],year=proclist$year[!proclist$done])
184
mdt=names(md[md<10,])
185
tiles=mdt
186

    
187
tiles
188
ctiles=c("h10v08","h11v08","h12v08","h10v07","h11v07","h12v07")  # South America
189

    
190
ctiles=tiles#[c(1:3)]  #subset to only some tiles (for example if some aren't finished yet)?
191
climatescript="/pleiades/u/awilso10/environmental-layers/climate/procedures/MOD35_Climatology.r"
192

    
193
## check which tiles have been processed and are on lou with a filename "MOD35_[tile].nc"
194
cdone=data.frame(path="",tile="")  #use this if you want to re-run everything
195
cdone=data.frame(path=sapply(strsplit(basename(
196
                   system("ssh lou 'find MOD35/summary -name \"MOD35_h[0-9][0-9]v[0-9][0-9].nc\"' ",intern=T)),split="_"),function(x) x[2]))
197
cdone=data.frame(path=sapply(strsplit(basename(
198
                   system("find summary -name \"MOD35_h[0-9][0-9]v[0-9][0-9].nc\"",intern=T)),split="_"),function(x) x[2]))
199
cdone$tile=substr(basename(as.character(cdone$path)),1,6)
200
print(paste(length(ctiles[!ctiles%in%cdone$tile]),"Tiles still need to be processed"))
201

    
202
## write the table processed by mpiexec
203
write.table(paste("--verbose ",climatescript," --verbose T --tile ",ctiles[!ctiles%in%cdone$tile],sep=""),
204
file=paste("notdone_climate.txt",sep=""),row.names=F,col.names=F,quote=F)
205

    
206
## delay start until previous jobs have finished?
207
delay=T
208
## check running jobs to get JobID of job you want to wait for
209
system("qstat -u awilso10",intern=T)
210
## enter JobID here:
211
job="2031668.pbspl1.nas.nasa.gov"
212

    
213
### qsub script
214
cat(paste("
215
#PBS -S /bin/bash
216
#PBS -l select=4:ncpus=8:mem=94
217
#PBS -l walltime=2:00:00
218
#PBS -j n
219
#PBS -m be
220
#PBS -N mod35_climate
221
#PBS -q devel
222
##PBS -q normal
223
##PBS -q ldan
224
#PBS -V
225
",if(delay) paste("#PBS -W depend=afterany:",job,sep="")," 
226

    
227
CORES=32
228
HDIR=/u/armichae/pr/
229
  source $HDIR/etc/environ.sh
230
  source /pleiades/u/awilso10/environ.sh
231
  source /pleiades/u/awilso10/.bashrc
232
IDIR=/nobackupp1/awilso10/mod35/
233
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
234
WORKLIST=$IDIR/notdone_climate.txt
235
EXE=Rscript
236
LOGSTDOUT=$IDIR/log/climatology_stdout
237
LOGSTDERR=$IDIR/log/climatology_stderr
238
### use mpiexec to parallelize across tiles
239
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
240
",sep=""),file=paste("mod35_climatology_qsub",sep=""))
241

    
242
## check files
243
system(paste("cat mod35_climatology_qsub",sep=""))        #qsub submission script
244
system(paste("cat notdone_climate.txt | head",sep=""))    #top of job file
245
system(paste("cat notdone_climate.txt | wc -l ",sep=""))  #number of jobs to be run
246

    
247
## Submit it
248
system(paste("qsub mod35_climatology_qsub",sep=""))
249

    
250
## check progress
251
system("qstat -u awilso10")
252

    
253
## start interactive job on compute node for debugging
254
# system("qsub -I -l walltime=2:00:00 -lselect=2:ncpus=16:model=san -q devel")
255

    
256

    
257
#################################################################
258
### copy the files back to Yale
259

    
260

    
261
system("ssh lou")
262
#scp `find MOD35/summary -name "MOD35_h[0-9][0-9]v[0-9][0-9].nc"` adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod35/summary/
263
system("rsync -cavv `find summary -name \"MOD35_h[0-9][0-9]v[0-9][0-9]_mean.nc\"` adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod35/summary/")
264
system("rsync -cavv `find summary -name \"MOD35_h[0-9][0-9]v[0-9][0-9].nc\"` adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod35/summary/")
265

    
266

    
267
system("gdalbuildvrt MOD35C6_2009.vrt summary/*2009mean.nc ") 
268
system("gdal_translate -stats -co \"COMPRESS=LZW\" -of GTiff MOD35C6_2009.vrt MOD35C6_2009.tif ")              
269
system("scp MOD35C6_2009.tif adamw@acrobates.eeb.24.177.10.190:/Users/adamw/Downloads/")
270
exit
271

    
272

    
(31-31/37)