Project

General

Profile

Download (7.59 KB) Statistics
| Branch: | Revision:
1
#### Script to facilitate processing of MOD06 data
2
  
3
setwd("/nobackupp1/awilso10/mod35")
4

    
5
library(rgdal)
6
library(raster)
7
library(RSQLite)
8

    
9

    
10
verbose=T
11

    
12
## get MODLAND tile information
13
tb=read.table("http://landweb.nascom.nasa.gov/developers/sn_tiles/sn_bound_10deg.txt",skip=6,nrows=648,header=T)
14
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
15
tb=tb[tb$lon_min!=-999,]
16
save(tb,file="modlandTiles.Rdata")
17
load("modlandTiles.Rdata")
18

    
19
## delete temporary log file that can grow to GB
20
system("rm /nobackupp1/awilso10/software/heg/TOOLKIT_MTD/runtime/LogStatus")
21

    
22

    
23
tile="h11v08"  # Venezuela
24
#tile="h11v07"  # Venezuela coast
25
#tile="h09v04"  # Oregon
26
tile="h21v09"  #Kenya
27

    
28
### list of tiles to process
29
tiles=c("h11v08","h21v09","h08v04","h09v04","h08v05","h09v05","h20v11","h31v11")
30
tiles=c("h10v08","h11v08","h12v08","h10v07","h11v07","h12v07")  # South America
31

    
32
## subset to MODLAND tiles
33
  modlandtiles=system("ls -r /nobackupp4/datapool/modis/MOD11A1.005/2010* | grep hdf$ | cut -c18-23 | sort | uniq - ",intern=T)
34
 tb$land=tb$tile%in%modlandtiles
35
tiles=tb$tile[tb$land]
36

    
37
## subset tile corner matrix to tiles selected above
38
tile_bb=tb[tb$tile%in%tiles,]
39

    
40
### get list of files to process
41
datadir="/nobackupp4/datapool/modis/MOD35_L2.006/"
42

    
43
outdir="daily/" #paste("daily/",tile,sep="")
44

    
45
##find swaths in region from sqlite database for the specified date/tile
46
## path to swath database
47
db="/nobackupp4/pvotava/DB/export/swath_geo.sql.sqlite3.db"
48
con=dbConnect("SQLite", dbname = db)
49
fs=do.call(rbind.data.frame,lapply(1:nrow(tile_bb),function(i){
50
  d=dbGetQuery(con,paste("SELECT * from swath_geo6
51
            WHERE east>=",tile_bb$lon_min[i]," AND
52
                  west<=",tile_bb$lon_max[i]," AND
53
                  north>=",tile_bb$lat_min[i]," AND
54
                  south<=",tile_bb$lat_max[i])
55
    )
56
  d$tile=tile_bb$tile[i]
57
  print(paste("Finished tile",tile_bb$tile[i]))
58
  return(d)
59
}))
60
con=dbDisconnect(con)
61
fs$id=substr(fs$id,7,19)
62

    
63
### Identify which swaths are available in the datapool
64
swaths=data.frame(path=list.files(datadir,pattern=paste("hdf$"),recursive=T,full=T),stringsAsFactors=F)  #all swaths in data pool
65
swaths$id=substr(basename(swaths$path),10,22)
66
fs$exists=fs$id%in%swaths$id 
67
fs$path=swaths$path[match(fs$id,swaths$id)]
68
  
69
if(verbose) print(paste("###############",nrow(fs)," swath IDs recieved from database"))
70

    
71
## get all unique dates
72
fs$dateid=format(as.Date(paste(fs$year,fs$day,sep=""),"%Y%j"),"%Y%m%d")
73
alldates=unique(fs$dateid[fs$exists])
74

    
75
#### Generate submission file
76
startdate="2000-03-01"
77
stopdate="2011-12-31"
78
## just 2009
79
startdate="2009-01-01"
80
stopdate="2009-12-31"
81

    
82
alldates=format(seq(as.Date(startdate),as.Date(stopdate),1),"%Y%m%d")
83

    
84
proclist=expand.grid(date=alldates,tile=tiles)
85
proclist$year=substr(proclist$date,1,4)
86

    
87
## identify tile-dates with no available swaths
88
avail=unique(cbind.data.frame(tile=fs$tile,date=fs$dateid)[fs$exists, ])
89
proclist$avail=paste(proclist$tile,proclist$date,sep="_")%in%paste(avail$tile,avail$date,sep="_")
90

    
91
## identify which have been completed
92
fdone=data.frame(path=system("ssh lou 'find MOD35/daily -name \"*.nc\"' ",intern=T))
93
#fdone=data.frame(path=list.files(outdir,pattern="nc$",recursive=T))
94
fdone$date=substr(basename(as.character(fdone$path)),14,21)
95
fdone$tile=substr(basename(as.character(fdone$path)),7,12)
96
proclist$done=paste(proclist$tile,proclist$date,sep="_")%in%substr(basename(as.character(fdone$path)),7,21)
97

    
98
### report on what has already been processed
99
print(paste(sum(!proclist$done)," out of ",nrow(proclist)," (",round(100*sum(!proclist$done)/nrow(proclist),2),"%) remain"))
100
table(tile=proclist$tile[proclist$done],year=proclist$year[proclist$done])
101

    
102
script="/u/awilso10/environmental-layers/climate/procedures/MOD35_L2_process.r"
103

    
104
## write the table processed by mpiexec
105
tp=(!proclist$done)&proclist$avail  #date-tiles to process
106
table(Available=proclist$avail,Completed=proclist$done)
107

    
108
write.table(paste("--verbose ",script," --date ",proclist$date[tp]," --verbose T --tile ",proclist$tile[tp],sep=""),
109
file=paste("notdone.txt",sep=""),row.names=F,col.names=F,quote=F)
110

    
111
### qsub script
112
cat(paste("
113
#PBS -S /bin/bash
114
#PBS -l select=50:ncpus=8:mpiprocs=8
115
##PBS -l select=2:ncpus=8:mpiprocs=8
116
##PBS -l select=2:ncpus=4:mpiprocs=4
117
#PBS -l walltime=5:00:00
118
#PBS -j n
119
#PBS -m be
120
#PBS -N mod35
121
#PBS -q normal
122
#PBS -V
123

    
124
CORES=400
125
HDIR=/u/armichae/pr/
126
#  source $HDIR/etc/environ.sh
127
  source /u/awilso10/environ.sh
128
  source /u/awilso10/.bashrc
129
IDIR=/nobackupp1/awilso10/mod35/
130
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
131
WORKLIST=$IDIR/notdone.txt
132
EXE=Rscript
133
LOGSTDOUT=$IDIR/log/mod35_stdout
134
LOGSTDERR=$IDIR/log/mod35_stderr
135
### use mpiexec to parallelize across days
136
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
137
",sep=""),file=paste("mod35_qsub",sep=""))
138

    
139

    
140
### Check the files
141
system(paste("cat mod35_qsub",sep=""))
142
system(paste("cat notdone.txt | head",sep=""))
143
system(paste("cat notdone.txt | wc -l ",sep=""))
144

    
145
## Submit it
146
system(paste("qsub mod35_qsub",sep=""))
147
system("qstat -u awilso10")
148

    
149
#######################################################
150
### Now submit the script to generate the climatologies
151

    
152
tiles
153
ctiles=tiles[c(1:3)]  #subset to only some tiles (for example if some aren't finished yet)?
154
climatescript="/pleiades/u/awilso10/environmental-layers/climate/procedures/MOD35_Climatology.r"
155

    
156
## write the table processed by mpiexec
157
write.table(paste("--verbose ",climatescript," --verbose T --tile ",ctiles,sep=""),
158
file=paste("notdone_climate.txt",sep=""),row.names=F,col.names=F,quote=F)
159

    
160
## delay start until previous jobs have finished?
161
delay=F
162
## check running jobs to get JobID of job you want to wait for
163
system("qstat -u awilso10")
164
## enter JobID here:
165
job="881394.pbspl1.nas.nasa.gov"
166

    
167
### qsub script
168
cat(paste("
169
#PBS -S /bin/bash
170
#PBS -l select=1:ncpus=16:mem=94
171
#PBS -l walltime=24:00:00
172
#PBS -j n
173
#PBS -m be
174
#PBS -N mod35_climate
175
#PBS -q ldan
176
#PBS -V
177
",if(delay) paste("#PBS -W depend=afterany:",job,sep="")," 
178

    
179
CORES=16
180
HDIR=/pleiades/u/armichae/pr/
181
  source $HDIR/etc/environ.sh
182
  source /pleiades/u/awilso10/environ.sh
183
  source /pleiades/u/awilso10/.bashrc
184
  source /pleiades/u/awilso10/moduleload
185
IDIR=/nobackupp1/awilso10/mod35/
186
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
187
WORKLIST=$IDIR/notdone_climate.txt
188
EXE=Rscript
189
LOGSTDOUT=$IDIR/log/climatology_stdout
190
LOGSTDERR=$IDIR/log/climatology_stderr
191
### use mpiexec to parallelize across tiles
192
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
193
",sep=""),file=paste("mod35_climatology_qsub",sep=""))
194

    
195
## check files
196
system(paste("cat mod35_climatology_qsub",sep=""))        #qsub submission script
197
system(paste("cat notdone_climate.txt | head",sep=""))    #top of job file
198
system(paste("cat notdone_climate.txt | wc -l ",sep=""))  #number of jobs to be run
199

    
200
## Submit it
201
system(paste("qsub mod35_climatology_qsub",sep=""))
202

    
203
## check progress
204
system("qstat -u awilso10")
205

    
206
## start interactive job on compute node for debugging
207
# system("qsub -I -l walltime=2:00:00 -lselect=2:ncpus=16:model=san -q devel")
208

    
209

    
210
#################################################################
211
### copy the files back to Yale
212
summarydir="summary"
213

    
214
sumfiles=list.files("summary",pattern="^MOD06_.*[0-9][.]nc",full=T)
215

    
216
system(paste("scp ",paste(sumfiles,collapse=" ")," adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod06/summary",sep=""))
217

    
218
#system(paste("scp ",tsdir,"/MOD06_",tile,"*.nc adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod06/summary",sep=""))
219
#system(paste("scp ",paste(fs$path[40421:40422],collapse=" ")," adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod06/swaths",sep=""))
220

    
221

    
222

    
223

    
224

    
(24-24/27)