Project

General

Profile

Download (6.86 KB) Statistics
| Branch: | Revision:
1
#### Script to facilitate processing of MOD06 data
2
  
3
  setwd("/nobackupp1/awilso10/mod06")
4

    
5
library(rgdal)
6
library(raster)
7
library(RSQLite)
8

    
9

    
10
verbose=T
11

    
12
## get MODLAND tile information
13
tb=read.table("http://landweb.nascom.nasa.gov/developers/sn_tiles/sn_bound_10deg.txt",skip=6,nrows=648,header=T)
14
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
15
save(tb,file="modlandTiles.Rdata")
16
load("modlandTiles.Rdata")
17

    
18
## delete temporary log file that can grow to GB
19
system("rm /nobackupp1/awilso10/software/heg/TOOLKIT_MTD/runtime/LogStatus")
20

    
21

    
22
tile="h11v08"  # Venezuela
23
#tile="h11v07"  # Venezuela coast
24
#tile="h09v04"  # Oregon
25
tile="h21v09"  #Kenya
26

    
27

    
28
### list of tiles to process
29
tiles=c("h11v08","h21v09","h08v04","h09v04","h08v05","h09v05","h20v11","h31v11")
30
tiles=tiles[1]
31
tile_bb=tb[tb$tile%in%tiles,]
32

    
33
### get list of files to process
34
datadir="/nobackupp4/datapool/modis/MOD06_L2.005/"
35
#datadir="/nobackupp1/awilso10/mod06/data"   #for data downloaded from 
36

    
37
outdir="daily/" #paste("daily/",tile,sep="")
38

    
39
##find swaths in region from sqlite database for the specified date/tile
40
## path to swath database
41
db="/nobackupp4/pvotava/DB/export/swath_geo.sql.sqlite3.db"
42
con=dbConnect("SQLite", dbname = db)
43
fs=do.call(rbind.data.frame,lapply(1:nrow(tile_bb),function(i){
44
  d=dbGetQuery(con,paste("SELECT * from swath_geo
45
            WHERE east>=",tile_bb$lon_min[i]," AND
46
                  west<=",tile_bb$lon_max[i]," AND
47
                  north>=",tile_bb$lat_min[i]," AND
48
                  south<=",tile_bb$lat_max[i])
49
    )
50
  d$tile=tile_bb$tile[i]
51
  print(paste("Finished tile",tile_bb$tile[i]))
52
  return(d)
53
}))
54
  con=dbDisconnect(con)
55
  fs$id=substr(fs$id,7,19)
56

    
57
### Identify which swaths are available in the datapool
58
swaths=data.frame(path=list.files(datadir,pattern=paste("hdf$"),recursive=T,full=T),stringsAsFactors=F)  #all swaths in data pool
59
swaths$id=substr(basename(swaths$path),10,22)
60
fs$exists=fs$id%in%swaths$id 
61
fs$path=swaths$path[match(fs$id,swaths$id)]
62
  
63
if(verbose) print(paste("###############",nrow(fs)," swath IDs recieved from database"))
64

    
65

    
66
## get all unique dates
67
fs$dateid=format(as.Date(paste(fs$year,fs$day,sep=""),"%Y%j"),"%Y%m%d")
68
alldates=unique(fs$dateid[fs$exists])
69

    
70
#### Generate submission file
71
alldates=format(seq(as.Date("2000-03-01"),as.Date("2011-12-31"),1),"%Y%m%d")
72
proclist=expand.grid(date=alldates,tile=tiles)
73
proclist$year=substr(proclist$date,1,4)
74
  
75
## identify which have been completed
76
fdone=data.frame(path=list.files(outdir,pattern="nc$",recursive=T))
77
fdone$date=substr(basename(as.character(fdone$path)),14,21)
78
fdone$tile=substr(basename(as.character(fdone$path)),7,12)
79

    
80
## identify which date-tiles have already been run
81
proclist$done=paste(proclist$tile,proclist$date,sep="_")%in%substr(basename(as.character(fdone$path)),7,21)
82

    
83
### report on what has already been processed
84
print(paste(sum(!proclist$done)," out of ",nrow(proclist)," (",round(sum(!proclist$done)/nrow(proclist),2),"%) remain"))
85
table(tile=proclist$tile[proclist$done],year=proclist$year[proclist$done])
86

    
87
script="/u/awilso10/environmental-layers/climate/procedures/MOD06_L2_process.r"
88

    
89
## write the table processed by mpiexec
90
write.table(paste("--verbose ",script," --date ",proclist$date[!proclist$done]," --verbose T --tile ",proclist$tile[!proclist$done],sep=""),
91
file=paste("notdone.txt",sep=""),row.names=F,col.names=F,quote=F)
92

    
93
### qsub script
94
cat(paste("
95
#PBS -S /bin/bash
96
#PBS -l select=50:ncpus=8:mpiprocs=8
97
##PBS -l select=50:ncpus=8:mpiprocs=8
98
##PBS -l select=2:ncpus=4:mpiprocs=4
99
#PBS -l walltime=5:00:00
100
#PBS -j n
101
#PBS -m be
102
#PBS -N mod06
103
#PBS -q normal
104
#PBS -V
105

    
106
CORES=400
107
HDIR=/u/armichae/pr/
108
#  source $HDIR/etc/environ.sh
109
  source /u/awilso10/environ.sh
110
  source /u/awilso10/.bashrc
111
IDIR=/nobackupp1/awilso10/mod06/
112
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
113
WORKLIST=$IDIR/notdone.txt
114
EXE=Rscript
115
LOGSTDOUT=$IDIR/log/swath_stdout
116
LOGSTDERR=$IDIR/log/swath_stderr
117
### use mpiexec to parallelize across days
118
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
119
",sep=""),file=paste("mod06_qsub",sep=""))
120

    
121

    
122
### Check the files
123
system(paste("cat mod06_qsub",sep=""))
124
system(paste("cat notdone.txt | head",sep=""))
125
system(paste("cat notdone.txt | wc -l ",sep=""))
126

    
127
## Submit it
128
system(paste("qsub mod06_qsub",sep=""))
129

    
130
#######################################################
131
### Now submit the script to generate the climatologies
132

    
133
tiles
134
ctiles=tiles#[c(1,3)]  #subset to only some tiles (for example if some aren't finished yet)?
135
climatescript="/u/awilso10/environmental-layers/climate/procedures/MOD06_Climatology.r"
136

    
137
## write the table processed by mpiexec
138
write.table(paste("--verbose ",climatescript," --verbose T --tile ",ctiles,sep=""),
139
file=paste("notdone_climate.txt",sep=""),row.names=F,col.names=F,quote=F)
140

    
141
## delay start until previous jobs have finished?
142
delay=F
143
## check running jobs to get JobID of job you want to wait for
144
system("qstat -u awilso10")
145
## enter JobID here:
146
job="881394.pbspl1.nas.nasa.gov"
147

    
148
### qsub script
149
cat(paste("
150
#PBS -S /bin/bash
151
#PBS -l select=50:ncpus=8:mpiprocs=8
152
##PBS -l select=2:ncpus=4:mpiprocs=4
153
#PBS -l walltime=5:00:00
154
#PBS -j n
155
#PBS -m be
156
#PBS -N mod06_climate
157
#PBS -q normal
158
#PBS -V
159
",if(delay) paste("#PBS -W depend=afterany:",job,sep="")," 
160

    
161
CORES=400
162
HDIR=/u/armichae/pr/
163
#  source $HDIR/etc/environ.sh
164
  source /u/awilso10/environ.sh
165
  source /u/awilso10/.bashrc
166
IDIR=/nobackupp1/awilso10/mod06/
167
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
168
WORKLIST=$IDIR/notdone_climate.txt
169
EXE=Rscript
170
LOGSTDOUT=$IDIR/log/climatology_stdout
171
LOGSTDERR=$IDIR/log/climatology_stderr
172
### use mpiexec to parallelize across days
173
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
174
### Now process the climatologies
175
",sep=""),file=paste("mod06_climatology_qsub",sep=""))
176

    
177
## check files
178
system(paste("cat mod06_climatology_qsub",sep=""))        #qsub submission script
179
system(paste("cat notdone_climate.txt | head",sep=""))    #top of job file
180
system(paste("cat notdone_climate.txt | wc -l ",sep=""))  #number of jobs to be run
181

    
182
## Submit it
183
system(paste("qsub mod06_climatology_qsub",sep=""))
184

    
185
## check progress
186
system("qstat -u awilso10")
187

    
188
## start interactive job on compute node for debugging
189
# system("qsub -I -l walltime=2:00:00 -lselect=2:ncpus=16:model=san -q devel")
190

    
191

    
192
#################################################################
193
### copy the files back to Yale
194
summarydir="summary"
195

    
196
sumfiles=list.files("summary",pattern="^MOD06_.*[0-9][.]nc",full=T)
197

    
198
system(paste("scp ",paste(sumfiles,collapse=" ")," adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod06/summary",sep=""))
199

    
200
#system(paste("scp ",tsdir,"/MOD06_",tile,"*.nc adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod06/summary",sep=""))
201
#system(paste("scp ",paste(fs$path[40421:40422],collapse=" ")," adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod06/swaths",sep=""))
202

    
203

    
204

    
205

    
206

    
(20-20/23)