Project

General

Profile

Download (6.85 KB) Statistics
| Branch: | Revision:
1 35d59dc1 Adam M. Wilson @ pfe
#### Script to facilitate processing of MOD06 data
2
  
3 c1352601 Adam M. Wilson @ pfe
setwd("/nobackupp1/awilso10/mod35")
4 35d59dc1 Adam M. Wilson @ pfe
5
library(rgdal)
6
library(raster)
7
library(RSQLite)
8
9
10
verbose=T
11
12
## get MODLAND tile information
13
tb=read.table("http://landweb.nascom.nasa.gov/developers/sn_tiles/sn_bound_10deg.txt",skip=6,nrows=648,header=T)
14
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
15
save(tb,file="modlandTiles.Rdata")
16
load("modlandTiles.Rdata")
17
18
## delete temporary log file that can grow to GB
19
system("rm /nobackupp1/awilso10/software/heg/TOOLKIT_MTD/runtime/LogStatus")
20
21
22
tile="h11v08"  # Venezuela
23
#tile="h11v07"  # Venezuela coast
24
#tile="h09v04"  # Oregon
25
tile="h21v09"  #Kenya
26
27
28
### list of tiles to process
29
tiles=c("h11v08","h21v09","h08v04","h09v04","h08v05","h09v05","h20v11","h31v11")
30
tiles=tiles[c(1,4)]
31
tile_bb=tb[tb$tile%in%tiles,]
32
33
### get list of files to process
34
datadir="/nobackupp4/datapool/modis/MOD35_L2.006/"
35
#datadir="/nobackupp1/awilso10/mod06/data"   #for data downloaded from 
36
37
outdir="daily/" #paste("daily/",tile,sep="")
38
39
##find swaths in region from sqlite database for the specified date/tile
40
## path to swath database
41
db="/nobackupp4/pvotava/DB/export/swath_geo.sql.sqlite3.db"
42
con=dbConnect("SQLite", dbname = db)
43
fs=do.call(rbind.data.frame,lapply(1:nrow(tile_bb),function(i){
44
  d=dbGetQuery(con,paste("SELECT * from swath_geo6
45
            WHERE east>=",tile_bb$lon_min[i]," AND
46
                  west<=",tile_bb$lon_max[i]," AND
47
                  north>=",tile_bb$lat_min[i]," AND
48
                  south<=",tile_bb$lat_max[i])
49
    )
50
  d$tile=tile_bb$tile[i]
51
  print(paste("Finished tile",tile_bb$tile[i]))
52
  return(d)
53
}))
54
con=dbDisconnect(con)
55
fs$id=substr(fs$id,7,19)
56
57
### Identify which swaths are available in the datapool
58
swaths=data.frame(path=list.files(datadir,pattern=paste("hdf$"),recursive=T,full=T),stringsAsFactors=F)  #all swaths in data pool
59
swaths$id=substr(basename(swaths$path),10,22)
60
fs$exists=fs$id%in%swaths$id 
61
fs$path=swaths$path[match(fs$id,swaths$id)]
62
  
63
if(verbose) print(paste("###############",nrow(fs)," swath IDs recieved from database"))
64
65
66
## get all unique dates
67
fs$dateid=format(as.Date(paste(fs$year,fs$day,sep=""),"%Y%j"),"%Y%m%d")
68
alldates=unique(fs$dateid[fs$exists])
69
70
#### Generate submission file
71
alldates=format(seq(as.Date("2000-03-01"),as.Date("2011-12-31"),1),"%Y%m%d")
72
proclist=expand.grid(date=alldates,tile=tiles)
73
proclist$year=substr(proclist$date,1,4)
74
  
75
## identify which have been completed
76
fdone=data.frame(path=list.files(outdir,pattern="nc$",recursive=T))
77
fdone$date=substr(basename(as.character(fdone$path)),14,21)
78
fdone$tile=substr(basename(as.character(fdone$path)),7,12)
79
80
## identify which date-tiles have already been run
81
proclist$done=paste(proclist$tile,proclist$date,sep="_")%in%substr(basename(as.character(fdone$path)),7,21)
82
83
### report on what has already been processed
84
print(paste(sum(!proclist$done)," out of ",nrow(proclist)," (",round(100*sum(!proclist$done)/nrow(proclist),2),"%) remain"))
85
table(tile=proclist$tile[proclist$done],year=proclist$year[proclist$done])
86
87
script="/u/awilso10/environmental-layers/climate/procedures/MOD35_L2_process.r"
88
89
## write the table processed by mpiexec
90
write.table(paste("--verbose ",script," --date ",proclist$date[!proclist$done]," --verbose T --tile ",proclist$tile[!proclist$done],sep=""),
91
file=paste("notdone.txt",sep=""),row.names=F,col.names=F,quote=F)
92
93
### qsub script
94
cat(paste("
95
#PBS -S /bin/bash
96
#PBS -l select=50:ncpus=8:mpiprocs=8
97
##PBS -l select=2:ncpus=8:mpiprocs=8
98
##PBS -l select=2:ncpus=4:mpiprocs=4
99
#PBS -l walltime=5:00:00
100
#PBS -j n
101
#PBS -m be
102
#PBS -N mod35
103
#PBS -q normal
104
#PBS -V
105
106
CORES=400
107
HDIR=/u/armichae/pr/
108
#  source $HDIR/etc/environ.sh
109
  source /u/awilso10/environ.sh
110
  source /u/awilso10/.bashrc
111
IDIR=/nobackupp1/awilso10/mod35/
112
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
113
WORKLIST=$IDIR/notdone.txt
114
EXE=Rscript
115
LOGSTDOUT=$IDIR/log/mod35_stdout
116
LOGSTDERR=$IDIR/log/mod35_stderr
117
### use mpiexec to parallelize across days
118
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
119
",sep=""),file=paste("mod35_qsub",sep=""))
120
121
122
### Check the files
123
system(paste("cat mod35_qsub",sep=""))
124
system(paste("cat notdone.txt | head",sep=""))
125
system(paste("cat notdone.txt | wc -l ",sep=""))
126
127
## Submit it
128
system(paste("qsub mod35_qsub",sep=""))
129 c24e32a8 Adam M. Wilson @ pfe
system("qstat -u awilso10")
130 35d59dc1 Adam M. Wilson @ pfe
131
#######################################################
132
### Now submit the script to generate the climatologies
133
134
tiles
135 b716a3f0 Adam M. Wilson @ pfe
ctiles=tiles#[c(2)]  #subset to only some tiles (for example if some aren't finished yet)?
136 35d59dc1 Adam M. Wilson @ pfe
climatescript="/u/awilso10/environmental-layers/climate/procedures/MOD35_Climatology.r"
137
138
## write the table processed by mpiexec
139
write.table(paste("--verbose ",climatescript," --verbose T --tile ",ctiles,sep=""),
140
file=paste("notdone_climate.txt",sep=""),row.names=F,col.names=F,quote=F)
141
142
## delay start until previous jobs have finished?
143
delay=F
144
## check running jobs to get JobID of job you want to wait for
145
system("qstat -u awilso10")
146
## enter JobID here:
147
job="881394.pbspl1.nas.nasa.gov"
148
149
### qsub script
150
cat(paste("
151
#PBS -S /bin/bash
152
##PBS -l select=50:ncpus=8:mpiprocs=8
153
#PBS -l select=4:ncpus=4:mpiprocs=4
154
#PBS -l walltime=2:00:00
155
#PBS -j n
156
#PBS -m be
157
#PBS -N mod35_climate
158
#PBS -q devel
159
#PBS -V
160
",if(delay) paste("#PBS -W depend=afterany:",job,sep="")," 
161
162
CORES=16
163
HDIR=/u/armichae/pr/
164
#  source $HDIR/etc/environ.sh
165
  source /u/awilso10/environ.sh
166
  source /u/awilso10/.bashrc
167
IDIR=/nobackupp1/awilso10/mod35/
168
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
169
WORKLIST=$IDIR/notdone_climate.txt
170
EXE=Rscript
171
LOGSTDOUT=$IDIR/log/climatology_stdout
172
LOGSTDERR=$IDIR/log/climatology_stderr
173
### use mpiexec to parallelize across days
174
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
175
",sep=""),file=paste("mod35_climatology_qsub",sep=""))
176
177
## check files
178
system(paste("cat mod35_climatology_qsub",sep=""))        #qsub submission script
179
system(paste("cat notdone_climate.txt | head",sep=""))    #top of job file
180
system(paste("cat notdone_climate.txt | wc -l ",sep=""))  #number of jobs to be run
181
182
## Submit it
183
system(paste("qsub mod35_climatology_qsub",sep=""))
184
185
## check progress
186
system("qstat -u awilso10")
187
188
## start interactive job on compute node for debugging
189
# system("qsub -I -l walltime=2:00:00 -lselect=2:ncpus=16:model=san -q devel")
190
191
192
#################################################################
193
### copy the files back to Yale
194
summarydir="summary"
195
196
sumfiles=list.files("summary",pattern="^MOD06_.*[0-9][.]nc",full=T)
197
198
system(paste("scp ",paste(sumfiles,collapse=" ")," adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod06/summary",sep=""))
199
200
#system(paste("scp ",tsdir,"/MOD06_",tile,"*.nc adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod06/summary",sep=""))
201
#system(paste("scp ",paste(fs$path[40421:40422],collapse=" ")," adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod06/swaths",sep=""))
202
203
204
205