Project

General

Profile

Download (10.4 KB) Statistics
| Branch: | Revision:
1 35d59dc1 Adam M. Wilson @ pfe
#### Script to facilitate processing of MOD06 data
2 95354b03 Adam M. Wilson @ pfe
### This script is meant to be run iteratively, rather than unsupervised. There are several steps that require manual checking (such as choosing the number of cores, etc.)
3
4
## working directory
5 c1352601 Adam M. Wilson @ pfe
setwd("/nobackupp1/awilso10/mod35")
6 35d59dc1 Adam M. Wilson @ pfe
7 95354b03 Adam M. Wilson @ pfe
## load libraries
8 35d59dc1 Adam M. Wilson @ pfe
library(rgdal)
9
library(raster)
10
library(RSQLite)
11
12 95354b03 Adam M. Wilson @ pfe
## flag to increase verbosity of output
13 35d59dc1 Adam M. Wilson @ pfe
verbose=T
14
15
## get MODLAND tile information
16
tb=read.table("http://landweb.nascom.nasa.gov/developers/sn_tiles/sn_bound_10deg.txt",skip=6,nrows=648,header=T)
17
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
18 aba23d60 Adam M. Wilson @ pfe
tb=tb[tb$lon_min!=-999,]
19 35d59dc1 Adam M. Wilson @ pfe
save(tb,file="modlandTiles.Rdata")
20
load("modlandTiles.Rdata")
21
22 e4e30b86 Adam M. Wilson @ pfe
## Choose some tiles to process
23 35d59dc1 Adam M. Wilson @ pfe
### list of tiles to process
24 aba23d60 Adam M. Wilson @ pfe
tiles=c("h10v08","h11v08","h12v08","h10v07","h11v07","h12v07")  # South America
25 95354b03 Adam M. Wilson @ pfe
## or a northern block of tiles
26 e4e30b86 Adam M. Wilson @ pfe
tiles=apply(expand.grid(paste("h",11:17,sep=""),v=c("v00","v01","v02","v03","v04")),1,function(x) paste(x,collapse="",sep=""))
27
## subset to MODLAND tiles
28
alltiles=system("ls -r MODTILES/ | grep tif$ | cut -c1-6 | sort | uniq - ",intern=T)
29 aba23d60 Adam M. Wilson @ pfe
30 848e34fd Adam M. Wilson @ pfe
## or run all tiles
31
#tiles=alltiles
32
33 e4e30b86 Adam M. Wilson @ pfe
## subset to tiles in global region (not outside global boundary in sinusoidal projection)
34
tiles=tiles[tiles%in%alltiles]
35 aba23d60 Adam M. Wilson @ pfe
36
## subset tile corner matrix to tiles selected above
37 35d59dc1 Adam M. Wilson @ pfe
tile_bb=tb[tb$tile%in%tiles,]
38
39
### get list of files to process
40
datadir="/nobackupp4/datapool/modis/MOD35_L2.006/"
41
42
outdir="daily/" #paste("daily/",tile,sep="")
43
44 848e34fd Adam M. Wilson @ pfe
##find swaths in region from sqlite database for the specified tile
45 b3344197 Adam M. Wilson @ pfe
## this takes a while, about 30 minutes, so only rebuild if you need to update what's available...
46
rebuildswathtable=F
47
if(rebuildswathtable){
48
  ## path to swath database
49
  db="/nobackupp4/pvotava/DB/export/swath_geo.sql.sqlite3.db"
50
  con=dbConnect("SQLite", dbname = db)
51
  fs=do.call(rbind.data.frame,lapply(1:nrow(tile_bb),function(i){
52
    d=dbGetQuery(con,paste("SELECT * from swath_geo6
53 35d59dc1 Adam M. Wilson @ pfe
            WHERE east>=",tile_bb$lon_min[i]," AND
54
                  west<=",tile_bb$lon_max[i]," AND
55
                  north>=",tile_bb$lat_min[i]," AND
56
                  south<=",tile_bb$lat_max[i])
57 b3344197 Adam M. Wilson @ pfe
      )
58
    d$tile=tile_bb$tile[i]
59
    print(paste("Finished tile",tile_bb$tile[i]))
60
    return(d)
61
  }))
62
  con=dbDisconnect(con)
63
  fs$id=substr(fs$id,7,19)
64
65
  ## Identify which swaths are available in the datapool
66
  swaths=data.frame(path=list.files(datadir,pattern=paste("hdf$"),recursive=T,full=T),stringsAsFactors=F)  #all swaths in data pool
67
  swaths$id=substr(basename(swaths$path),10,22)
68
  fs$exists=fs$id%in%swaths$id 
69
  fs$path=swaths$path[match(fs$id,swaths$id)]
70
71
  ## write tile-swath list to disk
72
  save(fs,swaths,file="swathtile.Rdata")
73
}
74
75 848e34fd Adam M. Wilson @ pfe
if(!exists("fs")) load("swathtile.Rdata")
76 b3344197 Adam M. Wilson @ pfe
77 35d59dc1 Adam M. Wilson @ pfe
if(verbose) print(paste("###############",nrow(fs)," swath IDs recieved from database"))
78
79
## get all unique dates
80
fs$dateid=format(as.Date(paste(fs$year,fs$day,sep=""),"%Y%j"),"%Y%m%d")
81 b3344197 Adam M. Wilson @ pfe
#alldates=unique(fs$dateid[fs$exists])
82 35d59dc1 Adam M. Wilson @ pfe
83
#### Generate submission file
84 aba23d60 Adam M. Wilson @ pfe
startdate="2000-03-01"
85
stopdate="2011-12-31"
86 b3344197 Adam M. Wilson @ pfe
## just 2005-2010
87
startdate="2009-01-01"
88
stopdate="2009-12-31"
89 aba23d60 Adam M. Wilson @ pfe
90
alldates=format(seq(as.Date(startdate),as.Date(stopdate),1),"%Y%m%d")
91
92 35d59dc1 Adam M. Wilson @ pfe
proclist=expand.grid(date=alldates,tile=tiles)
93
proclist$year=substr(proclist$date,1,4)
94 aba23d60 Adam M. Wilson @ pfe
95 52ae84b1 Adam M. Wilson @ pfe
 ## identify tile-dates with no available swaths
96 aba23d60 Adam M. Wilson @ pfe
avail=unique(cbind.data.frame(tile=fs$tile,date=fs$dateid)[fs$exists, ])
97
proclist$avail=paste(proclist$tile,proclist$date,sep="_")%in%paste(avail$tile,avail$date,sep="_")
98
99 35d59dc1 Adam M. Wilson @ pfe
## identify which have been completed
100 b3344197 Adam M. Wilson @ pfe
#fdone=data.frame(path=system("ssh lou 'find MOD35/daily -name \"*.nc\"' ",intern=T))
101
fdone=data.frame(path=list.files(outdir,pattern="nc$",recursive=T))
102 35d59dc1 Adam M. Wilson @ pfe
fdone$date=substr(basename(as.character(fdone$path)),14,21)
103
fdone$tile=substr(basename(as.character(fdone$path)),7,12)
104
proclist$done=paste(proclist$tile,proclist$date,sep="_")%in%substr(basename(as.character(fdone$path)),7,21)
105
106
### report on what has already been processed
107
print(paste(sum(!proclist$done)," out of ",nrow(proclist)," (",round(100*sum(!proclist$done)/nrow(proclist),2),"%) remain"))
108 95354b03 Adam M. Wilson @ pfe
stem(table(tile=proclist$tile[proclist$done],year=proclist$year[proclist$done]))
109 e5c2e69b Adam M. Wilson @ pfe
#table(tile=proclist$tile[proclist$done],year=proclist$year[proclist$done])
110 b3344197 Adam M. Wilson @ pfe
table(table(tile=proclist$tile[!proclist$done],year=proclist$year[!proclist$done]))
111
112
### explore tile counts
113
#x=table(tile=proclist$tile[proclist$done],year=proclist$year[proclist$done])
114
#x=x[order(rownames(x)),]
115 35d59dc1 Adam M. Wilson @ pfe
116
script="/u/awilso10/environmental-layers/climate/procedures/MOD35_L2_process.r"
117 e4e30b86 Adam M. Wilson @ pfe
 
118 35d59dc1 Adam M. Wilson @ pfe
## write the table processed by mpiexec
119 e4e30b86 Adam M. Wilson @ pfe
tp=T  # rerun everything
120 be64daa8 Adam M. Wilson @ pfe
tp=((!proclist$done)&proclist$avail)  #date-tiles to process
121 aba23d60 Adam M. Wilson @ pfe
table(Available=proclist$avail,Completed=proclist$done)
122 95354b03 Adam M. Wilson @ pfe
table(tp)
123 aba23d60 Adam M. Wilson @ pfe
124 848e34fd Adam M. Wilson @ pfe
write.table(paste("--verbose ",script," --date ",proclist$date[tp]," --verbose T --profile F --tile ",proclist$tile[tp],sep=""),
125 35d59dc1 Adam M. Wilson @ pfe
file=paste("notdone.txt",sep=""),row.names=F,col.names=F,quote=F)
126
127 95354b03 Adam M. Wilson @ pfe
## try running it once for a single tile-date to get estimate of time/tile-day
128
test=F
129
if(test){
130
  i=2
131 848e34fd Adam M. Wilson @ pfe
  time1=system.time(system(paste("Rscript --verbose ",script," --date ",proclist$date[i]," --profile T --verbose T --tile ",proclist$tile[i],sep="")))
132
  hours=round(length(proclist$date[tp])*142/60/60); hours
133 e5c2e69b Adam M. Wilson @ pfe
  hours=round(length(proclist$date[tp])*time1[3]/60/60,1); hours
134 848e34fd Adam M. Wilson @ pfe
  nodes=100
135
  threads=nodes*8
136
  writeLines(paste(" ################### \n Hours per date-tile:",round(time1[3]/60/60,2),"\n Date-tiles to process:",sum(tp)," \n Estimated CPU time: ",hours,"hours \n  With ",threads,"threads:",round(hours/threads,2),"hours \n ###################"))
137 d91f0663 Adam M. Wilson @ pfe
  summaryRprof("/nobackupp1/awilso10/mod35/log/profile.out")
138 95354b03 Adam M. Wilson @ pfe
}
139
140 848e34fd Adam M. Wilson @ pfe
### Set up submission script
141
queue="devel"
142
queue="normal" #"devel"
143 d91f0663 Adam M. Wilson @ pfe
queue="long" #"devel"
144
nodes=120
145
walltime=24
146 95354b03 Adam M. Wilson @ pfe
147 848e34fd Adam M. Wilson @ pfe
### write qsub script to disk
148 35d59dc1 Adam M. Wilson @ pfe
cat(paste("
149
#PBS -S /bin/bash
150 848e34fd Adam M. Wilson @ pfe
#PBS -l select=",nodes,":ncpus=8:mpiprocs=8
151
#PBS -l walltime=",walltime,":00:00
152 35d59dc1 Adam M. Wilson @ pfe
#PBS -j n
153
#PBS -m be
154
#PBS -N mod35
155 848e34fd Adam M. Wilson @ pfe
#PBS -q ",queue,"
156 35d59dc1 Adam M. Wilson @ pfe
#PBS -V
157
158 848e34fd Adam M. Wilson @ pfe
CORES=",nodes*8,"
159 35d59dc1 Adam M. Wilson @ pfe
HDIR=/u/armichae/pr/
160 5af36cdd Adam M. Wilson @ pfe
  source $HDIR/etc/environ.sh
161 35d59dc1 Adam M. Wilson @ pfe
  source /u/awilso10/environ.sh
162
  source /u/awilso10/.bashrc
163
IDIR=/nobackupp1/awilso10/mod35/
164
WORKLIST=$IDIR/notdone.txt
165
EXE=Rscript
166
LOGSTDOUT=$IDIR/log/mod35_stdout
167
LOGSTDERR=$IDIR/log/mod35_stderr
168
### use mpiexec to parallelize across days
169
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
170
",sep=""),file=paste("mod35_qsub",sep=""))
171
172
### Check the files
173
system(paste("cat mod35_qsub",sep=""))
174 848e34fd Adam M. Wilson @ pfe
system(paste("cat notdone.txt | head -n 4",sep=""))
175 35d59dc1 Adam M. Wilson @ pfe
system(paste("cat notdone.txt | wc -l ",sep=""))
176
177 848e34fd Adam M. Wilson @ pfe
## start interactive job on compute node for debugging
178
# system("qsub -I -l walltime=2:00:00 -lselect=2:ncpus=16:model=san -q devel")
179
180 95354b03 Adam M. Wilson @ pfe
181 35d59dc1 Adam M. Wilson @ pfe
## Submit it
182
system(paste("qsub mod35_qsub",sep=""))
183 b3344197 Adam M. Wilson @ pfe
184 c24e32a8 Adam M. Wilson @ pfe
system("qstat -u awilso10")
185 35d59dc1 Adam M. Wilson @ pfe
186
#######################################################
187
### Now submit the script to generate the climatologies
188
189 e4e30b86 Adam M. Wilson @ pfe
## report 'mostly' finished tiles
190 e5c2e69b Adam M. Wilson @ pfe
## this relies on proclist above so be sure to update above before running
191 e4e30b86 Adam M. Wilson @ pfe
md=table(tile=proclist$tile[!proclist$done],year=proclist$year[!proclist$done])
192
mdt=names(md[md<10,])
193
tiles=mdt
194 5af36cdd Adam M. Wilson @ pfe
195 35d59dc1 Adam M. Wilson @ pfe
tiles
196 b3344197 Adam M. Wilson @ pfe
ctiles=c("h10v08","h11v08","h12v08","h10v07","h11v07","h12v07")  # South America
197
198 be64daa8 Adam M. Wilson @ pfe
ctiles=tiles#[c(1:3)]  #subset to only some tiles (for example if some aren't finished yet)?
199 aba23d60 Adam M. Wilson @ pfe
climatescript="/pleiades/u/awilso10/environmental-layers/climate/procedures/MOD35_Climatology.r"
200 35d59dc1 Adam M. Wilson @ pfe
201 be64daa8 Adam M. Wilson @ pfe
## check which tiles have been processed and are on lou with a filename "MOD35_[tile].nc"
202 b3344197 Adam M. Wilson @ pfe
cdone=data.frame(path="",tile="")  #use this if you want to re-run everything
203 d91f0663 Adam M. Wilson @ pfe
#cdone=data.frame(path=sapply(strsplit(basename(
204
#                   system("ssh lou 'find MOD35/summary -name \"MOD35_h[0-9][0-9]v[0-9][0-9].nc\"' ",intern=T)),split="_"),function(x) x[2]))
205 5af36cdd Adam M. Wilson @ pfe
cdone=data.frame(path=sapply(strsplit(basename(
206
                   system("find summary -name \"MOD35_h[0-9][0-9]v[0-9][0-9].nc\"",intern=T)),split="_"),function(x) x[2]))
207 be64daa8 Adam M. Wilson @ pfe
cdone$tile=substr(basename(as.character(cdone$path)),1,6)
208 b3344197 Adam M. Wilson @ pfe
print(paste(length(ctiles[!ctiles%in%cdone$tile]),"Tiles still need to be processed"))
209 be64daa8 Adam M. Wilson @ pfe
210 35d59dc1 Adam M. Wilson @ pfe
## write the table processed by mpiexec
211 be64daa8 Adam M. Wilson @ pfe
write.table(paste("--verbose ",climatescript," --verbose T --tile ",ctiles[!ctiles%in%cdone$tile],sep=""),
212 35d59dc1 Adam M. Wilson @ pfe
file=paste("notdone_climate.txt",sep=""),row.names=F,col.names=F,quote=F)
213
214
## delay start until previous jobs have finished?
215 848e34fd Adam M. Wilson @ pfe
delay=F
216 35d59dc1 Adam M. Wilson @ pfe
## check running jobs to get JobID of job you want to wait for
217 e5c2e69b Adam M. Wilson @ pfe
system("qstat -u awilso10",intern=T)
218 35d59dc1 Adam M. Wilson @ pfe
## enter JobID here:
219 e5c2e69b Adam M. Wilson @ pfe
job="2031668.pbspl1.nas.nasa.gov"
220 35d59dc1 Adam M. Wilson @ pfe
221 d91f0663 Adam M. Wilson @ pfe
222
queue="devel"
223
nodes=50
224
walltime=2
225
226 35d59dc1 Adam M. Wilson @ pfe
### qsub script
227
cat(paste("
228
#PBS -S /bin/bash
229 d91f0663 Adam M. Wilson @ pfe
#PBS -l select=",nodes,":ncpus=8:mem=94
230
#PBS -l walltime=",walltime,":00:00
231 35d59dc1 Adam M. Wilson @ pfe
#PBS -j n
232
#PBS -m be
233
#PBS -N mod35_climate
234 d91f0663 Adam M. Wilson @ pfe
#PBS -q ",queue,"
235 35d59dc1 Adam M. Wilson @ pfe
#PBS -V
236
",if(delay) paste("#PBS -W depend=afterany:",job,sep="")," 
237
238 d91f0663 Adam M. Wilson @ pfe
CORES=",nodes*8,"
239 be64daa8 Adam M. Wilson @ pfe
HDIR=/u/armichae/pr/
240 aba23d60 Adam M. Wilson @ pfe
  source $HDIR/etc/environ.sh
241
  source /pleiades/u/awilso10/environ.sh
242
  source /pleiades/u/awilso10/.bashrc
243 35d59dc1 Adam M. Wilson @ pfe
IDIR=/nobackupp1/awilso10/mod35/
244
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
245
WORKLIST=$IDIR/notdone_climate.txt
246
EXE=Rscript
247
LOGSTDOUT=$IDIR/log/climatology_stdout
248
LOGSTDERR=$IDIR/log/climatology_stderr
249 aba23d60 Adam M. Wilson @ pfe
### use mpiexec to parallelize across tiles
250 35d59dc1 Adam M. Wilson @ pfe
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
251
",sep=""),file=paste("mod35_climatology_qsub",sep=""))
252
253
## check files
254
system(paste("cat mod35_climatology_qsub",sep=""))        #qsub submission script
255
system(paste("cat notdone_climate.txt | head",sep=""))    #top of job file
256
system(paste("cat notdone_climate.txt | wc -l ",sep=""))  #number of jobs to be run
257
258
## Submit it
259
system(paste("qsub mod35_climatology_qsub",sep=""))
260
261
## check progress
262
system("qstat -u awilso10")
263
264
265
266
#################################################################
267
### copy the files back to Yale
268
269 5af36cdd Adam M. Wilson @ pfe
270 848e34fd Adam M. Wilson @ pfe
#system("ssh lou")
271 be64daa8 Adam M. Wilson @ pfe
#scp `find MOD35/summary -name "MOD35_h[0-9][0-9]v[0-9][0-9].nc"` adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod35/summary/
272 e4e30b86 Adam M. Wilson @ pfe
system("rsync -cavv `find summary -name \"MOD35_h[0-9][0-9]v[0-9][0-9]_mean.nc\"` adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod35/summary/")
273 5af36cdd Adam M. Wilson @ pfe
system("rsync -cavv `find summary -name \"MOD35_h[0-9][0-9]v[0-9][0-9].nc\"` adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod35/summary/")
274
275
276
system("gdalbuildvrt MOD35C6_2009.vrt summary/*2009mean.nc ") 
277
system("gdal_translate -stats -co \"COMPRESS=LZW\" -of GTiff MOD35C6_2009.vrt MOD35C6_2009.tif ")              
278
system("scp MOD35C6_2009.tif adamw@acrobates.eeb.24.177.10.190:/Users/adamw/Downloads/")
279 be64daa8 Adam M. Wilson @ pfe
exit
280 35d59dc1 Adam M. Wilson @ pfe