Project

General

Profile

Download (6.9 KB) Statistics
| Branch: | Revision:
1
#### Script to facilitate processing of MOD06 data
2
 
3
  setwd("/nobackupp1/awilso10/mod06")
4

    
5
library(rgdal)
6
library(raster)
7
library(RSQLite)
8

    
9

    
10
verbose=T
11

    
12
## get MODLAND tile information
13
txbb=read.table("http://landweb.nascom.nasa.gov/developers/sn_tiles/sn_bound_10deg.txt",skip=6,nrows=648,header=T)
14
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
15
save(tb,file="modlandTiles.Rdata")
16
load("modlandTiles.Rdata")
17

    
18
## delete temporary log file that can grow to GB
19
system("rm /nobackupp1/awilso10/software/heg/TOOLKIT_MTD/runtime/LogStatus")
20

    
21

    
22
tile="h11v08"  # Venezuela
23
#tile="h11v07"  # Venezuela coast
24
#tile="h09v04"  # Oregon
25
tile="h21v09"  #Kenya
26

    
27

    
28
### list of tiles to process
29
tiles=c("h11v08","h21v09","h08v04","h09v04","h08v05","h09v05","h20v11","h31v11")
30
tiles=tiles[c(5,7,8)]
31
tile_bb=tb[tb$tile%in%tiles,]
32

    
33
### get list of files to process
34
datadir="/nobackupp4/datapool/modis/MOD06_L2.005/"
35
#datadir="/nobackupp1/awilso10/mod06/data"   #for data downloaded from 
36

    
37
outdir="daily/" #paste("daily/",tile,sep="")
38

    
39
##find swaths in region from sqlite database for the specified date/tile
40
## path to swath database
41
db="/nobackupp4/pvotava/DB/export/swath_geo.sql.sqlite3.db"
42
con=dbConnect("SQLite", dbname = db)
43
fs=do.call(rbind.data.frame,lapply(1:nrow(tile_bb),function(i){
44
  d=dbGetQuery(con,paste("SELECT * from swath_geo
45
            WHERE east>=",tile_bb$lon_min[i]," AND
46
                  west<=",tile_bb$lon_max[i]," AND
47
                  north>=",tile_bb$lat_min[i]," AND
48
                  south<=",tile_bb$lat_max[i])
49
    )
50
  d$tile=tile_bb$tile[i]
51
  print(paste("Finished tile",tile_bb$tile[i]))
52
  return(d)
53
}))
54
  con=dbDisconnect(con)
55
  fs$id=substr(fs$id,7,19)
56

    
57
### Identify which swaths are available in the datapool
58
swaths=data.frame(path=list.files(datadir,pattern=paste("hdf$"),recursive=T,full=T),stringsAsFactors=F)  #all swaths in data pool
59
swaths$id=substr(basename(swaths$path),10,22)
60
fs$exists=fs$id%in%swaths$id 
61
fs$path=swaths$path[match(fs$id,swaths$id)]
62
  
63
if(verbose) print(paste("###############",nrow(fs)," swath IDs recieved from database"))
64

    
65

    
66
## get all unique dates
67
fs$dateid=format(as.Date(paste(fs$year,fs$day,sep=""),"%Y%j"),"%Y%m%d")
68
alldates=unique(fs$dateid[fs$exists])
69

    
70
#### Generate submission file
71
alldates=format(seq(as.Date("2000-03-01"),as.Date("2011-12-31"),1),"%Y%m%d")
72
proclist=expand.grid(date=alldates,tile=tiles)
73
proclist$year=substr(proclist$date,1,4)
74
  
75
## identify which have been completed
76
fdone=data.frame(path=list.files(outdir,pattern="nc$",recursive=T))
77
fdone$date=substr(basename(as.character(fdone$path)),14,21)
78
fdone$tile=substr(basename(as.character(fdone$path)),14,21)
79

    
80
## identify which date-tiles have already been run
81
proclist$done=paste(proclist$tile,proclist$date,sep="_")%in%substr(basename(as.character(fdone$path)),7,21)
82

    
83
### report on what has already been processed
84
print(paste("Overview of completed tile-days (",round(sum(proclist$done)/nrow(proclist),2),"%)"))
85
table(tile=proclist$tile[proclist$done],year=proclist$year[proclist$done])
86

    
87

    
88
#updatedone=F #update the "done" list using the 
89
#if(updatedone&exists("fdly")){  #update using table from below
90
#  done[alldates%in%fdly$dateid[fdly$drop]]=F
91
#}
92

    
93
## Identify which dates still need to be processed
94
## This vector will be used to tell mpiexec which days to include
95
#notdone=alldates[!done]  
96

    
97
script="/u/awilso10/environmental-layers/climate/procedures/MOD06_L2_process.r"
98
climatescript="/u/awilso10/environmental-layers/climate/procedures/MOD06_Climatology.r"
99

    
100
## write the table processed by mpiexec
101
write.table(paste("--verbose ",script," --date ",proclist$date[!proclist$done]," --verbose T --tile ",proclist$tile[!proclist$done],sep=""),
102
file=paste("notdone.txt",sep=""),row.names=F,col.names=F,quote=F)
103

    
104
### qsub script
105
cat(paste("
106
#PBS -S /bin/bash
107
#PBS -l select=50:ncpus=8:mpiprocs=8
108
##PBS -l select=50:ncpus=8:mpiprocs=8
109
##PBS -l select=2:ncpus=4:mpiprocs=4
110
#PBS -l walltime=5:00:00
111
#PBS -j n
112
#PBS -m be
113
#PBS -N mod06
114
#PBS -q normal
115
#PBS -V
116

    
117
CORES=400
118
HDIR=/u/armichae/pr/
119
  source $HDIR/etc/environ.sh
120
  source /u/awilso10/.bashrc
121
IDIR=/nobackupp1/awilso10/mod06/
122
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
123
WORKLIST=$IDIR/notdone.txt
124
EXE=Rscript
125
LOGSTDOUT=$IDIR/log/swath_stdout
126
LOGSTDERR=$IDIR/log/swath_stderr
127
### use mpiexec to parallelize across days
128
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
129
",sep=""),file=paste("mod06_qsub",sep=""))
130

    
131

    
132
### Check the files
133
system(paste("cat mod06_qsub",sep=""))
134
system(paste("cat notdone.txt | head",sep=""))
135
system(paste("cat notdone.txt | wc -l ",sep=""))
136

    
137
## Submit it
138
system(paste("qsub mod06_qsub",sep=""))
139

    
140
#######################################################
141
### Now submit the script to generate the climatologies
142

    
143
tiles
144
ctiles=tiles[c(1,3)]  #subset to only some tiles (for example if some aren't finished yet)?
145
climatescript="/u/awilso10/environmental-layers/climate/procedures/MOD06_Climatology.r"
146

    
147
## write the table processed by mpiexec
148
write.table(paste("--verbose ",climatescript," --verbose T --tile ",ctiles,sep=""),
149
file=paste("notdone_climate.txt",sep=""),row.names=F,col.names=F,quote=F)
150

    
151
### qsub script
152
cat(paste("
153
#PBS -S /bin/bash
154
#PBS -l select=1:ncpus=8:mpiprocs=8
155
##PBS -l select=2:ncpus=4:mpiprocs=4
156
#PBS -l walltime=5:00:00
157
#PBS -j n
158
#PBS -m be
159
#PBS -N mod06_climate
160
#PBS -q normal
161
#PBS -V
162

    
163
CORES=8
164
HDIR=/u/armichae/pr/
165
  source $HDIR/etc/environ.sh
166
  source /u/awilso10/.bashrc
167
IDIR=/nobackupp1/awilso10/mod06/
168
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
169
WORKLIST=$IDIR/notdone_climate.txt
170
EXE=Rscript
171
LOGSTDOUT=$IDIR/log/climatology_stdout
172
LOGSTDERR=$IDIR/log/climatology_stderr
173
### use mpiexec to parallelize across days
174
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
175
### Now process the climatologies
176
",sep=""),file=paste("mod06_climatology_qsub",sep=""))
177

    
178
## check files
179
system(paste("cat mod06_climatology_qsub",sep=""))        #qsub submission script
180
system(paste("cat notdone_climate.txt | head",sep=""))    #top of job file
181
system(paste("cat notdone_climate.txt | wc -l ",sep=""))  #number of jobs to be run
182

    
183
## Submit it
184
system(paste("qsub mod06_climatology_qsub",sep=""))
185

    
186
## check progress
187
system("qstat -u awilso10")
188

    
189
## start interactive job on compute node for debugging
190
# system("qsub -I -l walltime=2:00:00 -lselect=2:ncpus=16:model=san -q devel")
191

    
192

    
193
#################################################################
194
### copy the files back to Yale
195
summarydir="summary"
196

    
197
sumfiles=list.files("summary",pattern="^MOD06_.*[0-9][.]nc",full=T)
198

    
199
system(paste("scp ",paste(sumfiles,collapse=" ")," adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod06/summary",sep=""))
200

    
201
#system(paste("scp ",tsdir,"/MOD06_",tile,"*.nc adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod06/summary",sep=""))
202
#system(paste("scp ",paste(fs$path[40421:40422],collapse=" ")," adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod06/swaths",sep=""))
203

    
204

    
205

    
206

    
207

    
(21-21/24)