Project

General

Profile

« Previous | Next » 

Revision aba23d60

Added by Adam Wilson over 11 years ago

updated MOD35 processing script to work in separate temporary directories and transfer daily cloud data to lou for archiving in preparation for global processing (which cannot be done in personal directory). Also set up MOD35_Climatology script to be submitted to LDAN queue to run on lou rather than Pleiades. However environment still not set up correctly and script fails when reading netcdf files. Andrew is working on it.

View differences:

climate/procedures/MOD35_Climatology.r
1 1
### Process a folder of daily MOD35 HDF files to produce a climatology
2 2

  
3
.libPaths("/pleiades/u/awilso10/R/x86_64-unknown-linux-gnu-library/2.15/")
4

  
3 5
## import commandline arguments
4
library(getopt)
6
library(getopt,lib="/pleiades/u/awilso10/R/x86_64-unknown-linux-gnu-library/2.15/")
5 7
## get options
6 8
opta <- getopt(matrix(c(
7 9
                        'tile', 't', 1, 'character',
......
11 13
tile=opta$tile #tile="h11v08"
12 14
verbose=opta$verbose  #print out extensive information for debugging?
13 15

  
16
## set working directory
17
setwd("/u/awilso10/MOD35")
18

  
14 19
### directory containing daily files
15 20
outdir=paste("daily/",tile,"/",sep="")  #directory for separate daily files
16 21

  
......
38 43
  fdly$year=format(fdly$date,"%Y")
39 44
nrow(fdly)
40 45

  
41
## check validity (via npar and ntime) of nc files
42
#for(i in 1:nrow(fdly)){
43
#  fdly$ntime[i]<-as.numeric(system(paste("cdo -s ntime ",fdly$path[i]),intern=T))
44
#  fdly$npar[i]<-as.numeric(system(paste("cdo -s npar ",fdly$path[i]),intern=T))
45
#  fdly$fyear[i]<-as.numeric(system(paste("cdo -s showyear ",fdly$path[i]),intern=T))
46
#  fdly$fmonth[i]<-as.numeric(system(paste("cdo -s showmon ",fdly$path[i]),intern=T))
47
#  fdly$fvar[i]<-system(paste("cdo -s showvar ",fdly$path[i]),intern=T)
48
#  print(paste(i," out of ",nrow(fdly)," for year ",  fdly$fyear[i]))
49
#}
50

  
51 46
## print some summaries
52 47
if(verbose) print("Summary of available daily files")
53 48
print(table(fdly$year))
54 49
print(table(fdly$month))
55 50
#print(table(fdly$fvar))
56 51

  
57
## Identify which files failed test
58
#fdly$drop=is.na(fdly$npar)|fdly$fvar!=finalvars
59

  
60
## delete files that fail check?
61
delete=F
62
if(delete) {
63
  print(paste(sum(fdly$drop),"files will be deleted"))
64
  file.remove(as.character(fdly$path[fdly$drop]))
65
}
66
## remove dropped files from list
67
#fdly=fdly[!fdly$drop,]
68

  
69 52
#################################################################################
70 53
## Combine the year-by-year files into a single daily file in the summary directory (for archiving)
71 54
if(verbose) print("Merging daily files into single file output")
......
108 91

  
109 92
myear=as.integer(max(fdly$year))  #this year will be used in all dates of monthly climatologies (and day will = 15)
110 93

  
111
## subset dates
112
## due to bug (?) in CDO tools, only 10 years of data can be processed at a time or strange areas of NAs appear.
113
datesubset="-seldate,2000-01-01,2011-12-31"
114

  
115 94
## Monthly means
116 95
if(verbose) print("Calculating the monthly means")
117 96
system(paste("cdo -O -b I8 -v sorttimestamp -setyear,",myear," -setday,15 -mulc,-1 -subc,100 -ymonmean ",outdir2,"/MOD35_",tile,"_daily.nc ",tsdir,"/MOD35_",tile,"_ymonmean.nc",sep=""),wait=T)
......
127 106
#months=c("01","02","03","04","05","06","07","08","09","10","11","12")
128 107
#  month="02"
129 108

  
130
#ymonmean=function(month){
131
#  tfile=paste(tempdir(),"/",tile,"_",month,"_",Sys.getpid(),".txt",sep="")
132
#  write.table(paste(fdly$path[fdly$month==month],collapse=" "),tfile,col.names=F,row.names=F,quote=F)
133
#  system(paste("cat ",tfile," | ",ncopath,"ncra -O -o ",tsdir,"/MOD35_",tile,"_",month,".nc",sep=""))
134
#  system(paste("cdo -O -setyear,",myear," -setmon,",month," -setday,15 -mulc,-1 -subc,100  ",tsdir,"/MOD35_",tile,"_",month,".nc ",tsdir,"/MOD35_",tile,"_",month,"b.nc",sep=""))
135
#  system(paste(ncopath,"ncrename -v PClear,PCloud ",tsdir,"/MOD35_",tile,"_",month,"b.nc",sep=""))
136
#  system(paste(ncopath,"ncatted ",
137
#" -a long_name,PCloud,o,c,\"Mean Probability of Cloud\" ",
138
#" -a missing_value,PCloud,o,b,255 ",
139
#" -a _FillValue,PCloud,d,b,255 ",
140
#tsdir,"/MOD35_",tile,"_",month,"b.nc",sep=""))
141
#}
142
#mclapply(months,ymonmean)
143

  
144
## merge to a single file
145
#  system(paste("cdo -O -b I8 -v -mergetime ",paste(tsdir,"/MOD35_",tile,"_",months,"b.nc ",sep="",collapse=" ")," ",tsdir,"/MOD35_",tile,"_ymonmean.nc",sep=""))
146

  
147
#system(paste("cdo -v -O selindexbox,1,100,1100,1200 ",outdir2,"/MOD35_",tile,"_daily.nc ",tsdir,"/MOD35_",tile,"_dailysmall.nc",sep=""),wait=T)
148
#system(paste("cdo -v -O sorttimestamp -setyear,",myear," -setday,15 -mulc,-1 -subc,100 -ymonmean  ",outdir2,"/MOD35_",tile,"_dailysmall.nc ",tsdir,"/MOD35_",tile,"_ymonmean.nc",sep=""),wait=T)
149
#system(paste("cdo -O sorttimestamp -setyear,",myear," -setday,15 -mulc,-1 -subc,100 -timmean -selmon,2 ",datesubset," ",outdir2,"/MOD35_",tile,"_daily.nc ",tsdir,"/MOD35_",tile,"_ymonmean.nc",sep=""),wait=T)
150
#system(paste("cdo -O sorttimestamp -setyear,",myear," -setday,15 -mulc,-1 -subc,100 -monmean   ",outdir2,"/MOD35_",tile,"_daily.nc ",tsdir,"/MOD35_",tile,"_monmean.nc",sep=""),wait=T)
151
#system(paste("cdo -O sorttimestamp -setyear,",myear," -mulc,-1 -subc,100 -ydrunmean,30 ",datesubset," ",outdir2,"/MOD35_",tile,"_daily.nc ",tsdir,"/MOD35_",tile,"_ydrunmean30.nc",sep=""),wait=T)
152
#system(paste("scp ",tsdir,"/MOD35_",tile,"_ymonmean.nc adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod35/",sep=""))
109
#system(paste("cdo -O sorttimestamp -setyear,",myear," -mulc,-1 -subc,100 -ydrunmean,30 ",outdir2,"/MOD35_",tile,"_daily.nc ",outdir2,"/MOD35_",tile,"_ydrunmean30.nc &",sep=""),wait=T)
153 110
#system(paste("scp summary/MOD35_",tile,".nc adamw@acrobates.eeb.yale.edu:/data/personal/adamw/projects/interp/data/modis/mod35/",sep=""))
154 111
#system(paste("ncdump -h ",tsdir,"/MOD35_",tile,"_ymonmean.nc ",sep=""))
155 112

  
156 113

  
157 114
## Monthly standard deviation
158 115
if(verbose) print("Calculating the monthly SD")
159
system(paste("cdo -O -b I8 sorttimestamp -setyear,",myear," -setday,15 -ymonstd  ",outdir2,"/MOD35_",tile,"_daily.nc ",tsdir,"/MOD35_",tile,"_ymonstd.nc",sep=""))
116
system(paste("cdo -O -b I8 sorttimestamp -setyear,",myear," -setday,15 -ymonstd -monmean ",
117
    outdir2,"/MOD35_",tile,"_daily.nc ",
118
    tsdir,"/MOD35_",tile,"_ymonstd.nc",sep=""))
160 119
system(paste(ncopath,"ncrename -v PClear,PCloud_sd ",tsdir,"/MOD35_",tile,"_ymonstd.nc",sep=""))
161 120
system(paste(ncopath,"ncatted ",
162 121
" -a long_name,PCloud_sd,o,c,\"Standard Deviation of p(cloud)\" ",
climate/procedures/MOD35_L2_process.r
44 44
date=opta$date  
45 45
tile=opta$tile 
46 46
verbose=opta$verbose  #print out extensive information for debugging?
47
outdir=paste("daily/",tile,"/",sep="")  #directory for separate daily files
48 47
## get year and doy from date
49 48
year=format(as.Date(date,"%Y%m%d"),"%Y")
50 49
doy=format(as.Date(date,"%Y%m%d"),"%j")
51 50

  
52 51
if(platform=="pleiades"){
53
  ## location of MOD06 files
52
  ## location of MOD35 files
54 53
  datadir=paste("/nobackupp4/datapool/modis/MOD35_L2.006/",year,"/",doy,"/",sep="")
55 54
  ## path to some executables
56 55
  ncopath="/nasa/sles11/nco/4.0.8/gcc/mpt/bin/"
......
58 57
  ## path to swath database
59 58
  db="/nobackupp4/pvotava/DB/export/swath_geo.sql.sqlite3.db"
60 59
  ## specify working directory
61
  setwd("/nobackupp1/awilso10/mod35")
60
  outdir=paste("daily/",tile,"/",sep="")  #directory for separate daily files
61
  basedir="/nobackupp1/awilso10/mod35/tmp/" #directory to hold files temporarily before transferring to lou
62
  setwd(tempdir())
63
  ## grass database
62 64
  gisBase="/u/armichae/pr/grass-6.4.2/"
63 65
  ## path to MOD11A1 file for this tile to align grid/extent
64 66
  gridfile=list.files("/nobackupp4/datapool/modis/MOD11A1.005/2006.01.27",pattern=paste(tile,".*[.]hdf$",sep=""),recursive=T,full=T)[1]
......
69 71
if(platform=="litoria"){  #if running on local server, use different paths
70 72
  ## specify working directory
71 73
  setwd("~/acrobates/adamw/projects/interp")
74
  outdir=paste("daily/",tile,"/",sep="")  #directory for separate daily files
75
  basedir=outdir
72 76
  gisBase="/usr/lib/grass64"
73 77
   ## location of MOD06 files
74 78
  datadir="~/acrobates/adamw/projects/interp/data/modis/mod35"
......
87 91
if(verbose) print(paste("Processing tile",tile," for date",date))
88 92

  
89 93
## load tile information and get bounding box
90
load(file="modlandTiles.Rdata")
94
load(file="/nobackupp1/awilso10/mod35/modlandTiles.Rdata")
91 95
tile_bb=tb[tb$tile==tile,] ## identify tile of interest
92 96
upleft=paste(tile_bb$lat_max,tile_bb$lon_min) #northwest corner
93 97
lowright=paste(tile_bb$lat_min,tile_bb$lon_max) #southeast corner
......
181 185
#####################################################
182 186
## Process the gridded files to align exactly with MODLAND tile and produce a daily summary of multiple swaths
183 187
  
184
## Identify output file
185
ncfile=paste(outdir,"/MOD35_",tile,"_",date,".nc",sep="")  #this is the 'final' daily output file
186

  
187 188
## function to convert binary to decimal to assist in identifying correct values
188 189
## this is helpful when defining QA handling below, but isn't used in processing
189 190
## b2d=function(x) sum(x * 2^(rev(seq_along(x)) - 1)) #http://tolstoy.newcastle.edu.au/R/e2/help/07/02/10596.html
......
200 201
  tf=paste(tempdir(),"/grass", Sys.getpid(),"/", sep="")  #temporar
201 202
  if(!file.exists(tf)) dir.create(tf)
202 203
  ## create output directory if needed
203
  if(!file.exists(dirname(ncfile))) dir.create(dirname(ncfile),recursive=T)
204
  
204
  ## Identify output file
205
  ncfile=paste(basedir,"MOD35_",tile,"_",date,".nc",sep="")  #this is the 'final' daily output file
206
  if(!file.exists(dirname(ncfile))) dir.create(dirname(ncfile))
207
 
205 208
  ## set up temporary grass instance for this PID
206 209
  if(verbose) print(paste("Set up temporary grass session in",tf))
207 210
  initGRASS(gisBase=gisBase,gisDbase=tf,SG=as(td,"SpatialGridDataFrame"),override=T,location="mod06",mapset="PERMANENT",home=tf,pid=Sys.getpid())
......
299 302
ncfile,sep=""))
300 303
#system(paste(ncopath,"ncatted -a sourcecode,global,o,c,",script," ",ncfile,sep=""))
301 304
   
302
  
303
### delete the temporary files 
304
  unlink_.gislock()
305
  system(paste("rm -frR ",tf,sep=""))
306

  
307 305

  
308 306
## Confirm that the file has the correct attributes, otherwise delete it
309 307
ntime=as.numeric(system(paste("cdo -s ntime ",ncfile),intern=T))
......
314 312
      print(paste("FILE ERROR:  tile ",tile," and date ",date," was not outputted correctly, deleting... "))
315 313
      file.remove(ncfile)
316 314
    }
317
   
315

  
316
############  copy files to lou
317
if(platform=="pleiades"){
318
  archivedir=paste("MOD35/",outdir,"/",sep="")  #directory to create on lou
319
  system(paste("ssh -q bridge2 \"ssh -q lou mkdir -p ",archivedir,"\"",sep=""))
320
  system(paste("ssh -q bridge2 \"scp -q ",ncfile," lou:",archivedir,"\"",sep=""))
321
  file.remove(ncfile)
322
  file.remove(paste(ncfile,".aux.xml",sep=""))
323
}
324

  
325
  
326
### delete the temporary files 
327
  unlink_.gislock()
328
  system(paste("rm -frR ",tempdir(),sep=""))
329

  
330

  
318 331
  ## print out some info
319 332
print(paste(" ###################################################################               Finished ",date,
320 333
"################################################################"))
climate/procedures/Pleiades_MOD35.R
12 12
## get MODLAND tile information
13 13
tb=read.table("http://landweb.nascom.nasa.gov/developers/sn_tiles/sn_bound_10deg.txt",skip=6,nrows=648,header=T)
14 14
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
15
tb=tb[tb$lon_min!=-999,]
15 16
save(tb,file="modlandTiles.Rdata")
16 17
load("modlandTiles.Rdata")
17 18

  
......
24 25
#tile="h09v04"  # Oregon
25 26
tile="h21v09"  #Kenya
26 27

  
27

  
28 28
### list of tiles to process
29 29
tiles=c("h11v08","h21v09","h08v04","h09v04","h08v05","h09v05","h20v11","h31v11")
30
tiles=tiles[c(1,4)]
30
tiles=c("h10v08","h11v08","h12v08","h10v07","h11v07","h12v07")  # South America
31

  
32
## subset to MODLAND tiles
33
  modlandtiles=system("ls -r /nobackupp4/datapool/modis/MOD11A1.005/2010* | grep hdf$ | cut -c18-23 | sort | uniq - ",intern=T)
34
 tb$land=tb$tile%in%modlandtiles
35
tiles=tb$tile[tb$land]
36

  
37
## subset tile corner matrix to tiles selected above
31 38
tile_bb=tb[tb$tile%in%tiles,]
32 39

  
33 40
### get list of files to process
34 41
datadir="/nobackupp4/datapool/modis/MOD35_L2.006/"
35
#datadir="/nobackupp1/awilso10/mod06/data"   #for data downloaded from 
36 42

  
37 43
outdir="daily/" #paste("daily/",tile,sep="")
38 44

  
......
62 68
  
63 69
if(verbose) print(paste("###############",nrow(fs)," swath IDs recieved from database"))
64 70

  
65

  
66 71
## get all unique dates
67 72
fs$dateid=format(as.Date(paste(fs$year,fs$day,sep=""),"%Y%j"),"%Y%m%d")
68 73
alldates=unique(fs$dateid[fs$exists])
69 74

  
70 75
#### Generate submission file
71
alldates=format(seq(as.Date("2000-03-01"),as.Date("2011-12-31"),1),"%Y%m%d")
76
startdate="2000-03-01"
77
stopdate="2011-12-31"
78
## just 2009
79
startdate="2009-01-01"
80
stopdate="2009-12-31"
81

  
82
alldates=format(seq(as.Date(startdate),as.Date(stopdate),1),"%Y%m%d")
83

  
72 84
proclist=expand.grid(date=alldates,tile=tiles)
73 85
proclist$year=substr(proclist$date,1,4)
74
  
86

  
87
## identify tile-dates with no available swaths
88
avail=unique(cbind.data.frame(tile=fs$tile,date=fs$dateid)[fs$exists, ])
89
proclist$avail=paste(proclist$tile,proclist$date,sep="_")%in%paste(avail$tile,avail$date,sep="_")
90

  
75 91
## identify which have been completed
76
fdone=data.frame(path=list.files(outdir,pattern="nc$",recursive=T))
92
fdone=data.frame(path=system("ssh lou 'find MOD35/daily -name \"*.nc\"' ",intern=T))
93
#fdone=data.frame(path=list.files(outdir,pattern="nc$",recursive=T))
77 94
fdone$date=substr(basename(as.character(fdone$path)),14,21)
78 95
fdone$tile=substr(basename(as.character(fdone$path)),7,12)
79

  
80
## identify which date-tiles have already been run
81 96
proclist$done=paste(proclist$tile,proclist$date,sep="_")%in%substr(basename(as.character(fdone$path)),7,21)
82 97

  
83 98
### report on what has already been processed
......
87 102
script="/u/awilso10/environmental-layers/climate/procedures/MOD35_L2_process.r"
88 103

  
89 104
## write the table processed by mpiexec
90
write.table(paste("--verbose ",script," --date ",proclist$date[!proclist$done]," --verbose T --tile ",proclist$tile[!proclist$done],sep=""),
105
tp=(!proclist$done)&proclist$avail  #date-tiles to process
106
table(Available=proclist$avail,Completed=proclist$done)
107

  
108
write.table(paste("--verbose ",script," --date ",proclist$date[tp]," --verbose T --tile ",proclist$tile[tp],sep=""),
91 109
file=paste("notdone.txt",sep=""),row.names=F,col.names=F,quote=F)
92 110

  
93 111
### qsub script
......
132 150
### Now submit the script to generate the climatologies
133 151

  
134 152
tiles
135
ctiles=tiles#[c(2)]  #subset to only some tiles (for example if some aren't finished yet)?
136
climatescript="/u/awilso10/environmental-layers/climate/procedures/MOD35_Climatology.r"
153
ctiles=tiles[c(1:3)]  #subset to only some tiles (for example if some aren't finished yet)?
154
climatescript="/pleiades/u/awilso10/environmental-layers/climate/procedures/MOD35_Climatology.r"
137 155

  
138 156
## write the table processed by mpiexec
139 157
write.table(paste("--verbose ",climatescript," --verbose T --tile ",ctiles,sep=""),
......
149 167
### qsub script
150 168
cat(paste("
151 169
#PBS -S /bin/bash
152
##PBS -l select=50:ncpus=8:mpiprocs=8
153
#PBS -l select=4:ncpus=4:mpiprocs=4
154
#PBS -l walltime=2:00:00
170
#PBS -l select=1:ncpus=16:mem=94
171
#PBS -l walltime=24:00:00
155 172
#PBS -j n
156 173
#PBS -m be
157 174
#PBS -N mod35_climate
158
#PBS -q devel
175
#PBS -q ldan
159 176
#PBS -V
160 177
",if(delay) paste("#PBS -W depend=afterany:",job,sep="")," 
161 178

  
162 179
CORES=16
163
HDIR=/u/armichae/pr/
164
#  source $HDIR/etc/environ.sh
165
  source /u/awilso10/environ.sh
166
  source /u/awilso10/.bashrc
180
HDIR=/pleiades/u/armichae/pr/
181
  source $HDIR/etc/environ.sh
182
  source /pleiades/u/awilso10/environ.sh
183
  source /pleiades/u/awilso10/.bashrc
184
  source /pleiades/u/awilso10/moduleload
167 185
IDIR=/nobackupp1/awilso10/mod35/
168 186
##WORKLIST=$HDIR/var/run/pxrRgrs/work.txt
169 187
WORKLIST=$IDIR/notdone_climate.txt
170 188
EXE=Rscript
171 189
LOGSTDOUT=$IDIR/log/climatology_stdout
172 190
LOGSTDERR=$IDIR/log/climatology_stderr
173
### use mpiexec to parallelize across days
191
### use mpiexec to parallelize across tiles
174 192
mpiexec -np $CORES pxargs -a $WORKLIST -p $EXE -v -v -v --work-analyze 1> $LOGSTDOUT 2> $LOGSTDERR
175 193
",sep=""),file=paste("mod35_climatology_qsub",sep=""))
176 194

  

Also available in: Unified diff