Project

General

Profile

« Previous | Next » 

Revision 92fd8a10

Added by Adam Wilson about 12 years ago

Updated script to accurately set paths for HEG tool to facilitate parallel processing of gridding procedure using swtif. Multicore package is used to parallelize which limits processing to a single node. Next step is to explore use of foreach package to allow multiple nodes

View differences:

climate/procedures/MOD06_L2_process.r
12 12
require(spgrass6)
13 13
library(multicore)
14 14

  
15
## number of cores to use
16
ncores=as.numeric(system("echo $NCORES",intern=T))
17

  
15 18
## specify some working directories
16 19
setwd("/nobackupp1/awilso10/mod06")
17 20

  
18
outdir="2_daily"
21
outdir="2_daily"  #directory for separate daily files
22
outdir2="3_summary" #directory for combined daily files and summarized files
19 23

  
20 24
print(paste("tempdir()=",tempdir()))
21 25
print(paste("TMPDIR=",Sys.getenv("TMPDIR")))
......
23 27
### get list of files to process
24 28
datadir="/nobackupp4/datapool/modis/MOD06_L2.005/"
25 29

  
26
fs=data.frame(
27
  path=list.files(datadir,full=T,recursive=T,pattern="hdf"),
28
  file=basename(list.files(datadir,full=F,recursive=T,pattern="hdf")))
30
fs=data.frame(path=list.files(datadir,full=T,recursive=T,pattern="hdf"),stringsAsFactors=F)
31
fs$file=basename(fs$path)
29 32
fs$date=as.Date(substr(fs$file,11,17),"%Y%j")
30 33
fs$month=format(fs$date,"%m")
31 34
fs$year=format(fs$date,"%Y")
......
48 51

  
49 52

  
50 53
## identify which have been completed
51
done=alldates%in%substr(list.files(outdir),5,12)
54
done=alldates%in%substr(list.files(outdir),7,14)
52 55
table(done)
53 56
notdone=alldates[!done]  #these are the dates that still need to be processed
54 57

  
......
106 109
  cat(c(hdr,grp)    , file=paste(tempdir(),"/",basename(file),"_MODparms.txt",sep=""))
107 110
  ## now run the swath2grid tool
108 111
  ## write the tiff file
109
  log=system(paste("/nobackupp4/pvotava/software/heg/bin/swtif -p ",paste(tempdir(),"/",basename(file),"_MODparms.txt -d",sep=""),sep=""),intern=T)
112
#  log=system(paste("/nobackupp4/pvotava/software/heg/bin/swtif -p ",paste(tempdir(),"/",basename(file),"_MODparms.txt -d",sep=""),sep=""),intern=T)
113
  log=system(paste("/nobackupp1/awilso10/software/heg/bin/swtif -p ",paste(tempdir(),"/",basename(file),"_MODparms.txt -d",sep=""),sep=""),intern=T)
110 114
  ## clean up temporary files in working directory
111 115
#  file.remove(paste("filetable.temp_",pid,sep=""))
112 116
  print(log)
......
242 246
  system(paste("/nasa/nco/3.9.8/bin/ncap -O -s 'time[time]=",as.integer(fs$date[fs$dateid==date]-as.Date("2000-01-01")),"'",ncfile," ",ncfile))
243 247
  system(paste("/nasa/nco/3.9.8/bin/ncatted -a calendar,time,c,c,\"standard\" -a long_name,time,c,c,\"time\" -a units,time,c,c,\"days since 2000-01-01 12:00:00\"",ncfile))
244 248
  system(paste("/nasa/nco/3.9.8/bin/ncrename -v Band1,CER -v Band2,COT -v Band3,CLD",ncfile))
245
  system(paste("/nasa/nco/3.9.8/bin/ncatted -a scale_factor,CER,o,d,0.001 -a missing_value,CER,o,d,-32768 -a long_name,CER,o,c,\"Effective Radius\"",ncfile))
246
  system(paste("/nasa/nco/3.9.8/bin/ncatted -a scale_factor,COT,o,d,0.001 -a missing_value,CER,o,d,-32768 -a long_name,COT,o,c,\"Optical Thickness\"",ncfile))
247
  system(paste("/nasa/nco/3.9.8/bin/ncatted -a scale_factor,CLD,o,d,0.001 -a missing_value,CER,o,d,-32768 -a long_name,CLD,o,c,\"Cloud Mask\"",ncfile))
249
  system(paste("/nasa/nco/3.9.8/bin/ncatted -a scale_factor,CER,o,d,0.01 -a units,CER,o,c,\"micron\" -a missing_value,CER,o,d,-32768 -a long_name,CER,o,c,\"Cloud Particle Effective Radius\"",ncfile))
250
  system(paste("/nasa/nco/3.9.8/bin/ncatted -a scale_factor,COT,o,d,0.01 -a units,COT,o,c,\"none\" -a missing_value,COT,o,d,-32768 -a long_name,COT,o,c,\"Cloud Optical Thickness\"",ncfile))
251
  system(paste("/nasa/nco/3.9.8/bin/ncatted -a scale_factor,CLD,o,d,0.01 -a units,CLD,o,c,\"none\" -a missing_value,CLD,o,d,-32768 -a long_name,CLD,o,c,\"Cloud Mask\"",ncfile))
248 252
   
249 253
  
250 254
### delete the temporary files 
251 255
  unlink_.gislock()
252 256
  system("/nobackupp1/awilso10/software/grass-6.4.3svn/etc/clean_temp")
253
  system(paste("rm -R ",tf,sep=""))
257
  system(paste("rm -rR ",tf,sep=""))
254 258
}
255 259

  
256 260

  
......
284 288
##mod06(date,tile)
285 289

  
286 290
## run it for all dates
287
mclapply(notdone,mod06,tile)
291
mclapply(notdone,mod06,tile,mc.cores=ncores/2) # use ncores/2 because system() commands can add second process for each spawned R
292

  
293

  
288 294

  
295
################################################################################
296
## now generate the climatologies
297
fdly=data.frame(
298
  path=list.files(outdir,pattern="nc$",full=T),
299
  file=list.files(outdir,pattern="nc$"))
300
fdly$date=as.Date(substr(fdly$file,7,14),"%Y%m%d")
301
fdly$month=format(fdly$date,"%m")
302
fdly$year=format(fdly$date,"%Y")
303

  
304
## check validity (via npar and ntime) of nc files
305
for(i in 1:nrow(fdly)){
306
  fdly$ntime[i]=as.numeric(system(paste("cdo  sinfo ",fdly$path[i]),intern=T))
307
  fdly$npar[i]=as.numeric(system(paste("cdo -s npar ",fdly$path[i]),intern=T))
308
  print(i)
309
}
289 310

  
311
## Combine all days within years into a single file (can't mergetime all days at once because this opens too many files)
312
tsdir=paste(tempdir(),"/summary",sep="")
313
dir.create(tsdir)
314
lapply(unique(fdly$year),function(y){
315
  system(paste("cdo -O mergetime ",paste(fdly$path[fdly$year==y],collapse=" ")," ",tsdir,"/MOD09_",tile,"_",y,"_daily.nc",sep=""))
316
  print(paste("Finished merging daily files for year",y))
317
})
318
## Combine the year-by-year files into a single daily file
319
system(paste("cdo -O mergetime ",paste(list.files(tsdir,full=T,pattern="daily[.]nc$"),collapse=" ")," ",outdir2,"/MOD09_",tile,"_daily.nc",sep=""))
320

  
321
system(paste("cdo -O monmean ",outdir2,"/MOD09_",tile,"_daily.nc ",outdir2,"/",tile,"_monmean.nc",sep=""))
322
system(paste("cdo -O ymonmean ",outdir2,"/MOD09_",tile,"_daily.nc ",outdir2,"/",tile,"_ymonmean.nc",sep=""))
323
system(paste("cdo -O ymonstd ",outdir2,"/MOD09_",tile,"_daily.nc ",outdir2,"/",tile,"_ymonstd.nc",sep=""))
324

  
325
print("Finished!   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
290 326
## quit R
291 327
q("no")
292 328
 
climate/procedures/Pleiades.R
11 11

  
12 12
cat(paste("
13 13
#PBS -S /bin/bash
14
#PBS -l select=32:ncpus=4:mpiprocs=4:model=wes
15
####old PBS -l select=64:ncpus=4:mpiprocs=4:model=wes
14
##PBS -l select=1:ncpus=16:model=san
15
###PBS -l select=4:ncpus=8:model=neh
16
#PBS -l select=1:ncpus=12:model=wes
16 17
####### old: select=48:ncpus=8:mpiprocs=8:model=neh
17
#PBS -l walltime=1:00:00
18
#PBS -l walltime=10:00:00
18 19
#PBS -j oe
19 20
#PBS -m e
20 21
#PBS -V
21 22
####PBS -W group_list=s1007
22
#PBS -q devel
23
###PBS -q devel
23 24
#PBS -o log/log_^array_index^
24 25
#PBS -o log/log_DataCompile
25 26
#PBS -M adam.wilson@yale.edu
......
35 36
  source /usr/local/lib/global.profile
36 37
  source /u/awilso10/.bashrc
37 38
## export a few important variables
39
  export NCORES=24  # use to limit mclapply() to set nubmer of cores, should be select*ncpus above
38 40
  export PATH=$PATH:/nobackupp1/awilso10/bin:/nobackupp1/awilso10/software/bin
39 41
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/nobackupp1/awilso10/software/lib
40
  export MRTDATADIR=/nobackupp1/awilso10/software/heg/data
41
  export PGSHOME=/nobackupp1/awilso10/software/heg
42
  export MRTBINDIR=/nobackupp1/awilso10/software/TOOLKIT_MTD
43 42
  export R_LIBS=\"/u/awilso10/R/x86_64-unknown-linux-gnu-library/2.15/\"
44 43
  export TMPDIR=/nobackupp1/awilso10/mod06/tmp
44
## HEG related variables
45
  export MRTDATADIR=/nobackupp1/awilso10/software/heg/data
46
  export PGSHOME=/nobackupp1/awilso10/software/heg/TOOLKIT_MTD
47
  export HEGUSER=ME
45 48
## load modules
46 49
  module load gcc mpi-sgi/mpt.2.06r6 hdf4 udunits R nco
47 50
## Run the script!
51
## current version not parallelizing across nodes!
48 52
  TMPDIR=$TMPDIR Rscript --verbose --vanilla /u/awilso10/environmental-layers/climate/procedures/MOD06_L2_process.r 
49 53
exit 0
54
exit 0
50 55
",sep=""),file="MOD06_process")
51 56

  
52 57
### Check the file
53 58
system("cat MOD06_process")
54 59
#system("cat ~/environmental-layers/climate/procedures/MOD06_L2_process.r")
55 60

  
61
## check queue status
62
system("/u/scicon/tools/bin/node_stats.sh")
63

  
56 64
## Submit it (and keep the pid)!
57 65
pid=system("qsub MOD06_process",intern=T); pid; pid=strsplit(pid,split="[.]")[[1]][1]
58 66

  
59 67
#system("qsub MOD06_process")
60 68

  
61 69
## work in interactive mode
62
#system("qsub -I -l walltime=1:00:00 -lselect=1:ncpus=2:model=wes -q devel")
70
# system("qsub -I -l walltime=1:00:00 -lselect=2:ncpus=16:model=san -q devel")
63 71

  
64 72
## check progress
65 73
system("qstat -u awilso10")
74
system(paste("/u/scicon/tools/bin/qps ",pid))
66 75
system(paste("qstat -t -x",pid))
67 76

  
68 77
system("qstat devel ") 

Also available in: Unified diff