Project

General

Profile

« Previous | Next » 

Revision 2c238837

Added by Adam Wilson about 12 years ago

Currently submission hangs using mqueue when loading rgdal library. Will attempt to add a separate submission script that first loads modules before running Rscript on the slaves

View differences:

climate/procedures/MOD06_L2_process.r
1
#!/bin/r
2

  
1 3
###################################################################################
2 4
###  R code to aquire and process MOD06_L2 cloud data from the MODIS platform
3 5

  
6
## load command line arguments (mname)
7
args=(commandArgs(TRUE)) ##args is now a list of character vectors
8
## Then cycle through each element of the list and evaluate the expressions.
9
eval(parse(text=args))
10

  
11
system("source ~/moduleload")
12

  
13
print(args)
14

  
15
tile="h11v08"
16
outdir="2_daily"  #directory for separate daily files
17
outdir2="3_summary" #directory for combined daily files and summarized files
18

  
19
print(paste("Processing tile",tile," for date",date))
4 20

  
5 21
## load libraries
6
require(rgdal)
7 22
require(reshape)
8 23
#require(ncdf4)
9 24
require(geosphere)
10 25
require(raster)
26
#require(rgdal)
11 27
require(spgrass6)
12
## packages for parallelization
13
#library(foreach)
14
#library(doMPI)
15
library(multicore)
16 28

  
17
## register cluster and number of cores to use
18
ncores=as.numeric(system("echo $NCORES",intern=T))
19
#cl=startMPIcluster(20,verbose=F)
20
#registerDoMPI(cl)
21 29

  
22 30
## specify some working directories
23 31
setwd("/nobackupp1/awilso10/mod06")
24 32

  
25
outdir="2_daily"  #directory for separate daily files
26
outdir2="3_summary" #directory for combined daily files and summarized files
27

  
28 33
print(paste("tempdir()=",tempdir()))
29 34
print(paste("TMPDIR=",Sys.getenv("TMPDIR")))
30 35

  
31
### get list of files to process
32
datadir="/nobackupp4/datapool/modis/MOD06_L2.005/"
33

  
34
fs=data.frame(path=list.files(datadir,full=T,recursive=T,pattern="hdf"),stringsAsFactors=F)
35
fs$file=basename(fs$path)
36
fs$date=as.Date(substr(fs$file,11,17),"%Y%j")
37
fs$month=format(fs$date,"%m")
38
fs$year=format(fs$date,"%Y")
39
fs$time=substr(fs$file,19,22)
40
fs$datetime=as.POSIXct(strptime(paste(substr(fs$file,11,17),substr(fs$file,19,22)), '%Y%j %H%M'))
41
fs$dateid=format(fs$date,"%Y%m%d")
42
fs$path=as.character(fs$path)
43
fs$file=as.character(fs$file)
44

  
45
## get all unique dates
46
alldates=unique(fs$dateid)
36
## load ancillary data
37
load(file="allfiles.Rdata")
47 38

  
48 39
## load tile information
49 40
load(file="modlandTiles.Rdata")
......
51 42
#modt=readOGR("modgrid","modis_sinusoidal_grid_world",)
52 43
#modt@data[,colnames(tb)[3:6]]=tb[match(paste(modt$h,modt$v),paste(tb$ih,tb$iv)),3:6]
53 44
#write.csv(modt@data,file="modistile.csv")
54
tile="h11v08"   #can move this to submit script if needed
55

  
56

  
57
## identify which have been completed
58
done=alldates%in%substr(list.files(outdir),7,14)
59
table(done)
60
notdone=alldates[!done]  #these are the dates that still need to be processed
61 45

  
62 46
## vars to process
63 47
vars=as.data.frame(matrix(c(
......
120 104
  print(log)
121 105
  ## confirm file is present
122 106
  print(paste("Confirming output file (",outfile,") is present and readable by GDAL"))
123
  gi=GDALinfo(outfile);  print(gi)
107
  system(paste("gdalinfo ",outfile))
124 108
  print(paste("Finished ", file))
125 109
}
126 110

  
......
144 128
  tf=paste(tempdir(),"/grass", Sys.getpid(),"/", sep="")
145 129
  print(paste("Set up temporary grass session in",tf))
146 130

  
147
  ## load a MOD11A1 file to define grid
148
  gridfile=list.files("/nobackupp4/datapool/modis/MOD11A1.005/2006.01.27/",pattern=paste(tile,".*hdf$",sep=""),full=T)[1]
149
  td=readGDAL(paste("HDF4_EOS:EOS_GRID:\"",gridfile,"\":MODIS_Grid_Daily_1km_LST:Night_view_angl",sep=""))
150
  projection(td)="+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs +datum=WGS84 +ellps=WGS84 "
151

  
152 131
  ## set up temporary grass instance for this PID
153 132
  initGRASS(gisBase="/nobackupp1/awilso10/software/grass-6.4.3svn",gisDbase=tf,SG=td,override=T,location="mod06",mapset="PERMANENT",home=tf,pid=Sys.getpid())
154 133
  system(paste("g.proj -c proj4=\"",projection(td),"\"",sep=""))
......
161 140

  
162 141
  ## Identify which files to process
163 142
  tfs=fs$file[fs$dateid==date]
143
  tfs=tfs[tfs%in%list.files(tempdir())]
164 144
  nfs=length(tfs)
165 145

  
166 146
  ## loop through scenes and process QA flags
......
170 150
     execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",file,"\":mod06:Cloud_Mask_1km_0",sep=""),
171 151
              output=paste("CM1_",i,sep=""),flags=c("overwrite","o")) ; print("")
172 152
    ## extract cloudy and 'confidently clear' pixels
153

  
173 154
    system(paste("r.mapcalc <<EOF
174 155
                CM_cloud_",i," =  ((CM1_",i," / 2^0) % 2) == 1  &&  ((CM1_",i," / 2^1) % 2^2) == 0 
175 156
                CM_clear_",i," =  ((CM1_",i," / 2^0) % 2) == 1  &&  ((CM1_",i," / 2^1) % 2^2) == 3 
......
289 270
 
290 271
## test it
291 272
##date=notdone[1]
292
##mod06(date,tile)
273
mod06(date,tile)
293 274

  
294 275
## run it for all dates
295
mclapply(notdone,mod06,tile,mc.cores=ncores) # use ncores/2 because system() commands can add second process for each spawned R
296

  
276
#mclapply(notdone,mod06,tile,mc.cores=ncores) # use ncores/2 because system() commands can add second process for each spawned R
297 277
#foreach(i=notdone[1:3],.packages=(.packages())) %dopar% mod06(i,tile)
298

  
299 278
#foreach(i=1:20) %dopar% print(i)
300 279

  
301 280

  
302
################################################################################
303
## now generate the climatologies
304
fdly=data.frame(
305
  path=list.files(outdir,pattern="nc$",full=T),
306
  file=list.files(outdir,pattern="nc$"))
307
fdly$date=as.Date(substr(fdly$file,7,14),"%Y%m%d")
308
fdly$month=format(fdly$date,"%m")
309
fdly$year=format(fdly$date,"%Y")
310

  
311
## check validity (via npar and ntime) of nc files
312
for(i in 1:nrow(fdly)){
313
  fdly$ntime[i]=as.numeric(system(paste("cdo  sinfo ",fdly$path[i]),intern=T))
314
  fdly$npar[i]=as.numeric(system(paste("cdo -s npar ",fdly$path[i]),intern=T))
315
  print(i)
316
}
317

  
318
## Combine all days within years into a single file (can't mergetime all days at once because this opens too many files)
319
tsdir=paste(tempdir(),"/summary",sep="")
320
dir.create(tsdir)
321
lapply(unique(fdly$year),function(y){
322
  system(paste("cdo -O mergetime ",paste(fdly$path[fdly$year==y],collapse=" ")," ",tsdir,"/MOD09_",tile,"_",y,"_daily.nc",sep=""))
323
  print(paste("Finished merging daily files for year",y))
324
})
325
## Combine the year-by-year files into a single daily file
326
system(paste("cdo -O mergetime ",paste(list.files(tsdir,full=T,pattern="daily[.]nc$"),collapse=" ")," ",outdir2,"/MOD09_",tile,"_daily.nc",sep=""))
327

  
328
system(paste("cdo -O monmean ",outdir2,"/MOD09_",tile,"_daily.nc ",outdir2,"/",tile,"_monmean.nc",sep=""))
329
system(paste("cdo -O ymonmean ",outdir2,"/MOD09_",tile,"_daily.nc ",outdir2,"/",tile,"_ymonmean.nc",sep=""))
330
system(paste("cdo -O ymonstd ",outdir2,"/MOD09_",tile,"_daily.nc ",outdir2,"/",tile,"_ymonstd.nc",sep=""))
331

  
332
print("Finished!   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
333
## quit R
334 281
q("no")
335
 
336

  
climate/procedures/Pleiades.R
7 7
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
8 8
save(tb,file="modlandTiles.Rdata")
9 9

  
10
### Submission script
10
outdir="2_daily"  #directory for separate daily files
11
outdir2="3_summary" #directory for combined daily files and summarized files
12

  
13
  ## load a MOD11A1 file to define grid
14
gridfile=list.files("/nobackupp4/datapool/modis/MOD11A1.005/2006.01.27/",pattern=paste(tile,".*hdf$",sep=""),full=T)[1]
15
  td=readGDAL(paste("HDF4_EOS:EOS_GRID:\"",gridfile,"\":MODIS_Grid_Daily_1km_LST:Night_view_angl",sep=""))
16
  projection(td)="+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs +datum=WGS84 +ellps=WGS84 "
17

  
18

  
19
### get list of files to process
20
datadir="/nobackupp4/datapool/modis/MOD06_L2.005/"
21

  
22
fs=data.frame(path=list.files(datadir,full=T,recursive=T,pattern="hdf"),stringsAsFactors=F)
23
fs$file=basename(fs$path)
24
fs$date=as.Date(substr(fs$file,11,17),"%Y%j")
25
fs$month=format(fs$date,"%m")
26
fs$year=format(fs$date,"%Y")
27
fs$time=substr(fs$file,19,22)
28
fs$datetime=as.POSIXct(strptime(paste(substr(fs$file,11,17),substr(fs$file,19,22)), '%Y%j %H%M'))
29
fs$dateid=format(fs$date,"%Y%m%d")
30
fs$path=as.character(fs$path)
31
fs$file=as.character(fs$file)
32

  
33
## get all unique dates
34
alldates=unique(fs$dateid)
35

  
36

  
37
#### Generate submission file
38
## identify which have been completed
39
done=alldates%in%substr(list.files(outdir),7,14)
40
table(done)
41
notdone=alldates[!done]  #these are the dates that still need to be processed
42

  
43
tile="h11v08"   #can move this to submit script if needed
44
script="/u/awilso10/environmental-layers/climate/procedures/MOD06_L2_process.r"
45
#write.table(paste("--verbose ",script," date=",notdone," tile=\"",tile,"\"",sep=""),file="notdone.txt",row.names=F,col.names=F,quote=F)
46
write.table(paste("--verbose ",script," date=",notdone[1:30],sep=""),file="notdone.txt",row.names=F,col.names=F,quote=F)
47

  
48
save(fs,alldates,gridfile,td,file="allfiles.Rdata")
49

  
50
## Submission script
11 51

  
12 52
cat(paste("
13 53
#PBS -S /bin/bash
14
#PBS -l select=1:ncpus=16:model=san
54
#PBS -l select=2:ncpus=16:model=san
15 55
###PBS -l select=4:ncpus=8:model=neh
16 56
##PBS -l select=1:ncpus=12:model=wes
17 57
####### old: select=48:ncpus=8:mpiprocs=8:model=neh
......
19 59
#PBS -j oe
20 60
#PBS -m e
21 61
#PBS -V
22
####PBS -W group_list=s1007
23 62
#PBS -q devel
24 63
#PBS -o log/log_^array_index^
25
#PBS -o log/log_DataCompile
64
#PBS -o log/log_DataCompile.log
26 65
#PBS -M adam.wilson@yale.edu
27 66
#PBS -N MOD06
28 67

  
29
#source /usr/share/modules/init/bash
30

  
31 68
## cd to working directory
32 69
cd /nobackupp1/awilso10/mod06
33 70

  
34 71
## set some memory limits
35 72
#  ulimit -d 1500000 -m 1500000 -v 1500000  #limit memory usage
73
  source /usr/local/lib/global.profile
36 74
  source /u/awilso10/.bashrc
37 75
  source /u/awilso10/moduleload
38
  source /usr/local/lib/global.profile
39 76
## export a few important variables
40
  export NCORES=16  # use to limit mclapply() to set nubmer of cores, should be select*ncpus above
77
  export NNODES=32
41 78
  export R_LIBS=\"/u/awilso10/R/x86_64-unknown-linux-gnu-library/2.15/\"
42 79
## load modules
43
  module load gcc hdf4 udunits R nco mpi-intel #mpi-sgi/mpt.2.06r6
80
#  module load gcc comp-intel/2012.0.032 netcdf mpi-sgi/mpt.2.06r6 hdf4 udunits R nco
44 81
## Run the script!
45 82
## current version not parallelizing across nodes!
46
  TMPDIR=$TMPDIR Rscript --verbose --vanilla /u/awilso10/environmental-layers/climate/procedures/MOD06_L2_process.r 
47
exit 0
48
exit 0
83
  TMPDIR=$TMPDIR Rscript --verbose --vanilla /u/awilso10/environmental-layers/climate/procedures/MOD06_L2_process.r date=20000403
84

  
85
WORKLIST=notdone.txt
86
EXE="Rscript"
87
LOG=log/log_DataCompile.log
49 88

  
89
TMPDIR=$TMPDIR mpiexec -np $NNODES  /nobackupp4/pvotava/software/share/mqueue-eg/mqueue/mqueue -l $WORKLIST -p $EXE -v -v -v --random-starts 2-4 --work-analyze #> $LOG
90
#mpiexec -np 2  /nobackupp4/pvotava/software/share/mqueue-eg/mqueue/mqueue -l testrun.txt -p $EXE -v -v -v  #> $LOG
91
#TMPDIR=$TMPDIR mpiexec -np $NNODES  /nobackupp4/pvotava/software/share/mqueue-eg/mqueue/mqueue -l $WORKLIST -p $EXE -v -v -v #> $LOG
92
exit 0
50 93
",sep=""),file="MOD06_process")
51 94

  
52 95
### Check the file
......
55 98

  
56 99
## check queue status
57 100
system("/u/scicon/tools/bin/node_stats.sh")
101
system("/u/scicon/tools/bin/qtop.pl 479343")
58 102

  
59 103
## Submit it (and keep the pid)!
60 104
system("qsub MOD06_process")
......
71 115
system("qstat devel ") 
72 116
#system("qstat | grep awilso10") 
73 117

  
118
####################################
119

  
120

  
121
################################################################################
122
## now generate the climatologies
123
fdly=data.frame(
124
  path=list.files(outdir,pattern="nc$",full=T),
125
  file=list.files(outdir,pattern="nc$"))
126
fdly$date=as.Date(substr(fdly$file,7,14),"%Y%m%d")
127
fdly$month=format(fdly$date,"%m")
128
fdly$year=format(fdly$date,"%Y")
129

  
130
## check validity (via npar and ntime) of nc files
131
for(i in 1:nrow(fdly)){
132
  fdly$ntime[i]=as.numeric(system(paste("cdo  sinfo ",fdly$path[i]),intern=T))
133
  fdly$npar[i]=as.numeric(system(paste("cdo -s npar ",fdly$path[i]),intern=T))
134
  print(i)
135
}
136

  
137
## Combine all days within years into a single file (can't mergetime all days at once because this opens too many files)
138
tsdir=paste(tempdir(),"/summary",sep="")
139
dir.create(tsdir)
140
lapply(unique(fdly$year),function(y){
141
  system(paste("cdo -O mergetime ",paste(fdly$path[fdly$year==y],collapse=" ")," ",tsdir,"/MOD09_",tile,"_",y,"_daily.nc",sep=""))
142
  print(paste("Finished merging daily files for year",y))
143
})
144
## Combine the year-by-year files into a single daily file
145
system(paste("cdo -O mergetime ",paste(list.files(tsdir,full=T,pattern="daily[.]nc$"),collapse=" ")," ",outdir2,"/MOD09_",tile,"_daily.nc",sep=""))
146

  
147
system(paste("cdo -O monmean ",outdir2,"/MOD09_",tile,"_daily.nc ",outdir2,"/",tile,"_monmean.nc",sep=""))
148
system(paste("cdo -O ymonmean ",outdir2,"/MOD09_",tile,"_daily.nc ",outdir2,"/",tile,"_ymonmean.nc",sep=""))
149
system(paste("cdo -O ymonstd ",outdir2,"/MOD09_",tile,"_daily.nc ",outdir2,"/",tile,"_ymonstd.nc",sep=""))
150

  
151
print("Finished!   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
152
## quit R
153
q("no")
154
 
155

  
156
#################################################################
74 157

  
75 158
### copy the files back to Yale
76 159
list.files("2_daily")
......
81 164

  
82 165

  
83 166
list.files(" /tmp/Rtmp6I6tFn")
167

  
168

  
169

  
170

  

Also available in: Unified diff