Project

General

Profile

« Previous | Next » 

Revision 9d52d7e0

Added by Adam Wilson over 12 years ago

Successfully running MOD06 processing on Pleiades as an array job (though submissions are limited to < 365 jobs so will have to find another way to submit them)

View differences:

climate/procedures/MOD06_L2_data_compile_Pleiades.r
26 26
setwd("/nobackupp1/awilso10/mod06")
27 27
tempdir=tempdir()  # to hold intermediate files
28 28
outdir="2_daily"   # final daily product
29

  
30
### use MODIS tile as ROI instead
31
modt=readOGR("modgrid","modis_sinusoidal_grid_world",)
32
tiles=c("H11V8")
33
roi=modt[modt$HV%in%tiles,]
34

  
35
## Bounding box of region in lat/lon
36
roi_ll=spTransform(roi,CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))
37
roi_bb=bbox(roi_ll)
38

  
29
tile="h11v08"   #can move this to submit script if needed
39 30
## read in list of all files
40 31
load("allfiles.Rdata")
41
load("alldates.Rdata")
42
load("notdonedates.Rdata")
32
load("notdone.Rdata")
43 33
load("vars.Rdata")
44 34

  
45
date=notdonedates[i]
35
date=notdone[i]
36
#file=fs$path[fs$dateid==date][1]
37

  
38
## print out some info
39
print(paste("Processing date ",date," for tile",tile))
46 40

  
41
## identify tile of interest
42
tile_bb=tb[tb$tile==tile,]
47 43

  
48 44
#### Function to generate hegtool parameter file for multi-band HDF-EOS file
49
swath2grid=function(file,vars,outdir,upleft,lowright){
45
swath2grid=function(file,vars,upleft,lowright){
50 46
  print(paste("Starting file",basename(file)))
51
  outfile=paste(outdir,"/",basename(file),sep="")
47
  outfile=paste(tempdir(),"/",basename(file),sep="")
52 48
### First write the parameter file (careful, heg is very finicky!)
53 49
  hdr=paste("NUM_RUNS = ",length(vars$varid),"|MULTI_BAND_HDFEOS:",length(vars$varid),sep="")
54 50
  grp=paste("
......
82 78
  ## now run the swath2grid tool
83 79
  ## write the tiff file
84 80
  log=system(paste("/nobackupp4/pvotava/software/heg/bin/swtif -p ",paste(tempdir(),"/",basename(file),"_MODparms.txt -d",sep=""),sep=""),intern=T)
85
      print(paste("Finished ", file))
81
  ## clean up temporary files in working directory
82
#  file.remove(paste("filetable.temp_",pid,sep=""))
83
  print(log)
84
  print(paste("Finished ", file))
86 85
}
87 86

  
88
## fix out dir to tempdir
89
## then move on to grass processing...
90

  
91 87
#### Run the gridding procedure
92
lapply(fs$path[fs$dateid==date],function(file){
93
  swath2grid(file,vars=vars,outdir=outdir,
94
             upleft=paste(roi_bb[2,2],roi_bb[1,1]),
95
             lowright=paste(roi_bb[2,1],roi_bb[1,2]))})
88
lapply(fs$path[fs$dateid==date],swath2grid,vars=vars,upleft=paste(tile_bb$lat_max,tile_bb$lon_min),lowright=paste(tile_bb$lat_min,tile_bb$lon_max))
89
#upleft=paste(roi_bb[2,2],roi_bb[1,1]),lowright=paste(roi_bb[2,1],roi_bb[1,2]))
96 90

  
97 91

  
98 92
##############################################################
99 93
### Import to GRASS for processing
100 94

  
101 95
#fs$grass=paste(fs$month,fs$year,fs$file,sep="_")
102
td=readGDAL(paste("HDF4_EOS:EOS_GRID:\"",outdir,"/",fs$file[1],"\":mod06:Cloud_Mask_1km_0",sep=""))
103
#projection(td)="+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs +datum=WGS84 +ellps=WGS84 "
104
projection(td)="+proj=utm +zone=10 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
96
#td=readGDAL(paste("HDF4_EOS:EOS_GRID:\"",outdir,"/",fs$file[1],"\":mod06:Cloud_Mask_1km_0",sep=""))
97

  
98
gridfile=list.files("/nobackupp4/datapool/modis/MOD11A1.005/2006.01.27/",pattern=paste(tile,".*hdf$",sep=""),full=T)
99
td=readGDAL(paste("HDF4_EOS:EOS_GRID:\"",gridfile,"\":MODIS_Grid_Daily_1km_LST:Night_view_angl",sep=""))
100
projection(td)="+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs +datum=WGS84 +ellps=WGS84 "
105 101

  
106 102
## fucntion to convert binary to decimal to assist in identifying correct values
107 103
b2d=function(x) sum(x * 2^(rev(seq_along(x)) - 1)) #http://tolstoy.newcastle.edu.au/R/e2/help/07/02/10596.html
......
109 105
b2d(c(T,T))
110 106

  
111 107
### create (or connect to) grass location
112
gisDbase="/media/data/grassdata"
113
gisLocation="oregon"
108
gisLocation="tmp"
114 109
gisMapset="mod06"
115 110
## set Grass to overwrite
116 111
Sys.setenv(GRASS_OVERWRITE=1)
117
Sys.setenv(DEBUG=0)
112
Sys.setenv(DEBUG=1)
113
Sys.setenv(GRASS_GUI="txt")
118 114

  
119 115
## temporary objects to test function below
120
 i=1
121
file=paste(outdir,"/",fs$file[1],sep="")
122
date=as.Date("2000-05-23")
116
# i=1
117
#file=paste(outdir,"/",fs$file[1],sep="")
118
#date=as.Date("2000-05-23")
123 119

  
120
#.GRASS_CACHE=spgrass6:::.GRASS_CACHE
124 121

  
125 122
### Function to extract various SDSs from a single gridded HDF file and use QA data to throw out 'bad' observations
126 123
loadcloud<-function(date,fs){
127 124
### set up unique grass session
128 125
  tf=paste(tempdir(),"/grass", Sys.getpid(),"/", sep="")
129 126
 
130
  ## set up tempfile for this PID
131
  initGRASS(gisBase="/usr/lib/grass64",gisDbase=tf,SG=td,override=T,location="mod06",mapset="PERMANENT",home=tf,pid=Sys.getpid())
132
#  system(paste("g.proj -c proj4=\"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +datum=WGS84 +units=m +no_defs\"",sep=""))
133
    system(paste("g.proj -c proj4=\"+proj=utm +zone=10 +ellps=WGS84 +datum=WGS84 +units=m +no_defs\"",sep=""))
127
  ## set up temporary grass instance for this PID
128
  initGRASS(gisBase="/nobackupp1/awilso10/software/grass-6.4.3svn",gisDbase=tf,SG=td,override=T,location="mod06",mapset="PERMANENT",home=tf,pid=Sys.getpid())
129
  system(paste("g.proj -c proj4=\"",projection(td),"\"",sep=""))
134 130

  
135
  ## Define region by importing one raster.
136
  execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",outdir,"/",fs$file[1],"\":mod06:Cloud_Mask_1km_0",sep=""),
131
  ## Define region by importing one MOD11A1 raster.
132
  execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",gridfile,"\":MODIS_Grid_Daily_1km_LST:Night_view_angl",sep=""),
137 133
            output="modisgrid",flags=c("quiet","overwrite","o"))
138 134
  system("g.region rast=modisgrid save=roi --overwrite")
139
  system("g.region roi")
140
  system("g.region -p")
141 135

  
142 136
  ## Identify which files to process
143
  tfs=fs$file[fs$date==date]
137
  tfs=fs$file[fs$dateid==date]
144 138
  nfs=length(tfs)
145 139

  
146
  ### print some summary info
147
  print(date)
148 140
  ## loop through scenes and process QA flags
149 141
  for(i in 1:nfs){
150
     file=paste(outdir,"/",tfs[i],sep="")
142
     file=paste(tempdir,"/",tfs[i],sep="")
151 143
     ## Cloud Mask
152 144
     execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",file,"\":mod06:Cloud_Mask_1km_0",sep=""),
153 145
              output=paste("CM1_",i,sep=""),flags=c("overwrite","o")) ; print("")
......
158 150
EOF",sep=""))
159 151

  
160 152
    ## QA
161
    execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",file,"\":mod06:Quality_Assurance_1km_0",sep=""),
153
     execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",file,"\":mod06:Quality_Assurance_1km_0",sep=""),
162 154
             output=paste("QA_",i,sep=""),flags=c("overwrite","o")) ; print("")
163 155
   ## QA_CER
164 156
   system(paste("r.mapcalc <<EOF
......
218 210
EOF",sep=""))
219 211

  
220 212
  #### Write the files to a geotiff
221
  execGRASS("r.out.gdal",input="CER_daily",output=paste(tifdir,"/CER_",format(date,"%Y%m%d"),".tif",sep=""),nodata=-999,flags=c("quiet"))
222
  execGRASS("r.out.gdal",input="COT_daily",output=paste(tifdir,"/COT_",format(date,"%Y%m%d"),".tif",sep=""),nodata=-999,flags=c("quiet"))
223
  execGRASS("r.out.gdal",input="CLD_daily",output=paste(tifdir,"/CLD_",format(date,"%Y%m%d"),".tif",sep=""),nodata=99,flags=c("quiet"))
213
  execGRASS("r.out.gdal",input="CER_daily",output=paste(outdir,"/CER_",date,".tif",sep=""),nodata=-999,flags=c("quiet"))
214
  execGRASS("r.out.gdal",input="COT_daily",output=paste(outdir,"/COT_",date,".tif",sep=""),nodata=-999,flags=c("quiet"))
215
  execGRASS("r.out.gdal",input="CLD_daily",output=paste(outdir,"/CLD_",date,".tif",sep=""),nodata=99,flags=c("quiet"))
224 216

  
225 217
### delete the temporary files 
226 218
  unlink_.gislock()
227
  system("/usr/lib/grass64/etc/clean_temp")
228
 system(paste("rm -R ",tf,sep=""))
219
  system("/nobackupp1/awilso10/software/grass-6.4.3svn/etc/clean_temp")
220
  system(paste("rm -R ",tf,sep=""))
229 221
### print update
230
  print(paste(" ###################################################################               Finished ",date,"
231
################################################################"))
232 222
}
233 223

  
234 224

  
235 225
###########################################
236 226
### Now run it
237 227

  
238
tdates=sort(unique(fs$date))
239
done=tdates%in%as.Date(substr(list.files(tifdir),5,12),"%Y%m%d")
240
table(done)
241
tdates=tdates[!done]
228
lapply(date,function(date) loadcloud(date,fs=fs))
229

  
230

  
231

  
232
  print(paste(" ###################################################################               Finished ",date,"
233
################################################################"))
242 234

  
243
mclapply(tdates,function(date) loadcloud(date,fs=fs))
244 235

  
236
## quit R
237
q("no")
245 238
 
246 239

  
climate/procedures/Pleiades.R
21 21
## get all unique dates
22 22
alldates=unique(fs$dateid)
23 23

  
24

  
25
## get MODLAND tile information
26
tb=read.table("http://landweb.nascom.nasa.gov/developers/sn_tiles/sn_bound_10deg.txt",skip=6,nrows=648,header=T)
27
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
28
### use MODIS tile as ROI
29
#modt=readOGR("modgrid","modis_sinusoidal_grid_world",)
30
#modt@data[,colnames(tb)[3:6]]=tb[match(paste(modt$h,modt$v),paste(tb$ih,tb$iv)),3:6]
31
#write.csv(modt@data,file="modistile.csv")
32

  
33

  
24 34
## write it out
25
save(fs,file="allfiles.Rdata")
26
save(alldates,file="alldates.Rdata")
35
save(fs,tb,file="allfiles.Rdata")
36
#save(alldates,file="alldates.Rdata")
27 37

  
28
notdonedates=alldates
29
save(notdonedates,file="notdonedates.Rdata")
38
## identify which have been completed
39
outdir="2_daily"
40
done=alldates%in%substr(list.files(outdir),5,12)
41
table(done)
42
notdone=alldates[!done]
30 43

  
44
#notdone=alldates[1:4]
45

  
46
save(notdone,file="notdone.Rdata")
31 47

  
32
## output ROI
33
#get bounding box of region in m
34
#ge=SpatialPoints(data.frame(lon=c(-125,-115),lat=c(40,47)))
35
#projection(ge)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
36
#ge2=spTransform(ge, CRS(" +proj=sinu +lon_0=0 +x_0=0 +y_0=0"))
37 48

  
38 49
## vars
39 50
vars=as.data.frame(matrix(c(
......
52 63

  
53 64

  
54 65
### Submission script
55
cat("
56
#PBS -S /bin/csh
57
#PBS -N cfd
58
# This example uses the Harpertown nodes
59
# User job can access ~7.6 GB of memory per Harpertown node.
60
# A memory intensive job that needs more than ~0.9 GB
61
# per process should use less than 8 cores per node
62
# to allow more memory per MPI process. This example
63
# asks for 64 nodes and 4 MPI processes per node.
64
# This request implies 64x4 = 256 MPI processes for the job.
65
#PBS -l select=64:ncpus=8:mpiprocs=4:model=har
66
#PBS -l walltime=4:00:00
66

  
67
cat(paste("
68
#PBS -S /bin/sh
69
#PBS -J 700-899
70
###PBS -J 1-",length(notdone),"
71
#PBS -l walltime=0:10:00
72
#PBS -l ncpus=100
67 73
#PBS -j oe
68
#PBS -W group_list=a0801
74
#PBS -o log/log_^array_index^
69 75
#PBS -m e
70

  
71
# Load some modules
72
module load gcc
73
module load hdf5
74
module load netcdf/4.1.3/gcc/mpt
75
module load mpi
76
module load tcl-tk/8.5.11
77
module load udunits/2.1.19
78
module load szip/2.1/gcc
79
module load R
80
module load git
81

  
82
# By default, PBS executes your job from your home directory.
83
# However, you can use the environment variable
84
# PBS_O_WORKDIR to change to the directory where
85
# you submitted your job.
86

  
87
cd $PBS_O_WORKDIR
88

  
89
# use of dplace to pin processes to processors may improve performance
90
# Here you request to pin processes to processors 2, 3, 6, 7 of each node.
91
# This helps for using the Harpertown nodes, but not for Nehalem-EP or
92
# Westmere-EP nodes
93

  
94
# The resource request of select=64 and mpiprocs=4 implies
95
# that you want to have 256 MPI processes in total.
96
# If this is correct, you can omit the -np 256 for mpiexec
97
# that you might have used before.
98

  
99
mpiexec dplace -s1 -c2,3,6,7 ./grinder < run_input > output
100

  
101
# It is a good practice to write stderr and stdout to a file (ex: output)
102
# Otherwise, they will be written to the PBS stderr and stdout in /PBS/spool,
103
# which has limited amount  of space. When /PBS/spool is filled up, any job
104
# that tries to write to /PBS/spool will die.
105

  
106
# -end of script-
76
#PBS -M adam.wilson@yale.edu
77
#PBS -N MOD06
78

  
79
## cd to working directory
80
cd /nobackupp1/awilso10/mod06
81

  
82
## set some memory limits
83
#  ulimit -d 1500000 -m 1500000 -v 1500000  #limit memory usage
84
  source /usr/local/lib/global.profile
85
## export a few important variables
86
  export PATH=$PATH:/nobackupp1/awilso10/bin:/nobackupp1/awilso10/software/bin
87
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/nobackupp1/awilso10/software/lib
88
  export MRTDATADIR=/nobackupp1/awilso10/software/heg/data
89
  export PGSHOME=/nobackupp1/awilso10/software/heg
90
  export MRTBINDIR=/nobackup1/awilso10/software/TOOLKIT_MTD
91
  export R_LIBS=\"/u/awilso10/R/x86_64-unknown-linux-gnu-library/2.15/\"
92
## load modules
93
  module load gcc mpi-sgi/mpt.2.06r6 hdf4 udunits R
94
## Run the script!
95
  Rscript --verbose --vanilla /u/awilso10/environmental-layers/climate/procedures/MOD06_L2_data_compile_Pleiades.r i=${PBS_ARRAY_INDEX}
96
rm -r $TMPDIR
97
exit 0
98
",sep=""),file="MOD06_process")
99

  
100
### Check the file
101
system("cat MOD06_process")
102
#system("chmod +x MOD06_process")
103

  
104
## Submit it!
105
#system("qsub -q devel MOD06_process")
106
system("qsub MOD06_process")
107

  
108
## check progress
109
system("qstat -u awilso10")
110
system("qstat -t 391843[]")
111
system("qstat -f 391843[2]")
112

  
113
#system("qstat devel ") 
114
#system("qstat | grep awilso10") 
115

  
116
                                        #print(paste(max(0,length(system("qstat",intern=T))-2)," processes running"))
117
# system("ssh c0-8.farm.caes.ucdavis.edu")
118
# system("qalter -p +1024 25964")  #decrease priority of job to run extraction below.
119
system("cat log/InterpScript.o55934.2")
120

  
121
## check log
122
system(paste("cat",list.files("log",pattern="InterpScript",full=T)[100]))
123
#system(paste("cat",list.files("log",pattern="InterpScript",full=T)[13]," | grep \"Temporary Directory\""))

Also available in: Unified diff