Revision 9d52d7e0
Added by Adam Wilson over 12 years ago
climate/procedures/MOD06_L2_data_compile_Pleiades.r | ||
---|---|---|
26 | 26 |
setwd("/nobackupp1/awilso10/mod06") |
27 | 27 |
tempdir=tempdir() # to hold intermediate files |
28 | 28 |
outdir="2_daily" # final daily product |
29 |
|
|
30 |
### use MODIS tile as ROI instead |
|
31 |
modt=readOGR("modgrid","modis_sinusoidal_grid_world",) |
|
32 |
tiles=c("H11V8") |
|
33 |
roi=modt[modt$HV%in%tiles,] |
|
34 |
|
|
35 |
## Bounding box of region in lat/lon |
|
36 |
roi_ll=spTransform(roi,CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")) |
|
37 |
roi_bb=bbox(roi_ll) |
|
38 |
|
|
29 |
tile="h11v08" #can move this to submit script if needed |
|
39 | 30 |
## read in list of all files |
40 | 31 |
load("allfiles.Rdata") |
41 |
load("alldates.Rdata") |
|
42 |
load("notdonedates.Rdata") |
|
32 |
load("notdone.Rdata") |
|
43 | 33 |
load("vars.Rdata") |
44 | 34 |
|
45 |
date=notdonedates[i] |
|
35 |
date=notdone[i] |
|
36 |
#file=fs$path[fs$dateid==date][1] |
|
37 |
|
|
38 |
## print out some info |
|
39 |
print(paste("Processing date ",date," for tile",tile)) |
|
46 | 40 |
|
41 |
## identify tile of interest |
|
42 |
tile_bb=tb[tb$tile==tile,] |
|
47 | 43 |
|
48 | 44 |
#### Function to generate hegtool parameter file for multi-band HDF-EOS file |
49 |
swath2grid=function(file,vars,outdir,upleft,lowright){
|
|
45 |
swath2grid=function(file,vars,upleft,lowright){ |
|
50 | 46 |
print(paste("Starting file",basename(file))) |
51 |
outfile=paste(outdir,"/",basename(file),sep="")
|
|
47 |
outfile=paste(tempdir(),"/",basename(file),sep="")
|
|
52 | 48 |
### First write the parameter file (careful, heg is very finicky!) |
53 | 49 |
hdr=paste("NUM_RUNS = ",length(vars$varid),"|MULTI_BAND_HDFEOS:",length(vars$varid),sep="") |
54 | 50 |
grp=paste(" |
... | ... | |
82 | 78 |
## now run the swath2grid tool |
83 | 79 |
## write the tiff file |
84 | 80 |
log=system(paste("/nobackupp4/pvotava/software/heg/bin/swtif -p ",paste(tempdir(),"/",basename(file),"_MODparms.txt -d",sep=""),sep=""),intern=T) |
85 |
print(paste("Finished ", file)) |
|
81 |
## clean up temporary files in working directory |
|
82 |
# file.remove(paste("filetable.temp_",pid,sep="")) |
|
83 |
print(log) |
|
84 |
print(paste("Finished ", file)) |
|
86 | 85 |
} |
87 | 86 |
|
88 |
## fix out dir to tempdir |
|
89 |
## then move on to grass processing... |
|
90 |
|
|
91 | 87 |
#### Run the gridding procedure |
92 |
lapply(fs$path[fs$dateid==date],function(file){ |
|
93 |
swath2grid(file,vars=vars,outdir=outdir, |
|
94 |
upleft=paste(roi_bb[2,2],roi_bb[1,1]), |
|
95 |
lowright=paste(roi_bb[2,1],roi_bb[1,2]))}) |
|
88 |
lapply(fs$path[fs$dateid==date],swath2grid,vars=vars,upleft=paste(tile_bb$lat_max,tile_bb$lon_min),lowright=paste(tile_bb$lat_min,tile_bb$lon_max)) |
|
89 |
#upleft=paste(roi_bb[2,2],roi_bb[1,1]),lowright=paste(roi_bb[2,1],roi_bb[1,2])) |
|
96 | 90 |
|
97 | 91 |
|
98 | 92 |
############################################################## |
99 | 93 |
### Import to GRASS for processing |
100 | 94 |
|
101 | 95 |
#fs$grass=paste(fs$month,fs$year,fs$file,sep="_") |
102 |
td=readGDAL(paste("HDF4_EOS:EOS_GRID:\"",outdir,"/",fs$file[1],"\":mod06:Cloud_Mask_1km_0",sep="")) |
|
103 |
#projection(td)="+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs +datum=WGS84 +ellps=WGS84 " |
|
104 |
projection(td)="+proj=utm +zone=10 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" |
|
96 |
#td=readGDAL(paste("HDF4_EOS:EOS_GRID:\"",outdir,"/",fs$file[1],"\":mod06:Cloud_Mask_1km_0",sep="")) |
|
97 |
|
|
98 |
gridfile=list.files("/nobackupp4/datapool/modis/MOD11A1.005/2006.01.27/",pattern=paste(tile,".*hdf$",sep=""),full=T) |
|
99 |
td=readGDAL(paste("HDF4_EOS:EOS_GRID:\"",gridfile,"\":MODIS_Grid_Daily_1km_LST:Night_view_angl",sep="")) |
|
100 |
projection(td)="+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs +datum=WGS84 +ellps=WGS84 " |
|
105 | 101 |
|
106 | 102 |
## fucntion to convert binary to decimal to assist in identifying correct values |
107 | 103 |
b2d=function(x) sum(x * 2^(rev(seq_along(x)) - 1)) #http://tolstoy.newcastle.edu.au/R/e2/help/07/02/10596.html |
... | ... | |
109 | 105 |
b2d(c(T,T)) |
110 | 106 |
|
111 | 107 |
### create (or connect to) grass location |
112 |
gisDbase="/media/data/grassdata" |
|
113 |
gisLocation="oregon" |
|
108 |
gisLocation="tmp" |
|
114 | 109 |
gisMapset="mod06" |
115 | 110 |
## set Grass to overwrite |
116 | 111 |
Sys.setenv(GRASS_OVERWRITE=1) |
117 |
Sys.setenv(DEBUG=0) |
|
112 |
Sys.setenv(DEBUG=1) |
|
113 |
Sys.setenv(GRASS_GUI="txt") |
|
118 | 114 |
|
119 | 115 |
## temporary objects to test function below |
120 |
i=1 |
|
121 |
file=paste(outdir,"/",fs$file[1],sep="") |
|
122 |
date=as.Date("2000-05-23") |
|
116 |
# i=1
|
|
117 |
#file=paste(outdir,"/",fs$file[1],sep="")
|
|
118 |
#date=as.Date("2000-05-23")
|
|
123 | 119 |
|
120 |
#.GRASS_CACHE=spgrass6:::.GRASS_CACHE |
|
124 | 121 |
|
125 | 122 |
### Function to extract various SDSs from a single gridded HDF file and use QA data to throw out 'bad' observations |
126 | 123 |
loadcloud<-function(date,fs){ |
127 | 124 |
### set up unique grass session |
128 | 125 |
tf=paste(tempdir(),"/grass", Sys.getpid(),"/", sep="") |
129 | 126 |
|
130 |
## set up tempfile for this PID |
|
131 |
initGRASS(gisBase="/usr/lib/grass64",gisDbase=tf,SG=td,override=T,location="mod06",mapset="PERMANENT",home=tf,pid=Sys.getpid()) |
|
132 |
# system(paste("g.proj -c proj4=\"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +datum=WGS84 +units=m +no_defs\"",sep="")) |
|
133 |
system(paste("g.proj -c proj4=\"+proj=utm +zone=10 +ellps=WGS84 +datum=WGS84 +units=m +no_defs\"",sep="")) |
|
127 |
## set up temporary grass instance for this PID |
|
128 |
initGRASS(gisBase="/nobackupp1/awilso10/software/grass-6.4.3svn",gisDbase=tf,SG=td,override=T,location="mod06",mapset="PERMANENT",home=tf,pid=Sys.getpid()) |
|
129 |
system(paste("g.proj -c proj4=\"",projection(td),"\"",sep="")) |
|
134 | 130 |
|
135 |
## Define region by importing one raster. |
|
136 |
execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",outdir,"/",fs$file[1],"\":mod06:Cloud_Mask_1km_0",sep=""),
|
|
131 |
## Define region by importing one MOD11A1 raster.
|
|
132 |
execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",gridfile,"\":MODIS_Grid_Daily_1km_LST:Night_view_angl",sep=""),
|
|
137 | 133 |
output="modisgrid",flags=c("quiet","overwrite","o")) |
138 | 134 |
system("g.region rast=modisgrid save=roi --overwrite") |
139 |
system("g.region roi") |
|
140 |
system("g.region -p") |
|
141 | 135 |
|
142 | 136 |
## Identify which files to process |
143 |
tfs=fs$file[fs$date==date] |
|
137 |
tfs=fs$file[fs$dateid==date]
|
|
144 | 138 |
nfs=length(tfs) |
145 | 139 |
|
146 |
### print some summary info |
|
147 |
print(date) |
|
148 | 140 |
## loop through scenes and process QA flags |
149 | 141 |
for(i in 1:nfs){ |
150 |
file=paste(outdir,"/",tfs[i],sep="")
|
|
142 |
file=paste(tempdir,"/",tfs[i],sep="")
|
|
151 | 143 |
## Cloud Mask |
152 | 144 |
execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",file,"\":mod06:Cloud_Mask_1km_0",sep=""), |
153 | 145 |
output=paste("CM1_",i,sep=""),flags=c("overwrite","o")) ; print("") |
... | ... | |
158 | 150 |
EOF",sep="")) |
159 | 151 |
|
160 | 152 |
## QA |
161 |
execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",file,"\":mod06:Quality_Assurance_1km_0",sep=""), |
|
153 |
execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",file,"\":mod06:Quality_Assurance_1km_0",sep=""),
|
|
162 | 154 |
output=paste("QA_",i,sep=""),flags=c("overwrite","o")) ; print("") |
163 | 155 |
## QA_CER |
164 | 156 |
system(paste("r.mapcalc <<EOF |
... | ... | |
218 | 210 |
EOF",sep="")) |
219 | 211 |
|
220 | 212 |
#### Write the files to a geotiff |
221 |
execGRASS("r.out.gdal",input="CER_daily",output=paste(tifdir,"/CER_",format(date,"%Y%m%d"),".tif",sep=""),nodata=-999,flags=c("quiet"))
|
|
222 |
execGRASS("r.out.gdal",input="COT_daily",output=paste(tifdir,"/COT_",format(date,"%Y%m%d"),".tif",sep=""),nodata=-999,flags=c("quiet"))
|
|
223 |
execGRASS("r.out.gdal",input="CLD_daily",output=paste(tifdir,"/CLD_",format(date,"%Y%m%d"),".tif",sep=""),nodata=99,flags=c("quiet"))
|
|
213 |
execGRASS("r.out.gdal",input="CER_daily",output=paste(outdir,"/CER_",date,".tif",sep=""),nodata=-999,flags=c("quiet"))
|
|
214 |
execGRASS("r.out.gdal",input="COT_daily",output=paste(outdir,"/COT_",date,".tif",sep=""),nodata=-999,flags=c("quiet"))
|
|
215 |
execGRASS("r.out.gdal",input="CLD_daily",output=paste(outdir,"/CLD_",date,".tif",sep=""),nodata=99,flags=c("quiet"))
|
|
224 | 216 |
|
225 | 217 |
### delete the temporary files |
226 | 218 |
unlink_.gislock() |
227 |
system("/usr/lib/grass64/etc/clean_temp")
|
|
228 |
system(paste("rm -R ",tf,sep="")) |
|
219 |
system("/nobackupp1/awilso10/software/grass-6.4.3svn/etc/clean_temp")
|
|
220 |
system(paste("rm -R ",tf,sep=""))
|
|
229 | 221 |
### print update |
230 |
print(paste(" ################################################################### Finished ",date," |
|
231 |
################################################################")) |
|
232 | 222 |
} |
233 | 223 |
|
234 | 224 |
|
235 | 225 |
########################################### |
236 | 226 |
### Now run it |
237 | 227 |
|
238 |
tdates=sort(unique(fs$date)) |
|
239 |
done=tdates%in%as.Date(substr(list.files(tifdir),5,12),"%Y%m%d") |
|
240 |
table(done) |
|
241 |
tdates=tdates[!done] |
|
228 |
lapply(date,function(date) loadcloud(date,fs=fs)) |
|
229 |
|
|
230 |
|
|
231 |
|
|
232 |
print(paste(" ################################################################### Finished ",date," |
|
233 |
################################################################")) |
|
242 | 234 |
|
243 |
mclapply(tdates,function(date) loadcloud(date,fs=fs)) |
|
244 | 235 |
|
236 |
## quit R |
|
237 |
q("no") |
|
245 | 238 |
|
246 | 239 |
|
climate/procedures/Pleiades.R | ||
---|---|---|
21 | 21 |
## get all unique dates |
22 | 22 |
alldates=unique(fs$dateid) |
23 | 23 |
|
24 |
|
|
25 |
## get MODLAND tile information |
|
26 |
tb=read.table("http://landweb.nascom.nasa.gov/developers/sn_tiles/sn_bound_10deg.txt",skip=6,nrows=648,header=T) |
|
27 |
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="") |
|
28 |
### use MODIS tile as ROI |
|
29 |
#modt=readOGR("modgrid","modis_sinusoidal_grid_world",) |
|
30 |
#modt@data[,colnames(tb)[3:6]]=tb[match(paste(modt$h,modt$v),paste(tb$ih,tb$iv)),3:6] |
|
31 |
#write.csv(modt@data,file="modistile.csv") |
|
32 |
|
|
33 |
|
|
24 | 34 |
## write it out |
25 |
save(fs,file="allfiles.Rdata") |
|
26 |
save(alldates,file="alldates.Rdata") |
|
35 |
save(fs,tb,file="allfiles.Rdata")
|
|
36 |
#save(alldates,file="alldates.Rdata")
|
|
27 | 37 |
|
28 |
notdonedates=alldates |
|
29 |
save(notdonedates,file="notdonedates.Rdata") |
|
38 |
## identify which have been completed |
|
39 |
outdir="2_daily" |
|
40 |
done=alldates%in%substr(list.files(outdir),5,12) |
|
41 |
table(done) |
|
42 |
notdone=alldates[!done] |
|
30 | 43 |
|
44 |
#notdone=alldates[1:4] |
|
45 |
|
|
46 |
save(notdone,file="notdone.Rdata") |
|
31 | 47 |
|
32 |
## output ROI |
|
33 |
#get bounding box of region in m |
|
34 |
#ge=SpatialPoints(data.frame(lon=c(-125,-115),lat=c(40,47))) |
|
35 |
#projection(ge)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") |
|
36 |
#ge2=spTransform(ge, CRS(" +proj=sinu +lon_0=0 +x_0=0 +y_0=0")) |
|
37 | 48 |
|
38 | 49 |
## vars |
39 | 50 |
vars=as.data.frame(matrix(c( |
... | ... | |
52 | 63 |
|
53 | 64 |
|
54 | 65 |
### Submission script |
55 |
cat(" |
|
56 |
#PBS -S /bin/csh |
|
57 |
#PBS -N cfd |
|
58 |
# This example uses the Harpertown nodes |
|
59 |
# User job can access ~7.6 GB of memory per Harpertown node. |
|
60 |
# A memory intensive job that needs more than ~0.9 GB |
|
61 |
# per process should use less than 8 cores per node |
|
62 |
# to allow more memory per MPI process. This example |
|
63 |
# asks for 64 nodes and 4 MPI processes per node. |
|
64 |
# This request implies 64x4 = 256 MPI processes for the job. |
|
65 |
#PBS -l select=64:ncpus=8:mpiprocs=4:model=har |
|
66 |
#PBS -l walltime=4:00:00 |
|
66 |
|
|
67 |
cat(paste(" |
|
68 |
#PBS -S /bin/sh |
|
69 |
#PBS -J 700-899 |
|
70 |
###PBS -J 1-",length(notdone)," |
|
71 |
#PBS -l walltime=0:10:00 |
|
72 |
#PBS -l ncpus=100 |
|
67 | 73 |
#PBS -j oe |
68 |
#PBS -W group_list=a0801
|
|
74 |
#PBS -o log/log_^array_index^
|
|
69 | 75 |
#PBS -m e |
70 |
|
|
71 |
# Load some modules |
|
72 |
module load gcc |
|
73 |
module load hdf5 |
|
74 |
module load netcdf/4.1.3/gcc/mpt |
|
75 |
module load mpi |
|
76 |
module load tcl-tk/8.5.11 |
|
77 |
module load udunits/2.1.19 |
|
78 |
module load szip/2.1/gcc |
|
79 |
module load R |
|
80 |
module load git |
|
81 |
|
|
82 |
# By default, PBS executes your job from your home directory. |
|
83 |
# However, you can use the environment variable |
|
84 |
# PBS_O_WORKDIR to change to the directory where |
|
85 |
# you submitted your job. |
|
86 |
|
|
87 |
cd $PBS_O_WORKDIR |
|
88 |
|
|
89 |
# use of dplace to pin processes to processors may improve performance |
|
90 |
# Here you request to pin processes to processors 2, 3, 6, 7 of each node. |
|
91 |
# This helps for using the Harpertown nodes, but not for Nehalem-EP or |
|
92 |
# Westmere-EP nodes |
|
93 |
|
|
94 |
# The resource request of select=64 and mpiprocs=4 implies |
|
95 |
# that you want to have 256 MPI processes in total. |
|
96 |
# If this is correct, you can omit the -np 256 for mpiexec |
|
97 |
# that you might have used before. |
|
98 |
|
|
99 |
mpiexec dplace -s1 -c2,3,6,7 ./grinder < run_input > output |
|
100 |
|
|
101 |
# It is a good practice to write stderr and stdout to a file (ex: output) |
|
102 |
# Otherwise, they will be written to the PBS stderr and stdout in /PBS/spool, |
|
103 |
# which has limited amount of space. When /PBS/spool is filled up, any job |
|
104 |
# that tries to write to /PBS/spool will die. |
|
105 |
|
|
106 |
# -end of script- |
|
76 |
#PBS -M adam.wilson@yale.edu |
|
77 |
#PBS -N MOD06 |
|
78 |
|
|
79 |
## cd to working directory |
|
80 |
cd /nobackupp1/awilso10/mod06 |
|
81 |
|
|
82 |
## set some memory limits |
|
83 |
# ulimit -d 1500000 -m 1500000 -v 1500000 #limit memory usage |
|
84 |
source /usr/local/lib/global.profile |
|
85 |
## export a few important variables |
|
86 |
export PATH=$PATH:/nobackupp1/awilso10/bin:/nobackupp1/awilso10/software/bin |
|
87 |
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/nobackupp1/awilso10/software/lib |
|
88 |
export MRTDATADIR=/nobackupp1/awilso10/software/heg/data |
|
89 |
export PGSHOME=/nobackupp1/awilso10/software/heg |
|
90 |
export MRTBINDIR=/nobackup1/awilso10/software/TOOLKIT_MTD |
|
91 |
export R_LIBS=\"/u/awilso10/R/x86_64-unknown-linux-gnu-library/2.15/\" |
|
92 |
## load modules |
|
93 |
module load gcc mpi-sgi/mpt.2.06r6 hdf4 udunits R |
|
94 |
## Run the script! |
|
95 |
Rscript --verbose --vanilla /u/awilso10/environmental-layers/climate/procedures/MOD06_L2_data_compile_Pleiades.r i=${PBS_ARRAY_INDEX} |
|
96 |
rm -r $TMPDIR |
|
97 |
exit 0 |
|
98 |
",sep=""),file="MOD06_process") |
|
99 |
|
|
100 |
### Check the file |
|
101 |
system("cat MOD06_process") |
|
102 |
#system("chmod +x MOD06_process") |
|
103 |
|
|
104 |
## Submit it! |
|
105 |
#system("qsub -q devel MOD06_process") |
|
106 |
system("qsub MOD06_process") |
|
107 |
|
|
108 |
## check progress |
|
109 |
system("qstat -u awilso10") |
|
110 |
system("qstat -t 391843[]") |
|
111 |
system("qstat -f 391843[2]") |
|
112 |
|
|
113 |
#system("qstat devel ") |
|
114 |
#system("qstat | grep awilso10") |
|
115 |
|
|
116 |
#print(paste(max(0,length(system("qstat",intern=T))-2)," processes running")) |
|
117 |
# system("ssh c0-8.farm.caes.ucdavis.edu") |
|
118 |
# system("qalter -p +1024 25964") #decrease priority of job to run extraction below. |
|
119 |
system("cat log/InterpScript.o55934.2") |
|
120 |
|
|
121 |
## check log |
|
122 |
system(paste("cat",list.files("log",pattern="InterpScript",full=T)[100])) |
|
123 |
#system(paste("cat",list.files("log",pattern="InterpScript",full=T)[13]," | grep \"Temporary Directory\"")) |
Also available in: Unified diff
Successfully running MOD06 processing on Pleiades as an array job (though submissions are limited to < 365 jobs so will have to find another way to submit them)