1
|
#### Script to facilitate processing of MOD06 data
|
2
|
|
3
|
|
4
|
setwd("/nobackupp1/awilso10/mod06")
|
5
|
|
6
|
### get list of files to process
|
7
|
datadir="/nobackupp4/datapool/modis/MOD06_L2.005/"
|
8
|
|
9
|
fs=data.frame(
|
10
|
path=list.files(datadir,full=T,recursive=T,pattern="hdf"),
|
11
|
file=basename(list.files(datadir,full=F,recursive=T,pattern="hdf")))
|
12
|
fs$date=as.Date(substr(fs$file,11,17),"%Y%j")
|
13
|
fs$month=format(fs$date,"%m")
|
14
|
fs$year=format(fs$date,"%Y")
|
15
|
fs$time=substr(fs$file,19,22)
|
16
|
fs$datetime=as.POSIXct(strptime(paste(substr(fs$file,11,17),substr(fs$file,19,22)), '%Y%j %H%M'))
|
17
|
fs$dateid=format(fs$date,"%Y%m%d")
|
18
|
fs$path=as.character(fs$path)
|
19
|
fs$file=as.character(fs$file)
|
20
|
|
21
|
## get all unique dates
|
22
|
alldates=unique(fs$dateid)
|
23
|
|
24
|
## write it out
|
25
|
save(fs,file="allfiles.Rdata")
|
26
|
save(alldates,file="alldates.Rdata")
|
27
|
|
28
|
notdonedates=alldates
|
29
|
save(notdonedates,file="notdonedates.Rdata")
|
30
|
|
31
|
|
32
|
## output ROI
|
33
|
#get bounding box of region in m
|
34
|
#ge=SpatialPoints(data.frame(lon=c(-125,-115),lat=c(40,47)))
|
35
|
#projection(ge)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
|
36
|
#ge2=spTransform(ge, CRS(" +proj=sinu +lon_0=0 +x_0=0 +y_0=0"))
|
37
|
|
38
|
## vars
|
39
|
vars=as.data.frame(matrix(c(
|
40
|
"Cloud_Effective_Radius", "CER",
|
41
|
"Cloud_Effective_Radius_Uncertainty", "CERU",
|
42
|
"Cloud_Optical_Thickness", "COT",
|
43
|
"Cloud_Optical_Thickness_Uncertainty", "COTU",
|
44
|
"Cloud_Water_Path", "CWP",
|
45
|
"Cloud_Water_Path_Uncertainty", "CWPU",
|
46
|
"Cloud_Phase_Optical_Properties", "CPOP",
|
47
|
"Cloud_Multi_Layer_Flag", "CMLF",
|
48
|
"Cloud_Mask_1km", "CM1",
|
49
|
"Quality_Assurance_1km", "QA"),
|
50
|
byrow=T,ncol=2,dimnames=list(1:10,c("variable","varid"))),stringsAsFactors=F)
|
51
|
save(vars,file="vars.Rdata")
|
52
|
|
53
|
|
54
|
### Submission script
|
55
|
cat("
|
56
|
#PBS -S /bin/csh
|
57
|
#PBS -N cfd
|
58
|
# This example uses the Harpertown nodes
|
59
|
# User job can access ~7.6 GB of memory per Harpertown node.
|
60
|
# A memory intensive job that needs more than ~0.9 GB
|
61
|
# per process should use less than 8 cores per node
|
62
|
# to allow more memory per MPI process. This example
|
63
|
# asks for 64 nodes and 4 MPI processes per node.
|
64
|
# This request implies 64x4 = 256 MPI processes for the job.
|
65
|
#PBS -l select=64:ncpus=8:mpiprocs=4:model=har
|
66
|
#PBS -l walltime=4:00:00
|
67
|
#PBS -j oe
|
68
|
#PBS -W group_list=a0801
|
69
|
#PBS -m e
|
70
|
|
71
|
# Load some modules
|
72
|
module load gcc
|
73
|
module load hdf5
|
74
|
module load netcdf/4.1.3/gcc/mpt
|
75
|
module load mpi
|
76
|
module load tcl-tk/8.5.11
|
77
|
module load udunits/2.1.19
|
78
|
module load szip/2.1/gcc
|
79
|
module load R
|
80
|
module load git
|
81
|
|
82
|
# By default, PBS executes your job from your home directory.
|
83
|
# However, you can use the environment variable
|
84
|
# PBS_O_WORKDIR to change to the directory where
|
85
|
# you submitted your job.
|
86
|
|
87
|
cd $PBS_O_WORKDIR
|
88
|
|
89
|
# use of dplace to pin processes to processors may improve performance
|
90
|
# Here you request to pin processes to processors 2, 3, 6, 7 of each node.
|
91
|
# This helps for using the Harpertown nodes, but not for Nehalem-EP or
|
92
|
# Westmere-EP nodes
|
93
|
|
94
|
# The resource request of select=64 and mpiprocs=4 implies
|
95
|
# that you want to have 256 MPI processes in total.
|
96
|
# If this is correct, you can omit the -np 256 for mpiexec
|
97
|
# that you might have used before.
|
98
|
|
99
|
mpiexec dplace -s1 -c2,3,6,7 ./grinder < run_input > output
|
100
|
|
101
|
# It is a good practice to write stderr and stdout to a file (ex: output)
|
102
|
# Otherwise, they will be written to the PBS stderr and stdout in /PBS/spool,
|
103
|
# which has limited amount of space. When /PBS/spool is filled up, any job
|
104
|
# that tries to write to /PBS/spool will die.
|
105
|
|
106
|
# -end of script-
|