Revision 1836d7f0
Added by Benoit Parmentier almost 12 years ago
climate/research/oregon/modis-lst/LST_downloading_and_climatology.py | ||
---|---|---|
1 |
#!/usr/bin/python |
|
2 |
# |
|
3 |
# Early version of assorted Python code to download MODIS 11A1 (daily) |
|
4 |
# data associated with specified dates and tiles, then interface with |
|
5 |
# GRASS to calculate temporally averaged LST in a way that accounts for |
|
6 |
# QC flags. |
|
7 |
# |
|
8 |
# TODO: |
|
9 |
# - functionalize to encapsulate high level procedural steps |
|
10 |
# - create a 'main' function to allow script to be run as a command |
|
11 |
# that can accept arguments? |
|
12 |
# - put downloaded data somewhere other than working directory? |
|
13 |
# - write code to set up and take down a temporary GRASS location/mapset |
|
14 |
# - write code to export climatology rasters to file (geotiff? compressed?) |
|
15 |
# - calculate other aggregate stats? (stddev, ...) |
|
16 |
# - deal with nightly LST too? |
|
17 |
# - deal with more than just first two bits of QC flags? |
|
18 |
# e.g.: r.mapcalc 'qc_level2 = (qc>>2) & 0x03' |
|
19 |
# - record all downloads to a log file? |
|
20 |
# |
|
21 |
# Jim Regetz |
|
22 |
# NCEAS |
|
23 |
# Created on 16-May-2012 |
|
24 |
|
|
25 |
import os, glob |
|
26 |
import datetime, calendar |
|
27 |
import ftplib |
|
28 |
import grass.script as gs |
|
29 |
|
|
30 |
#------------------ |
|
31 |
# helper functions |
|
32 |
#------------------ |
|
33 |
|
|
34 |
def yj_to_ymd(year, doy): |
|
35 |
"""Return date as e.g. '2000.03.05' based on specified year and |
|
36 |
numeric day-of-year (doy) """ |
|
37 |
date = datetime.datetime.strptime('%d%03d' % (year, doy), '%Y%j') |
|
38 |
return date.strftime('%Y.%m.%d') |
|
39 |
|
|
40 |
def get_doy_range(year, month): |
|
41 |
"""Determine starting and ending numeric day-of-year (doy) |
|
42 |
asscociated with the specified month and year. |
|
43 |
|
|
44 |
Arguments: |
|
45 |
year -- four-digit integer year |
|
46 |
month -- integer month (1-12) |
|
47 |
|
|
48 |
Returns tuple of start and end doy for that month/year. |
|
49 |
""" |
|
50 |
last_day_of_month = calendar.monthrange(year, month)[1] |
|
51 |
start_doy = int(datetime.datetime.strptime('%d.%02d.%02d' % (year, |
|
52 |
month, 1), '%Y.%m.%d').strftime('%j')) |
|
53 |
end_doy = int(datetime.datetime.strptime('%d.%02d.%02d' % (year, |
|
54 |
month, last_day_of_month), '%Y.%m.%d').strftime('%j')) |
|
55 |
return (int(start_doy), int(end_doy)) |
|
56 |
|
|
57 |
# quick function to return list of dirs in wd |
|
58 |
def list_contents(ftp): |
|
59 |
"""Parse ftp directory listing into list of names of the files |
|
60 |
and/or directories contained therein. May or may not be robust in |
|
61 |
general, but seems to work fine for LP DAAP ftp server.""" |
|
62 |
listing = [] |
|
63 |
ftp.dir(listing.append) |
|
64 |
contents = [item.split()[-1] for item in listing[1:]] |
|
65 |
return contents |
|
66 |
|
|
67 |
def download_mod11a1(destdir, tile, start_doy, end_doy, year, ver=5): |
|
68 |
"""Download into destination directory the MODIS 11A1 HDF files |
|
69 |
matching a given tile, year, and day range. If for some unexpected |
|
70 |
reason there are multiple matches for a given day, only the first is |
|
71 |
used. If no matches, the day is skipped with a warning message. |
|
72 |
|
|
73 |
Arguments: |
|
74 |
destdir -- path to local destination directory |
|
75 |
tile -- tile identifier (e.g., 'h08v04') |
|
76 |
start_doy -- integer start of day range (0-366) |
|
77 |
end_doy -- integer end of day range (0-366) |
|
78 |
year -- integer year (>=2000) |
|
79 |
ver -- MODIS version (4 or 5) |
|
80 |
|
|
81 |
Returns list of absolute paths to the downloaded HDF files. |
|
82 |
""" |
|
83 |
# connect to data pool and change to the right directory |
|
84 |
ftp = ftplib.FTP('e4ftl01.cr.usgs.gov') |
|
85 |
ftp.login('anonymous', '') |
|
86 |
ftp.cwd('MOLT/MOD11A1.%03d' % ver) |
|
87 |
# make list of daily directories to traverse |
|
88 |
available_days = list_contents(ftp) |
|
89 |
desired_days = [yj_to_ymd(year, x) for x in range(start_doy, end_doy+1)] |
|
90 |
days_to_get = filter(lambda day: day in desired_days, available_days) |
|
91 |
if len(days_to_get) < len(desired_days): |
|
92 |
missing_days = [day for day in desired_days if day not in days_to_get] |
|
93 |
print 'skipping %d day(s) not found on server:' % len(missing_days) |
|
94 |
print '\n'.join(missing_days) |
|
95 |
# get each tile in turn |
|
96 |
hdfs = list() |
|
97 |
for day in days_to_get: |
|
98 |
ftp.cwd(day) |
|
99 |
files_to_get = [file for file in list_contents(ftp) |
|
100 |
if tile in file and file[-3:]=='hdf'] |
|
101 |
if len(files_to_get)==0: |
|
102 |
# no file found -- print message and move on |
|
103 |
print 'no hdf found on server for tile', tile, 'on', day |
|
104 |
continue |
|
105 |
elif 1<len(files_to_get): |
|
106 |
# multiple files found! -- just use the first... |
|
107 |
print 'multiple hdfs found on server for tile', tile, 'on', day |
|
108 |
file = files_to_get[0] |
|
109 |
local_file = os.path.join(destdir, file) |
|
110 |
ftp.retrbinary('RETR %s' % file, open(local_file, 'wb').write) |
|
111 |
hdfs.append(os.path.abspath(local_file)) |
|
112 |
ftp.cwd('..') |
|
113 |
# politely disconnect |
|
114 |
ftp.quit() |
|
115 |
# return list of downloaded paths |
|
116 |
return hdfs |
|
117 |
|
|
118 |
def get_hdf_paths(hdfdir, tile, start_doy, end_doy, year): |
|
119 |
"""Look in supplied directory to find the MODIS 11A1 HDF files |
|
120 |
matching a given tile, year, and day range. If for some unexpected |
|
121 |
reason there are multiple matches for a given day, only the first is |
|
122 |
used. If no matches, the day is skipped with a warning message. |
|
123 |
|
|
124 |
Arguments: |
|
125 |
hdfdir -- path to directory containing the desired HDFs |
|
126 |
tile -- tile identifier (e.g., 'h08v04') |
|
127 |
start_doy -- integer start of day range (0-366) |
|
128 |
end_doy -- integer end of day range (0-366) |
|
129 |
year -- integer year (>=2000) |
|
130 |
|
|
131 |
Returns list of absolute paths to the located HDF files. |
|
132 |
""" |
|
133 |
hdfs = list() |
|
134 |
for doy in range(start_doy, end_doy+1): |
|
135 |
fileglob = 'MOD11A1.A%d%03d.%s*hdf' % (year, doy, tile) |
|
136 |
pathglob = os.path.join(hdfdir, fileglob) |
|
137 |
files = glob.glob(pathglob) |
|
138 |
if len(files)==0: |
|
139 |
# no file found -- print message and move on |
|
140 |
print 'cannot access %s: no such file' % pathglob |
|
141 |
continue |
|
142 |
elif 1<len(files): |
|
143 |
# multiple files found! -- just use the first... |
|
144 |
print 'multiple hdfs found for tile', tile, 'on', day |
|
145 |
hdfs.append(os.path.abspath(files[0])) |
|
146 |
return hdfs |
|
147 |
|
|
148 |
def calc_clim(maplist, name, overwrite=False): |
|
149 |
"""Generate some climatalogies in GRASS based on the input list of |
|
150 |
maps. As usual, current GRASS region settings apply. Produces the |
|
151 |
following output rasters: |
|
152 |
* nobs: count of number of (non-null) values over the input maps |
|
153 |
* mean: arithmetic mean of (non-null) values over the input maps |
|
154 |
|
|
155 |
Arguments: |
|
156 |
maplist -- list of names of GRASS maps to be aggregated |
|
157 |
name -- template (suffix) for GRASS output map names |
|
158 |
|
|
159 |
Returns list of names of the output maps created in GRASS. |
|
160 |
""" |
|
161 |
denominator = '(%s)' % '+'.join(['if(!isnull(%s))' % m |
|
162 |
for m in maplist]) |
|
163 |
gs.mapcalc('nobs_%s = %s' % (name, denominator), overwrite=overwrite) |
|
164 |
numerator = '(%s)' % '+'.join(['if(isnull(%s), 0, %s)' % (m, m) |
|
165 |
for m in maplist]) |
|
166 |
gs.mapcalc('mean_%s = round(float(%s)/nobs_%s)' % (name, numerator, name), |
|
167 |
overwrite=overwrite) |
|
168 |
return ['%s_%s' % (s, name) for s in ['nobs', 'mean']] |
|
169 |
|
|
170 |
def load_qc_adjusted_lst(hdf): |
|
171 |
"""Load LST_Day_1km into GRASS from the specified hdf file, and |
|
172 |
nullify any cells for which QA flags indicate anything other than |
|
173 |
high quality. |
|
174 |
|
|
175 |
Argument: |
|
176 |
hdf -- local path to the 11A1 HDF file |
|
177 |
|
|
178 |
Returns the name of the QC-adjusted LST map created in GRASS. |
|
179 |
""" |
|
180 |
lstname = 'LST_' + '_'.join(os.path.basename(hdf).split('.')[1:3]) |
|
181 |
# read in lst grid |
|
182 |
gs.run_command('r.in.gdal', |
|
183 |
input='HDF4_EOS:EOS_GRID:%s:MODIS_Grid_Daily_1km_LST:LST_Day_1km' % hdf, |
|
184 |
output=lstname) |
|
185 |
# read in qc grid |
|
186 |
gs.run_command('r.in.gdal', |
|
187 |
input = 'HDF4_EOS:EOS_GRID:%s:MODIS_Grid_Daily_1km_LST:QC_Day' % hdf, |
|
188 |
output = 'qc_this_day') |
|
189 |
gs.run_command('g.region', rast=lstname) |
|
190 |
# null out any LST cells for which QC is not high [00] |
|
191 |
gs.mapcalc('${lst} = if((qc_this_day & 0x03)==0, ${lst}, null())', |
|
192 |
lst=lstname, overwrite=True) |
|
193 |
# clean up |
|
194 |
gs.run_command('g.remove', rast='qc_this_day') |
|
195 |
# return name of qc-adjusted LST raster in GRASS |
|
196 |
return lstname |
|
197 |
|
|
198 |
|
|
199 |
#-------------------------------------------- |
|
200 |
# test procedures mostly for timing purposes |
|
201 |
#-------------------------------------------- |
|
202 |
|
|
203 |
# TODO: set up a (temporary?) GRASS database to use for processing? code |
|
204 |
# currently assumes it's being run within an existing GRASS session |
|
205 |
# using an appropriately configured LOCATION... |
|
206 |
# |
|
207 |
# note the following trick to fix datum for modis sinu; |
|
208 |
# TODO: check that this works properly...compare to MRT output? |
|
209 |
# gs.run_command('g.proj', flags='c', |
|
210 |
# proj4='+proj=sinu +a=6371007.181 +b=6371007.181 +ellps=sphere') |
|
211 |
## proj4='+proj=sinu +R=6371007.181 +nadgrids=@null +wktext') |
|
212 |
|
|
213 |
# (1) download then aggregate for one month |
|
214 |
|
|
215 |
tile = 'h09v04' |
|
216 |
year = 2005 |
|
217 |
month = 1 |
|
218 |
hdfdir = '.' |
|
219 |
|
|
220 |
# determine range of doys for the specified month |
|
221 |
start_doy, end_doy = get_doy_range(year, month) |
|
222 |
# download data |
|
223 |
### [atlas 17-May-2012] Wall time: 111.62 s |
|
224 |
hdfs = download_mod11a1(hdfdir, tile, start_doy, end_doy, year) |
|
225 |
# generate monthly pixelwise mean & count of high-quality daytime LST |
|
226 |
### [atlas 17-May-2012] Wall time: 53.79 s |
|
227 |
gs.os.environ['GRASS_OVERWRITE'] = '1' |
|
228 |
LST = [load_qc_adjusted_lst(hdf) for hdf in hdfs] |
|
229 |
clims = calc_clim(LST, 'LST_%s_%d_%02d' % (tile, year, month)) |
|
230 |
# clean up |
|
231 |
gs.run_command('g.remove', rast=','.join(LST)) |
|
232 |
gs.os.environ['GRASS_OVERWRITE'] = '0' |
|
233 |
|
|
234 |
|
|
235 |
# (2) aggregate all 12 months in one year, using local data |
|
236 |
|
|
237 |
tile = 'h09v04' |
|
238 |
year = 2005 |
|
239 |
hdfdir = '/home/layers/data/climate/MOD11A1.004-OR-orig' |
|
240 |
|
|
241 |
### [atlas 17-May-2012] Wall time: 802.86 s |
|
242 |
gs.os.environ['GRASS_OVERWRITE'] = '1' |
|
243 |
for month in range(1, 12+1): |
|
244 |
start_doy, end_doy = get_doy_range(year, month) |
|
245 |
hdfs = get_hdf_paths(hdfdir, tile, start_doy, end_doy, year) |
|
246 |
LST = [load_qc_adjusted_lst(hdf) for hdf in hdfs] |
|
247 |
clims = calc_clim(LST, 'LST_%s_%d_%02d' % (tile, year, month)) |
|
248 |
gs.run_command('g.remove', rast=','.join(LST)) |
|
249 |
gs.os.environ['GRASS_OVERWRITE'] = '0' |
Also available in: Unified diff
LST climatology, initial commit script from Jim Regetz commit 01b3830e, task#375 and task#316