Project

General

Profile

« Previous | Next » 

Revision 1836d7f0

Added by Benoit Parmentier almost 12 years ago

LST climatology, initial commit script from Jim Regetz commit 01b3830e, task#375 and task#316

View differences:

climate/research/oregon/modis-lst/LST_downloading_and_climatology.py
1
#!/usr/bin/python
2
#
3
# Early version of assorted Python code to download MODIS 11A1 (daily)
4
# data associated with specified dates and tiles, then interface with
5
# GRASS to calculate temporally averaged LST in a way that accounts for
6
# QC flags.
7
#
8
# TODO:
9
#  - functionalize to encapsulate high level procedural steps
10
#  - create a 'main' function to allow script to be run as a command
11
#    that can accept arguments?
12
#  - put downloaded data somewhere other than working directory?
13
#  - write code to set up and take down a temporary GRASS location/mapset
14
#  - write code to export climatology rasters to file (geotiff? compressed?)
15
#  - calculate other aggregate stats? (stddev, ...)
16
#  - deal with nightly LST too?
17
#  - deal with more than just first two bits of QC flags?
18
#     e.g.: r.mapcalc 'qc_level2 = (qc>>2) & 0x03'
19
#  - record all downloads to a log file?
20
#
21
# Jim Regetz
22
# NCEAS
23
# Created on 16-May-2012
24

  
25
import os, glob
26
import datetime, calendar
27
import ftplib
28
import grass.script as gs
29

  
30
#------------------
31
# helper functions
32
#------------------
33

  
34
def yj_to_ymd(year, doy):
35
    """Return date as e.g. '2000.03.05' based on specified year and
36
    numeric day-of-year (doy) """
37
    date = datetime.datetime.strptime('%d%03d' % (year, doy), '%Y%j')
38
    return date.strftime('%Y.%m.%d')
39

  
40
def get_doy_range(year, month):
41
    """Determine starting and ending numeric day-of-year (doy)
42
    asscociated with the specified month and year.
43

  
44
    Arguments:
45
    year -- four-digit integer year
46
    month -- integer month (1-12)
47

  
48
    Returns tuple of start and end doy for that month/year.
49
    """
50
    last_day_of_month = calendar.monthrange(year, month)[1]
51
    start_doy = int(datetime.datetime.strptime('%d.%02d.%02d' % (year,
52
        month, 1), '%Y.%m.%d').strftime('%j'))
53
    end_doy = int(datetime.datetime.strptime('%d.%02d.%02d' % (year,
54
        month, last_day_of_month), '%Y.%m.%d').strftime('%j'))
55
    return (int(start_doy), int(end_doy))
56

  
57
# quick function to return list of dirs in wd
58
def list_contents(ftp):
59
    """Parse ftp directory listing into list of names of the files
60
    and/or directories contained therein. May or may not be robust in
61
    general, but seems to work fine for LP DAAP ftp server."""
62
    listing = []
63
    ftp.dir(listing.append)
64
    contents = [item.split()[-1] for item in listing[1:]]
65
    return contents
66

  
67
def download_mod11a1(destdir, tile, start_doy, end_doy, year, ver=5):
68
    """Download into destination directory the MODIS 11A1 HDF files
69
    matching a given tile, year, and day range. If for some unexpected
70
    reason there are multiple matches for a given day, only the first is
71
    used. If no matches, the day is skipped with a warning message.
72

  
73
    Arguments:
74
    destdir -- path to local destination directory
75
    tile -- tile identifier (e.g., 'h08v04')
76
    start_doy -- integer start of day range (0-366)
77
    end_doy -- integer end of day range (0-366)
78
    year -- integer year (>=2000)
79
    ver -- MODIS version (4 or 5)
80

  
81
    Returns list of absolute paths to the downloaded HDF files.
82
    """
83
    # connect to data pool and change to the right directory
84
    ftp = ftplib.FTP('e4ftl01.cr.usgs.gov')
85
    ftp.login('anonymous', '')
86
    ftp.cwd('MOLT/MOD11A1.%03d' % ver)
87
    # make list of daily directories to traverse
88
    available_days = list_contents(ftp)
89
    desired_days = [yj_to_ymd(year, x) for x in range(start_doy, end_doy+1)]
90
    days_to_get = filter(lambda day: day in desired_days, available_days)
91
    if len(days_to_get) < len(desired_days):
92
        missing_days = [day for day in desired_days if day not in days_to_get]
93
        print 'skipping %d day(s) not found on server:' % len(missing_days)
94
        print '\n'.join(missing_days)
95
    # get each tile in turn
96
    hdfs = list()
97
    for day in days_to_get:
98
        ftp.cwd(day)
99
        files_to_get = [file for file in list_contents(ftp)
100
            if tile in file and file[-3:]=='hdf']
101
        if len(files_to_get)==0:
102
            # no file found -- print message and move on
103
            print 'no hdf found on server for tile', tile, 'on', day
104
            continue
105
        elif 1<len(files_to_get):
106
            # multiple files found! -- just use the first...
107
            print 'multiple hdfs found on server for tile', tile, 'on', day
108
        file = files_to_get[0]
109
        local_file = os.path.join(destdir, file)
110
        ftp.retrbinary('RETR %s' % file, open(local_file, 'wb').write)
111
        hdfs.append(os.path.abspath(local_file))
112
        ftp.cwd('..')
113
    # politely disconnect
114
    ftp.quit()
115
    # return list of downloaded paths
116
    return hdfs
117

  
118
def get_hdf_paths(hdfdir, tile, start_doy, end_doy, year):
119
    """Look in supplied directory to find the MODIS 11A1 HDF files
120
    matching a given tile, year, and day range. If for some unexpected
121
    reason there are multiple matches for a given day, only the first is
122
    used. If no matches, the day is skipped with a warning message.
123

  
124
    Arguments:
125
    hdfdir -- path to directory containing the desired HDFs
126
    tile -- tile identifier (e.g., 'h08v04')
127
    start_doy -- integer start of day range (0-366)
128
    end_doy -- integer end of day range (0-366)
129
    year -- integer year (>=2000)
130

  
131
    Returns list of absolute paths to the located HDF files.
132
    """
133
    hdfs = list()
134
    for doy in range(start_doy, end_doy+1):
135
        fileglob = 'MOD11A1.A%d%03d.%s*hdf' % (year, doy, tile)
136
        pathglob = os.path.join(hdfdir, fileglob)
137
        files = glob.glob(pathglob)
138
        if len(files)==0:
139
            # no file found -- print message and move on
140
            print 'cannot access %s: no such file' % pathglob
141
            continue
142
        elif 1<len(files):
143
            # multiple files found! -- just use the first...
144
            print 'multiple hdfs found for tile', tile, 'on', day
145
        hdfs.append(os.path.abspath(files[0]))
146
    return hdfs
147

  
148
def calc_clim(maplist, name, overwrite=False):
149
    """Generate some climatalogies in GRASS based on the input list of
150
    maps. As usual, current GRASS region settings apply. Produces the
151
    following output rasters:
152
      * nobs: count of number of (non-null) values over the input maps
153
      * mean: arithmetic mean of (non-null) values over the input maps
154

  
155
    Arguments:
156
    maplist -- list of names of GRASS maps to be aggregated
157
    name -- template (suffix) for GRASS output map names
158

  
159
    Returns list of names of the output maps created in GRASS.
160
    """
161
    denominator = '(%s)' % '+'.join(['if(!isnull(%s))' % m
162
        for m in maplist])
163
    gs.mapcalc('nobs_%s = %s' % (name, denominator), overwrite=overwrite)
164
    numerator = '(%s)' % '+'.join(['if(isnull(%s), 0, %s)' % (m, m)
165
        for m in maplist])
166
    gs.mapcalc('mean_%s = round(float(%s)/nobs_%s)' % (name, numerator, name),
167
        overwrite=overwrite)
168
    return ['%s_%s' % (s, name) for s in ['nobs', 'mean']]
169

  
170
def load_qc_adjusted_lst(hdf):
171
    """Load LST_Day_1km into GRASS from the specified hdf file, and
172
    nullify any cells for which QA flags indicate anything other than
173
    high quality.
174

  
175
    Argument:
176
    hdf -- local path to the 11A1 HDF file
177

  
178
    Returns the name of the QC-adjusted LST map created in GRASS.
179
    """
180
    lstname =  'LST_' + '_'.join(os.path.basename(hdf).split('.')[1:3])
181
    # read in lst grid
182
    gs.run_command('r.in.gdal',
183
        input='HDF4_EOS:EOS_GRID:%s:MODIS_Grid_Daily_1km_LST:LST_Day_1km' % hdf,
184
        output=lstname)
185
    # read in qc grid
186
    gs.run_command('r.in.gdal',
187
        input = 'HDF4_EOS:EOS_GRID:%s:MODIS_Grid_Daily_1km_LST:QC_Day' % hdf,
188
        output = 'qc_this_day')
189
    gs.run_command('g.region', rast=lstname)
190
    # null out any LST cells for which QC is not high [00]
191
    gs.mapcalc('${lst} = if((qc_this_day & 0x03)==0, ${lst}, null())',
192
        lst=lstname, overwrite=True)
193
    # clean up
194
    gs.run_command('g.remove', rast='qc_this_day')
195
    # return name of qc-adjusted LST raster in GRASS
196
    return lstname
197

  
198

  
199
#--------------------------------------------
200
# test procedures mostly for timing purposes
201
#--------------------------------------------
202

  
203
# TODO: set up a (temporary?) GRASS database to use for processing? code
204
# currently assumes it's being run within an existing GRASS session
205
# using an appropriately configured LOCATION...
206
#
207
# note the following trick to fix datum for modis sinu;
208
# TODO: check that this works properly...compare to MRT output?
209
# gs.run_command('g.proj', flags='c',
210
#     proj4='+proj=sinu +a=6371007.181 +b=6371007.181 +ellps=sphere')
211
##    proj4='+proj=sinu +R=6371007.181 +nadgrids=@null +wktext')
212

  
213
# (1) download then aggregate for one month
214

  
215
tile = 'h09v04'
216
year = 2005
217
month = 1
218
hdfdir = '.'
219

  
220
# determine range of doys for the specified month
221
start_doy, end_doy = get_doy_range(year, month)
222
# download data
223
### [atlas 17-May-2012] Wall time: 111.62 s
224
hdfs = download_mod11a1(hdfdir, tile, start_doy, end_doy, year)
225
# generate monthly pixelwise mean & count of high-quality daytime LST
226
### [atlas 17-May-2012] Wall time: 53.79 s
227
gs.os.environ['GRASS_OVERWRITE'] = '1'
228
LST = [load_qc_adjusted_lst(hdf) for hdf in hdfs]
229
clims = calc_clim(LST, 'LST_%s_%d_%02d' % (tile, year, month))
230
# clean up
231
gs.run_command('g.remove', rast=','.join(LST))
232
gs.os.environ['GRASS_OVERWRITE'] = '0'
233

  
234

  
235
# (2) aggregate all 12 months in one year, using local data
236

  
237
tile = 'h09v04'
238
year = 2005
239
hdfdir = '/home/layers/data/climate/MOD11A1.004-OR-orig'
240

  
241
### [atlas 17-May-2012] Wall time: 802.86 s
242
gs.os.environ['GRASS_OVERWRITE'] = '1'
243
for month in range(1, 12+1):
244
    start_doy, end_doy = get_doy_range(year, month)
245
    hdfs = get_hdf_paths(hdfdir, tile, start_doy, end_doy, year)
246
    LST = [load_qc_adjusted_lst(hdf) for hdf in hdfs]
247
    clims = calc_clim(LST, 'LST_%s_%d_%02d' % (tile, year, month))
248
    gs.run_command('g.remove', rast=','.join(LST))
249
gs.os.environ['GRASS_OVERWRITE'] = '0'

Also available in: Unified diff