Project

General

Profile

Download (16.3 KB) Statistics
| Branch: | Revision:
1 04d1bd09 Benoit Parmentier
# -*- coding: utf-8 -*-
2
"""
3
Created on Fri Oct 26 23:48:57 2012
4
5
@author: benoitparmentier
6
"""
7
8
#!/usr/bin/python
9
#
10
# Early version of assorted Python code to download MODIS 11A1 (daily)
11
# data associated with specified dates and tiles, then interface with
12
# GRASS to calculate temporally averaged LST in a way that accounts for
13
# QC flags.
14
#
15
# TODO:
16
#  - functionalize to encapsulate high level procedural steps
17
#  - create a 'main' function to allow script to be run as a command
18
#    that can accept arguments?
19
#  - put downloaded data somewhere other than working directory?
20
#  - write code to set up and take down a temporary GRASS location/mapset
21
#  - write code to export climatology rasters to file (geotiff? compressed?)
22
#  - calculate other aggregate stats? (stddev, ...)
23
#  - deal with nightly LST too?
24
#  - deal with more than just first two bits of QC flags?
25
#     e.g.: r.mapcalc 'qc_level2 = (qc>>2) & 0x03'   
26
#     0x03?? that means 0000_0011 and >>2 means shit twice on the right for 
27
#  - record all downloads to a log file?
28
#
29
# Jim Regetz
30
# NCEAS
31
# Created on 16-May-2012
32
# Modified by Benoit Parmentier
33
# NCEAS on 18-October-2012
34
# 
35
36
import os, glob
37
import datetime, calendar
38
import ftplib
39
import grass.script as gs
40 b8506b64 Benoit Parmentier
41 04d1bd09 Benoit Parmentier
#from datetime import date
42
#from datetime import datetime
43
44
45
#------------------
46
# helper functions
47
#------------------
48
49
def yj_to_ymd(year, doy):
50
    """Return date as e.g. '2000.03.05' based on specified year and
51
    numeric day-of-year (doy) """
52
    date = datetime.datetime.strptime('%d%03d' % (year, doy), '%Y%j')
53
    return date.strftime('%Y.%m.%d')
54
55
def get_doy_range(year, month):
56
    """Determine starting and ending numeric day-of-year (doy)
57
    asscociated with the specified month and year.
58
59
    Arguments:
60
    year -- four-digit integer year
61
    month -- integer month (1-12)
62
63
    Returns tuple of start and end doy for that month/year.
64
    """
65
    last_day_of_month = calendar.monthrange(year, month)[1]
66
    start_doy = int(datetime.datetime.strptime('%d.%02d.%02d' % (year,
67
        month, 1), '%Y.%m.%d').strftime('%j'))
68
    end_doy = int(datetime.datetime.strptime('%d.%02d.%02d' % (year,
69
        month, last_day_of_month), '%Y.%m.%d').strftime('%j'))
70
    return (int(start_doy), int(end_doy))
71
72
#added by Benoit, to create a list of raster images for particular month (2001-2010)
73
74
def date_sequence(year_start,month_start,day_start,year_end,month_end,day_end):
75
    """Create a date sequence for a start date and end date defined by year, month, date
76
    Arguments:
77
    year_start -- starting year
78
 
79
    Returns list of absolute paths to the downloaded HDF files.Note the format yyyydoy
80
    """
81
    from datetime import date
82
    from dateutil.rrule import rrule, DAILY
83
84
    a = date(year_start, month_start,day_start)
85
    b = date(year_end, month_end, day_end)
86
    date_seq=list()
87
    for dt in rrule(DAILY, dtstart=a, until=b):
88
        print dt.strftime("%Y%j")    
89
        date_seq.append(dt.strftime("%Y%j"))
90
        
91
    return(date_seq)
92
93
#Added on January 16, 2012 by Benoit NCEAS
94
def list_raster_per_month(listmaps):
95
    """Determine starting and ending numeric day-of-year (doy)
96
    asscociated with the specified month and year.
97
98
    Arguments:
99
    maplist -- list of raster grass-digit integer year
100
    month -- integer month (1-12)
101
    year_range -- list of years
102
103
    Returns tuple of start and end doy for that month/year.
104
    """
105
    list_maps_month=list()
106
    nb_month=12
107
    for i in range(1,nb_month+1):
108
        #i=i+1
109
        #filename = listfiles_wd2[i] #list the files of interest
110
        #monthlist[:]=[] #empty the list
111
        monthlist=list()
112
        #monthlist2[:]=[] #empty the list
113
        j=0  #counter
114
        for mapname in listmaps:
115
            #tmp_str=LST[0]
116
            date_map=mapname.split('_')[1][1:8]
117
            #date = filename[filename.rfind('_MOD')-8:filename.rfind('_MOD')]
118
            d=datetime.datetime.strptime(date_map,'%Y%j') #This produces a date object from a string extracted from the file name.
119
            month=d.strftime('%m') #Extract the month of the current map
120
            if int(month)==i:
121
                #monthlist.append(listmaps[j])
122
                monthlist.append(mapname)
123
                #monthlist2.append(listmaps[j])
124
            j=j+1
125
        list_maps_month.append(monthlist)   #This is a list of a list containing a list for each month
126
        #list_files2.append(monthlist2)
127
    return(list_maps_month)    
128 081e96f7 Benoit Parmentier
    
129 04d1bd09 Benoit Parmentier
# quick function to return list of dirs in wd
130
def list_contents(ftp):
131
    """Parse ftp directory listing into list of names of the files
132
    and/or directories contained therein. May or may not be robust in
133
    general, but seems to work fine for LP DAAP ftp server."""
134
    listing = []
135
    ftp.dir(listing.append)
136
    contents = [item.split()[-1] for item in listing[1:]]
137
    return contents
138
139
def download_mod11a1(destdir, tile, start_doy, end_doy, year, ver=5):
140
    """Download into destination directory the MODIS 11A1 HDF files
141
    matching a given tile, year, and day range. If for some unexpected
142
    reason there are multiple matches for a given day, only the first is
143
    used. If no matches, the day is skipped with a warning message.
144
145
    Arguments:
146
    destdir -- path to local destination directory
147
    tile -- tile identifier (e.g., 'h08v04')
148
    start_doy -- integer start of day range (0-366)
149
    end_doy -- integer end of day range (0-366)
150
    year -- integer year (>=2000)
151
    ver -- MODIS version (4 or 5)
152
153
    Returns list of absolute paths to the downloaded HDF files.
154
    """
155
    # connect to data pool and change to the right directory
156
    ftp = ftplib.FTP('e4ftl01.cr.usgs.gov')
157
    ftp.login('anonymous', '')
158
    ftp.cwd('MOLT/MOD11A1.%03d' % ver)
159
    # make list of daily directories to traverse
160
    available_days = list_contents(ftp)
161
    desired_days = [yj_to_ymd(year, x) for x in range(start_doy, end_doy+1)]
162
    days_to_get = filter(lambda day: day in desired_days, available_days)
163
    if len(days_to_get) < len(desired_days):
164
        missing_days = [day for day in desired_days if day not in days_to_get]
165
        print 'skipping %d day(s) not found on server:' % len(missing_days)
166
        print '\n'.join(missing_days)
167
    # get each tile in turn
168
    hdfs = list()
169
    for day in days_to_get:
170
        ftp.cwd(day)
171
        files_to_get = [file for file in list_contents(ftp)
172
            if tile in file and file[-3:]=='hdf']
173
        if len(files_to_get)==0:
174
            # no file found -- print message and move on
175
            print 'no hdf found on server for tile', tile, 'on', day
176
            ftp.cwd('..') #added by Benoit
177
            continue
178
        elif 1<len(files_to_get):
179
            # multiple files found! -- just use the first...
180
            print 'multiple hdfs found on server for tile', tile, 'on', day
181
        file = files_to_get[0]
182
        local_file = os.path.join(destdir, file)
183
        ftp.retrbinary('RETR %s' % file, open(local_file, 'wb').write)
184
        hdfs.append(os.path.abspath(local_file))
185
        ftp.cwd('..')
186
    # politely disconnect
187
    ftp.quit()
188
    # return list of downloaded paths
189
    return hdfs
190
191
def get_hdf_paths(hdfdir, tile, start_doy, end_doy, year):
192
    """Look in supplied directory to find the MODIS 11A1 HDF files
193
    matching a given tile, year, and day range. If for some unexpected
194
    reason there are multiple matches for a given day, only the first is
195
    used. If no matches, the day is skipped with a warning message.
196
197
    Arguments:
198
    hdfdir -- path to directory containing the desired HDFs
199
    tile -- tile identifier (e.g., 'h08v04')
200
    start_doy -- integer start of day range (0-366)
201
    end_doy -- integer end of day range (0-366)
202
    year -- integer year (>=2000)
203
204
    Returns list of absolute paths to the located HDF files.
205
    """
206
    hdfs = list()
207
    for doy in range(start_doy, end_doy+1):
208
        fileglob = 'MOD11A1.A%d%03d.%s*hdf' % (year, doy, tile)
209
        pathglob = os.path.join(hdfdir, fileglob)
210
        files = glob.glob(pathglob)
211
        if len(files)==0:
212
            # no file found -- print message and move on
213
            print 'cannot access %s: no such file' % pathglob
214
            continue
215
        elif 1<len(files):
216
            # multiple files found! -- just use the first...
217
            print 'multiple hdfs found for tile', tile, 'on', day
218
        hdfs.append(os.path.abspath(files[0]))
219
    return hdfs
220
221
def calc_clim(maplist, name, overwrite=False):
222
    """Generate some climatalogies in GRASS based on the input list of
223
    maps. As usual, current GRASS region settings apply. Produces the
224
    following output rasters:
225
      * nobs: count of number of (non-null) values over the input maps
226
      * mean: arithmetic mean of (non-null) values over the input maps
227
228
    Arguments:
229
    maplist -- list of names of GRASS maps to be aggregated
230
    name -- template (suffix) for GRASS output map names
231
232
    Returns list of names of the output maps created in GRASS.
233
    """
234
    denominator = '(%s)' % '+'.join(['if(!isnull(%s))' % m
235
        for m in maplist])
236
    gs.mapcalc('nobs_%s = %s' % (name, denominator), overwrite=overwrite)
237
    numerator = '(%s)' % '+'.join(['if(isnull(%s), 0, %s)' % (m, m)
238
        for m in maplist])
239
    gs.mapcalc('mean_%s = round(float(%s)/nobs_%s)' % (name, numerator, name),
240
        overwrite=overwrite)
241
    return ['%s_%s' % (s, name) for s in ['nobs', 'mean']]
242
243
def load_qc_adjusted_lst(hdf):
244
    """Load LST_Day_1km into GRASS from the specified hdf file, and
245
    nullify any cells for which QA flags indicate anything other than
246
    high quality.
247
248
    Argument:
249
    hdf -- local path to the 11A1 HDF file
250
251
    Returns the name of the QC-adjusted LST map created in GRASS.
252
    """
253
    lstname =  'LST_' + '_'.join(os.path.basename(hdf).split('.')[1:3])               
254
    # read in lst grid
255
    gs.run_command('r.in.gdal',
256
        input='HDF4_EOS:EOS_GRID:%s:MODIS_Grid_Daily_1km_LST:LST_Day_1km' % hdf,
257
        output=lstname)            # No new location created since it has already been done with r.in.gdal before!!!
258
259
    # read in qc grid
260
    gs.run_command('r.in.gdal',
261
        input = 'HDF4_EOS:EOS_GRID:%s:MODIS_Grid_Daily_1km_LST:QC_Day' % hdf,
262
        output = 'qc_this_day')
263
    gs.run_command('g.region', rast=lstname)
264
    # null out any LST cells for which QC is not high [00]  #THIS WHERE WE MIGHT NEED TO CHANGE...
265
    gs.mapcalc('${lst} = if((qc_this_day & 0x03)==0, ${lst}, null())',
266
        lst=lstname, overwrite=True)
267
    # clean up
268
    gs.run_command('g.remove', rast='qc_this_day')
269
    # return name of qc-adjusted LST raster in GRASS
270
    return lstname
271
272
273
def main():
274
    #--------------------------------------------
275
    # Download and Calculate monthly climatology for daily LST time series for a specific area
276
    #--------------------------------------------
277
278
    # TODO: set up a (temporary?) GRASS database to use for processing? code
279
    # currently assumes it's being run within an existing GRASS session
280
    # using an appropriately configured LOCATION...
281
    #
282
    # note the following trick to fix datum for modis sinu;
283
    # TODO: check that this works properly...compare to MRT output?
284
    # gs.run_command('g.proj', flags='c',
285
    #     proj4='+proj=sinu +a=6371007.181 +b=6371007.181 +ellps=sphere')
286
    ##    proj4='+proj=sinu +R=6371007.181 +nadgrids=@null +wktext')
287
288
 
289 b8506b64 Benoit Parmentier
    #### MOdified by Benoit in March 2013 
290 04d1bd09 Benoit Parmentier
    
291
    #Parameters
292 b8506b64 Benoit Parmentier
    #Inputs from R??
293 63f27fec Benoit Parmentier
    tiles = ['h11v08','h11v07','h12v07','h12v08','h10v07','h10v08'] #These tiles correspond to Venezuela.
294 04d1bd09 Benoit Parmentier
    start_year = 2001
295
    end_year = 2010
296
    end_month=12
297
    start_month=1
298
    hdfdir =  '/home/parmentier/Data/benoit_test' #destination file where hdf files are stored locally after download.
299 b8506b64 Benoit Parmentier
    night=1    # if 1 then produce night climatology
300
    out_suffix="_03192013"
301 63f27fec Benoit Parmentier
    download=1  # if 1 then download files
302 bb5e3f36 Benoit Parmentier
    
303 04d1bd09 Benoit Parmentier
    ################## First step: download data ###################
304
    # Added tile loop 
305
    year_list=range(start_year,end_year+1) #list of year to loop through
306 bb5e3f36 Benoit Parmentier
    #for testing...
307
    #year_list=[2001,2002]
308
    #hdfdir = '/home/parmentier/Data/benoit_test2' 
309 63f27fec Benoit Parmentier
    #tiles = ['h10v07','h10v08','h12v07']
310
    if download==1:
311
        for tile in tiles:
312
            for year in year_list:
313
                start_doy = 1
314
                end_doy = 365
315
                if calendar.isleap(year):
316
                    end_doy=366
317
                    
318
                hdfs = download_mod11a1(hdfdir, tile, start_doy, end_doy, year)
319 04d1bd09 Benoit Parmentier
    
320
    # modify loop to take into account "hdfs", list of hdf files needed in the next steps…  
321
    ################# Second step: compute climatology #################
322 bb5e3f36 Benoit Parmentier
    ## Process tile by tile...
323
    ##start loop for tile
324
    #tile= 'h12v08'
325 63f27fec Benoit Parmentier
    #tiles = ['h12v07','h12v08']
326
    var_name = ['LST_Day_1km','LST_Night_1km']
327
    if night==1:
328
        lst_var = var_name[1]
329
    if night==0:
330
        lst_var = var_name[0]
331 04d1bd09 Benoit Parmentier
        
332 63f27fec Benoit Parmentier
    #start = time.clock()    
333
    for tile in tiles:        
334
        # Set up a temporary GRASS data base   
335
        tmp_location = 'tmp' + "_"+ tile + "_"+ str(os.getpid())        # Name of the temporary GRASS data base 
336
        orig_location = gs.gisenv()['LOCATION_NAME']
337
        orig_mapset = gs.gisenv()['MAPSET']
338
        gs.os.environ['GRASS_OVERWRITE'] = '1'         # Allow for overwrite?? GRASS data base  
339
            
340
        path_file = hdfdir
341
        os.chdir(path_file)    #set working directory
342
        os.getcwd()            #get current working directory
343
        listfiles_wd2 = glob.glob('*'+tile+'*'+'.hdf') #list the all the LST files of interest
344
        name_of_file = listfiles_wd2[0]    
345 b8506b64 Benoit Parmentier
        #Create the name of the layer to extract from the hdf file used for definition of the database
346 63f27fec Benoit Parmentier
        lst1day = 'HDF4_EOS:EOS_GRID:%s/%s:%s' % (
347
        path_file,
348
        name_of_file,
349 b8506b64 Benoit Parmentier
        'MODIS_Grid_Daily_1km_LST:'+ lst_var)
350
        #'LST_Night_1km'
351
        #change the above line later to allow night LST for tmin
352 63f27fec Benoit Parmentier
         
353
        #now import one image and set the location, mapset and database !!create name per tile
354
        gs.run_command('r.in.gdal', input=lst1day, output='LST_1day_'+tile,
355
               location=tmp_location)
356 04d1bd09 Benoit Parmentier
    
357 63f27fec Benoit Parmentier
        # Now that the new location has been create, switch to new location
358
        gs.run_command('g.gisenv', set='LOCATION_NAME=%s' % tmp_location)
359
        gs.run_command('g.gisenv', set='MAPSET=PERMANENT')
360 bb5e3f36 Benoit Parmentier
    
361 63f27fec Benoit Parmentier
        # set GRASS the projection system and GRASS region to match the extent of the file
362
        gs.run_command('g.proj', flags='c',
363 b8506b64 Benoit Parmentier
        proj4='+proj=sinu +a=6371007.181 +b=6371007.181 +ellps=sphere') #This should be set in the parameters
364 63f27fec Benoit Parmentier
        gs.run_command('g.region', rast='LST_1day_'+tile)
365 04d1bd09 Benoit Parmentier
    
366 63f27fec Benoit Parmentier
        #generate monthly pixelwise mean & count of high-quality daytime LST
367
        gs.os.environ['GRASS_OVERWRITE'] = '1'
368
    
369
        #Provide a list of file "hdfs" to process…this should be in a loop...
370
        #tile= 'h12v08'
371
        fileglob = 'MOD11A1.A*.%s*hdf' % (tile)
372
        pathglob = os.path.join(path_file, fileglob)
373
        files = glob.glob(pathglob)
374
        hdfs = files
375
        ### LST values in images must be masked out if they have low quality, determined from QC flags stored in hdf files
376
        LST = [load_qc_adjusted_lst(hdf) for hdf in hdfs]
377
        ### LST is a list that contains the name of the new lst files in the GRASS format. The files are stored in the temporary GRASS database.
378
        list_maps_month=list_raster_per_month(LST)    
379
        list_maps_name=['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']
380
        ##Now calculate clim per month, do a test for year1
381
        nb_month = 12
382
        for i in range(1,nb_month+1):
383 b8506b64 Benoit Parmentier
            clims = calc_clim(list_maps_month[i-1],lst_var+'_'+tile+'_'+list_maps_name[i-1]+'_'+str(i-1))
384 63f27fec Benoit Parmentier
            for j in range(1, len(clims)+1):
385
                if j-1 ==0:
386
                    gs.run_command('r.out.gdal', input= clims[j-1], output=clims[j-1]+ out_suffix +'.tif', type='Float32')
387
                if j-1 ==1:
388
                    gs.mapcalc(' clim_rescaled = ('+ clims[j-1 ]+ ' * 0.02) -273.15')  
389
                    gs.run_command('r.out.gdal', input= 'clim_rescaled', output=clims[j-1]+ out_suffix+'.tif', type='Float32')
390
        #clims = calc_clim(LST, 'LST_%s_%d_%02d' % (tile, year, month))
391
        # clean up  if necessary
392
        
393
        #gs.run_command('g.remove', rast=','.join(LST))
394
        #gs.os.environ['GRASS_OVERWRITE'] = '0'
395
        
396
        # clean up
397
        #gs.run_command('g.gisenv', set='LOCATION_NAME=%s' % orig_location)
398
        #gs.run_command('g.gisenv', set='MAPSET=%s' % orig_mapset)
399
        #shutil.rmtree(os.path.join(gs.gisenv()['GISDBASE'], tmp_location))
400 04d1bd09 Benoit Parmentier
401
    return None