Project

General

Profile

Download (16.2 KB) Statistics
| Branch: | Revision:
1
# -*- coding: utf-8 -*-
2
"""
3
Created on Fri Oct 26 23:48:57 2012
4

    
5
@author: benoitparmentier
6
"""
7

    
8
#!/usr/bin/python
9
#
10
# Early version of assorted Python code to download MODIS 11A1 (daily)
11
# data associated with specified dates and tiles, then interface with
12
# GRASS to calculate temporally averaged LST in a way that accounts for
13
# QC flags.
14
#
15
# TODO:
16
#  - functionalize to encapsulate high level procedural steps
17
#  - create a 'main' function to allow script to be run as a command
18
#    that can accept arguments?
19
#  - put downloaded data somewhere other than working directory?
20
#  - write code to set up and take down a temporary GRASS location/mapset
21
#  - write code to export climatology rasters to file (geotiff? compressed?)
22
#  - calculate other aggregate stats? (stddev, ...)
23
#  - deal with nightly LST too?
24
#  - deal with more than just first two bits of QC flags?
25
#     e.g.: r.mapcalc 'qc_level2 = (qc>>2) & 0x03'   
26
#     0x03?? that means 0000_0011 and >>2 means shit twice on the right for 
27
#  - record all downloads to a log file?
28
#
29
# Jim Regetz
30
# NCEAS
31
# Created on 16-May-2012
32
# Modified by Benoit Parmentier
33
# NCEAS on 18-October-2012
34
# 
35

    
36
import os, glob
37
import datetime, calendar
38
import ftplib
39
import grass.script as gs
40
#from datetime import date
41
#from datetime import datetime
42

    
43

    
44
#------------------
45
# helper functions
46
#------------------
47

    
48
def yj_to_ymd(year, doy):
49
    """Return date as e.g. '2000.03.05' based on specified year and
50
    numeric day-of-year (doy) """
51
    date = datetime.datetime.strptime('%d%03d' % (year, doy), '%Y%j')
52
    return date.strftime('%Y.%m.%d')
53

    
54
def get_doy_range(year, month):
55
    """Determine starting and ending numeric day-of-year (doy)
56
    asscociated with the specified month and year.
57

    
58
    Arguments:
59
    year -- four-digit integer year
60
    month -- integer month (1-12)
61

    
62
    Returns tuple of start and end doy for that month/year.
63
    """
64
    last_day_of_month = calendar.monthrange(year, month)[1]
65
    start_doy = int(datetime.datetime.strptime('%d.%02d.%02d' % (year,
66
        month, 1), '%Y.%m.%d').strftime('%j'))
67
    end_doy = int(datetime.datetime.strptime('%d.%02d.%02d' % (year,
68
        month, last_day_of_month), '%Y.%m.%d').strftime('%j'))
69
    return (int(start_doy), int(end_doy))
70

    
71
#added by Benoit, to create a list of raster images for particular month (2001-2010)
72

    
73
def date_sequence(year_start,month_start,day_start,year_end,month_end,day_end):
74
    """Create a date sequence for a start date and end date defined by year, month, date
75
    Arguments:
76
    year_start -- starting year
77
 
78
    Returns list of absolute paths to the downloaded HDF files.Note the format yyyydoy
79
    """
80
    from datetime import date
81
    from dateutil.rrule import rrule, DAILY
82

    
83
    a = date(year_start, month_start,day_start)
84
    b = date(year_end, month_end, day_end)
85
    date_seq=list()
86
    for dt in rrule(DAILY, dtstart=a, until=b):
87
        print dt.strftime("%Y%j")    
88
        date_seq.append(dt.strftime("%Y%j"))
89
        
90
    return(date_seq)
91

    
92
#Added on January 16, 2012 by Benoit NCEAS
93
def list_raster_per_month(listmaps):
94
    """Determine starting and ending numeric day-of-year (doy)
95
    asscociated with the specified month and year.
96

    
97
    Arguments:
98
    maplist -- list of raster grass-digit integer year
99
    month -- integer month (1-12)
100
    year_range -- list of years
101

    
102
    Returns tuple of start and end doy for that month/year.
103
    """
104
    list_maps_month=list()
105
    nb_month=12
106
    for i in range(1,nb_month+1):
107
        #i=i+1
108
        #filename = listfiles_wd2[i] #list the files of interest
109
        #monthlist[:]=[] #empty the list
110
        monthlist=list()
111
        #monthlist2[:]=[] #empty the list
112
        j=0  #counter
113
        for mapname in listmaps:
114
            #tmp_str=LST[0]
115
            date_map=mapname.split('_')[1][1:8]
116
            #date = filename[filename.rfind('_MOD')-8:filename.rfind('_MOD')]
117
            d=datetime.datetime.strptime(date_map,'%Y%j') #This produces a date object from a string extracted from the file name.
118
            month=d.strftime('%m') #Extract the month of the current map
119
            if int(month)==i:
120
                #monthlist.append(listmaps[j])
121
                monthlist.append(mapname)
122
                #monthlist2.append(listmaps[j])
123
            j=j+1
124
        list_maps_month.append(monthlist)   #This is a list of a list containing a list for each month
125
        #list_files2.append(monthlist2)
126
    return(list_maps_month)    
127
    
128
# quick function to return list of dirs in wd
129
def list_contents(ftp):
130
    """Parse ftp directory listing into list of names of the files
131
    and/or directories contained therein. May or may not be robust in
132
    general, but seems to work fine for LP DAAP ftp server."""
133
    listing = []
134
    ftp.dir(listing.append)
135
    contents = [item.split()[-1] for item in listing[1:]]
136
    return contents
137

    
138
def download_mod11a1(destdir, tile, start_doy, end_doy, year, ver=5):
139
    """Download into destination directory the MODIS 11A1 HDF files
140
    matching a given tile, year, and day range. If for some unexpected
141
    reason there are multiple matches for a given day, only the first is
142
    used. If no matches, the day is skipped with a warning message.
143

    
144
    Arguments:
145
    destdir -- path to local destination directory
146
    tile -- tile identifier (e.g., 'h08v04')
147
    start_doy -- integer start of day range (0-366)
148
    end_doy -- integer end of day range (0-366)
149
    year -- integer year (>=2000)
150
    ver -- MODIS version (4 or 5)
151

    
152
    Returns list of absolute paths to the downloaded HDF files.
153
    """
154
    # connect to data pool and change to the right directory
155
    ftp = ftplib.FTP('e4ftl01.cr.usgs.gov')
156
    ftp.login('anonymous', '')
157
    ftp.cwd('MOLT/MOD11A1.%03d' % ver)
158
    # make list of daily directories to traverse
159
    available_days = list_contents(ftp)
160
    desired_days = [yj_to_ymd(year, x) for x in range(start_doy, end_doy+1)]
161
    days_to_get = filter(lambda day: day in desired_days, available_days)
162
    if len(days_to_get) < len(desired_days):
163
        missing_days = [day for day in desired_days if day not in days_to_get]
164
        print 'skipping %d day(s) not found on server:' % len(missing_days)
165
        print '\n'.join(missing_days)
166
    # get each tile in turn
167
    hdfs = list()
168
    for day in days_to_get:
169
        ftp.cwd(day)
170
        files_to_get = [file for file in list_contents(ftp)
171
            if tile in file and file[-3:]=='hdf']
172
        if len(files_to_get)==0:
173
            # no file found -- print message and move on
174
            print 'no hdf found on server for tile', tile, 'on', day
175
            ftp.cwd('..') #added by Benoit
176
            continue
177
        elif 1<len(files_to_get):
178
            # multiple files found! -- just use the first...
179
            print 'multiple hdfs found on server for tile', tile, 'on', day
180
        file = files_to_get[0]
181
        local_file = os.path.join(destdir, file)
182
        ftp.retrbinary('RETR %s' % file, open(local_file, 'wb').write)
183
        hdfs.append(os.path.abspath(local_file))
184
        ftp.cwd('..')
185
    # politely disconnect
186
    ftp.quit()
187
    # return list of downloaded paths
188
    return hdfs
189

    
190
def get_hdf_paths(hdfdir, tile, start_doy, end_doy, year):
191
    """Look in supplied directory to find the MODIS 11A1 HDF files
192
    matching a given tile, year, and day range. If for some unexpected
193
    reason there are multiple matches for a given day, only the first is
194
    used. If no matches, the day is skipped with a warning message.
195

    
196
    Arguments:
197
    hdfdir -- path to directory containing the desired HDFs
198
    tile -- tile identifier (e.g., 'h08v04')
199
    start_doy -- integer start of day range (0-366)
200
    end_doy -- integer end of day range (0-366)
201
    year -- integer year (>=2000)
202

    
203
    Returns list of absolute paths to the located HDF files.
204
    """
205
    hdfs = list()
206
    for doy in range(start_doy, end_doy+1):
207
        fileglob = 'MOD11A1.A%d%03d.%s*hdf' % (year, doy, tile)
208
        pathglob = os.path.join(hdfdir, fileglob)
209
        files = glob.glob(pathglob)
210
        if len(files)==0:
211
            # no file found -- print message and move on
212
            print 'cannot access %s: no such file' % pathglob
213
            continue
214
        elif 1<len(files):
215
            # multiple files found! -- just use the first...
216
            print 'multiple hdfs found for tile', tile, 'on', day
217
        hdfs.append(os.path.abspath(files[0]))
218
    return hdfs
219

    
220
def calc_clim(maplist, name, overwrite=False):
221
    """Generate some climatalogies in GRASS based on the input list of
222
    maps. As usual, current GRASS region settings apply. Produces the
223
    following output rasters:
224
      * nobs: count of number of (non-null) values over the input maps
225
      * mean: arithmetic mean of (non-null) values over the input maps
226

    
227
    Arguments:
228
    maplist -- list of names of GRASS maps to be aggregated
229
    name -- template (suffix) for GRASS output map names
230

    
231
    Returns list of names of the output maps created in GRASS.
232
    """
233
    denominator = '(%s)' % '+'.join(['if(!isnull(%s))' % m
234
        for m in maplist])
235
    gs.mapcalc('nobs_%s = %s' % (name, denominator), overwrite=overwrite)
236
    numerator = '(%s)' % '+'.join(['if(isnull(%s), 0, %s)' % (m, m)
237
        for m in maplist])
238
    gs.mapcalc('mean_%s = round(float(%s)/nobs_%s)' % (name, numerator, name),
239
        overwrite=overwrite)
240
    return ['%s_%s' % (s, name) for s in ['nobs', 'mean']]
241

    
242
def load_qc_adjusted_lst(hdf):
243
    """Load LST_Day_1km into GRASS from the specified hdf file, and
244
    nullify any cells for which QA flags indicate anything other than
245
    high quality.
246

    
247
    Argument:
248
    hdf -- local path to the 11A1 HDF file
249

    
250
    Returns the name of the QC-adjusted LST map created in GRASS.
251
    """
252
    lstname =  'LST_' + '_'.join(os.path.basename(hdf).split('.')[1:3])               
253
    # read in lst grid
254
    gs.run_command('r.in.gdal',
255
        input='HDF4_EOS:EOS_GRID:%s:MODIS_Grid_Daily_1km_LST:LST_Day_1km' % hdf,
256
        output=lstname)            # No new location created since it has already been done with r.in.gdal before!!!
257

    
258
    # read in qc grid
259
    gs.run_command('r.in.gdal',
260
        input = 'HDF4_EOS:EOS_GRID:%s:MODIS_Grid_Daily_1km_LST:QC_Day' % hdf,
261
        output = 'qc_this_day')
262
    gs.run_command('g.region', rast=lstname)
263
    # null out any LST cells for which QC is not high [00]  #THIS WHERE WE MIGHT NEED TO CHANGE...
264
    gs.mapcalc('${lst} = if((qc_this_day & 0x03)==0, ${lst}, null())',
265
        lst=lstname, overwrite=True)
266
    # clean up
267
    gs.run_command('g.remove', rast='qc_this_day')
268
    # return name of qc-adjusted LST raster in GRASS
269
    return lstname
270

    
271

    
272
def main():
273
    #--------------------------------------------
274
    # Download and Calculate monthly climatology for daily LST time series for a specific area
275
    #--------------------------------------------
276

    
277
    # TODO: set up a (temporary?) GRASS database to use for processing? code
278
    # currently assumes it's being run within an existing GRASS session
279
    # using an appropriately configured LOCATION...
280
    #
281
    # note the following trick to fix datum for modis sinu;
282
    # TODO: check that this works properly...compare to MRT output?
283
    # gs.run_command('g.proj', flags='c',
284
    #     proj4='+proj=sinu +a=6371007.181 +b=6371007.181 +ellps=sphere')
285
    ##    proj4='+proj=sinu +R=6371007.181 +nadgrids=@null +wktext')
286

    
287
 
288
    #### MOdified by Benoit in October 2012 and January 2013
289
    
290
    #Parameters
291
    tiles = ['h11v08','h11v07','h12v07','h12v08','h10v07','h10v08'] #These tiles correspond to Venezuela.
292
    start_year = 2001
293
    end_year = 2010
294
    end_month=12
295
    start_month=1
296
    hdfdir =  '/home/parmentier/Data/benoit_test' #destination file where hdf files are stored locally after download.
297
    night=0    # if 1 then produce night climatology
298
    out_suffix="_01252013"
299
    download=1  # if 1 then download files
300
    
301
    ################## First step: download data ###################
302
    # Added tile loop 
303
    year_list=range(start_year,end_year+1) #list of year to loop through
304
    #for testing...
305
    #year_list=[2001,2002]
306
    #hdfdir = '/home/parmentier/Data/benoit_test2' 
307
    #tiles = ['h10v07','h10v08','h12v07']
308
    if download==1:
309
        for tile in tiles:
310
            for year in year_list:
311
                start_doy = 1
312
                end_doy = 365
313
                if calendar.isleap(year):
314
                    end_doy=366
315
                    
316
                hdfs = download_mod11a1(hdfdir, tile, start_doy, end_doy, year)
317
    
318
    # modify loop to take into account "hdfs", list of hdf files needed in the next steps…  
319
    ################# Second step: compute climatology #################
320
    ## Process tile by tile...
321
    ##start loop for tile
322
    #tile= 'h12v08'
323
    #tiles = ['h12v07','h12v08']
324
    var_name = ['LST_Day_1km','LST_Night_1km']
325
    if night==1:
326
        lst_var = var_name[1]
327
    if night==0:
328
        lst_var = var_name[0]
329
        
330
    #start = time.clock()    
331
    for tile in tiles:        
332
        # Set up a temporary GRASS data base   
333
        tmp_location = 'tmp' + "_"+ tile + "_"+ str(os.getpid())        # Name of the temporary GRASS data base 
334
        orig_location = gs.gisenv()['LOCATION_NAME']
335
        orig_mapset = gs.gisenv()['MAPSET']
336
        gs.os.environ['GRASS_OVERWRITE'] = '1'         # Allow for overwrite?? GRASS data base  
337
            
338
        path_file = hdfdir
339
        os.chdir(path_file)    #set working directory
340
        os.getcwd()            #get current working directory
341
        listfiles_wd2 = glob.glob('*'+tile+'*'+'.hdf') #list the all the LST files of interest
342
        name_of_file = listfiles_wd2[0]    
343
       #Create the name of the layer to extract from the hdf file used for definition of the database
344
        lst1day = 'HDF4_EOS:EOS_GRID:%s/%s:%s' % (
345
        path_file,
346
        name_of_file,
347
         'MODIS_Grid_Daily_1km_LST:'+ lst_var)
348
         #'LST_Night_1km'
349
         #change the above line later to allow night LST for tmin
350
         
351
        #now import one image and set the location, mapset and database !!create name per tile
352
        gs.run_command('r.in.gdal', input=lst1day, output='LST_1day_'+tile,
353
               location=tmp_location)
354
    
355
        # Now that the new location has been create, switch to new location
356
        gs.run_command('g.gisenv', set='LOCATION_NAME=%s' % tmp_location)
357
        gs.run_command('g.gisenv', set='MAPSET=PERMANENT')
358
    
359
        # set GRASS the projection system and GRASS region to match the extent of the file
360
        gs.run_command('g.proj', flags='c',
361
        proj4='+proj=sinu +a=6371007.181 +b=6371007.181 +ellps=sphere')
362
        gs.run_command('g.region', rast='LST_1day_'+tile)
363
    
364
        #generate monthly pixelwise mean & count of high-quality daytime LST
365
        gs.os.environ['GRASS_OVERWRITE'] = '1'
366
    
367
        #Provide a list of file "hdfs" to process…this should be in a loop...
368
        #tile= 'h12v08'
369
        fileglob = 'MOD11A1.A*.%s*hdf' % (tile)
370
        pathglob = os.path.join(path_file, fileglob)
371
        files = glob.glob(pathglob)
372
        hdfs = files
373
        ### LST values in images must be masked out if they have low quality, determined from QC flags stored in hdf files
374
        LST = [load_qc_adjusted_lst(hdf) for hdf in hdfs]
375
        ### LST is a list that contains the name of the new lst files in the GRASS format. The files are stored in the temporary GRASS database.
376
        list_maps_month=list_raster_per_month(LST)    
377
        list_maps_name=['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']
378
        ##Now calculate clim per month, do a test for year1
379
        nb_month = 12
380
        for i in range(1,nb_month+1):
381
            clims = calc_clim(list_maps_month[i-1],tile+'_'+list_maps_name[i-1]+'_'+str(i-1))
382
            for j in range(1, len(clims)+1):
383
                if j-1 ==0:
384
                    gs.run_command('r.out.gdal', input= clims[j-1], output=clims[j-1]+ out_suffix +'.tif', type='Float32')
385
                if j-1 ==1:
386
                    gs.mapcalc(' clim_rescaled = ('+ clims[j-1 ]+ ' * 0.02) -273.15')  
387
                    gs.run_command('r.out.gdal', input= 'clim_rescaled', output=clims[j-1]+ out_suffix+'.tif', type='Float32')
388
        #clims = calc_clim(LST, 'LST_%s_%d_%02d' % (tile, year, month))
389
        # clean up  if necessary
390
        
391
        #gs.run_command('g.remove', rast=','.join(LST))
392
        #gs.os.environ['GRASS_OVERWRITE'] = '0'
393
        
394
        # clean up
395
        #gs.run_command('g.gisenv', set='LOCATION_NAME=%s' % orig_location)
396
        #gs.run_command('g.gisenv', set='MAPSET=%s' % orig_mapset)
397
        #shutil.rmtree(os.path.join(gs.gisenv()['GISDBASE'], tmp_location))
398

    
399
    return None
(12-12/19)