Project

General

Profile

Download (17.4 KB) Statistics
| Branch: | Revision:
1
# -*- coding: utf-8 -*-
2
"""
3
Created on Fri Oct 26 23:48:57 2012
4

    
5
@author: benoitparmentier
6
"""
7

    
8
#!/usr/bin/python
9
#
10
# Early version of assorted Python code to download MODIS 11A1 (daily)
11
# data associated with specified dates and tiles, then interface with
12
# GRASS to calculate temporally averaged LST in a way that accounts for
13
# QC flags.
14
#
15
# TODO:
16
#  - functionalize to encapsulate high level procedural steps
17
#  - create a 'main' function to allow script to be run as a command
18
#    that can accept arguments?
19
#  - put downloaded data somewhere other than working directory?
20
#  - write code to set up and take down a temporary GRASS location/mapset
21
#  - write code to export climatology rasters to file (geotiff? compressed?)
22
#  - calculate other aggregate stats? (stddev, ...)
23
#  - deal with nightly LST too?
24
#  - deal with more than just first two bits of QC flags?
25
#     e.g.: r.mapcalc 'qc_level2 = (qc>>2) & 0x03'   
26
#     0x03?? that means 0000_0011 and >>2 means shift twice on the right for 
27
#  - record all downloads to a log file?
28
#
29
# Jim Regetz
30
# NCEAS
31
# Created on 16-May-2012
32
# Modified by Benoit Parmentier
33
# NCEAS on 18-October-2012
34
# 
35

    
36
import os, glob
37
import datetime, calendar
38
import ftplib
39
import grass.script as gs
40
import argparse
41

    
42
#from datetime import date
43
#from datetime import datetime
44

    
45

    
46
#------------------
47
# helper functions
48
#------------------
49

    
50
def yj_to_ymd(year, doy):
51
    """Return date as e.g. '2000.03.05' based on specified year and
52
    numeric day-of-year (doy) """
53
    date = datetime.datetime.strptime('%d%03d' % (year, doy), '%Y%j')
54
    return date.strftime('%Y.%m.%d')
55

    
56
def get_doy_range(year, month):
57
    """Determine starting and ending numeric day-of-year (doy)
58
    asscociated with the specified month and year.
59

    
60
    Arguments:
61
    year -- four-digit integer year
62
    month -- integer month (1-12)
63

    
64
    Returns tuple of start and end doy for that month/year.
65
    """
66
    last_day_of_month = calendar.monthrange(year, month)[1]
67
    start_doy = int(datetime.datetime.strptime('%d.%02d.%02d' % (year,
68
        month, 1), '%Y.%m.%d').strftime('%j'))
69
    end_doy = int(datetime.datetime.strptime('%d.%02d.%02d' % (year,
70
        month, last_day_of_month), '%Y.%m.%d').strftime('%j'))
71
    return (int(start_doy), int(end_doy))
72

    
73
#added by Benoit, to create a list of raster images for particular month (2001-2010)
74

    
75
def date_sequence(year_start,month_start,day_start,year_end,month_end,day_end):
76
    """Create a date sequence for a start date and end date defined by year, month, date
77
    Arguments:
78
    year_start -- starting year
79
 
80
    Returns list of absolute paths to the downloaded HDF files.Note the format yyyydoy
81
    """
82
    from datetime import date
83
    from dateutil.rrule import rrule, DAILY
84

    
85
    a = date(year_start, month_start,day_start)
86
    b = date(year_end, month_end, day_end)
87
    date_seq=list()
88
    for dt in rrule(DAILY, dtstart=a, until=b):
89
        print dt.strftime("%Y%j")    
90
        date_seq.append(dt.strftime("%Y%j"))
91
        
92
    return(date_seq)
93

    
94
   
95
# quick function to return list of dirs in wd
96
def list_contents(ftp):
97
    """Parse ftp directory listing into list of names of the files
98
    and/or directories contained therein. May or may not be robust in
99
    general, but seems to work fine for LP DAAP ftp server."""
100
    listing = []
101
    ftp.dir(listing.append)
102
    contents = [item.split()[-1] for item in listing[1:]]
103
    return contents
104

    
105
def download_mod11a1(destdir, tile, start_doy, end_doy, year, ver=5):
106
    """Download into destination directory the MODIS 11A1 HDF files
107
    matching a given tile, year, and day range. If for some unexpected
108
    reason there are multiple matches for a given day, only the first is
109
    used. If no matches, the day is skipped with a warning message.
110

    
111
    Arguments:
112
    destdir -- path to local destination directory
113
    tile -- tile identifier (e.g., 'h08v04')
114
    start_doy -- integer start of day range (0-366)
115
    end_doy -- integer end of day range (0-366)
116
    year -- integer year (>=2000)
117
    ver -- MODIS version (4 or 5)
118

    
119
    Returns list of absolute paths to the downloaded HDF files.
120
    """
121
    # connect to data pool and change to the right directory
122
    ftp = ftplib.FTP('e4ftl01.cr.usgs.gov')
123
    ftp.login('anonymous', '')
124
    ftp.cwd('MOLT/MOD11A1.%03d' % ver)
125
    # make list of daily directories to traverse
126
    available_days = list_contents(ftp)
127
    desired_days = [yj_to_ymd(year, x) for x in range(start_doy, end_doy+1)]
128
    days_to_get = filter(lambda day: day in desired_days, available_days)
129
    if len(days_to_get) < len(desired_days):
130
        missing_days = [day for day in desired_days if day not in days_to_get]
131
        print 'skipping %d day(s) not found on server:' % len(missing_days)
132
        print '\n'.join(missing_days)
133
    # get each tile in turn
134
    hdfs = list()
135
    for day in days_to_get:
136
        ftp.cwd(day)
137
        files_to_get = [file for file in list_contents(ftp)
138
            if tile in file and file[-3:]=='hdf']
139
        if len(files_to_get)==0:
140
            # no file found -- print message and move on
141
            print 'no hdf found on server for tile', tile, 'on', day
142
            ftp.cwd('..') #added by Benoit
143
            continue
144
        elif 1<len(files_to_get):
145
            # multiple files found! -- just use the first...
146
            print 'multiple hdfs found on server for tile', tile, 'on', day
147
        file = files_to_get[0]
148
        local_file = os.path.join(destdir, file)
149
        ftp.retrbinary('RETR %s' % file, open(local_file, 'wb').write)
150
        hdfs.append(os.path.abspath(local_file))
151
        ftp.cwd('..')
152
    # politely disconnect
153
    ftp.quit()
154
    # return list of downloaded paths
155
    return hdfs
156

    
157
def get_hdf_paths(hdfdir, tile, start_doy, end_doy, year):
158
    """Look in supplied directory to find the MODIS 11A1 HDF files
159
    matching a given tile, year, and day range. If for some unexpected
160
    reason there are multiple matches for a given day, only the first is
161
    used. If no matches, the day is skipped with a warning message.
162

    
163
    Arguments:
164
    hdfdir -- path to directory containing the desired HDFs
165
    tile -- tile identifier (e.g., 'h08v04')
166
    start_doy -- integer start of day range (0-366)
167
    end_doy -- integer end of day range (0-366)
168
    year -- integer year (>=2000)
169

    
170
    Returns list of absolute paths to the located HDF files.
171
    """
172
    hdfs = list()
173
    for doy in range(start_doy, end_doy+1):
174
        fileglob = 'MOD11A1.A%d%03d.%s*hdf' % (year, doy, tile)
175
        pathglob = os.path.join(hdfdir, fileglob)
176
        files = glob.glob(pathglob)
177
        if len(files)==0:
178
            # no file found -- print message and move on
179
            print 'cannot access %s: no such file' % pathglob
180
            continue
181
        elif 1<len(files):
182
            # multiple files found! -- just use the first...
183
            print 'multiple hdfs found for tile', tile, 'on', day
184
        hdfs.append(os.path.abspath(files[0]))
185
    return hdfs
186

    
187
#Added on January 16, 2012 by Benoit NCEAS
188
def list_raster_per_month(listmaps):
189
    """Determine starting and ending numeric day-of-year (doy)
190
    asscociated with the specified month and year.
191

    
192
    Arguments:
193
    maplist -- list of raster grass-digit integer year
194
    month -- integer month (1-12)
195
    year_range -- list of years
196

    
197
    Returns tuple of start and end doy for that month/year.
198
    """
199
    list_maps_month=list()
200
    nb_month=12
201
    for i in range(1,nb_month+1):
202
        #i=i+1
203
        #filename = listfiles_wd2[i] #list the files of interest
204
        #monthlist[:]=[] #empty the list
205
        monthlist=list()
206
        #monthlist2[:]=[] #empty the list
207
        j=0  #counter
208
        for mapname in listmaps:
209
            #tmp_str=LST[0]
210
            date_map=mapname.split('_')[1][1:8]
211
            #date = filename[filename.rfind('_MOD')-8:filename.rfind('_MOD')]
212
            d=datetime.datetime.strptime(date_map,'%Y%j') #This produces a date object from a string extracted from the file name.
213
            month=d.strftime('%m') #Extract the month of the current map
214
            if int(month)==i:
215
                #monthlist.append(listmaps[j])
216
                monthlist.append(mapname)
217
                #monthlist2.append(listmaps[j])
218
            j=j+1
219
        list_maps_month.append(monthlist)   #This is a list of a list containing a list for each month
220
        #list_files2.append(monthlist2)
221
    return(list_maps_month)    
222

    
223

    
224
def calc_clim(maplist, name, overwrite=False):
225
    """Generate some climatalogies in GRASS based on the input list of
226
    maps. As usual, current GRASS region settings apply. Produces the
227
    following output rasters:
228
      * nobs: count of number of (non-null) values over the input maps
229
      * mean: arithmetic mean of (non-null) values over the input maps
230

    
231
    Arguments:
232
    maplist -- list of names of GRASS maps to be aggregated
233
    name -- template (suffix) for GRASS output map names
234

    
235
    Returns list of names of the output maps created in GRASS.
236
    """
237
    denominator = '(%s)' % '+'.join(['if(!isnull(%s))' % m
238
        for m in maplist])
239
    gs.mapcalc('nobs_%s = %s' % (name, denominator), overwrite=overwrite)
240
    numerator = '(%s)' % '+'.join(['if(isnull(%s), 0, %s)' % (m, m)
241
        for m in maplist])
242
    gs.mapcalc('mean_%s = round(float(%s)/nobs_%s)' % (name, numerator, name),
243
        overwrite=overwrite)
244
    return ['%s_%s' % (s, name) for s in ['nobs', 'mean']]
245

    
246
def load_qc_adjusted_lst(hdf):
247
    """Load LST_Day_1km into GRASS from the specified hdf file, and
248
    nullify any cells for which QA flags indicate anything other than
249
    high quality.
250

    
251
    Argument:
252
    hdf -- local path to the 11A1 HDF file
253

    
254
    Returns the name of the QC-adjusted LST map created in GRASS.
255
    """
256
    lstname =  'LST_' + '_'.join(os.path.basename(hdf).split('.')[1:3])               
257
    # read in lst grid
258
    gs.run_command('r.in.gdal',
259
        input='HDF4_EOS:EOS_GRID:%s:MODIS_Grid_Daily_1km_LST:LST_Day_1km' % hdf,
260
        output=lstname)            # No new location created since it has already been done with r.in.gdal before!!!
261

    
262
    # read in qc grid
263
    gs.run_command('r.in.gdal',
264
        input = 'HDF4_EOS:EOS_GRID:%s:MODIS_Grid_Daily_1km_LST:QC_Day' % hdf,
265
        output = 'qc_this_day')
266
    gs.run_command('g.region', rast=lstname)
267
    # null out any LST cells for which QC is not high [00]  #THIS WHERE WE MIGHT NEED TO CHANGE...
268
    gs.mapcalc('${lst} = if((qc_this_day & 0x03)==0, ${lst}, null())',
269
        lst=lstname, overwrite=True)
270
    # clean up
271
    gs.run_command('g.remove', rast='qc_this_day')
272
    # return name of qc-adjusted LST raster in GRASS
273
    return lstname
274

    
275

    
276
def main():
277
    #--------------------------------------------
278
    # Download and Calculate monthly climatology for daily LST time series for a specific area
279
    #--------------------------------------------
280

    
281
    # TODO: set up a (temporary?) GRASS database to use for processing? code
282
    # currently assumes it's being run within an existing GRASS session
283
    # using an appropriately configured LOCATION...
284
    #
285
    # note the following trick to fix datum for modis sinu;
286
    # TODO: check that this works properly...compare to MRT output?
287
    # gs.run_command('g.proj', flags='c',
288
    #     proj4='+proj=sinu +a=6371007.181 +b=6371007.181 +ellps=sphere')
289
    ##    proj4='+proj=sinu +R=6371007.181 +nadgrids=@null +wktext')
290

    
291
    ########## START OF SCRIPT ##############
292
    #### Modified by Benoit on May 13, 2013 
293
    
294
    ### INPUT Parameters
295
    #Inputs from R?? there are 9 parameters
296
   #tiles = ['h11v08','h11v07','h12v07','h12v08','h10v07','h10v08'] #These tiles correspond to Venezuela.
297
    tiles= ['h12v08']    
298
    #start_year = 2001
299
    #end_year = 2010
300
    #start_month=1
301
    #end_month=12
302
    #hdfdir =  '/home/layers/commons/modis/MOD11A1_tiles' #destination file where hdf files are stored locally after download.
303
    #night=1    # if 1 then produce night climatology
304
    #out_suffix="_03192013"
305
    #download=1  # if 1 then download files
306
 
307
    parser = argparse.ArgumentParser()
308
    parser.add_argument("tiles", type=str, help="list of Modis tiles")
309
    parser.add_argument("start_year", type=int, help="start year")
310
    parser.add_argument("end_year", type=int, help="end year")
311
    parser.add_argument("start_month", type=int, help="start month")
312
    parser.add_argument("end_month", type=int, help="end month")
313
    parser.add_argument("hdfdir", type=str, help="destination/source directory for hdf file")
314
    parser.add_argument("night", type=int, help="night")
315
    parser.add_argument("download", type=int, help="out_suffix")
316
    parser.add_argument("out_suffix", type=str, help="out_suffix")
317

    
318
    myargs = parser.parse_args()
319

    
320
    tiles = myargs.tiles #These tiles correspond to Venezuela.
321
    start_year = myargs.start_year
322
    end_year = myargs.start_year 
323
    end_month = myargs.end_month
324
    start_month= myargs.start_month
325
    hdfdir =  myargs.hdfdir
326
    night= myargs.night    # if 1 then produce night climatology
327
    out_suffix= myargs.out_suffix #"_03192013"
328
    download=myargs.download# if 1 then download files
329
    
330
    tiles =tiles.split(",") #Create a list from string
331
    #need to add on the fly creation of folder for each tile!!
332
    
333
    ################## First step: download data ###################
334
    # Added tile loop 
335
    year_list=range(start_year,end_year+1) #list of year to loop through
336

    
337
    if download==1:
338
        
339
        for tile in tiles:
340
            for year in year_list:
341
                start_doy = 1
342
                end_doy = 365
343
                if calendar.isleap(year):
344
                    end_doy=366
345
                    
346
                hdfs = download_mod11a1(hdfdir, tile, start_doy, end_doy, year)
347
    
348
    # modify loop to take into account "hdfs", list of hdf files needed in the next steps…  
349
    ################# Second step: compute climatology #################
350
    ## Process tile by tile...
351
    var_name = ['LST_Day_1km','LST_Night_1km']
352
    if night==1:
353
        lst_var = var_name[1]
354
    if night==0:
355
        lst_var = var_name[0]
356
        
357
    #start = time.clock()    
358
    for tile in tiles:        
359
        # Set up a temporary GRASS data base   
360
        tmp_location = 'tmp' + "_"+ tile + "_"+ str(os.getpid())        # Name of the temporary GRASS data base 
361
        orig_location = gs.gisenv()['LOCATION_NAME']
362
        orig_mapset = gs.gisenv()['MAPSET']
363
        gs.os.environ['GRASS_OVERWRITE'] = '1'         # Allow for overwrite?? GRASS data base  
364
            
365
        path_file = hdfdir
366
        os.chdir(path_file)    #set working directory
367
        os.getcwd()            #get current working directory
368
        listfiles_wd2 = glob.glob('*'+tile+'*'+'.hdf') #list the all the LST files of interest
369
        name_of_file = listfiles_wd2[0]    
370
        #Create the name of the layer to extract from the hdf file used for definition of the database
371
        lst1day = 'HDF4_EOS:EOS_GRID:%s/%s:%s' % (
372
        path_file,
373
        name_of_file,
374
        'MODIS_Grid_Daily_1km_LST:'+ lst_var)
375
        #'LST_Night_1km'
376
        #change the above line later to allow night LST for tmin
377
         
378
        #now import one image and set the location, mapset and database !!create name per tile
379
        gs.run_command('r.in.gdal', input=lst1day, output='LST_1day_'+tile,
380
               location=tmp_location)
381
    
382
        # Now that the new location has been create, switch to new location
383
        gs.run_command('g.gisenv', set='LOCATION_NAME=%s' % tmp_location)
384
        gs.run_command('g.gisenv', set='MAPSET=PERMANENT')
385
    
386
        # set GRASS the projection system and GRASS region to match the extent of the file
387
        gs.run_command('g.proj', flags='c',
388
        proj4='+proj=sinu +a=6371007.181 +b=6371007.181 +ellps=sphere') #This should be set in the parameters
389
        gs.run_command('g.region', rast='LST_1day_'+tile)
390
    
391
        #generate monthly pixelwise mean & count of high-quality daytime LST
392
        gs.os.environ['GRASS_OVERWRITE'] = '1'
393
    
394
        #Provide a list of file "hdfs" to process…this should be in a loop...
395
        #tile= 'h12v08'
396
        fileglob = 'MOD11A1.A*.%s*hdf' % (tile)
397
        pathglob = os.path.join(path_file, fileglob)
398
        files = glob.glob(pathglob)
399
        hdfs = files
400
        ### LST values in images must be masked out if they have low quality, determined from QC flags stored in hdf files
401
        LST = [load_qc_adjusted_lst(hdf) for hdf in hdfs]
402
        ### LST is a list that contains the name of the new lst files in the GRASS format. The files are stored in the temporary GRASS database.
403
        list_maps_month=list_raster_per_month(LST)    
404
        list_maps_name=['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']
405
        ##Now calculate clim per month, do a test for year1
406
        nb_month = 12
407
        for i in range(1,nb_month+1):
408
            clims = calc_clim(list_maps_month[i-1],lst_var+'_'+tile+'_'+list_maps_name[i-1]+'_'+str(i-1))
409
            for j in range(1, len(clims)+1):
410
                if j-1 ==0:
411
                    gs.run_command('r.out.gdal', input= clims[j-1], output=clims[j-1]+ out_suffix +'.tif', type='Float32')
412
                if j-1 ==1:
413
                    gs.mapcalc(' clim_rescaled = ('+ clims[j-1 ]+ ' * 0.02) -273.15')  
414
                    gs.run_command('r.out.gdal', input= 'clim_rescaled', output=clims[j-1]+ out_suffix+'.tif', type='Float32')
415
        #clims = calc_clim(LST, 'LST_%s_%d_%02d' % (tile, year, month))
416
        
417
        # clean up GRASS DATABASE 
418
        
419
        gs.run_command('g.remove', rast=','.join(LST))
420
        gs.os.environ['GRASS_OVERWRITE'] = '0'
421
        
422
        #clean up
423
        gs.run_command('g.gisenv', set='LOCATION_NAME=%s' % orig_location)
424
        gs.run_command('g.gisenv', set='MAPSET=%s' % orig_mapset)
425
        shutil.rmtree(os.path.join(gs.gisenv()['GISDBASE'], tmp_location))
426

    
427
    return None
428

    
429

    
(13-13/22)