Project

General

Profile

Download (16.3 KB) Statistics
| Branch: | Revision:
1
# -*- coding: utf-8 -*-
2
"""
3
Created on Fri Oct 26 23:48:57 2012
4

    
5
@author: benoitparmentier
6
"""
7

    
8
#!/usr/bin/python
9
#
10
# Early version of assorted Python code to download MODIS 11A1 (daily)
11
# data associated with specified dates and tiles, then interface with
12
# GRASS to calculate temporally averaged LST in a way that accounts for
13
# QC flags.
14
#
15
# TODO:
16
#  - functionalize to encapsulate high level procedural steps
17
#  - create a 'main' function to allow script to be run as a command
18
#    that can accept arguments?
19
#  - put downloaded data somewhere other than working directory?
20
#  - write code to set up and take down a temporary GRASS location/mapset
21
#  - write code to export climatology rasters to file (geotiff? compressed?)
22
#  - calculate other aggregate stats? (stddev, ...)
23
#  - deal with nightly LST too?
24
#  - deal with more than just first two bits of QC flags?
25
#     e.g.: r.mapcalc 'qc_level2 = (qc>>2) & 0x03'   
26
#     0x03?? that means 0000_0011 and >>2 means shit twice on the right for 
27
#  - record all downloads to a log file?
28
#
29
# Jim Regetz
30
# NCEAS
31
# Created on 16-May-2012
32
# Modified by Benoit Parmentier
33
# NCEAS on 18-October-2012
34
# 
35

    
36
import os, glob
37
import datetime, calendar
38
import ftplib
39
import grass.script as gs
40

    
41
#from datetime import date
42
#from datetime import datetime
43

    
44

    
45
#------------------
46
# helper functions
47
#------------------
48

    
49
def yj_to_ymd(year, doy):
50
    """Return date as e.g. '2000.03.05' based on specified year and
51
    numeric day-of-year (doy) """
52
    date = datetime.datetime.strptime('%d%03d' % (year, doy), '%Y%j')
53
    return date.strftime('%Y.%m.%d')
54

    
55
def get_doy_range(year, month):
56
    """Determine starting and ending numeric day-of-year (doy)
57
    asscociated with the specified month and year.
58

    
59
    Arguments:
60
    year -- four-digit integer year
61
    month -- integer month (1-12)
62

    
63
    Returns tuple of start and end doy for that month/year.
64
    """
65
    last_day_of_month = calendar.monthrange(year, month)[1]
66
    start_doy = int(datetime.datetime.strptime('%d.%02d.%02d' % (year,
67
        month, 1), '%Y.%m.%d').strftime('%j'))
68
    end_doy = int(datetime.datetime.strptime('%d.%02d.%02d' % (year,
69
        month, last_day_of_month), '%Y.%m.%d').strftime('%j'))
70
    return (int(start_doy), int(end_doy))
71

    
72
#added by Benoit, to create a list of raster images for particular month (2001-2010)
73

    
74
def date_sequence(year_start,month_start,day_start,year_end,month_end,day_end):
75
    """Create a date sequence for a start date and end date defined by year, month, date
76
    Arguments:
77
    year_start -- starting year
78
 
79
    Returns list of absolute paths to the downloaded HDF files.Note the format yyyydoy
80
    """
81
    from datetime import date
82
    from dateutil.rrule import rrule, DAILY
83

    
84
    a = date(year_start, month_start,day_start)
85
    b = date(year_end, month_end, day_end)
86
    date_seq=list()
87
    for dt in rrule(DAILY, dtstart=a, until=b):
88
        print dt.strftime("%Y%j")    
89
        date_seq.append(dt.strftime("%Y%j"))
90
        
91
    return(date_seq)
92

    
93
#Added on January 16, 2012 by Benoit NCEAS
94
def list_raster_per_month(listmaps):
95
    """Determine starting and ending numeric day-of-year (doy)
96
    asscociated with the specified month and year.
97

    
98
    Arguments:
99
    maplist -- list of raster grass-digit integer year
100
    month -- integer month (1-12)
101
    year_range -- list of years
102

    
103
    Returns tuple of start and end doy for that month/year.
104
    """
105
    list_maps_month=list()
106
    nb_month=12
107
    for i in range(1,nb_month+1):
108
        #i=i+1
109
        #filename = listfiles_wd2[i] #list the files of interest
110
        #monthlist[:]=[] #empty the list
111
        monthlist=list()
112
        #monthlist2[:]=[] #empty the list
113
        j=0  #counter
114
        for mapname in listmaps:
115
            #tmp_str=LST[0]
116
            date_map=mapname.split('_')[1][1:8]
117
            #date = filename[filename.rfind('_MOD')-8:filename.rfind('_MOD')]
118
            d=datetime.datetime.strptime(date_map,'%Y%j') #This produces a date object from a string extracted from the file name.
119
            month=d.strftime('%m') #Extract the month of the current map
120
            if int(month)==i:
121
                #monthlist.append(listmaps[j])
122
                monthlist.append(mapname)
123
                #monthlist2.append(listmaps[j])
124
            j=j+1
125
        list_maps_month.append(monthlist)   #This is a list of a list containing a list for each month
126
        #list_files2.append(monthlist2)
127
    return(list_maps_month)    
128
    
129
# quick function to return list of dirs in wd
130
def list_contents(ftp):
131
    """Parse ftp directory listing into list of names of the files
132
    and/or directories contained therein. May or may not be robust in
133
    general, but seems to work fine for LP DAAP ftp server."""
134
    listing = []
135
    ftp.dir(listing.append)
136
    contents = [item.split()[-1] for item in listing[1:]]
137
    return contents
138

    
139
def download_mod11a1(destdir, tile, start_doy, end_doy, year, ver=5):
140
    """Download into destination directory the MODIS 11A1 HDF files
141
    matching a given tile, year, and day range. If for some unexpected
142
    reason there are multiple matches for a given day, only the first is
143
    used. If no matches, the day is skipped with a warning message.
144

    
145
    Arguments:
146
    destdir -- path to local destination directory
147
    tile -- tile identifier (e.g., 'h08v04')
148
    start_doy -- integer start of day range (0-366)
149
    end_doy -- integer end of day range (0-366)
150
    year -- integer year (>=2000)
151
    ver -- MODIS version (4 or 5)
152

    
153
    Returns list of absolute paths to the downloaded HDF files.
154
    """
155
    # connect to data pool and change to the right directory
156
    ftp = ftplib.FTP('e4ftl01.cr.usgs.gov')
157
    ftp.login('anonymous', '')
158
    ftp.cwd('MOLT/MOD11A1.%03d' % ver)
159
    # make list of daily directories to traverse
160
    available_days = list_contents(ftp)
161
    desired_days = [yj_to_ymd(year, x) for x in range(start_doy, end_doy+1)]
162
    days_to_get = filter(lambda day: day in desired_days, available_days)
163
    if len(days_to_get) < len(desired_days):
164
        missing_days = [day for day in desired_days if day not in days_to_get]
165
        print 'skipping %d day(s) not found on server:' % len(missing_days)
166
        print '\n'.join(missing_days)
167
    # get each tile in turn
168
    hdfs = list()
169
    for day in days_to_get:
170
        ftp.cwd(day)
171
        files_to_get = [file for file in list_contents(ftp)
172
            if tile in file and file[-3:]=='hdf']
173
        if len(files_to_get)==0:
174
            # no file found -- print message and move on
175
            print 'no hdf found on server for tile', tile, 'on', day
176
            ftp.cwd('..') #added by Benoit
177
            continue
178
        elif 1<len(files_to_get):
179
            # multiple files found! -- just use the first...
180
            print 'multiple hdfs found on server for tile', tile, 'on', day
181
        file = files_to_get[0]
182
        local_file = os.path.join(destdir, file)
183
        ftp.retrbinary('RETR %s' % file, open(local_file, 'wb').write)
184
        hdfs.append(os.path.abspath(local_file))
185
        ftp.cwd('..')
186
    # politely disconnect
187
    ftp.quit()
188
    # return list of downloaded paths
189
    return hdfs
190

    
191
def get_hdf_paths(hdfdir, tile, start_doy, end_doy, year):
192
    """Look in supplied directory to find the MODIS 11A1 HDF files
193
    matching a given tile, year, and day range. If for some unexpected
194
    reason there are multiple matches for a given day, only the first is
195
    used. If no matches, the day is skipped with a warning message.
196

    
197
    Arguments:
198
    hdfdir -- path to directory containing the desired HDFs
199
    tile -- tile identifier (e.g., 'h08v04')
200
    start_doy -- integer start of day range (0-366)
201
    end_doy -- integer end of day range (0-366)
202
    year -- integer year (>=2000)
203

    
204
    Returns list of absolute paths to the located HDF files.
205
    """
206
    hdfs = list()
207
    for doy in range(start_doy, end_doy+1):
208
        fileglob = 'MOD11A1.A%d%03d.%s*hdf' % (year, doy, tile)
209
        pathglob = os.path.join(hdfdir, fileglob)
210
        files = glob.glob(pathglob)
211
        if len(files)==0:
212
            # no file found -- print message and move on
213
            print 'cannot access %s: no such file' % pathglob
214
            continue
215
        elif 1<len(files):
216
            # multiple files found! -- just use the first...
217
            print 'multiple hdfs found for tile', tile, 'on', day
218
        hdfs.append(os.path.abspath(files[0]))
219
    return hdfs
220

    
221
def calc_clim(maplist, name, overwrite=False):
222
    """Generate some climatalogies in GRASS based on the input list of
223
    maps. As usual, current GRASS region settings apply. Produces the
224
    following output rasters:
225
      * nobs: count of number of (non-null) values over the input maps
226
      * mean: arithmetic mean of (non-null) values over the input maps
227

    
228
    Arguments:
229
    maplist -- list of names of GRASS maps to be aggregated
230
    name -- template (suffix) for GRASS output map names
231

    
232
    Returns list of names of the output maps created in GRASS.
233
    """
234
    denominator = '(%s)' % '+'.join(['if(!isnull(%s))' % m
235
        for m in maplist])
236
    gs.mapcalc('nobs_%s = %s' % (name, denominator), overwrite=overwrite)
237
    numerator = '(%s)' % '+'.join(['if(isnull(%s), 0, %s)' % (m, m)
238
        for m in maplist])
239
    gs.mapcalc('mean_%s = round(float(%s)/nobs_%s)' % (name, numerator, name),
240
        overwrite=overwrite)
241
    return ['%s_%s' % (s, name) for s in ['nobs', 'mean']]
242

    
243
def load_qc_adjusted_lst(hdf):
244
    """Load LST_Day_1km into GRASS from the specified hdf file, and
245
    nullify any cells for which QA flags indicate anything other than
246
    high quality.
247

    
248
    Argument:
249
    hdf -- local path to the 11A1 HDF file
250

    
251
    Returns the name of the QC-adjusted LST map created in GRASS.
252
    """
253
    lstname =  'LST_' + '_'.join(os.path.basename(hdf).split('.')[1:3])               
254
    # read in lst grid
255
    gs.run_command('r.in.gdal',
256
        input='HDF4_EOS:EOS_GRID:%s:MODIS_Grid_Daily_1km_LST:LST_Day_1km' % hdf,
257
        output=lstname)            # No new location created since it has already been done with r.in.gdal before!!!
258

    
259
    # read in qc grid
260
    gs.run_command('r.in.gdal',
261
        input = 'HDF4_EOS:EOS_GRID:%s:MODIS_Grid_Daily_1km_LST:QC_Day' % hdf,
262
        output = 'qc_this_day')
263
    gs.run_command('g.region', rast=lstname)
264
    # null out any LST cells for which QC is not high [00]  #THIS WHERE WE MIGHT NEED TO CHANGE...
265
    gs.mapcalc('${lst} = if((qc_this_day & 0x03)==0, ${lst}, null())',
266
        lst=lstname, overwrite=True)
267
    # clean up
268
    gs.run_command('g.remove', rast='qc_this_day')
269
    # return name of qc-adjusted LST raster in GRASS
270
    return lstname
271

    
272

    
273
def main():
274
    #--------------------------------------------
275
    # Download and Calculate monthly climatology for daily LST time series for a specific area
276
    #--------------------------------------------
277

    
278
    # TODO: set up a (temporary?) GRASS database to use for processing? code
279
    # currently assumes it's being run within an existing GRASS session
280
    # using an appropriately configured LOCATION...
281
    #
282
    # note the following trick to fix datum for modis sinu;
283
    # TODO: check that this works properly...compare to MRT output?
284
    # gs.run_command('g.proj', flags='c',
285
    #     proj4='+proj=sinu +a=6371007.181 +b=6371007.181 +ellps=sphere')
286
    ##    proj4='+proj=sinu +R=6371007.181 +nadgrids=@null +wktext')
287

    
288
 
289
    #### MOdified by Benoit in March 2013 
290
    
291
    #Parameters
292
    #Inputs from R??
293
    tiles = ['h11v08','h11v07','h12v07','h12v08','h10v07','h10v08'] #These tiles correspond to Venezuela.
294
    start_year = 2001
295
    end_year = 2010
296
    end_month=12
297
    start_month=1
298
    hdfdir =  '/home/parmentier/Data/benoit_test' #destination file where hdf files are stored locally after download.
299
    night=1    # if 1 then produce night climatology
300
    out_suffix="_03192013"
301
    download=1  # if 1 then download files
302
    
303
    ################## First step: download data ###################
304
    # Added tile loop 
305
    year_list=range(start_year,end_year+1) #list of year to loop through
306
    #for testing...
307
    #year_list=[2001,2002]
308
    #hdfdir = '/home/parmentier/Data/benoit_test2' 
309
    #tiles = ['h10v07','h10v08','h12v07']
310
    if download==1:
311
        for tile in tiles:
312
            for year in year_list:
313
                start_doy = 1
314
                end_doy = 365
315
                if calendar.isleap(year):
316
                    end_doy=366
317
                    
318
                hdfs = download_mod11a1(hdfdir, tile, start_doy, end_doy, year)
319
    
320
    # modify loop to take into account "hdfs", list of hdf files needed in the next steps…  
321
    ################# Second step: compute climatology #################
322
    ## Process tile by tile...
323
    ##start loop for tile
324
    #tile= 'h12v08'
325
    #tiles = ['h12v07','h12v08']
326
    var_name = ['LST_Day_1km','LST_Night_1km']
327
    if night==1:
328
        lst_var = var_name[1]
329
    if night==0:
330
        lst_var = var_name[0]
331
        
332
    #start = time.clock()    
333
    for tile in tiles:        
334
        # Set up a temporary GRASS data base   
335
        tmp_location = 'tmp' + "_"+ tile + "_"+ str(os.getpid())        # Name of the temporary GRASS data base 
336
        orig_location = gs.gisenv()['LOCATION_NAME']
337
        orig_mapset = gs.gisenv()['MAPSET']
338
        gs.os.environ['GRASS_OVERWRITE'] = '1'         # Allow for overwrite?? GRASS data base  
339
            
340
        path_file = hdfdir
341
        os.chdir(path_file)    #set working directory
342
        os.getcwd()            #get current working directory
343
        listfiles_wd2 = glob.glob('*'+tile+'*'+'.hdf') #list the all the LST files of interest
344
        name_of_file = listfiles_wd2[0]    
345
        #Create the name of the layer to extract from the hdf file used for definition of the database
346
        lst1day = 'HDF4_EOS:EOS_GRID:%s/%s:%s' % (
347
        path_file,
348
        name_of_file,
349
        'MODIS_Grid_Daily_1km_LST:'+ lst_var)
350
        #'LST_Night_1km'
351
        #change the above line later to allow night LST for tmin
352
         
353
        #now import one image and set the location, mapset and database !!create name per tile
354
        gs.run_command('r.in.gdal', input=lst1day, output='LST_1day_'+tile,
355
               location=tmp_location)
356
    
357
        # Now that the new location has been create, switch to new location
358
        gs.run_command('g.gisenv', set='LOCATION_NAME=%s' % tmp_location)
359
        gs.run_command('g.gisenv', set='MAPSET=PERMANENT')
360
    
361
        # set GRASS the projection system and GRASS region to match the extent of the file
362
        gs.run_command('g.proj', flags='c',
363
        proj4='+proj=sinu +a=6371007.181 +b=6371007.181 +ellps=sphere') #This should be set in the parameters
364
        gs.run_command('g.region', rast='LST_1day_'+tile)
365
    
366
        #generate monthly pixelwise mean & count of high-quality daytime LST
367
        gs.os.environ['GRASS_OVERWRITE'] = '1'
368
    
369
        #Provide a list of file "hdfs" to process…this should be in a loop...
370
        #tile= 'h12v08'
371
        fileglob = 'MOD11A1.A*.%s*hdf' % (tile)
372
        pathglob = os.path.join(path_file, fileglob)
373
        files = glob.glob(pathglob)
374
        hdfs = files
375
        ### LST values in images must be masked out if they have low quality, determined from QC flags stored in hdf files
376
        LST = [load_qc_adjusted_lst(hdf) for hdf in hdfs]
377
        ### LST is a list that contains the name of the new lst files in the GRASS format. The files are stored in the temporary GRASS database.
378
        list_maps_month=list_raster_per_month(LST)    
379
        list_maps_name=['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']
380
        ##Now calculate clim per month, do a test for year1
381
        nb_month = 12
382
        for i in range(1,nb_month+1):
383
            clims = calc_clim(list_maps_month[i-1],lst_var+'_'+tile+'_'+list_maps_name[i-1]+'_'+str(i-1))
384
            for j in range(1, len(clims)+1):
385
                if j-1 ==0:
386
                    gs.run_command('r.out.gdal', input= clims[j-1], output=clims[j-1]+ out_suffix +'.tif', type='Float32')
387
                if j-1 ==1:
388
                    gs.mapcalc(' clim_rescaled = ('+ clims[j-1 ]+ ' * 0.02) -273.15')  
389
                    gs.run_command('r.out.gdal', input= 'clim_rescaled', output=clims[j-1]+ out_suffix+'.tif', type='Float32')
390
        #clims = calc_clim(LST, 'LST_%s_%d_%02d' % (tile, year, month))
391
        # clean up  if necessary
392
        
393
        #gs.run_command('g.remove', rast=','.join(LST))
394
        #gs.os.environ['GRASS_OVERWRITE'] = '0'
395
        
396
        # clean up
397
        #gs.run_command('g.gisenv', set='LOCATION_NAME=%s' % orig_location)
398
        #gs.run_command('g.gisenv', set='MAPSET=%s' % orig_mapset)
399
        #shutil.rmtree(os.path.join(gs.gisenv()['GISDBASE'], tmp_location))
400

    
401
    return None
(12-12/19)