Project

General

Profile

Download (16 KB) Statistics
| Branch: | Revision:
1
# -*- coding: utf-8 -*-
2
"""
3
Created on Fri Oct 26 23:48:57 2012
4

    
5
@author: benoitparmentier
6
"""
7

    
8
#!/usr/bin/python
9
#
10
# Early version of assorted Python code to download MODIS 11A1 (daily)
11
# data associated with specified dates and tiles, then interface with
12
# GRASS to calculate temporally averaged LST in a way that accounts for
13
# QC flags.
14
#
15
# TODO:
16
#  - functionalize to encapsulate high level procedural steps
17
#  - create a 'main' function to allow script to be run as a command
18
#    that can accept arguments?
19
#  - put downloaded data somewhere other than working directory?
20
#  - write code to set up and take down a temporary GRASS location/mapset
21
#  - write code to export climatology rasters to file (geotiff? compressed?)
22
#  - calculate other aggregate stats? (stddev, ...)
23
#  - deal with nightly LST too?
24
#  - deal with more than just first two bits of QC flags?
25
#     e.g.: r.mapcalc 'qc_level2 = (qc>>2) & 0x03'   
26
#     0x03?? that means 0000_0011 and >>2 means shit twice on the right for 
27
#  - record all downloads to a log file?
28
#
29
# Jim Regetz
30
# NCEAS
31
# Created on 16-May-2012
32
# Modified by Benoit Parmentier
33
# NCEAS on 18-October-2012
34
# 
35

    
36
import os, glob
37
import datetime, calendar
38
import ftplib
39
import grass.script as gs
40
#from datetime import date
41
#from datetime import datetime
42

    
43

    
44
#------------------
45
# helper functions
46
#------------------
47

    
48
def yj_to_ymd(year, doy):
49
    """Return date as e.g. '2000.03.05' based on specified year and
50
    numeric day-of-year (doy) """
51
    date = datetime.datetime.strptime('%d%03d' % (year, doy), '%Y%j')
52
    return date.strftime('%Y.%m.%d')
53

    
54
def get_doy_range(year, month):
55
    """Determine starting and ending numeric day-of-year (doy)
56
    asscociated with the specified month and year.
57

    
58
    Arguments:
59
    year -- four-digit integer year
60
    month -- integer month (1-12)
61

    
62
    Returns tuple of start and end doy for that month/year.
63
    """
64
    last_day_of_month = calendar.monthrange(year, month)[1]
65
    start_doy = int(datetime.datetime.strptime('%d.%02d.%02d' % (year,
66
        month, 1), '%Y.%m.%d').strftime('%j'))
67
    end_doy = int(datetime.datetime.strptime('%d.%02d.%02d' % (year,
68
        month, last_day_of_month), '%Y.%m.%d').strftime('%j'))
69
    return (int(start_doy), int(end_doy))
70

    
71
#added by Benoit, to create a list of raster images for particular month (2001-2010)
72

    
73
def date_sequence(year_start,month_start,day_start,year_end,month_end,day_end):
74
    """Create a date sequence for a start date and end date defined by year, month, date
75
    Arguments:
76
    year_start -- starting year
77
 
78
    Returns list of absolute paths to the downloaded HDF files.Note the format yyyydoy
79
    """
80
    from datetime import date
81
    from dateutil.rrule import rrule, DAILY
82

    
83
    a = date(year_start, month_start,day_start)
84
    b = date(year_end, month_end, day_end)
85
    date_seq=list()
86
    for dt in rrule(DAILY, dtstart=a, until=b):
87
        print dt.strftime("%Y%j")    
88
        date_seq.append(dt.strftime("%Y%j"))
89
        
90
    return(date_seq)
91

    
92
#Added on January 16, 2012 by Benoit NCEAS
93
def list_raster_per_month(listmaps):
94
    """Determine starting and ending numeric day-of-year (doy)
95
    asscociated with the specified month and year.
96

    
97
    Arguments:
98
    maplist -- list of raster grass-digit integer year
99
    month -- integer month (1-12)
100
    year_range -- list of years
101

    
102
    Returns tuple of start and end doy for that month/year.
103
    """
104
    list_maps_month=list()
105
    nb_month=12
106
    for i in range(1,nb_month+1):
107
        #i=i+1
108
        #filename = listfiles_wd2[i] #list the files of interest
109
        #monthlist[:]=[] #empty the list
110
        monthlist=list()
111
        #monthlist2[:]=[] #empty the list
112
        j=0  #counter
113
        for mapname in listmaps:
114
            #tmp_str=LST[0]
115
            date_map=mapname.split('_')[1][1:8]
116
            #date = filename[filename.rfind('_MOD')-8:filename.rfind('_MOD')]
117
            d=datetime.datetime.strptime(date_map,'%Y%j') #This produces a date object from a string extracted from the file name.
118
            month=d.strftime('%m') #Extract the month of the current map
119
            if int(month)==i:
120
                #monthlist.append(listmaps[j])
121
                monthlist.append(mapname)
122
                #monthlist2.append(listmaps[j])
123
            j=j+1
124
        list_maps_month.append(monthlist)   #This is a list of a list containing a list for each month
125
        #list_files2.append(monthlist2)
126
    return(list_maps_month)    
127
    
128
# quick function to return list of dirs in wd
129
def list_contents(ftp):
130
    """Parse ftp directory listing into list of names of the files
131
    and/or directories contained therein. May or may not be robust in
132
    general, but seems to work fine for LP DAAP ftp server."""
133
    listing = []
134
    ftp.dir(listing.append)
135
    contents = [item.split()[-1] for item in listing[1:]]
136
    return contents
137

    
138
def download_mod11a1(destdir, tile, start_doy, end_doy, year, ver=5):
139
    """Download into destination directory the MODIS 11A1 HDF files
140
    matching a given tile, year, and day range. If for some unexpected
141
    reason there are multiple matches for a given day, only the first is
142
    used. If no matches, the day is skipped with a warning message.
143

    
144
    Arguments:
145
    destdir -- path to local destination directory
146
    tile -- tile identifier (e.g., 'h08v04')
147
    start_doy -- integer start of day range (0-366)
148
    end_doy -- integer end of day range (0-366)
149
    year -- integer year (>=2000)
150
    ver -- MODIS version (4 or 5)
151

    
152
    Returns list of absolute paths to the downloaded HDF files.
153
    """
154
    # connect to data pool and change to the right directory
155
    ftp = ftplib.FTP('e4ftl01.cr.usgs.gov')
156
    ftp.login('anonymous', '')
157
    ftp.cwd('MOLT/MOD11A1.%03d' % ver)
158
    # make list of daily directories to traverse
159
    available_days = list_contents(ftp)
160
    desired_days = [yj_to_ymd(year, x) for x in range(start_doy, end_doy+1)]
161
    days_to_get = filter(lambda day: day in desired_days, available_days)
162
    if len(days_to_get) < len(desired_days):
163
        missing_days = [day for day in desired_days if day not in days_to_get]
164
        print 'skipping %d day(s) not found on server:' % len(missing_days)
165
        print '\n'.join(missing_days)
166
    # get each tile in turn
167
    hdfs = list()
168
    for day in days_to_get:
169
        ftp.cwd(day)
170
        files_to_get = [file for file in list_contents(ftp)
171
            if tile in file and file[-3:]=='hdf']
172
        if len(files_to_get)==0:
173
            # no file found -- print message and move on
174
            print 'no hdf found on server for tile', tile, 'on', day
175
            ftp.cwd('..') #added by Benoit
176
            continue
177
        elif 1<len(files_to_get):
178
            # multiple files found! -- just use the first...
179
            print 'multiple hdfs found on server for tile', tile, 'on', day
180
        file = files_to_get[0]
181
        local_file = os.path.join(destdir, file)
182
        ftp.retrbinary('RETR %s' % file, open(local_file, 'wb').write)
183
        hdfs.append(os.path.abspath(local_file))
184
        ftp.cwd('..')
185
    # politely disconnect
186
    ftp.quit()
187
    # return list of downloaded paths
188
    return hdfs
189

    
190
def get_hdf_paths(hdfdir, tile, start_doy, end_doy, year):
191
    """Look in supplied directory to find the MODIS 11A1 HDF files
192
    matching a given tile, year, and day range. If for some unexpected
193
    reason there are multiple matches for a given day, only the first is
194
    used. If no matches, the day is skipped with a warning message.
195

    
196
    Arguments:
197
    hdfdir -- path to directory containing the desired HDFs
198
    tile -- tile identifier (e.g., 'h08v04')
199
    start_doy -- integer start of day range (0-366)
200
    end_doy -- integer end of day range (0-366)
201
    year -- integer year (>=2000)
202

    
203
    Returns list of absolute paths to the located HDF files.
204
    """
205
    hdfs = list()
206
    for doy in range(start_doy, end_doy+1):
207
        fileglob = 'MOD11A1.A%d%03d.%s*hdf' % (year, doy, tile)
208
        pathglob = os.path.join(hdfdir, fileglob)
209
        files = glob.glob(pathglob)
210
        if len(files)==0:
211
            # no file found -- print message and move on
212
            print 'cannot access %s: no such file' % pathglob
213
            continue
214
        elif 1<len(files):
215
            # multiple files found! -- just use the first...
216
            print 'multiple hdfs found for tile', tile, 'on', day
217
        hdfs.append(os.path.abspath(files[0]))
218
    return hdfs
219

    
220
def calc_clim(maplist, name, overwrite=False):
221
    """Generate some climatalogies in GRASS based on the input list of
222
    maps. As usual, current GRASS region settings apply. Produces the
223
    following output rasters:
224
      * nobs: count of number of (non-null) values over the input maps
225
      * mean: arithmetic mean of (non-null) values over the input maps
226

    
227
    Arguments:
228
    maplist -- list of names of GRASS maps to be aggregated
229
    name -- template (suffix) for GRASS output map names
230

    
231
    Returns list of names of the output maps created in GRASS.
232
    """
233
    denominator = '(%s)' % '+'.join(['if(!isnull(%s))' % m
234
        for m in maplist])
235
    gs.mapcalc('nobs_%s = %s' % (name, denominator), overwrite=overwrite)
236
    numerator = '(%s)' % '+'.join(['if(isnull(%s), 0, %s)' % (m, m)
237
        for m in maplist])
238
    gs.mapcalc('mean_%s = round(float(%s)/nobs_%s)' % (name, numerator, name),
239
        overwrite=overwrite)
240
    return ['%s_%s' % (s, name) for s in ['nobs', 'mean']]
241

    
242
def load_qc_adjusted_lst(hdf):
243
    """Load LST_Day_1km into GRASS from the specified hdf file, and
244
    nullify any cells for which QA flags indicate anything other than
245
    high quality.
246

    
247
    Argument:
248
    hdf -- local path to the 11A1 HDF file
249

    
250
    Returns the name of the QC-adjusted LST map created in GRASS.
251
    """
252
    lstname =  'LST_' + '_'.join(os.path.basename(hdf).split('.')[1:3])               
253
    # read in lst grid
254
    gs.run_command('r.in.gdal',
255
        input='HDF4_EOS:EOS_GRID:%s:MODIS_Grid_Daily_1km_LST:LST_Day_1km' % hdf,
256
        output=lstname)            # No new location created since it has already been done with r.in.gdal before!!!
257

    
258
    # read in qc grid
259
    gs.run_command('r.in.gdal',
260
        input = 'HDF4_EOS:EOS_GRID:%s:MODIS_Grid_Daily_1km_LST:QC_Day' % hdf,
261
        output = 'qc_this_day')
262
    gs.run_command('g.region', rast=lstname)
263
    # null out any LST cells for which QC is not high [00]  #THIS WHERE WE MIGHT NEED TO CHANGE...
264
    gs.mapcalc('${lst} = if((qc_this_day & 0x03)==0, ${lst}, null())',
265
        lst=lstname, overwrite=True)
266
    # clean up
267
    gs.run_command('g.remove', rast='qc_this_day')
268
    # return name of qc-adjusted LST raster in GRASS
269
    return lstname
270

    
271

    
272
def main():
273
    #--------------------------------------------
274
    # Download and Calculate monthly climatology for daily LST time series for a specific area
275
    #--------------------------------------------
276

    
277
    # TODO: set up a (temporary?) GRASS database to use for processing? code
278
    # currently assumes it's being run within an existing GRASS session
279
    # using an appropriately configured LOCATION...
280
    #
281
    # note the following trick to fix datum for modis sinu;
282
    # TODO: check that this works properly...compare to MRT output?
283
    # gs.run_command('g.proj', flags='c',
284
    #     proj4='+proj=sinu +a=6371007.181 +b=6371007.181 +ellps=sphere')
285
    ##    proj4='+proj=sinu +R=6371007.181 +nadgrids=@null +wktext')
286

    
287
 
288
    #### Added by Benoit on 18 October 2012
289
    
290
    #Parameters
291
    tiles = ['h11v08','h11v07','h12v08'] #These tiles correspond to Venezuela.
292
    start_year = 2001
293
    end_year = 2010
294
    end_month=12
295
    start_month=1
296
    hdfdir =  '/home/parmentier/Data/benoit_test' #destination file where hdf files are stored locally after download.
297
    ################## First step: download data ###################
298
    # Added tile loop 
299
    year_list=range(start_year,end_year+1) #list of year to loop through
300
    year_list=[2001,2002]
301
    for tile in tiles:	
302
        for year in year_list:
303
            		if year==end_year:
304
            				start_doy, end_doy = get_doy_range(year, end_month)
305
                			start_doy=1  #Fix problem later
306
                			hdfs = download_mod11a1(hdfdir, tile, start_doy, end_doy, year)
307

    
308
            		elif year==start_year:
309
                			start_doy, end_doy = get_doy_range(year, start_month)
310
                			end_doy=365
311
                	if calendar.isleap(year):
312
                			end_doy=366
313
                			hdfs = download_mod11a1(hdfdir, tile, start_doy, end_doy, year)
314
            
315
            		elif (year!=start_year and year!=end_year):
316
                			start_doy=1
317
                			end_doy=365
318
                			if calendar.isleap(year):
319
                					end_doy=366
320
                			hdfs = download_mod11a1(hdfdir, tile, start_doy, end_doy, year)
321
    
322
    # modify loop to take into account "hdfs", list of hdf files needed in the next steps…  
323

    
324
    ################# Second step: compute climatology #################
325

    
326
    # Set up a temporary GRASS data base 
327
    
328
    tmp_location = 'tmp' + str(os.getpid())        # Name of the temporary GRASS data base 
329
    orig_location = gs.gisenv()['LOCATION_NAME']
330
    orig_mapset = gs.gisenv()['MAPSET']
331
    gs.os.environ['GRASS_OVERWRITE'] = '1'         # Allow for overwrite?? GRASS data base  
332
    
333
    ##NEED TO LOOP OVER DIFFERENT TILES!!!
334
    ##start loop for tile
335
    #load a file that will define the region of interest and location at the same time
336
        
337
    tile= 'h12v08'
338
    tile=tiles[2]
339
    #tile= 'h12v08'
340
    
341
    path_file = '/home/parmentier/Data/benoit_test'
342
    os.chdir(path_file)    #set working directory
343
    os.getcwd()            #get current working directory
344
    listfiles_wd2 = glob.glob('*'+tile+'*'+'.hdf') #list the all the LST files of interest
345
    name_of_file = listfiles_wd2[0]    
346
    #name_of_file = 'MOD11A1.A2003364.h12v08.005.2008163211649.hdf'
347
     # first load daytime LST, by accessing specific layers in the hdd??
348
    #SUBDATASET_1_NAME=HDF4_EOS:EOS_GRID:"MOD11A1.A2003364.h12v08.005.2008163211649.hdf":MODIS_Grid_Daily_1km_LST:LST_Day_1km
349
    #path_file = os.getcwd() 
350
    #Create the name of the layer to extract from the hdf file used for definition of the database
351
    lst1day = 'HDF4_EOS:EOS_GRID:%s/%s:%s' % (
352
    path_file,
353
    name_of_file,
354
     'MODIS_Grid_Daily_1km_LST:LST_Day_1km')
355
     
356
    #now import one image and set the location, mapset and database !!create name per tile
357
    gs.run_command('r.in.gdal', input=lst1day, output='LST_1day_h12v08',
358
           location=tmp_location)
359

    
360
    # Now that the new location has been create, switch to new location
361
    gs.run_command('g.gisenv', set='LOCATION_NAME=%s' % tmp_location)
362
    gs.run_command('g.gisenv', set='MAPSET=PERMANENT')
363

    
364
    # set GRASS the projection system and GRASS region to match the extent of the file
365
    gs.run_command('g.proj', flags='c',
366
    proj4='+proj=sinu +a=6371007.181 +b=6371007.181 +ellps=sphere')
367
    gs.run_command('g.region', rast='LST_1day_h12v08')
368

    
369
    
370
    #generate monthly pixelwise mean & count of high-quality daytime LST
371
   
372
    gs.os.environ['GRASS_OVERWRITE'] = '1'
373

    
374
    #Provide a list of file "hdfs" to process…
375
    #tile= 'h12v08'
376
    fileglob = 'MOD11A1.A*.%s*hdf' % (tile)
377
    pathglob = os.path.join(path_file, fileglob)
378
    files = glob.glob(pathglob)
379
    hdfs = files
380
    ### LST values in images must be masked out if they have low quality: this is done using QC flags from the layer subset in hdf files
381
    LST = [load_qc_adjusted_lst(hdf) for hdf in hdfs]
382
    ### LST is a list that contains the name of the new lst files in the GRASS format. The files are stored in the temporary GRASS database.
383
    list_maps_month=list_raster_per_month(LST)    
384
    list_maps_name=['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']
385
    ##Now calculate clim per month, do a test for year1
386
    nb_month=12
387
    for i in range(1,nb_month+1):
388
        #clims = calc_clim(LST, 'LST_%s_%d_%02d' % (tile, year, month))
389
        clims = calc_clim(list_maps_month[i-1],list_maps_name[i-1]+'_'+str(i-1))
390

    
391
    #clims = calc_clim(LST, 'LST_%s_%d_%02d' % (tile, year, month))
392
    
393
    # clean up  if necessary
394
    gs.run_command('g.remove', rast=','.join(LST))
395
    gs.os.environ['GRASS_OVERWRITE'] = '0'
396
    
397
    # clean up
398
    gs.run_command('g.gisenv', set='LOCATION_NAME=%s' % orig_location)
399
    gs.run_command('g.gisenv', set='MAPSET=%s' % orig_mapset)
400
    shutil.rmtree(os.path.join(gs.gisenv()['GISDBASE'], tmp_location))
401

    
402
    return None
(12-12/19)