/ - Diff - BIEN 3 - NCEAS Projects

« Previous | Next »

Revision 5098

Added by Aaron Marcuse-Kubitza over 12 years ago

tnrs_db: Profile the TNRS requests. This involves using a finally block to ensure that the profiling stats are printed even if the program exits with an error.

     import csvs
     import opts
     import profiling
     import sql
     import sql_gen
     import sql_io
-...
         # Connect to DB
         db = sql.connect(db_config, log_debug=log)
         # Iterate over unscrubbed verbatim taxonpaths
         start = 0
         total_pause = 0
         while True:
             # Fetch next set
             cur = sql.select(db, 'taxonpath', ['taxonomicnamewithauthor'],
                 [('canon_taxonpath_id', None)], limit=max_taxons, start=start,
                 cacheable=False)
             this_ct = cur.rowcount
             start += this_ct # advance start to fetch next set
             if this_ct == 0:
                 total_pause += pause
                 if total_pause > max_pause: break
                 log('Waited '+str(total_pause)+' sec. Waiting...')
                 time.sleep(pause) # wait for more rows
                 continue # try again
             # otherwise, rows found
         tnrs_profiler = profiling.ItersProfiler(iter_text='row')
         try:
             # Iterate over unscrubbed verbatim taxonpaths
             start = 0
             total_pause = 0
             # Run TNRS
             log('Making TNRS request')
             try: stream = tnrs.repeated_tnrs_request(list(sql.values(cur)))
             except InvalidResponse: pass # skip this set in case it caused the error
             else:
                 log('Storing TNRS response data')
                 stream_info = csvs.stream_info(stream, parse_header=True)
                 stream = streams.ProgressInputStream(stream, sys.stderr, n=1000)
                 sql_io.append_csv(db, tnrs_data, stream_info, stream)
             while True:
                 # Fetch next set
                 cur = sql.select(db, 'taxonpath', ['taxonomicnamewithauthor'],
                     [('canon_taxonpath_id', None)], limit=max_taxons, start=start,
                     cacheable=False)
                 this_ct = cur.rowcount
                 start += this_ct # advance start to fetch next set
                 if this_ct == 0:
                     total_pause += pause
                     if total_pause > max_pause: break
                     log('Waited '+str(total_pause)+' sec. Waiting...')
                     time.sleep(pause) # wait for more rows
                     continue # try again
                 # otherwise, rows found
                 total_pause = 0
                 taxons = list(sql.values(cur))
                 # Run TNRS
                 log('Making TNRS request')
                 tnrs_profiler.start()
                 try:
                     try: stream = tnrs.repeated_tnrs_request(taxons)
                     finally: tnrs_profiler.stop(iter_ct=this_ct)
                 except InvalidResponse: pass # skip set in case it caused the error
                 else:
                     log('Storing TNRS response data')
                     stream_info = csvs.stream_info(stream, parse_header=True)
                     stream = streams.ProgressInputStream(stream, sys.stderr, n=1000)
                     sql_io.append_csv(db, tnrs_data, stream_info, stream)
         finally:
             log(tnrs_profiler.msg())
     main()

Also available in: Unified diff

Project

General

Profile

Revision 5098

Added by Aaron Marcuse-Kubitza over 12 years ago