/ - Diff - BIEN 3 - NCEAS Projects

« Previous | Next »

Revision 5123

Added by Aaron Marcuse-Kubitza over 12 years ago

tnrs_db: Print cumulative profiling information after every TNRS request, rather than just at the end

         db = sql.connect(db_config, log_debug=log)
         tnrs_profiler = profiling.ItersProfiler(iter_text='row')
         try:
             # Iterate over unscrubbed verbatim taxonpaths
             start = 0
         # Iterate over unscrubbed verbatim taxonpaths
         start = 0
         total_pause = 0
         while True:
             # Fetch next set
             cur = sql.select(db, 'taxonpath', ['taxonomicnamewithauthor'],
                 [('canon_taxonpath_id', None)], limit=tnrs.max_names,
                 start=start, cacheable=False)
             this_ct = cur.rowcount
             start += this_ct # advance start to fetch next set
             if this_ct == 0:
                 total_pause += pause
                 if total_pause > max_pause: break
                 log('Waited '+str(total_pause)+' sec. Waiting...')
                 time.sleep(pause) # wait for more rows
                 continue # try again
             # otherwise, rows found
             total_pause = 0
             while True:
                 # Fetch next set
                 cur = sql.select(db, 'taxonpath', ['taxonomicnamewithauthor'],
                     [('canon_taxonpath_id', None)], limit=tnrs.max_names,
                     start=start, cacheable=False)
                 this_ct = cur.rowcount
                 start += this_ct # advance start to fetch next set
                 if this_ct == 0:
                     total_pause += pause
                     if total_pause > max_pause: break
                     log('Waited '+str(total_pause)+' sec. Waiting...')
                     time.sleep(pause) # wait for more rows
                     continue # try again
                 # otherwise, rows found
                 total_pause = 0
                 names = list(sql.values(cur))
                 # Run TNRS
                 log('Processing '+str(this_ct)+' taxonpaths')
                 log('Making TNRS request')
                 tnrs_profiler.start()
                 try:
                     try: stream = tnrs.repeated_tnrs_request(names)
                     finally: tnrs_profiler.stop(iter_ct=this_ct)
                 except tnrs.InvalidResponse: pass # skip set in case it caused error
                 else:
                     log('Storing TNRS response data')
                     stream_info = csvs.stream_info(stream, parse_header=True)
                     stream = streams.ProgressInputStream(stream, sys.stderr, n=1000)
                     sql_io.append_csv(db, tnrs_data, stream_info, stream)
         finally:
             log(tnrs_profiler.msg())
             names = list(sql.values(cur))
             # Run TNRS
             log('Processing '+str(this_ct)+' taxonpaths')
             log('Making TNRS request')
             tnrs_profiler.start()
             try:
                 try: stream = tnrs.repeated_tnrs_request(names)
                 finally:
                     tnrs_profiler.stop(iter_ct=this_ct)
                     log('Cumulatively: '+tnrs_profiler.msg())
             except tnrs.InvalidResponse: pass # skip set in case it caused error
             else:
                 log('Storing TNRS response data')
                 stream_info = csvs.stream_info(stream, parse_header=True)
                 stream = streams.ProgressInputStream(stream, sys.stderr, n=1000)
                 sql_io.append_csv(db, tnrs_data, stream_info, stream)
     main()

Also available in: Unified diff

Project

General

Profile

Revision 5123

Added by Aaron Marcuse-Kubitza over 12 years ago