Project

General

Profile

« Previous | Next » 

Revision 5098

tnrs_db: Profile the TNRS requests. This involves using a finally block to ensure that the profiling stats are printed even if the program exits with an error.

View differences:

tnrs_db
11 11

  
12 12
import csvs
13 13
import opts
14
import profiling
14 15
import sql
15 16
import sql_gen
16 17
import sql_io
......
42 43
    # Connect to DB
43 44
    db = sql.connect(db_config, log_debug=log)
44 45
    
45
    # Iterate over unscrubbed verbatim taxonpaths
46
    start = 0
47
    total_pause = 0
48
    while True:
49
        # Fetch next set
50
        cur = sql.select(db, 'taxonpath', ['taxonomicnamewithauthor'],
51
            [('canon_taxonpath_id', None)], limit=max_taxons, start=start,
52
            cacheable=False)
53
        this_ct = cur.rowcount
54
        start += this_ct # advance start to fetch next set
55
        if this_ct == 0:
56
            total_pause += pause
57
            if total_pause > max_pause: break
58
            log('Waited '+str(total_pause)+' sec. Waiting...')
59
            time.sleep(pause) # wait for more rows
60
            continue # try again
61
        # otherwise, rows found
46
    tnrs_profiler = profiling.ItersProfiler(iter_text='row')
47
    try:
48
        # Iterate over unscrubbed verbatim taxonpaths
49
        start = 0
62 50
        total_pause = 0
63
        
64
        # Run TNRS
65
        log('Making TNRS request')
66
        try: stream = tnrs.repeated_tnrs_request(list(sql.values(cur)))
67
        except InvalidResponse: pass # skip this set in case it caused the error
68
        else:
69
            log('Storing TNRS response data')
70
            stream_info = csvs.stream_info(stream, parse_header=True)
71
            stream = streams.ProgressInputStream(stream, sys.stderr, n=1000)
72
            sql_io.append_csv(db, tnrs_data, stream_info, stream)
51
        while True:
52
            # Fetch next set
53
            cur = sql.select(db, 'taxonpath', ['taxonomicnamewithauthor'],
54
                [('canon_taxonpath_id', None)], limit=max_taxons, start=start,
55
                cacheable=False)
56
            this_ct = cur.rowcount
57
            start += this_ct # advance start to fetch next set
58
            if this_ct == 0:
59
                total_pause += pause
60
                if total_pause > max_pause: break
61
                log('Waited '+str(total_pause)+' sec. Waiting...')
62
                time.sleep(pause) # wait for more rows
63
                continue # try again
64
            # otherwise, rows found
65
            total_pause = 0
66
            taxons = list(sql.values(cur))
67
            
68
            # Run TNRS
69
            log('Making TNRS request')
70
            tnrs_profiler.start()
71
            try:
72
                try: stream = tnrs.repeated_tnrs_request(taxons)
73
                finally: tnrs_profiler.stop(iter_ct=this_ct)
74
            except InvalidResponse: pass # skip set in case it caused the error
75
            else:
76
                log('Storing TNRS response data')
77
                stream_info = csvs.stream_info(stream, parse_header=True)
78
                stream = streams.ProgressInputStream(stream, sys.stderr, n=1000)
79
                sql_io.append_csv(db, tnrs_data, stream_info, stream)
80
    finally:
81
        log(tnrs_profiler.msg())
73 82

  
74 83
main()

Also available in: Unified diff