Revision 5123
Added by Aaron Marcuse-Kubitza about 12 years ago
bin/tnrs_db | ||
---|---|---|
43 | 43 |
db = sql.connect(db_config, log_debug=log) |
44 | 44 |
|
45 | 45 |
tnrs_profiler = profiling.ItersProfiler(iter_text='row') |
46 |
try: |
|
47 |
# Iterate over unscrubbed verbatim taxonpaths |
|
48 |
start = 0 |
|
46 |
|
|
47 |
# Iterate over unscrubbed verbatim taxonpaths |
|
48 |
start = 0 |
|
49 |
total_pause = 0 |
|
50 |
while True: |
|
51 |
# Fetch next set |
|
52 |
cur = sql.select(db, 'taxonpath', ['taxonomicnamewithauthor'], |
|
53 |
[('canon_taxonpath_id', None)], limit=tnrs.max_names, |
|
54 |
start=start, cacheable=False) |
|
55 |
this_ct = cur.rowcount |
|
56 |
start += this_ct # advance start to fetch next set |
|
57 |
if this_ct == 0: |
|
58 |
total_pause += pause |
|
59 |
if total_pause > max_pause: break |
|
60 |
log('Waited '+str(total_pause)+' sec. Waiting...') |
|
61 |
time.sleep(pause) # wait for more rows |
|
62 |
continue # try again |
|
63 |
# otherwise, rows found |
|
49 | 64 |
total_pause = 0 |
50 |
while True: |
|
51 |
# Fetch next set |
|
52 |
cur = sql.select(db, 'taxonpath', ['taxonomicnamewithauthor'], |
|
53 |
[('canon_taxonpath_id', None)], limit=tnrs.max_names, |
|
54 |
start=start, cacheable=False) |
|
55 |
this_ct = cur.rowcount |
|
56 |
start += this_ct # advance start to fetch next set |
|
57 |
if this_ct == 0: |
|
58 |
total_pause += pause |
|
59 |
if total_pause > max_pause: break |
|
60 |
log('Waited '+str(total_pause)+' sec. Waiting...') |
|
61 |
time.sleep(pause) # wait for more rows |
|
62 |
continue # try again |
|
63 |
# otherwise, rows found |
|
64 |
total_pause = 0 |
|
65 |
names = list(sql.values(cur)) |
|
66 |
|
|
67 |
# Run TNRS |
|
68 |
log('Processing '+str(this_ct)+' taxonpaths') |
|
69 |
log('Making TNRS request') |
|
70 |
tnrs_profiler.start() |
|
71 |
try: |
|
72 |
try: stream = tnrs.repeated_tnrs_request(names) |
|
73 |
finally: tnrs_profiler.stop(iter_ct=this_ct) |
|
74 |
except tnrs.InvalidResponse: pass # skip set in case it caused error |
|
75 |
else: |
|
76 |
log('Storing TNRS response data') |
|
77 |
stream_info = csvs.stream_info(stream, parse_header=True) |
|
78 |
stream = streams.ProgressInputStream(stream, sys.stderr, n=1000) |
|
79 |
sql_io.append_csv(db, tnrs_data, stream_info, stream) |
|
80 |
finally: |
|
81 |
log(tnrs_profiler.msg()) |
|
65 |
names = list(sql.values(cur)) |
|
66 |
|
|
67 |
# Run TNRS |
|
68 |
log('Processing '+str(this_ct)+' taxonpaths') |
|
69 |
log('Making TNRS request') |
|
70 |
tnrs_profiler.start() |
|
71 |
try: |
|
72 |
try: stream = tnrs.repeated_tnrs_request(names) |
|
73 |
finally: |
|
74 |
tnrs_profiler.stop(iter_ct=this_ct) |
|
75 |
log('Cumulatively: '+tnrs_profiler.msg()) |
|
76 |
except tnrs.InvalidResponse: pass # skip set in case it caused error |
|
77 |
else: |
|
78 |
log('Storing TNRS response data') |
|
79 |
stream_info = csvs.stream_info(stream, parse_header=True) |
|
80 |
stream = streams.ProgressInputStream(stream, sys.stderr, n=1000) |
|
81 |
sql_io.append_csv(db, tnrs_data, stream_info, stream) |
|
82 | 82 |
|
83 | 83 |
main() |
Also available in: Unified diff
tnrs_db: Print cumulative profiling information after every TNRS request, rather than just at the end