Revision 5098
Added by Aaron Marcuse-Kubitza about 12 years ago
bin/tnrs_db | ||
---|---|---|
11 | 11 |
|
12 | 12 |
import csvs |
13 | 13 |
import opts |
14 |
import profiling |
|
14 | 15 |
import sql |
15 | 16 |
import sql_gen |
16 | 17 |
import sql_io |
... | ... | |
42 | 43 |
# Connect to DB |
43 | 44 |
db = sql.connect(db_config, log_debug=log) |
44 | 45 |
|
45 |
# Iterate over unscrubbed verbatim taxonpaths |
|
46 |
start = 0 |
|
47 |
total_pause = 0 |
|
48 |
while True: |
|
49 |
# Fetch next set |
|
50 |
cur = sql.select(db, 'taxonpath', ['taxonomicnamewithauthor'], |
|
51 |
[('canon_taxonpath_id', None)], limit=max_taxons, start=start, |
|
52 |
cacheable=False) |
|
53 |
this_ct = cur.rowcount |
|
54 |
start += this_ct # advance start to fetch next set |
|
55 |
if this_ct == 0: |
|
56 |
total_pause += pause |
|
57 |
if total_pause > max_pause: break |
|
58 |
log('Waited '+str(total_pause)+' sec. Waiting...') |
|
59 |
time.sleep(pause) # wait for more rows |
|
60 |
continue # try again |
|
61 |
# otherwise, rows found |
|
46 |
tnrs_profiler = profiling.ItersProfiler(iter_text='row') |
|
47 |
try: |
|
48 |
# Iterate over unscrubbed verbatim taxonpaths |
|
49 |
start = 0 |
|
62 | 50 |
total_pause = 0 |
63 |
|
|
64 |
# Run TNRS |
|
65 |
log('Making TNRS request') |
|
66 |
try: stream = tnrs.repeated_tnrs_request(list(sql.values(cur))) |
|
67 |
except InvalidResponse: pass # skip this set in case it caused the error |
|
68 |
else: |
|
69 |
log('Storing TNRS response data') |
|
70 |
stream_info = csvs.stream_info(stream, parse_header=True) |
|
71 |
stream = streams.ProgressInputStream(stream, sys.stderr, n=1000) |
|
72 |
sql_io.append_csv(db, tnrs_data, stream_info, stream) |
|
51 |
while True: |
|
52 |
# Fetch next set |
|
53 |
cur = sql.select(db, 'taxonpath', ['taxonomicnamewithauthor'], |
|
54 |
[('canon_taxonpath_id', None)], limit=max_taxons, start=start, |
|
55 |
cacheable=False) |
|
56 |
this_ct = cur.rowcount |
|
57 |
start += this_ct # advance start to fetch next set |
|
58 |
if this_ct == 0: |
|
59 |
total_pause += pause |
|
60 |
if total_pause > max_pause: break |
|
61 |
log('Waited '+str(total_pause)+' sec. Waiting...') |
|
62 |
time.sleep(pause) # wait for more rows |
|
63 |
continue # try again |
|
64 |
# otherwise, rows found |
|
65 |
total_pause = 0 |
|
66 |
taxons = list(sql.values(cur)) |
|
67 |
|
|
68 |
# Run TNRS |
|
69 |
log('Making TNRS request') |
|
70 |
tnrs_profiler.start() |
|
71 |
try: |
|
72 |
try: stream = tnrs.repeated_tnrs_request(taxons) |
|
73 |
finally: tnrs_profiler.stop(iter_ct=this_ct) |
|
74 |
except InvalidResponse: pass # skip set in case it caused the error |
|
75 |
else: |
|
76 |
log('Storing TNRS response data') |
|
77 |
stream_info = csvs.stream_info(stream, parse_header=True) |
|
78 |
stream = streams.ProgressInputStream(stream, sys.stderr, n=1000) |
|
79 |
sql_io.append_csv(db, tnrs_data, stream_info, stream) |
|
80 |
finally: |
|
81 |
log(tnrs_profiler.msg()) |
|
73 | 82 |
|
74 | 83 |
main() |
Also available in: Unified diff
tnrs_db: Profile the TNRS requests. This involves using a finally block to ensure that the profiling stats are printed even if the program exits with an error.