Revision 9515
Added by Aaron Marcuse-Kubitza over 11 years ago
bin/tnrs_db | ||
---|---|---|
50 | 50 |
|
51 | 51 |
# Iterate over unscrubbed verbatim taxonlabels |
52 | 52 |
total_pause = 0 |
53 |
def process():
|
|
53 |
while True:
|
|
54 | 54 |
# Fetch next set |
55 | 55 |
cur = sql.select(db, tnrs_input, limit=tnrs.max_names, cacheable=False) |
56 | 56 |
this_ct = cur.rowcount |
57 | 57 |
log('Processing '+str(this_ct)+' taxonlabels') |
58 | 58 |
if this_ct == 0: |
59 |
if not wait: return False
|
|
59 |
if not wait: break
|
|
60 | 60 |
log('Waited '+str(total_pause)+' sec total') |
61 | 61 |
total_pause += pause |
62 |
if total_pause > max_pause: return False
|
|
62 |
if total_pause > max_pause: break
|
|
63 | 63 |
log('Waiting '+str(pause)+' sec...') |
64 | 64 |
time.sleep(pause) # wait for more rows |
65 |
return True # try again
|
|
65 |
continue # try again
|
|
66 | 66 |
# otherwise, rows found |
67 | 67 |
total_pause = 0 |
68 | 68 |
names = list(sql.values(cur)) |
69 | 69 |
|
70 |
# Run TNRS |
|
71 |
log('Making TNRS request') |
|
72 |
now_str = str(dates.now()) |
|
73 |
tnrs_profiler.start() |
|
74 |
try: stream = tnrs.repeated_tnrs_request(names) |
|
75 |
finally: |
|
76 |
tnrs_profiler.stop(iter_ct=this_ct) |
|
77 |
log('Cumulatively: '+tnrs_profiler.msg()) |
|
78 |
|
|
79 |
log('Storing TNRS response data') |
|
80 |
reader, header = csvs.reader_and_header(stream) |
|
81 |
header.insert(0, time_col_name) |
|
82 |
reader = csvs.ColInsertFilter(reader, now_str) |
|
83 |
sql_io.append_csv(db, tnrs_data, reader, header) |
|
84 |
|
|
85 |
return True # continue |
|
86 |
while process(): pass |
|
70 |
def process(): |
|
71 |
# Run TNRS |
|
72 |
log('Making TNRS request') |
|
73 |
now_str = str(dates.now()) |
|
74 |
tnrs_profiler.start() |
|
75 |
try: stream = tnrs.repeated_tnrs_request(names) |
|
76 |
finally: |
|
77 |
tnrs_profiler.stop(iter_ct=this_ct) |
|
78 |
log('Cumulatively: '+tnrs_profiler.msg()) |
|
79 |
|
|
80 |
log('Storing TNRS response data') |
|
81 |
reader, header = csvs.reader_and_header(stream) |
|
82 |
header.insert(0, time_col_name) |
|
83 |
reader = csvs.ColInsertFilter(reader, now_str) |
|
84 |
sql_io.append_csv(db, tnrs_data, reader, header) |
|
85 |
process() |
|
87 | 86 |
|
88 | 87 |
main() |
Also available in: Unified diff
bugfix: bin/tnrs_db: wrap just the TNRS request and the storing of the response data in a function (undoing part of r9514), because the transaction start time for Time_submitted should not be until the TNRS request is actually made (it often takes several minutes to materialize the next set of input names on a full DB)