Revision 5640
Added by Aaron Marcuse-Kubitza over 12 years ago
bin/tnrs_db | ||
---|---|---|
1 | 1 |
#!/usr/bin/env python |
2 |
# Scrubs the taxonconcepts in VegBIEN using TNRS.
|
|
2 |
# Scrubs the taxonlabels in VegBIEN using TNRS.
|
|
3 | 3 |
# Runs continuously until no new rows are added after max_pause. |
4 | 4 |
|
5 | 5 |
import os.path |
... | ... | |
45 | 45 |
|
46 | 46 |
tnrs_profiler = profiling.ItersProfiler(iter_text='name') |
47 | 47 |
|
48 |
# Iterate over unscrubbed verbatim taxonconcepts
|
|
48 |
# Iterate over unscrubbed verbatim taxonlabels
|
|
49 | 49 |
total_pause = 0 |
50 |
tables = ['taxonconcept', sql_gen.Join('tnrs',
|
|
50 |
tables = ['taxonlabel', sql_gen.Join('tnrs',
|
|
51 | 51 |
{'Name_submitted': 'identifyingtaxonomicname'}, sql_gen.filter_out)] |
52 | 52 |
# Has a concatenated name and not already linked to an accepted name |
53 | 53 |
conds = [('identifyingtaxonomicname', sql_gen.CompareCond(None, '!=')), |
54 |
('matched_concept_id', None)]
|
|
54 |
('matched_label_id', None)]
|
|
55 | 55 |
while True: |
56 | 56 |
# Fetch next set |
57 | 57 |
cur = sql.select(db, tables, ['identifyingtaxonomicname'], conds, |
58 | 58 |
limit=tnrs.max_names, cacheable=False) |
59 | 59 |
this_ct = cur.rowcount |
60 |
log('Processing '+str(this_ct)+' taxonconcepts')
|
|
60 |
log('Processing '+str(this_ct)+' taxonlabels')
|
|
61 | 61 |
if this_ct == 0: |
62 | 62 |
if not wait: break |
63 | 63 |
log('Waited '+str(total_pause)+' sec total') |
Also available in: Unified diff
tnrs_db: Updated with schema changes