Project

General

Profile

« Previous | Next » 

Revision 5640

tnrs_db: Updated with schema changes

View differences:

bin/tnrs_db
1 1
#!/usr/bin/env python
2
# Scrubs the taxonconcepts in VegBIEN using TNRS.
2
# Scrubs the taxonlabels in VegBIEN using TNRS.
3 3
# Runs continuously until no new rows are added after max_pause.
4 4

  
5 5
import os.path
......
45 45
    
46 46
    tnrs_profiler = profiling.ItersProfiler(iter_text='name')
47 47
    
48
    # Iterate over unscrubbed verbatim taxonconcepts
48
    # Iterate over unscrubbed verbatim taxonlabels
49 49
    total_pause = 0
50
    tables = ['taxonconcept', sql_gen.Join('tnrs',
50
    tables = ['taxonlabel', sql_gen.Join('tnrs',
51 51
        {'Name_submitted': 'identifyingtaxonomicname'}, sql_gen.filter_out)]
52 52
    # Has a concatenated name and not already linked to an accepted name
53 53
    conds = [('identifyingtaxonomicname', sql_gen.CompareCond(None, '!=')),
54
        ('matched_concept_id', None)]
54
        ('matched_label_id', None)]
55 55
    while True:
56 56
        # Fetch next set
57 57
        cur = sql.select(db, tables, ['identifyingtaxonomicname'], conds,
58 58
            limit=tnrs.max_names, cacheable=False)
59 59
        this_ct = cur.rowcount
60
        log('Processing '+str(this_ct)+' taxonconcepts')
60
        log('Processing '+str(this_ct)+' taxonlabels')
61 61
        if this_ct == 0:
62 62
            if not wait: break
63 63
            log('Waited '+str(total_pause)+' sec total')

Also available in: Unified diff