Project

General

Profile

1
#!/usr/bin/env python
2
# Scrubs the taxonlabels in VegBIEN using TNRS.
3

    
4
import os.path
5
import sys
6

    
7
sys.path.append(os.path.dirname(__file__)+"/../lib")
8

    
9
import csvs
10
import opts
11
import profiling
12
import sql
13
import sql_gen
14
import sql_io
15
import strings
16
import tnrs
17

    
18
tnrs_input = sql_gen.Table('tnrs_input_name')
19
tnrs_data = sql_gen.Table('tnrs')
20

    
21
def main():
22
    # Input
23
    env_names = []
24
    db_config = opts.get_env_vars(sql.db_config_names, None, env_names)
25
    verbosity = float(opts.get_env_var('verbosity', 3, env_names))
26
    if not 'engine' in db_config: raise SystemExit('Usage: '
27
        +opts.env_usage(env_names)+' '+sys.argv[0]+' 2>>log')
28
    
29
    def log(msg, level=1):
30
        '''Higher level -> more verbose'''
31
        if level <= verbosity:
32
            sys.stderr.write(strings.to_raw_str(msg.rstrip('\n')+'\n'))
33
    
34
    # Connect to DB
35
    db = sql.connect(db_config, log_debug=log)
36
    
37
    cumulative_tnrs_profiler = profiling.ItersProfiler(iter_text='name')
38
    
39
    # Iterate over unscrubbed verbatim taxonlabels
40
    while True:
41
        # Fetch next set
42
        cur = sql.select(db, tnrs_input, limit=tnrs.max_names, cacheable=False)
43
        this_ct = cur.rowcount
44
        log('Processing '+str(this_ct)+' taxonlabels')
45
        if this_ct == 0: break
46
        # otherwise, rows found
47
        names = list(sql.values(cur))
48
        
49
        def process():
50
            # Run TNRS
51
            log('Making TNRS request')
52
            stream = tnrs.tnrs_request(names,
53
                cumulative_profiler=cumulative_tnrs_profiler)
54
            
55
            log('Storing TNRS response data')
56
            sql_io.append_csv(db, tnrs_data, *csvs.reader_and_header(stream))
57
        # start transaction *before* submitting data, so Time_submitted is
58
        # correctly set to the submission time rather than the insertion time.
59
        # these may differ by several minutes if TNRS is slow.
60
        sql.with_savepoint(db, process)
61

    
62
main()
(73-73/81)