Project

General

Profile

« Previous | Next » 

Revision 5121

TNRS-related programs: Use "names" instead of "taxons" for variable names because what's being submitted are actually verbatim taxonomic names, not official references to specific taxa

View differences:

lib/tnrs.py
14 14
pause_growth_factor = 1.3
15 15
max_pause = 600 # sec; = 10 min
16 16
assert initial_pause <= max_pause
17
max_taxons = 5000# according to http://tnrs.iplantcollaborative.org/TNRSapp.html
18
max_taxons = 500 # the maximum above crashes the TNRS server
17
max_names = 5000 # according to http://tnrs.iplantcollaborative.org/TNRSapp.html
18
max_names = 500 # the maximum above crashes the TNRS server
19 19

  
20 20
# Protocol params
21 21
url_base = 'http://tnrs.iplantcollaborative.org/tnrsdemo/'
......
27 27
}
28 28
submission_request_template = ('7|0|7|'+url_base+
29 29
'||org.iplantc.tnrs.demo.client.SearchService|doSearch|\
30
java.lang.String/2004016611|{"sources":"gcc,tropicos,usda", "names":"[taxons]"\
30
java.lang.String/2004016611|{"sources":"gcc,tropicos,usda", "names":"[names]"\
31 31
, "type":"matching", "taxonomic":"true", "classification":"tropicos", \
32 32
"match_to_rank":"true"}|0.05|1|2|3|4|2|5|5|6|7|')
33 33
submission_response_pattern = r'^//OK\[1,\["(\w+)"\],0,7\]$'
......
64 64
            +response)
65 65
    return match.groups()
66 66

  
67
def tnrs_request(taxons, debug=False):
68
    taxon_ct = len(taxons)
69
    assert taxon_ct <= max_taxons
67
def tnrs_request(names, debug=False):
68
    name_ct = len(names)
69
    assert name_ct <= max_names
70 70
    
71 71
    # Logging
72 72
    def debug_log(label, str_=''):
......
100 100
    profiler = profiling.ItersProfiler(start_now=True, iter_text='name')
101 101
    try:
102 102
        debug_log('Submit')
103
        request = submission_request_template.replace('[taxons]',
104
            r'\\n'.join(map(gwt_encode, taxons))) # double-escape \n
103
        request = submission_request_template.replace('[names]',
104
            r'\\n'.join(map(gwt_encode, names))) # double-escape \n
105 105
        response, response_info = do_request(request)
106 106
        key, = parse_response('submission', submission_response_pattern, response,
107 107
            response, response_info)
......
135 135
        debug_log('response info', str(response.info()))
136 136
        return response
137 137
    finally:
138
        profiler.stop(taxon_ct)
138
        profiler.stop(name_ct)
139 139
        sys.stderr.write(profiler.msg()+'\n')
140 140

  
141
def repeated_tnrs_request(taxons, debug=False, **kw_args):
141
def repeated_tnrs_request(names, debug=False, **kw_args):
142 142
    for try_num in xrange(2):
143
        try: return tnrs_request(taxons, debug, **kw_args)
143
        try: return tnrs_request(names, debug, **kw_args)
144 144
        except InvalidResponse, e:
145 145
            exc.print_ex(e, detail=False)
146 146
            debug = True
bin/tnrs_client
18 18
    # Input
19 19
    env_names = []
20 20
    debug = opts.env_flag('debug', False, env_names)
21
    taxons = sys.argv[1:]
22
    if not taxons: raise SystemExit('Usage: '+opts.env_usage(env_names, True)
23
        +' '+sys.argv[0]+' taxon... >out 2>>log')
21
    names = sys.argv[1:]
22
    if not names: raise SystemExit('Usage: '+opts.env_usage(env_names, True)
23
        +' '+sys.argv[0]+' name... >out 2>>log')
24 24
    
25
    streams.copy(tnrs.repeated_tnrs_request(taxons, debug), sys.stdout)
25
    streams.copy(tnrs.repeated_tnrs_request(names, debug), sys.stdout)
26 26

  
27 27
main()
bin/tnrs_db
50 50
        while True:
51 51
            # Fetch next set
52 52
            cur = sql.select(db, 'taxonpath', ['taxonomicnamewithauthor'],
53
                [('canon_taxonpath_id', None)], limit=tnrs.max_taxons,
53
                [('canon_taxonpath_id', None)], limit=tnrs.max_names,
54 54
                start=start, cacheable=False)
55 55
            this_ct = cur.rowcount
56 56
            start += this_ct # advance start to fetch next set
......
62 62
                continue # try again
63 63
            # otherwise, rows found
64 64
            total_pause = 0
65
            taxons = list(sql.values(cur))
65
            names = list(sql.values(cur))
66 66
            
67 67
            # Run TNRS
68 68
            log('Processing '+str(this_ct)+' taxonpaths')
69 69
            log('Making TNRS request')
70 70
            tnrs_profiler.start()
71 71
            try:
72
                try: stream = tnrs.repeated_tnrs_request(taxons)
72
                try: stream = tnrs.repeated_tnrs_request(names)
73 73
                finally: tnrs_profiler.stop(iter_ct=this_ct)
74 74
            except tnrs.InvalidResponse: pass # skip set in case it caused error
75 75
            else:

Also available in: Unified diff