Revision 5121
Added by Aaron Marcuse-Kubitza about 12 years ago
lib/tnrs.py | ||
---|---|---|
14 | 14 |
pause_growth_factor = 1.3 |
15 | 15 |
max_pause = 600 # sec; = 10 min |
16 | 16 |
assert initial_pause <= max_pause |
17 |
max_taxons = 5000# according to http://tnrs.iplantcollaborative.org/TNRSapp.html
|
|
18 |
max_taxons = 500 # the maximum above crashes the TNRS server
|
|
17 |
max_names = 5000 # according to http://tnrs.iplantcollaborative.org/TNRSapp.html
|
|
18 |
max_names = 500 # the maximum above crashes the TNRS server
|
|
19 | 19 |
|
20 | 20 |
# Protocol params |
21 | 21 |
url_base = 'http://tnrs.iplantcollaborative.org/tnrsdemo/' |
... | ... | |
27 | 27 |
} |
28 | 28 |
submission_request_template = ('7|0|7|'+url_base+ |
29 | 29 |
'||org.iplantc.tnrs.demo.client.SearchService|doSearch|\ |
30 |
java.lang.String/2004016611|{"sources":"gcc,tropicos,usda", "names":"[taxons]"\
|
|
30 |
java.lang.String/2004016611|{"sources":"gcc,tropicos,usda", "names":"[names]"\
|
|
31 | 31 |
, "type":"matching", "taxonomic":"true", "classification":"tropicos", \ |
32 | 32 |
"match_to_rank":"true"}|0.05|1|2|3|4|2|5|5|6|7|') |
33 | 33 |
submission_response_pattern = r'^//OK\[1,\["(\w+)"\],0,7\]$' |
... | ... | |
64 | 64 |
+response) |
65 | 65 |
return match.groups() |
66 | 66 |
|
67 |
def tnrs_request(taxons, debug=False):
|
|
68 |
taxon_ct = len(taxons)
|
|
69 |
assert taxon_ct <= max_taxons
|
|
67 |
def tnrs_request(names, debug=False):
|
|
68 |
name_ct = len(names)
|
|
69 |
assert name_ct <= max_names
|
|
70 | 70 |
|
71 | 71 |
# Logging |
72 | 72 |
def debug_log(label, str_=''): |
... | ... | |
100 | 100 |
profiler = profiling.ItersProfiler(start_now=True, iter_text='name') |
101 | 101 |
try: |
102 | 102 |
debug_log('Submit') |
103 |
request = submission_request_template.replace('[taxons]',
|
|
104 |
r'\\n'.join(map(gwt_encode, taxons))) # double-escape \n
|
|
103 |
request = submission_request_template.replace('[names]',
|
|
104 |
r'\\n'.join(map(gwt_encode, names))) # double-escape \n
|
|
105 | 105 |
response, response_info = do_request(request) |
106 | 106 |
key, = parse_response('submission', submission_response_pattern, response, |
107 | 107 |
response, response_info) |
... | ... | |
135 | 135 |
debug_log('response info', str(response.info())) |
136 | 136 |
return response |
137 | 137 |
finally: |
138 |
profiler.stop(taxon_ct)
|
|
138 |
profiler.stop(name_ct)
|
|
139 | 139 |
sys.stderr.write(profiler.msg()+'\n') |
140 | 140 |
|
141 |
def repeated_tnrs_request(taxons, debug=False, **kw_args):
|
|
141 |
def repeated_tnrs_request(names, debug=False, **kw_args):
|
|
142 | 142 |
for try_num in xrange(2): |
143 |
try: return tnrs_request(taxons, debug, **kw_args)
|
|
143 |
try: return tnrs_request(names, debug, **kw_args)
|
|
144 | 144 |
except InvalidResponse, e: |
145 | 145 |
exc.print_ex(e, detail=False) |
146 | 146 |
debug = True |
bin/tnrs_client | ||
---|---|---|
18 | 18 |
# Input |
19 | 19 |
env_names = [] |
20 | 20 |
debug = opts.env_flag('debug', False, env_names) |
21 |
taxons = sys.argv[1:]
|
|
22 |
if not taxons: raise SystemExit('Usage: '+opts.env_usage(env_names, True)
|
|
23 |
+' '+sys.argv[0]+' taxon... >out 2>>log')
|
|
21 |
names = sys.argv[1:]
|
|
22 |
if not names: raise SystemExit('Usage: '+opts.env_usage(env_names, True)
|
|
23 |
+' '+sys.argv[0]+' name... >out 2>>log')
|
|
24 | 24 |
|
25 |
streams.copy(tnrs.repeated_tnrs_request(taxons, debug), sys.stdout)
|
|
25 |
streams.copy(tnrs.repeated_tnrs_request(names, debug), sys.stdout)
|
|
26 | 26 |
|
27 | 27 |
main() |
bin/tnrs_db | ||
---|---|---|
50 | 50 |
while True: |
51 | 51 |
# Fetch next set |
52 | 52 |
cur = sql.select(db, 'taxonpath', ['taxonomicnamewithauthor'], |
53 |
[('canon_taxonpath_id', None)], limit=tnrs.max_taxons,
|
|
53 |
[('canon_taxonpath_id', None)], limit=tnrs.max_names,
|
|
54 | 54 |
start=start, cacheable=False) |
55 | 55 |
this_ct = cur.rowcount |
56 | 56 |
start += this_ct # advance start to fetch next set |
... | ... | |
62 | 62 |
continue # try again |
63 | 63 |
# otherwise, rows found |
64 | 64 |
total_pause = 0 |
65 |
taxons = list(sql.values(cur))
|
|
65 |
names = list(sql.values(cur))
|
|
66 | 66 |
|
67 | 67 |
# Run TNRS |
68 | 68 |
log('Processing '+str(this_ct)+' taxonpaths') |
69 | 69 |
log('Making TNRS request') |
70 | 70 |
tnrs_profiler.start() |
71 | 71 |
try: |
72 |
try: stream = tnrs.repeated_tnrs_request(taxons)
|
|
72 |
try: stream = tnrs.repeated_tnrs_request(names)
|
|
73 | 73 |
finally: tnrs_profiler.stop(iter_ct=this_ct) |
74 | 74 |
except tnrs.InvalidResponse: pass # skip set in case it caused error |
75 | 75 |
else: |
Also available in: Unified diff
TNRS-related programs: Use "names" instead of "taxons" for variable names because what's being submitted are actually verbatim taxonomic names, not official references to specific taxa