Revision 5120
Added by Aaron Marcuse-Kubitza about 12 years ago
tnrs.py | ||
---|---|---|
6 | 6 |
import urllib2 |
7 | 7 |
|
8 | 8 |
import exc |
9 |
import profiling |
|
9 | 10 |
import streams |
10 | 11 |
|
11 | 12 |
# Config |
... | ... | |
64 | 65 |
return match.groups() |
65 | 66 |
|
66 | 67 |
def tnrs_request(taxons, debug=False): |
67 |
assert len(taxons) <= max_taxons |
|
68 |
taxon_ct = len(taxons) |
|
69 |
assert taxon_ct <= max_taxons |
|
68 | 70 |
|
69 | 71 |
# Logging |
70 | 72 |
def debug_log(label, str_=''): |
... | ... | |
95 | 97 |
except urllib2.HTTPError: pass # try again |
96 | 98 |
pause *= pause_growth_factor |
97 | 99 |
|
98 |
debug_log('Submit') |
|
99 |
request = submission_request_template.replace('[taxons]', |
|
100 |
r'\\n'.join(map(gwt_encode, taxons))) # double-escape \n |
|
101 |
response, response_info = do_request(request) |
|
102 |
key, = parse_response('submission', submission_response_pattern, response, |
|
103 |
response, response_info) |
|
104 |
debug_log('key', key) |
|
105 |
key_enc = gwt_encode(key) |
|
106 |
|
|
107 |
debug_log('Retrieve') |
|
108 |
request = retrieval_request_template.replace('[key]', key_enc) |
|
109 |
response, response_info = do_repeated_request(request) |
|
110 |
parse_response('retrieval', retrieval_response_pattern, response, response, |
|
111 |
response_info) |
|
112 |
session_id, = parse_response('retrieval info', |
|
113 |
retrieval_response_info_pattern, response_info, response, response_info) |
|
114 |
debug_log('session_id', session_id) |
|
115 |
headers['Cookie'] = 'JSESSIONID='+session_id |
|
116 |
|
|
117 |
# The output of the retrieve step is unusable because the array has |
|
118 |
# different lengths depending on the taxonomic ranks present in the provided |
|
119 |
# taxon name. The extra download step is therefore necessary. |
|
120 |
|
|
121 |
debug_log('Prepare download') |
|
122 |
request = download_request_template.replace('[key]', key_enc) |
|
123 |
response, response_info = do_request(request) |
|
124 |
csv_url, = parse_response('download', download_response_pattern, response, |
|
125 |
response, response_info) |
|
126 |
csv_url += download_url_suffix |
|
127 |
debug_log('csv_url', csv_url) |
|
128 |
|
|
129 |
debug_log('Download') |
|
130 |
response = urllib2.urlopen(urllib2.Request(csv_url)) |
|
131 |
debug_log('response info', str(response.info())) |
|
132 |
return response |
|
100 |
profiler = profiling.ItersProfiler(start_now=True, iter_text='name') |
|
101 |
try: |
|
102 |
debug_log('Submit') |
|
103 |
request = submission_request_template.replace('[taxons]', |
|
104 |
r'\\n'.join(map(gwt_encode, taxons))) # double-escape \n |
|
105 |
response, response_info = do_request(request) |
|
106 |
key, = parse_response('submission', submission_response_pattern, response, |
|
107 |
response, response_info) |
|
108 |
debug_log('key', key) |
|
109 |
key_enc = gwt_encode(key) |
|
110 |
|
|
111 |
debug_log('Retrieve') |
|
112 |
request = retrieval_request_template.replace('[key]', key_enc) |
|
113 |
response, response_info = do_repeated_request(request) |
|
114 |
parse_response('retrieval', retrieval_response_pattern, response, response, |
|
115 |
response_info) |
|
116 |
session_id, = parse_response('retrieval info', |
|
117 |
retrieval_response_info_pattern, response_info, response, response_info) |
|
118 |
debug_log('session_id', session_id) |
|
119 |
headers['Cookie'] = 'JSESSIONID='+session_id |
|
120 |
|
|
121 |
# The output of the retrieve step is unusable because the array has |
|
122 |
# different lengths depending on the taxonomic ranks present in the provided |
|
123 |
# taxon name. The extra download step is therefore necessary. |
|
124 |
|
|
125 |
debug_log('Prepare download') |
|
126 |
request = download_request_template.replace('[key]', key_enc) |
|
127 |
response, response_info = do_request(request) |
|
128 |
csv_url, = parse_response('download', download_response_pattern, response, |
|
129 |
response, response_info) |
|
130 |
csv_url += download_url_suffix |
|
131 |
debug_log('csv_url', csv_url) |
|
132 |
|
|
133 |
debug_log('Download') |
|
134 |
response = urllib2.urlopen(urllib2.Request(csv_url)) |
|
135 |
debug_log('response info', str(response.info())) |
|
136 |
return response |
|
137 |
finally: |
|
138 |
profiler.stop(taxon_ct) |
|
139 |
sys.stderr.write(profiler.msg()+'\n') |
|
133 | 140 |
|
134 | 141 |
def repeated_tnrs_request(taxons, debug=False, **kw_args): |
135 | 142 |
for try_num in xrange(2): |
Also available in: Unified diff
tnrs.py: tnrs_request(): Profile the TNRS request