Revision 14540
Added by Aaron Marcuse-Kubitza over 10 years ago
trunk/lib/tnrs.py | ||
---|---|---|
1 | 1 |
# TNRS |
2 | 2 |
|
3 |
import json |
|
3 | 4 |
import os.path |
4 | 5 |
import re |
5 | 6 |
import sys |
... | ... | |
131 | 132 |
+response) |
132 | 133 |
return match.groups() |
133 | 134 |
|
134 |
def single_tnrs_request(names, debug=False, cumulative_profiler=None): |
|
135 |
def single_tnrs_request(names, debug=False, cumulative_profiler=None, |
|
136 |
use_tnrs_export=True): |
|
135 | 137 |
''' |
136 | 138 |
Note that names containing only whitespace characters (after gwt_encode()) |
137 | 139 |
are ignored by TNRS and do not receive a response row. Thus, you should |
138 | 140 |
always match up the Name_submitted returned by TNRS with the actual |
139 | 141 |
submitted name to determine the corresponding TNRS response row. |
142 |
|
|
143 |
@param use_tnrs_export whether to use TNRS's TSV export feature, which |
|
144 |
currently returns incorrect selected matches (vegpath.org/issues/943) |
|
140 | 145 |
''' |
141 | 146 |
name_ct = len(names) |
142 | 147 |
assert name_ct <= max_names |
... | ... | |
200 | 205 |
# lengths depending on the taxonomic ranks present in the provided taxon |
201 | 206 |
# name. the extra download step is therefore necessary. |
202 | 207 |
|
203 |
debug_log('Prepare download') |
|
204 |
request = download_request_template.replace('[key]', key_enc) |
|
205 |
response, response_info = do_request(request) |
|
206 |
csv_url, = parse_response('download', download_response_pattern, |
|
207 |
response, response, response_info) |
|
208 |
csv_url += download_url_suffix |
|
209 |
debug_log('csv_url', csv_url) |
|
210 |
|
|
211 |
debug_log('Download') |
|
212 |
request_obj = urllib2.Request(csv_url) |
|
213 |
debug_log('request URL', str(csv_url)) |
|
214 |
debug_log('request info', str(request_obj.header_items())) |
|
215 |
debug_log('request str', str(request_obj.get_data())) |
|
216 |
response = urllib2.urlopen(request_obj) |
|
217 |
response_info = str(response.info()) |
|
218 |
debug_log('response info', response_info) |
|
219 |
return TnrsOutputStream(response) |
|
208 |
if use_tnrs_export: |
|
209 |
debug_log('Prepare download') |
|
210 |
request = download_request_template.replace('[key]', key_enc) |
|
211 |
response, response_info = do_request(request) |
|
212 |
csv_url, = parse_response('download', download_response_pattern, |
|
213 |
response, response, response_info) |
|
214 |
csv_url += download_url_suffix |
|
215 |
debug_log('csv_url', csv_url) |
|
216 |
|
|
217 |
debug_log('Download') |
|
218 |
request_obj = urllib2.Request(csv_url) |
|
219 |
debug_log('request URL', str(csv_url)) |
|
220 |
debug_log('request info', str(request_obj.header_items())) |
|
221 |
debug_log('request str', str(request_obj.get_data())) |
|
222 |
response = urllib2.urlopen(request_obj) |
|
223 |
response_info = str(response.info()) |
|
224 |
debug_log('response info', response_info) |
|
225 |
return TnrsOutputStream(response) |
|
226 |
else: |
|
227 |
debug_log('Expand') |
|
228 |
request = ('7|0|6|http://tnrs.iplantcollaborative.org/tnrsdemo/|\ |
|
229 |
1E87C78041CEFBF0992F46BDF84D7D60|org.iplantc.tnrs.demo.client.SearchService|\ |
|
230 |
requestGroupMembers|java.lang.String/2004016611|{"group":"0", \ |
|
231 |
"email":"tnrs@lka5jjs.orv", "key":"[key]", "source_sorting":"false", \ |
|
232 |
"taxonomic_constraint":"false"}|1|2|3|4|1|5|6|'.replace('[key]', key_enc)) |
|
233 |
response, response_info = do_request(request) |
|
234 |
data_gwt, = parse_response('expand', r'^//OK\[1,\["(.*)"\],0,7\]$', |
|
235 |
response, response, response_info) |
|
236 |
data_json = json.loads(gwt_decode(data_gwt)) |
|
237 |
raise NotImplementedError() |
|
220 | 238 |
finally: |
221 | 239 |
profiler.stop(name_ct) |
222 | 240 |
sys.stderr.write(profiler.msg()+'\n') |
Also available in: Unified diff
lib/tnrs.py: added option to avoid using TNRS's TSV export feature, which currently returns incorrect selected matches (vegpath.org/issues/943). this has been implemented up through the GWT/JSON decoding.