Project

General

Profile

« Previous | Next » 

Revision 5120

tnrs.py: tnrs_request(): Profile the TNRS request

View differences:

tnrs.py
6 6
import urllib2
7 7

  
8 8
import exc
9
import profiling
9 10
import streams
10 11

  
11 12
# Config
......
64 65
    return match.groups()
65 66

  
66 67
def tnrs_request(taxons, debug=False):
67
    assert len(taxons) <= max_taxons
68
    taxon_ct = len(taxons)
69
    assert taxon_ct <= max_taxons
68 70
    
69 71
    # Logging
70 72
    def debug_log(label, str_=''):
......
95 97
            except urllib2.HTTPError: pass # try again
96 98
            pause *= pause_growth_factor
97 99
    
98
    debug_log('Submit')
99
    request = submission_request_template.replace('[taxons]',
100
        r'\\n'.join(map(gwt_encode, taxons))) # double-escape \n
101
    response, response_info = do_request(request)
102
    key, = parse_response('submission', submission_response_pattern, response,
103
        response, response_info)
104
    debug_log('key', key)
105
    key_enc = gwt_encode(key)
106
    
107
    debug_log('Retrieve')
108
    request = retrieval_request_template.replace('[key]', key_enc)
109
    response, response_info = do_repeated_request(request)
110
    parse_response('retrieval', retrieval_response_pattern, response, response,
111
        response_info)
112
    session_id, = parse_response('retrieval info',
113
        retrieval_response_info_pattern, response_info, response, response_info)
114
    debug_log('session_id', session_id)
115
    headers['Cookie'] = 'JSESSIONID='+session_id
116
    
117
    # The output of the retrieve step is unusable because the array has
118
    # different lengths depending on the taxonomic ranks present in the provided
119
    # taxon name. The extra download step is therefore necessary.
120
    
121
    debug_log('Prepare download')
122
    request = download_request_template.replace('[key]', key_enc)
123
    response, response_info = do_request(request)
124
    csv_url, = parse_response('download', download_response_pattern, response,
125
        response, response_info)
126
    csv_url += download_url_suffix
127
    debug_log('csv_url', csv_url)
128
    
129
    debug_log('Download')
130
    response = urllib2.urlopen(urllib2.Request(csv_url))
131
    debug_log('response info', str(response.info()))
132
    return response
100
    profiler = profiling.ItersProfiler(start_now=True, iter_text='name')
101
    try:
102
        debug_log('Submit')
103
        request = submission_request_template.replace('[taxons]',
104
            r'\\n'.join(map(gwt_encode, taxons))) # double-escape \n
105
        response, response_info = do_request(request)
106
        key, = parse_response('submission', submission_response_pattern, response,
107
            response, response_info)
108
        debug_log('key', key)
109
        key_enc = gwt_encode(key)
110
        
111
        debug_log('Retrieve')
112
        request = retrieval_request_template.replace('[key]', key_enc)
113
        response, response_info = do_repeated_request(request)
114
        parse_response('retrieval', retrieval_response_pattern, response, response,
115
            response_info)
116
        session_id, = parse_response('retrieval info',
117
            retrieval_response_info_pattern, response_info, response, response_info)
118
        debug_log('session_id', session_id)
119
        headers['Cookie'] = 'JSESSIONID='+session_id
120
        
121
        # The output of the retrieve step is unusable because the array has
122
        # different lengths depending on the taxonomic ranks present in the provided
123
        # taxon name. The extra download step is therefore necessary.
124
        
125
        debug_log('Prepare download')
126
        request = download_request_template.replace('[key]', key_enc)
127
        response, response_info = do_request(request)
128
        csv_url, = parse_response('download', download_response_pattern, response,
129
            response, response_info)
130
        csv_url += download_url_suffix
131
        debug_log('csv_url', csv_url)
132
        
133
        debug_log('Download')
134
        response = urllib2.urlopen(urllib2.Request(csv_url))
135
        debug_log('response info', str(response.info()))
136
        return response
137
    finally:
138
        profiler.stop(taxon_ct)
139
        sys.stderr.write(profiler.msg()+'\n')
133 140

  
134 141
def repeated_tnrs_request(taxons, debug=False, **kw_args):
135 142
    for try_num in xrange(2):

Also available in: Unified diff