/ - Diff - BIEN 3 - NCEAS Projects

« Previous | Next »

Revision 5121

Added by Aaron Marcuse-Kubitza over 12 years ago

TNRS-related programs: Use "names" instead of "taxons" for variable names because what's being submitted are actually verbatim taxonomic names, not official references to specific taxa

     pause_growth_factor = 1.3
     max_pause = 600 # sec; = 10 min
     assert initial_pause <= max_pause
     max_taxons = 5000# according to http://tnrs.iplantcollaborative.org/TNRSapp.html
     max_taxons = 500 # the maximum above crashes the TNRS server
     max_names = 5000 # according to http://tnrs.iplantcollaborative.org/TNRSapp.html
     max_names = 500 # the maximum above crashes the TNRS server
     # Protocol params
     url_base = 'http://tnrs.iplantcollaborative.org/tnrsdemo/'
-...
+    }
     submission_request_template = ('7|0|7|'+url_base+
     '||org.iplantc.tnrs.demo.client.SearchService|doSearch|\
     java.lang.String/2004016611|{"sources":"gcc,tropicos,usda", "names":"[taxons]"\
     java.lang.String/2004016611|{"sources":"gcc,tropicos,usda", "names":"[names]"\
     , "type":"matching", "taxonomic":"true", "classification":"tropicos", \
     "match_to_rank":"true"}|0.05|1|2|3|4|2|5|5|6|7|')
     submission_response_pattern = r'^//OK\[1,\["(\w+)"\],0,7\]$'
-...
                 +response)
         return match.groups()
     def tnrs_request(taxons, debug=False):
         taxon_ct = len(taxons)
         assert taxon_ct <= max_taxons
     def tnrs_request(names, debug=False):
         name_ct = len(names)
         assert name_ct <= max_names
         # Logging
         def debug_log(label, str_=''):
-...
         profiler = profiling.ItersProfiler(start_now=True, iter_text='name')
         try:
             debug_log('Submit')
             request = submission_request_template.replace('[taxons]',
                 r'\\n'.join(map(gwt_encode, taxons))) # double-escape \n
             request = submission_request_template.replace('[names]',
                 r'\\n'.join(map(gwt_encode, names))) # double-escape \n
             response, response_info = do_request(request)
             key, = parse_response('submission', submission_response_pattern, response,
                 response, response_info)
-...
             debug_log('response info', str(response.info()))
             return response
         finally:
             profiler.stop(taxon_ct)
             profiler.stop(name_ct)
             sys.stderr.write(profiler.msg()+'\n')
     def repeated_tnrs_request(taxons, debug=False, **kw_args):
     def repeated_tnrs_request(names, debug=False, **kw_args):
         for try_num in xrange(2):
             try: return tnrs_request(taxons, debug, **kw_args)
             try: return tnrs_request(names, debug, **kw_args)
             except InvalidResponse, e:
                 exc.print_ex(e, detail=False)
                 debug = True

         # Input
         env_names = []
         debug = opts.env_flag('debug', False, env_names)
         taxons = sys.argv[1:]
         if not taxons: raise SystemExit('Usage: '+opts.env_usage(env_names, True)
             +' '+sys.argv[0]+' taxon... >out 2>>log')
         names = sys.argv[1:]
         if not names: raise SystemExit('Usage: '+opts.env_usage(env_names, True)
             +' '+sys.argv[0]+' name... >out 2>>log')
         streams.copy(tnrs.repeated_tnrs_request(taxons, debug), sys.stdout)
         streams.copy(tnrs.repeated_tnrs_request(names, debug), sys.stdout)
     main()

             while True:
                 # Fetch next set
                 cur = sql.select(db, 'taxonpath', ['taxonomicnamewithauthor'],
                     [('canon_taxonpath_id', None)], limit=tnrs.max_taxons,
                     [('canon_taxonpath_id', None)], limit=tnrs.max_names,
                     start=start, cacheable=False)
                 this_ct = cur.rowcount
                 start += this_ct # advance start to fetch next set
-...
                     continue # try again
                 # otherwise, rows found
                 total_pause = 0
                 taxons = list(sql.values(cur))
                 names = list(sql.values(cur))
                 # Run TNRS
                 log('Processing '+str(this_ct)+' taxonpaths')
                 log('Making TNRS request')
                 tnrs_profiler.start()
                 try:
                     try: stream = tnrs.repeated_tnrs_request(taxons)
                     try: stream = tnrs.repeated_tnrs_request(names)
                     finally: tnrs_profiler.stop(iter_ct=this_ct)
                 except tnrs.InvalidResponse: pass # skip set in case it caused error
                 else:

Also available in: Unified diff

Project

General

Profile

Revision 5121

Added by Aaron Marcuse-Kubitza over 12 years ago