Project

General

Profile

« Previous | Next » 

Revision 1643

inputs/REMIB/src/nodes.all.specimens.csv.make: Write each node to a separate output file

View differences:

inputs/REMIB/src/nodes.all.specimens.csv.make
34 34
    def log(msg, line_ending='\n'):
35 35
        sys.stderr.write(('    '*log_indent)+msg+line_ending)
36 36
    
37
    stdout = streams.LineCountOutputStream(sys.stdout)
37
    os.chdir(os.path.dirname(__file__)) # dir of output files
38 38
    
39 39
    # Get by family ('familia') because that is the most general level at which
40 40
    # an identification can be made. This assumes all records have a family.
......
43 43
        'pais=Todos&pais_otro=&estado=100&formato=csv&mapa=no&mapabase=estados'
44 44
        '&coleccion=id%3D[node_id]')
45 45
    done = False
46
    for node_id in itertools.count(1):
46
    for node_id in itertools.count(2):
47 47
        if done: break
48 48
        log('Processing node #'+str(node_id)+'...')
49 49
        log_indent += 1
50
        start_line_num = stdout.line_num
51 50
        profiler = profiling.ItersProfiler(start_now=True, iter_text='row')
52 51
        
52
        filename = 'node.'+str(node_id)+'.specimens.csv'
53
        out = streams.LineCountOutputStream(open(filename, 'w'))
54
        start_line_num = out.line_num
53 55
        node_url_template = url_template.replace('[node_id]', str(node_id))
56
        
54 57
        for prefix_chars in itertools.product(alphabet, repeat=2):
55 58
            if done: break
56 59
            prefix = ''.join(prefix_chars)
......
69 72
                util.skip(stream, is_ignore) # skip header
70 73
                try:
71 74
                    metadata_row = csv.reader(stream).next()
72
                    if metadata_row[1] != 'COLLECTION': raise InputException(
75
                    if metadata_row[0] != 'COLLECTION': raise InputException(
73 76
                        'Invalid metadata row: '+str(metadata_row))
74 77
                except StopIteration:
75 78
                    done = True # empty response means no more nodes
......
81 84
                        break
82 85
                    if row_ct % 100 == 0: print_status('\r')
83 86
                        # CR at end so next print overwrites msg
84
                    stdout.write(line)
87
                    out.write(line)
85 88
                    row_ct += 1
86 89
            except (InputException, timeout.TimeoutException), e:
87 90
                clear_line()
88
                log('! Output line '+str(stdout.line_num)+': '+exc.str_(e))
91
                log('! Output line '+str(out.line_num)+': '+exc.str_(e))
89 92
            stream.close()
90 93
            
91 94
            profiler.add_iters(row_ct)
......
94 97
            
95 98
        profiler.stop()
96 99
        log(profiler.msg())
97
        
98
        if stdout.line_num == start_line_num: line_range = '<none>'
99
        else: line_range = str(start_line_num)+'-'+str(stdout.line_num-1)
100
        log('Used output lines '+line_range)
101
        
102 100
        log_indent -= 1
103 101

  
104 102
main()

Also available in: Unified diff