Project

General

Profile

« Previous | Next » 

Revision 1447

cat_csv: Ignore any duplicated headers instead of requiring each CSV to have a header identical to the first. Rewrote to pass the CSVs through as lines rather than parsing each row. Because the CSVs are not parsed, checked that all CSVs have the same dialect.

View differences:

bin/cat_csv
1 1
#!/usr/bin/env python
2
# Concatenates two spreadsheets with the same headers
2
# Concatenates spreadsheets, removing any duplicated headers
3
# Usage: self [sheet...] >out_sheet
3 4

  
4
import csv
5
import os.path
5 6
import sys
6 7

  
8
sys.path.append(os.path.dirname(__file__)+"/../lib")
9

  
10
import csvs
11
import util
12

  
7 13
def main():
8
    try: _prog_name, sheet_1_path = sys.argv
9
    except ValueError:
10
        raise SystemExit('Usage: '+sys.argv[0]+' <sheet_0 sheet_1 [| '
11
            +sys.argv[0]+' sheet_2]... >cat_sheet')
14
    paths = sys.argv[1:]
12 15
    
13
    # Get cols
14
    reader_0 = csv.reader(sys.stdin)
15
    sheet_0_cols = reader_0.next()
16
    stream_1 = open(sheet_1_path, 'rb')
17
    reader_1 = csv.reader(stream_1)
18
    sheet_1_cols = reader_1.next()
19
    if not sheet_0_cols == sheet_1_cols: raise SystemExit('Map error: '
20
        'Sheet 1 column names don\'t match sheet 0 column names')
21
    
22
    # Write combined sheet
23
    writer = csv.writer(sys.stdout)
24
    writer.writerow(sheet_0_cols)
25
    def write_sheet(reader):
26
        for row in reader: writer.writerow(row)
27
    write_sheet(reader_0)
28
    write_sheet(reader_1)
29
    
30
    stream_1.close()
16
    first_path = None
17
    first_info = None
18
    for path in paths:
19
        stream = open(path, 'rb')
20
        
21
        # Get dialect and process first line
22
        info = csvs.stream_info(stream)
23
        def write_header(): sys.stdout.write(info.header_line)
24
        if first_info == None:
25
            first_path = path
26
            first_info = info
27
            write_header()
28
        else:
29
            if not util.classes_eq(info.dialect, first_info.dialect):
30
                raise SystemExit('Spreadsheet error: "'+path
31
                    +'" dialect doesn\'t match "'+first_path+'" dialect')
32
            if info.header_line != first_info.header_line: write_header()
33
                # not a duplicated header
34
        
35
        # Copy remaining lines
36
        while True:
37
            line = stream.readline()
38
            if line == '': break
39
            sys.stdout.write(line)
40
        
41
        stream.close()
31 42

  
32 43
main()

Also available in: Unified diff