Project

General

Profile

1 355 aaronmk
#!/usr/bin/env python
2 1447 aaronmk
# Concatenates spreadsheets, removing any duplicated headers
3
# Usage: self [sheet...] >out_sheet
4 3071 aaronmk
#
5
# The existing header can be overwritten by adding a separate header file whose
6
# header line is prefixed with "!". This is useful if the existing header's
7
# column names are too long, not descriptive enough, etc.
8 355 aaronmk
9 1447 aaronmk
import os.path
10 355 aaronmk
import sys
11
12 1447 aaronmk
sys.path.append(os.path.dirname(__file__)+"/../lib")
13
14
import csvs
15 3071 aaronmk
import strings
16 1447 aaronmk
import util
17
18 355 aaronmk
def main():
19 1447 aaronmk
    paths = sys.argv[1:]
20 355 aaronmk
21 3071 aaronmk
    header_written = False
22 1447 aaronmk
    first_path = None
23
    first_info = None
24 3071 aaronmk
25 1447 aaronmk
    for path in paths:
26 1493 aaronmk
        if path == '-': stream = sys.stdin
27
        else: stream = open(path, 'rb')
28 1447 aaronmk
29
        # Get dialect and process first line
30
        info = csvs.stream_info(stream)
31 1448 aaronmk
        try:
32 1661 aaronmk
            if info.dialect == None: continue # dialect of None = empty stream
33
34 3071 aaronmk
            overwrite_ref = [False] # whether to overwrite the existing header
35
            info.header_line = strings.remove_prefix('!', info.header_line,
36
                overwrite_ref)
37
38 1448 aaronmk
            def write_header(): sys.stdout.write(info.header_line)
39 3071 aaronmk
40 1448 aaronmk
            if first_info == None:
41
                first_path = path
42 3071 aaronmk
                if not overwrite_ref[0]: first_info = info
43
                    # otherwise, next header becomes the compare-to header
44
                if not header_written:
45
                    write_header()
46
                    header_written = True
47 1448 aaronmk
            else:
48
                if not util.classes_eq(info.dialect, first_info.dialect):
49
                    raise SystemExit('Spreadsheet error: "'+path
50
                        +'" dialect doesn\'t match "'+first_path+'" dialect')
51
                if info.header_line != first_info.header_line: write_header()
52
                    # not a duplicated header
53
54
            # Copy remaining lines
55
            while True:
56
                line = stream.readline()
57
                if line == '': break
58
                sys.stdout.write(line)
59
        except IOError, e: # abort if output stream closed
60
            if str(e) != '[Errno 32] Broken pipe': raise # other IOErrors
61 1661 aaronmk
        finally: stream.close() # still run if continue is called
62 355 aaronmk
63
main()