Revision 1447
Added by Aaron Marcuse-Kubitza over 12 years ago
bin/cat_csv | ||
---|---|---|
1 | 1 |
#!/usr/bin/env python |
2 |
# Concatenates two spreadsheets with the same headers |
|
2 |
# Concatenates spreadsheets, removing any duplicated headers |
|
3 |
# Usage: self [sheet...] >out_sheet |
|
3 | 4 |
|
4 |
import csv
|
|
5 |
import os.path
|
|
5 | 6 |
import sys |
6 | 7 |
|
8 |
sys.path.append(os.path.dirname(__file__)+"/../lib") |
|
9 |
|
|
10 |
import csvs |
|
11 |
import util |
|
12 |
|
|
7 | 13 |
def main(): |
8 |
try: _prog_name, sheet_1_path = sys.argv |
|
9 |
except ValueError: |
|
10 |
raise SystemExit('Usage: '+sys.argv[0]+' <sheet_0 sheet_1 [| ' |
|
11 |
+sys.argv[0]+' sheet_2]... >cat_sheet') |
|
14 |
paths = sys.argv[1:] |
|
12 | 15 |
|
13 |
# Get cols |
|
14 |
reader_0 = csv.reader(sys.stdin) |
|
15 |
sheet_0_cols = reader_0.next() |
|
16 |
stream_1 = open(sheet_1_path, 'rb') |
|
17 |
reader_1 = csv.reader(stream_1) |
|
18 |
sheet_1_cols = reader_1.next() |
|
19 |
if not sheet_0_cols == sheet_1_cols: raise SystemExit('Map error: ' |
|
20 |
'Sheet 1 column names don\'t match sheet 0 column names') |
|
21 |
|
|
22 |
# Write combined sheet |
|
23 |
writer = csv.writer(sys.stdout) |
|
24 |
writer.writerow(sheet_0_cols) |
|
25 |
def write_sheet(reader): |
|
26 |
for row in reader: writer.writerow(row) |
|
27 |
write_sheet(reader_0) |
|
28 |
write_sheet(reader_1) |
|
29 |
|
|
30 |
stream_1.close() |
|
16 |
first_path = None |
|
17 |
first_info = None |
|
18 |
for path in paths: |
|
19 |
stream = open(path, 'rb') |
|
20 |
|
|
21 |
# Get dialect and process first line |
|
22 |
info = csvs.stream_info(stream) |
|
23 |
def write_header(): sys.stdout.write(info.header_line) |
|
24 |
if first_info == None: |
|
25 |
first_path = path |
|
26 |
first_info = info |
|
27 |
write_header() |
|
28 |
else: |
|
29 |
if not util.classes_eq(info.dialect, first_info.dialect): |
|
30 |
raise SystemExit('Spreadsheet error: "'+path |
|
31 |
+'" dialect doesn\'t match "'+first_path+'" dialect') |
|
32 |
if info.header_line != first_info.header_line: write_header() |
|
33 |
# not a duplicated header |
|
34 |
|
|
35 |
# Copy remaining lines |
|
36 |
while True: |
|
37 |
line = stream.readline() |
|
38 |
if line == '': break |
|
39 |
sys.stdout.write(line) |
|
40 |
|
|
41 |
stream.close() |
|
31 | 42 |
|
32 | 43 |
main() |
Also available in: Unified diff
cat_csv: Ignore any duplicated headers instead of requiring each CSV to have a header identical to the first. Rewrote to pass the CSVs through as lines rather than parsing each row. Because the CSVs are not parsed, checked that all CSVs have the same dialect.