Project

General

Profile

1 4591 aaronmk
#!/usr/bin/env python
2 7448 aaronmk
# Translates a spreadsheet column using a thesaurus.
3 4650 aaronmk
# The column header is also translated. CSVs without a header are supported.
4 4591 aaronmk
# Unrecognized names are left untouched, permitting successive runs on different
5 7448 aaronmk
# thesauruses.
6 4591 aaronmk
# Case- and punctuation-sensitive. (Use canon first for case-insensitivity.)
7
8
import csv
9
import sys
10
11
def main():
12
    try: _prog_name, col_num, dict_path = sys.argv
13
    except ValueError: raise SystemExit('Usage: '+sys.argv[0]
14 7448 aaronmk
        +' <in col# thesaurus [| '+sys.argv[0]+' col# thesaurus_2]... >out')
15 4591 aaronmk
    col_num = int(col_num)
16
17 7448 aaronmk
    # Get thesaurus
18 4591 aaronmk
    dict_ = {}
19
    stream = open(dict_path, 'rb')
20
    reader = csv.reader(stream)
21 4626 aaronmk
    for row in reader: dict_[row[0]] = row[1]
22 4591 aaronmk
    stream.close()
23
24
    # Translate input
25
    reader = csv.reader(sys.stdin)
26
    writer = csv.writer(sys.stdout)
27
    for row in reader:
28
        term = row[col_num]
29
        try: row[col_num] = dict_[term]
30
        except KeyError: pass
31
        writer.writerow(row)
32
33
main()