Project

General

Profile

1 205 aaronmk
#!/usr/bin/env python
2
# Modifies a map spreadsheet A->B or any file using a replacements spreadsheet
3
# A->C or B->C
4
5
import csv
6 1705 aaronmk
import os.path
7 205 aaronmk
import re
8
import sys
9
10 1705 aaronmk
sys.path.append(os.path.dirname(__file__)+"/../lib")
11
12
import maps
13 4357 aaronmk
import opts
14 1705 aaronmk
15 205 aaronmk
def main():
16 4357 aaronmk
    env_names = []
17
    def usage_err():
18
        raise SystemExit('Usage: '+opts.env_usage(env_names, True)+' '
19
            +sys.argv[0]+' <map repl [col_num] [| '+sys.argv[0]
20
            +' repl_1 [col_num_1]]... >new_map')
21
22
    text = opts.env_flag('text', False, env_names) # all patterns are plain text
23 205 aaronmk
    try: _prog_name, repl_path = sys.argv[:2]
24 4357 aaronmk
    except ValueError: usage_err()
25 205 aaronmk
    col_num = None
26
    try: col_num = sys.argv[2]
27 211 aaronmk
    except IndexError: pass
28 205 aaronmk
    if col_num != None: col_num = int(col_num) # 0-based
29
30
    # Get replacements
31
    repls = []
32
    stream = open(repl_path, 'rb')
33
    reader = csv.reader(stream)
34
    repl_in, repl_out = reader.next()[:2]
35
    for row in reader:
36 210 aaronmk
        in_, out = row[:2]
37 216 aaronmk
        if in_ != '':
38 4357 aaronmk
            if text or re.match(r'^\w+$', in_):
39
                in_ = r'(?<![^\W_])'+re.escape(in_)+r'(?![^\W_])' # match word
40 1229 aaronmk
            repls.append((r'(?m)'+in_, out))
41 205 aaronmk
    stream.close()
42
    def repl_all(str_):
43
        for repl, with_ in repls: str_ = re.sub(repl, with_, str_)
44
        return str_
45
46
    # Modify map or file
47
    if col_num != None:
48
        reader = csv.reader(sys.stdin)
49
        writer = csv.writer(sys.stdout)
50
        cols = reader.next()
51 1705 aaronmk
        label, root = maps.col_info(cols[col_num])[:2]
52 205 aaronmk
        if label != repl_in: raise SystemExit('Map error: Map column '+
53
            str(col_num)+' label "'+label+'" doesn\'t match replacements input '
54
            'column label "'+repl_in+'"')
55
        cols[col_num] = repl_out+sep+repl_all(root)
56
        writer.writerow(cols)
57
        for row in reader:
58
            row[col_num] = repl_all(row[col_num])
59
            writer.writerow(row)
60 358 aaronmk
    else: sys.stdout.write(repl_all(sys.stdin.read()))
61 205 aaronmk
62
main()