Project

General

Profile

1
#!/usr/bin/env python
2
# Modifies a map spreadsheet A->B or any file using a replacements spreadsheet
3
# A->C or B->C
4

    
5
import csv
6
import HTMLParser
7
import os.path
8
import re
9
import sys
10

    
11
sys.path.append(os.path.dirname(__file__)+"/../lib")
12

    
13
import maps
14
import opts
15
import strings
16

    
17
def unescape_html(str_): return HTMLParser.HTMLParser().unescape(str_)
18

    
19
def repl_unescape_html(match): return unescape_html(match.group(0))
20

    
21
def main():
22
    env_names = []
23
    def usage_err():
24
        raise SystemExit('Usage: '+opts.env_usage(env_names, True)+' '
25
            +sys.argv[0]+' <map repl [col_num] [| '+sys.argv[0]
26
            +' repl_1 [col_num_1]]... >new_map')
27
    
28
    text = opts.env_flag('text', False, env_names) # all patterns are plain text
29
    try: _prog_name, repl_path = sys.argv[:2]
30
    except ValueError: usage_err()
31
    col_num = None
32
    try: col_num = sys.argv[2]
33
    except IndexError: pass
34
    if col_num != None: col_num = int(col_num) # 0-based
35
    
36
    # Get replacements
37
    repls = []
38
    stream = open(repl_path, 'rb')
39
    reader = csv.reader(stream)
40
    reader.next() # skip header
41
    for row in reader:
42
        in_, out = row[:2]
43
        if in_ != '':
44
            if text or re.match(r'^\w+$', in_): # match word
45
                in_ = (r'(?:^|(?<=[\s,"])|^:|(?<=[\s,"]):)'+re.escape(in_)
46
                    +r'(?![[:alnum:]])')
47
            repls.append((r'(?m)'+in_, out))
48
    stream.close()
49
    def repl_all(str_):
50
        str_ = strings.ustr(str_)
51
        for repl, with_ in repls:
52
            if with_ == 'unescape_html()': with_ = repl_unescape_html
53
            str_ = re.sub(repl, with_, str_)
54
        return str_
55
    
56
    # Modify map or file
57
    if col_num != None:
58
        reader = csv.reader(sys.stdin)
59
        writer = csv.writer(sys.stdout)
60
        cols = reader.next()
61
        writer.writerow(cols)
62
        for row in reader:
63
            row[col_num] = repl_all(row[col_num])
64
            writer.writerow(row)
65
    else: sys.stdout.write(strings.to_raw_str(repl_all(sys.stdin.read())))
66

    
67
main()
(63-63/84)