Project

General

Profile

1 123 aaronmk
#!/usr/bin/env python
2
# Converts a map spreadsheet to human-readable (but machine unusable) form
3 3807 aaronmk
# Usage: self [col_num...] <in_map >out_map
4 123 aaronmk
5
import csv
6
import re
7
import sys
8
9
def sub_nested(regex, repl, str_):
10
    while True:
11
        str_, n = re.subn(regex, repl, str_)
12
        if n == 0: return str_
13
14
def cleanup(xpath):
15
    truncated = False
16
17
    # Remove attrs
18
    xpath = sub_nested(r':\[[^\[\]]*?\]', r'', xpath)
19
20 3813 aaronmk
    # Remove keys except last
21
    xpath = sub_nested(r':?\[[^\[\]]*?\](?=.*\[[^\[\]]*?\].*?)', r'', xpath)
22
23 123 aaronmk
    # Remove lookahead assertions
24
    xpath = sub_nested(r'\((/[^\)]*?)\)(?=/)', r'\1', xpath)
25
26
    # Remove pointers
27
    xpath, n = re.subn(r'^.*->', r'', xpath)
28
    if n > 0: truncated = True
29
30
    # Remove part of path before first key list, XML function, or path end
31
    # Leave enough to include the table of a user-defined value
32
    xpath, n = re.subn(r'^(?:/(?!_)[\w*]+)*(?=(?:/(?!_)[\w*]+){2}(?:\[|/_|$))',
33
        r'', xpath)
34
    if n > 0: truncated = True
35
36 4788 aaronmk
    # Remove XML functions other than unit conversions
37
    xpath = re.sub(r'/_(?!\w+_to_)\w+/\w+', r'', xpath)
38 3811 aaronmk
39 123 aaronmk
    # Remove backward (child-to-parent) pointer's target ID attr
40
    xpath = re.sub(r'\[[\w*]+\]|(?<=\[)[\w*]+,', r'', xpath)
41
42
    # Remove negative keys
43
    xpath = re.sub(r',?!(?:[\w*]+/)*@?[\w*]+', r'', xpath)
44
45
    # Remove path before key
46
    xpath = re.sub(r'(?:[\w*]+/)*(@?[\w*]+)(?==)', r'\1', xpath)
47
48
    # Prepend / to show truncation
49
    if truncated: xpath = '/'+xpath
50
51
    return xpath
52
53
def main():
54 3807 aaronmk
    col_nums = sys.argv[1:]
55
    if col_nums == []: col_nums = range(2)
56
    col_nums = map(int, col_nums) # 0-based
57
58 123 aaronmk
    # Convert map
59
    reader = csv.reader(sys.stdin)
60
    writer = csv.writer(sys.stdout)
61
    writer.writerow(reader.next())
62 125 aaronmk
    for row in reader:
63 3807 aaronmk
        for i in col_nums: row[i] = cleanup(row[i])
64 125 aaronmk
        writer.writerow(row)
65 123 aaronmk
66
main()