1 |
929
|
aaronmk
|
#!/usr/bin/env python
|
2 |
1505
|
aaronmk
|
# Subtracts map spreadsheet A1->C from A0->B to produce A->B.
|
3 |
1786
|
aaronmk
|
# Other A0 entries are subtracted as well:
|
4 |
|
|
# - Empty entries without a comment documenting why they are empty, if there is
|
5 |
|
|
# a matching entry in A1.
|
6 |
|
|
# - Entries whose input and output maps to the same non-empty value in A1
|
7 |
3767
|
aaronmk
|
# Multi-safe (supports an input appearing multiple times). Note that if there is
|
8 |
|
|
# *any* mapping for an input in subtract_map, all mappings for that input in
|
9 |
|
|
# from_map will be excluded.
|
10 |
4508
|
aaronmk
|
# Case- and punctuation-insensitive.
|
11 |
929
|
aaronmk
|
|
12 |
|
|
import csv
|
13 |
2021
|
aaronmk
|
import operator
|
14 |
1137
|
aaronmk
|
import os.path
|
15 |
929
|
aaronmk
|
import sys
|
16 |
|
|
|
17 |
1137
|
aaronmk
|
sys.path.append(os.path.dirname(__file__)+"/../lib")
|
18 |
|
|
|
19 |
1503
|
aaronmk
|
import maps
|
20 |
1137
|
aaronmk
|
import opts
|
21 |
|
|
import util
|
22 |
|
|
|
23 |
929
|
aaronmk
|
def main():
|
24 |
1137
|
aaronmk
|
ignore = opts.env_flag('ignore')
|
25 |
|
|
try: _prog_name, map_1_path = sys.argv[:2]
|
26 |
929
|
aaronmk
|
except ValueError:
|
27 |
1137
|
aaronmk
|
raise SystemExit('Usage: env [ignore=1] '+sys.argv[0]+' <from_map '
|
28 |
|
|
'subtract_map [compare_col_num...] [| '+sys.argv[0]
|
29 |
|
|
+' subtract_map_2]... >difference_map')
|
30 |
|
|
compare_col_nums = map(int, sys.argv[2:]) # 0-based
|
31 |
1495
|
aaronmk
|
if compare_col_nums == []:
|
32 |
|
|
compare_col_nums = None # list_subset() value for all columns
|
33 |
929
|
aaronmk
|
|
34 |
2021
|
aaronmk
|
def compare_on(row):
|
35 |
4508
|
aaronmk
|
return tuple(map(maps.simplify, util.list_subset(row, compare_col_nums,
|
36 |
|
|
default=None)))
|
37 |
1137
|
aaronmk
|
|
38 |
1503
|
aaronmk
|
headers = [None]*2
|
39 |
|
|
|
40 |
929
|
aaronmk
|
# Get map 1
|
41 |
1505
|
aaronmk
|
input_cols = set()
|
42 |
1137
|
aaronmk
|
compare_cols = set()
|
43 |
1786
|
aaronmk
|
map_ = dict()
|
44 |
929
|
aaronmk
|
stream = open(map_1_path, 'rb')
|
45 |
|
|
reader = csv.reader(stream)
|
46 |
1503
|
aaronmk
|
headers[1] = reader.next()
|
47 |
929
|
aaronmk
|
for row in reader:
|
48 |
1505
|
aaronmk
|
if row[0] != '':
|
49 |
4508
|
aaronmk
|
input_cols.add(maps.simplify(row[0]))
|
50 |
1505
|
aaronmk
|
compare_cols.add(compare_on(row))
|
51 |
2021
|
aaronmk
|
if reduce(operator.and_, (v == '' for v in row[1:])): # all empty
|
52 |
|
|
map_[row[0]] = row[1]
|
53 |
929
|
aaronmk
|
stream.close()
|
54 |
|
|
|
55 |
|
|
# Open map 0
|
56 |
|
|
reader = csv.reader(sys.stdin)
|
57 |
1503
|
aaronmk
|
headers[0] = reader.next()
|
58 |
929
|
aaronmk
|
|
59 |
1503
|
aaronmk
|
# Check col labels
|
60 |
1508
|
aaronmk
|
combinable = maps.combinable(*headers)
|
61 |
|
|
if not combinable and not ignore:
|
62 |
1503
|
aaronmk
|
raise SystemExit('Map error: '
|
63 |
|
|
'Map 0 column 0 label doesn\'t contain map 1 column 0 label')
|
64 |
|
|
|
65 |
929
|
aaronmk
|
# Subtract map 1 from map 0
|
66 |
|
|
writer = csv.writer(sys.stdout)
|
67 |
1503
|
aaronmk
|
writer.writerow(headers[0])
|
68 |
929
|
aaronmk
|
for row in reader:
|
69 |
1786
|
aaronmk
|
if not combinable or not (
|
70 |
4508
|
aaronmk
|
(maps.is_nonexplicit_empty_mapping(row)
|
71 |
|
|
and maps.simplify(row[0]) in input_cols)
|
72 |
1786
|
aaronmk
|
or compare_on(row) in compare_cols
|
73 |
|
|
or util.have_same_value(map_, row[0], row[1]) # map to same place
|
74 |
|
|
):
|
75 |
1508
|
aaronmk
|
# not combinable or not in map 1
|
76 |
1505
|
aaronmk
|
writer.writerow(row)
|
77 |
929
|
aaronmk
|
|
78 |
|
|
main()
|