Revision 4695
Added by Aaron Marcuse-Kubitza over 12 years ago
bin/union | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Combines two map spreadsheets A0->B and A1->C to A->B, with B overwriting C |
|
3 |
# Multi-safe (supports an input appearing multiple times). Note that if there is |
|
4 |
# *any* non-empty mapping for an input in map_0, all mappings for that input in |
|
5 |
# map_1 will be excluded. |
|
6 |
# Case- and punctuation-insensitive. |
|
7 |
|
|
8 |
import csv |
|
9 |
import os.path |
|
10 |
import sys |
|
11 |
|
|
12 |
sys.path.append(os.path.dirname(__file__)+"/../lib") |
|
13 |
|
|
14 |
import maps |
|
15 |
import opts |
|
16 |
import util |
|
17 |
|
|
18 |
def non_empty(row): return row[0] != '' and row[1] != '' |
|
19 |
|
|
20 |
def main(): |
|
21 |
ignore = opts.env_flag('ignore') |
|
22 |
header_num = util.cast(int, opts.get_env_var('header_num')) |
|
23 |
# selects which map's header to use as the output header |
|
24 |
all_ = opts.env_flag('all') # doesn't merge mappings that are in both maps |
|
25 |
try: _prog_name, map_1_path = sys.argv |
|
26 |
except ValueError: |
|
27 |
raise SystemExit('Usage: env [ignore=1] [header_num={0|1}] [all=1] ' |
|
28 |
+sys.argv[0]+' <map_0 map_1 [| '+sys.argv[0]+' map_2]... >out_map') |
|
29 |
|
|
30 |
headers = [None]*2 |
|
31 |
|
|
32 |
# Open map 0 |
|
33 |
map_0_reader = csv.reader(sys.stdin) |
|
34 |
headers[0] = map_0_reader.next() |
|
35 |
|
|
36 |
# Open map 1 |
|
37 |
stream = open(map_1_path, 'rb') |
|
38 |
map_1_reader = csv.reader(stream) |
|
39 |
headers[1] = map_1_reader.next() |
|
40 |
|
|
41 |
# Check col labels |
|
42 |
combinable = maps.combinable(*headers) |
|
43 |
if not combinable and not ignore: |
|
44 |
raise SystemExit('Map error: ' |
|
45 |
'Map 0 column 0 label doesn\'t contain map 1 column 0 label') |
|
46 |
|
|
47 |
# Pass through map 0, storing which mappings it defines |
|
48 |
writer = csv.writer(sys.stdout) |
|
49 |
writer.writerow(maps.merge_headers(*headers, **dict(prefer=header_num))) |
|
50 |
mappings = set() |
|
51 |
for row in map_0_reader: |
|
52 |
if all_ or non_empty(row): |
|
53 |
if not all_: mappings.add(maps.simplify(row[0])) |
|
54 |
writer.writerow(row) |
|
55 |
|
|
56 |
if combinable: |
|
57 |
# Add mappings in map 1 that weren't already defined or mapped to |
|
58 |
for row in map_1_reader: |
|
59 |
if non_empty(row) and maps.simplify(row[0]) not in mappings: |
|
60 |
writer.writerow(row) |
|
61 |
|
|
62 |
stream.close() |
|
63 |
|
|
64 |
main() |
|
65 | 0 |
Also available in: Unified diff
Removed no longer used bin/union