Revision 1525
Added by Aaron Marcuse-Kubitza almost 13 years ago
intersect | ||
---|---|---|
8 | 8 |
sys.path.append(os.path.dirname(__file__)+"/../lib") |
9 | 9 |
|
10 | 10 |
import maps |
11 |
import util |
|
11 | 12 |
|
12 | 13 |
def main(): |
13 |
try: _prog_name, map_1_path = sys.argv |
|
14 |
try: _prog_name, map_1_path = sys.argv[:2]
|
|
14 | 15 |
except ValueError: |
15 |
raise SystemExit('Usage: '+sys.argv[0]+' <map_0 map_1 [| '+sys.argv[0] |
|
16 |
raise SystemExit('Usage: '+sys.argv[0]+' <map_0 map_1 ' |
|
17 |
'[compare_col_num...] [| '+sys.argv[0] |
|
16 | 18 |
+' map_2]... >intersect_map') |
19 |
compare_col_nums = map(int, sys.argv[2:]) # 0-based |
|
20 |
if compare_col_nums == []: |
|
21 |
compare_col_nums = None # list_subset() value for all columns |
|
17 | 22 |
|
23 |
def compare_on(row): return tuple(util.list_subset(row, compare_col_nums)) |
|
24 |
|
|
25 |
headers = [None]*2 |
|
26 |
|
|
18 | 27 |
# Get map 1 |
19 |
map_1 = {}
|
|
28 |
compare_cols = set()
|
|
20 | 29 |
stream = open(map_1_path, 'rb') |
21 | 30 |
reader = csv.reader(stream) |
22 |
map_1_cols = reader.next()
|
|
31 |
headers[1] = reader.next()
|
|
23 | 32 |
for row in reader: |
24 |
if row[0] != '': map_1[row[0]] = row |
|
33 |
if row[0] != '': |
|
34 |
compare_cols.add(compare_on(row)) |
|
25 | 35 |
stream.close() |
26 | 36 |
|
27 | 37 |
# Open map 0 |
28 | 38 |
reader = csv.reader(sys.stdin) |
29 |
map_0_cols = reader.next() |
|
30 |
if map_0_cols[0] != map_1_cols[0]: raise SystemExit('Map error: ' |
|
31 |
'Map 1 column 0 name doesn\'t match map 0 column 0 name') |
|
39 |
headers[0] = reader.next() |
|
32 | 40 |
|
41 |
# Check col labels |
|
42 |
combinable = maps.combinable(*headers) |
|
43 |
if not combinable and not ignore: |
|
44 |
raise SystemExit('Map error: ' |
|
45 |
'Map 0 column 0 label doesn\'t contain map 1 column 0 label') |
|
46 |
|
|
33 | 47 |
# Add map 0 to map 1, overwriting existing entries |
34 | 48 |
writer = csv.writer(sys.stdout) |
35 |
writer.writerow(maps.merge_mappings(map_1_cols, map_0_cols))
|
|
49 |
writer.writerow(headers[0])
|
|
36 | 50 |
for row in reader: |
37 |
if row[0] != '': |
|
38 |
try: prev = map_1[row[0]] |
|
39 |
except KeyError: pass |
|
40 |
else: writer.writerow(maps.merge_mappings(prev, row)) |
|
51 |
if not combinable or compare_on(row) in compare_cols: |
|
52 |
# not combinable, or in map 1 |
|
53 |
writer.writerow(row) |
|
41 | 54 |
|
42 | 55 |
main() |
Also available in: Unified diff
intersect: Compare columns based on specified compare_col_nums, just like subtract