Revision 3766
Added by Aaron Marcuse-Kubitza over 12 years ago
bin/join | ||
---|---|---|
1 | 1 |
#!/usr/bin/env python |
2 | 2 |
# Inner-joins two map spreadsheets A->B and B->C to A->C |
3 |
# Multi-safe (supports an input appearing multiple times). |
|
3 | 4 |
|
4 | 5 |
import csv |
6 |
import operator |
|
5 | 7 |
import os.path |
6 | 8 |
import sys |
7 | 9 |
import warnings |
... | ... | |
23 | 25 |
reader = csv.reader(stream) |
24 | 26 |
map_1_cols = reader.next() |
25 | 27 |
for row in reader: |
26 |
if row[0] != '': map_1[row[0]] = row
|
|
28 |
if row[0] != '': map_1.setdefault(row[0], []).append(row)
|
|
27 | 29 |
stream.close() |
28 | 30 |
|
29 | 31 |
# Join map 1 to map 0 |
... | ... | |
44 | 46 |
out_orig = row[1] # used in "No join mapping" error msg |
45 | 47 |
|
46 | 48 |
# Look for a match |
47 |
out_row = None
|
|
49 |
out_rows = []
|
|
48 | 50 |
suffix = '' |
49 | 51 |
while True: |
50 | 52 |
try: |
51 |
out_row = map_1[row[1]] |
|
53 |
out_rows = map_1[row[1]]
|
|
52 | 54 |
break |
53 | 55 |
except KeyError: |
54 | 56 |
# Heuristically look for a match on a parent path. |
... | ... | |
60 | 62 |
suffix = sep+new_suffix+suffix # prepend new suffix |
61 | 63 |
|
62 | 64 |
# Write new mapping |
63 |
if out_row != None and out_row[1] != '': # found non-empty mapping |
|
64 |
row = maps.merge_mappings(row, out_row) |
|
65 |
if row[1] != '': row[1] += suffix # don't modify out_row! |
|
65 |
is_empty = len(out_rows) == 1 and out_rows[0][1] == '' |
|
66 |
if out_rows and not is_empty: # found non-empty mapping(s) |
|
67 |
for out_row in out_rows: |
|
68 |
row = maps.merge_mappings(row, out_row) |
|
69 |
if row[1] != '': row[1] += suffix # don't modify out_row! |
|
66 | 70 |
else: |
67 | 71 |
msg = 'No' |
68 |
if out_row != None: msg += ' non-empty'
|
|
72 |
if is_empty: msg += ' non-empty'
|
|
69 | 73 |
msg += ' join mapping for '+out_orig |
70 | 74 |
set_error(msg) |
71 | 75 |
elif row[2] == '': # also no comment explaining why no input mapping |
Also available in: Unified diff
join: Made it multi-safe (supports an input appearing multiple times)