Project

General

Profile

« Previous | Next » 

Revision 1525

intersect: Compare columns based on specified compare_col_nums, just like subtract

View differences:

intersect
8 8
sys.path.append(os.path.dirname(__file__)+"/../lib")
9 9

  
10 10
import maps
11
import util
11 12

  
12 13
def main():
13
    try: _prog_name, map_1_path = sys.argv
14
    try: _prog_name, map_1_path = sys.argv[:2]
14 15
    except ValueError:
15
        raise SystemExit('Usage: '+sys.argv[0]+' <map_0 map_1 [| '+sys.argv[0]
16
        raise SystemExit('Usage: '+sys.argv[0]+' <map_0 map_1 '
17
            '[compare_col_num...] [| '+sys.argv[0]
16 18
            +' map_2]... >intersect_map')
19
    compare_col_nums = map(int, sys.argv[2:]) # 0-based
20
    if compare_col_nums == []:
21
        compare_col_nums = None # list_subset() value for all columns
17 22
    
23
    def compare_on(row): return tuple(util.list_subset(row, compare_col_nums))
24
    
25
    headers = [None]*2
26
    
18 27
    # Get map 1
19
    map_1 = {}
28
    compare_cols = set()
20 29
    stream = open(map_1_path, 'rb')
21 30
    reader = csv.reader(stream)
22
    map_1_cols = reader.next()
31
    headers[1] = reader.next()
23 32
    for row in reader:
24
        if row[0] != '': map_1[row[0]] = row
33
        if row[0] != '':
34
            compare_cols.add(compare_on(row))
25 35
    stream.close()
26 36
    
27 37
    # Open map 0
28 38
    reader = csv.reader(sys.stdin)
29
    map_0_cols = reader.next()
30
    if map_0_cols[0] != map_1_cols[0]: raise SystemExit('Map error: '
31
        'Map 1 column 0 name doesn\'t match map 0 column 0 name')
39
    headers[0] = reader.next()
32 40
    
41
    # Check col labels
42
    combinable = maps.combinable(*headers)
43
    if not combinable and not ignore:
44
        raise SystemExit('Map error: '
45
        'Map 0 column 0 label doesn\'t contain map 1 column 0 label')
46
    
33 47
    # Add map 0 to map 1, overwriting existing entries
34 48
    writer = csv.writer(sys.stdout)
35
    writer.writerow(maps.merge_mappings(map_1_cols, map_0_cols))
49
    writer.writerow(headers[0])
36 50
    for row in reader:
37
        if row[0] != '':
38
            try: prev = map_1[row[0]]
39
            except KeyError: pass
40
            else: writer.writerow(maps.merge_mappings(prev, row))
51
        if not combinable or compare_on(row) in compare_cols:
52
            # not combinable, or in map 1
53
            writer.writerow(row)
41 54

  
42 55
main()

Also available in: Unified diff