Project

General

Profile

« Previous | Next » 

Revision 4652

Removed no longer used intersect

View differences:

bin/intersect
1
#!/usr/bin/env python
2
# Intersects two map spreadsheets A0->B and A1->C to A->B, with B overwriting C
3
# Multi-safe (supports an input appearing multiple times).
4
# Case- and punctuation-insensitive.
5

  
6
import csv
7
import os.path
8
import sys
9

  
10
sys.path.append(os.path.dirname(__file__)+"/../lib")
11

  
12
import maps
13
import util
14

  
15
def main():
16
    try: _prog_name, map_1_path = sys.argv[:2]
17
    except ValueError:
18
        raise SystemExit('Usage: '+sys.argv[0]+' <map_0 map_1 '
19
            '[compare_col_num...] [| '+sys.argv[0]
20
            +' map_2]... >intersect_map')
21
    compare_col_nums = map(int, sys.argv[2:]) # 0-based
22
    if compare_col_nums == []:
23
        compare_col_nums = None # list_subset() value for all columns
24
    
25
    def compare_on(row): return tuple(map(maps.simplify,
26
        util.list_subset(row, compare_col_nums)))
27
    
28
    headers = [None]*2
29
    
30
    # Get map 1
31
    compare_cols = set()
32
    stream = open(map_1_path, 'rb')
33
    reader = csv.reader(stream)
34
    headers[1] = reader.next()
35
    for row in reader:
36
        if row[0] != '':
37
            compare_cols.add(compare_on(row))
38
    stream.close()
39
    
40
    # Open map 0
41
    reader = csv.reader(sys.stdin)
42
    headers[0] = reader.next()
43
    
44
    # Check col labels
45
    combinable = maps.combinable(*headers)
46
    if not combinable:
47
        raise SystemExit('Map error: '
48
        'Map 0 column 0 label doesn\'t contain map 1 column 0 label')
49
    
50
    # Add map 0 to map 1, overwriting existing entries
51
    writer = csv.writer(sys.stdout)
52
    writer.writerow(headers[0])
53
    for row in reader:
54
        if not combinable or compare_on(row) in compare_cols:
55
            # not combinable, or in map 1
56
            writer.writerow(row)
57

  
58
main()
59 0

  
bin/map
200 200
            
201 201
            def split_col_name(name):
202 202
                label, sep, root = name.partition(':')
203
                label, sep2, prefixes_str = label.partition('[')
204
                prefixes_str = strings.remove_suffix(']', prefixes_str)
205
                prefixes = strings.split(',', prefixes_str)
206
                return label, sep != '', root, prefixes
207
                    # extract datasrc from "datasrc[data_format]"
203
                return label, sep != '', root, []
208 204
            
209 205
            in_label, in_root, prefixes = maps.col_info(in_label)
210 206
            in_is_xpaths = in_root != None

Also available in: Unified diff