/bin/union - BIEN 3 - NCEAS Projects

root/bin/union @ 1497

       #!/usr/bin/env python
       # Combines two map spreadsheets A0->B and A1->C to A->B, with B overwriting C
       import csv
       import os.path
       import sys
       sys.path.append(os.path.dirname(__file__)+"/../lib")
       import maps
       import opts
       def col_label(col_name): return col_name.partition(':')[0]
       def overlaps(str0, str1): return str0.find(str1) >= 0 or str1.find(str0) >= 0
       def main():
           ignore = opts.env_flag('ignore')
           header_num = int(opts.get_env_var('header_num', 0))
               # selects which map's header to use as the output header
           try: _prog_name, map_1_path = sys.argv
           except ValueError:
               raise SystemExit('Usage: env [ignore=1] [header_num={0|1}] '+sys.argv[0]
                   +' <map_0 map_1 [| '+sys.argv[0]+' map_2]... >union_map')
           headers = [None]*2
           # Open map 0
           map_0_reader = csv.reader(sys.stdin)
           headers[0] = map_0_reader.next()
           # Open map 1
           stream = open(map_1_path, 'rb')
           map_1_reader = csv.reader(stream)
           headers[1] = map_1_reader.next()
           # Check col labels
           combinable = overlaps(*[col_label(header[0]) for header in headers])
           if not combinable and not ignore: raise SystemExit('Map error: '
               'Map 0 column 0 label doesn\'t contain map 1 column 0 label')
           # Pass through map 0, storing which inputs it defines
           writer = csv.writer(sys.stdout)
           writer.writerow(headers[header_num])
           inputs = set()
           for row in map_0_reader:
               if row[0] != '': inputs.add(row[0])
               writer.writerow(row)
           if combinable:
               # Add entries in map 1 that weren't already defined
               for row in map_1_reader:
                   if row[0] != '' and row[0] not in inputs: writer.writerow(row)
           stream.close()
       main()

(29-29/32)

Project

General

Profile