Revision 4504
Added by Aaron Marcuse-Kubitza about 12 years ago
union | ||
---|---|---|
3 | 3 |
# Multi-safe (supports an input appearing multiple times). Note that if there is |
4 | 4 |
# *any* non-empty mapping for an input in map_0, all mappings for that input in |
5 | 5 |
# map_1 will be excluded. |
6 |
# Case- and punctuation-insensitive. |
|
6 | 7 |
|
7 | 8 |
import csv |
8 | 9 |
import os.path |
... | ... | |
49 | 50 |
mappings = set() |
50 | 51 |
for row in map_0_reader: |
51 | 52 |
if all_ or non_empty(row): |
52 |
if not all_: mappings.add(row[0])
|
|
53 |
if not all_: mappings.add(maps.simplify(row[0]))
|
|
53 | 54 |
writer.writerow(row) |
54 | 55 |
|
55 | 56 |
if combinable: |
56 | 57 |
# Add mappings in map 1 that weren't already defined or mapped to |
57 | 58 |
for row in map_1_reader: |
58 |
if non_empty(row) and row[0] not in mappings: writer.writerow(row) |
|
59 |
if non_empty(row) and maps.simplify(row[0]) not in mappings: |
|
60 |
writer.writerow(row) |
|
59 | 61 |
|
60 | 62 |
stream.close() |
61 | 63 |
|
Also available in: Unified diff
intersect, union: Made case- and punctuation-insensitive. mappings/Veg+-VegBIEN.csv: Removed no longer needed duplicate entries for each first letter case, which must now be removed for case- and punctuation-insensitive intersect/union to work. Note that the SpeciesLink `svn diff` hides _alt entry 0, which contains one of the removed duplicate columns that appears in the diff.