Revision 4505
Added by Aaron Marcuse-Kubitza over 12 years ago
bin/map | ||
---|---|---|
6 | 6 |
# http://vegbank.org/vegdocs/xml/vegbank_example_ver1.0.2.xml |
7 | 7 |
# Duplicate-column safe (supports multiple columns of the same name, which will |
8 | 8 |
# be combined) |
9 |
# Case- and punctuation-insensitive. |
|
9 | 10 |
|
10 | 11 |
import copy |
11 | 12 |
import csv |
... | ... | |
300 | 301 |
col_names_ct = len(col_names) |
301 | 302 |
col_idxs = util.list_flip(col_names) |
302 | 303 |
col_names_simp = map(maps.simplify, col_names) |
303 |
col_names_recover = dict(zip(col_names_simp, col_names)) |
|
304 |
col_idxs_simp = util.list_flip(col_names_simp) |
|
304 |
col_names_map = dict(zip(col_names_simp, col_names)) |
|
305 | 305 |
prefixes_simp = map(maps.simplify, prefixes) |
306 | 306 |
|
307 | 307 |
# Resolve prefixes |
... | ... | |
310 | 310 |
for in_, out in mappings_orig: |
311 | 311 |
if metadata_value(in_) == None: |
312 | 312 |
try: |
313 |
cols = get_with_prefix(col_idxs_simp, prefixes_simp,
|
|
313 |
cols = get_with_prefix(col_names_map, prefixes_simp,
|
|
314 | 314 |
maps.simplify(in_)) |
315 | 315 |
except KeyError: pass |
316 | 316 |
else: |
317 |
cols = [(col_names_recover[n], i) for n, i in cols]
|
|
317 |
cols = [(orig, col_idxs[orig]) for simp, orig in cols]
|
|
318 | 318 |
mappings[len(mappings):] = [[db_xml.ColRef(*col), out] |
319 | 319 |
for col in cols] # can't use += because that uses = |
320 | 320 |
|
Also available in: Unified diff
bin/map: map_table(): Refactored to map simplified to original column names first and then determine column index for each original name, in order to avoid trying to recover the original name from a simplified name where multiple original names might collide onto the same simplified name. Documented that it's case- and punctuation-insensitive.