1 |
733
|
aaronmk
|
# Map spreadsheet manipulation
|
2 |
|
|
|
3 |
1704
|
aaronmk
|
import re
|
4 |
|
|
|
5 |
|
|
import Parser
|
6 |
1501
|
aaronmk
|
import strings
|
7 |
1504
|
aaronmk
|
import util
|
8 |
1501
|
aaronmk
|
|
9 |
1765
|
aaronmk
|
##### Metadata
|
10 |
|
|
|
11 |
1704
|
aaronmk
|
def col_info(col_name, require_root=False):
|
12 |
|
|
'''@return tuple (label, root, prefixes)'''
|
13 |
1719
|
aaronmk
|
def syntax_err(): raise Parser.SyntaxError('Column name must have '
|
14 |
1704
|
aaronmk
|
'syntax "datasrc[format,...]:root" (formats optional): '+col_name)
|
15 |
|
|
|
16 |
|
|
match = re.match(r'^([^\[:]*)(?:\[([^\]]*?)\])?(?::(.*))?$', col_name)
|
17 |
|
|
if not match: syntax_err()
|
18 |
|
|
label, prefixes, root = match.groups()
|
19 |
|
|
if require_root and root == None: syntax_err()
|
20 |
|
|
return label, root, strings.split(',', util.coalesce(prefixes, ''))
|
21 |
1501
|
aaronmk
|
|
22 |
1765
|
aaronmk
|
##### Combinability
|
23 |
|
|
|
24 |
1704
|
aaronmk
|
def col_formats(col_name):
|
25 |
|
|
label, root, prefixes = col_info(col_name)
|
26 |
|
|
return [label]+prefixes
|
27 |
|
|
|
28 |
1765
|
aaronmk
|
def cols_combinable(*col_names):
|
29 |
|
|
return strings.overlaps(*[','.join(col_formats(c)) for c in col_names])
|
30 |
1501
|
aaronmk
|
|
31 |
1765
|
aaronmk
|
def combinable(*headers): return cols_combinable(*[h[0] for h in headers])
|
32 |
|
|
|
33 |
|
|
def strip_root(root): return re.sub(r':\[[^\]]*?\]', r'', root)
|
34 |
|
|
|
35 |
|
|
def stripped_root(col_name):
|
36 |
|
|
'''@return NamedTuple (labels, root)'''
|
37 |
1768
|
aaronmk
|
return util.do_ignore_none(strip_root, col_info(col_name)[1])
|
38 |
1765
|
aaronmk
|
|
39 |
1768
|
aaronmk
|
def cols_root_combinable(*col_names):
|
40 |
|
|
roots = map(stripped_root, col_names)
|
41 |
1765
|
aaronmk
|
return roots[0] == None or roots[1] == None or roots[0] == roots[1]
|
42 |
|
|
|
43 |
1768
|
aaronmk
|
def cols_fully_combinable(*col_names):
|
44 |
|
|
return cols_combinable(*col_names) and cols_root_combinable(*col_names)
|
45 |
|
|
|
46 |
1765
|
aaronmk
|
def join_combinable(*headers):
|
47 |
1768
|
aaronmk
|
return cols_fully_combinable(headers[0][1], headers[1][0])
|
48 |
1765
|
aaronmk
|
|
49 |
|
|
##### Merging
|
50 |
|
|
|
51 |
10255
|
aaronmk
|
def simplify(str_): return re.sub(r'#.*$|[\W_]+', r'', str_.lower())
|
52 |
4499
|
aaronmk
|
|
53 |
1504
|
aaronmk
|
def is_nonexplicit_empty_mapping(row):
|
54 |
|
|
return reduce(util.and_, (v == '' for v in row[1:]))
|
55 |
|
|
|
56 |
736
|
aaronmk
|
def merge_values(*vals):
|
57 |
|
|
new = []
|
58 |
|
|
for val in vals:
|
59 |
|
|
if val != '' and val not in new: new.append(val)
|
60 |
|
|
return '; '.join(new)
|
61 |
|
|
|
62 |
733
|
aaronmk
|
def merge_rows(*rows):
|
63 |
|
|
'''e.g. ['a','b'] + ['','y','z'] = ['a','b; y','z']'''
|
64 |
|
|
def get(row, i):
|
65 |
|
|
try: return row[i]
|
66 |
|
|
except IndexError: return ''
|
67 |
736
|
aaronmk
|
return [merge_values(*[get(row, i) for row in rows])
|
68 |
|
|
for i in xrange(max(map(len, rows)))]
|
69 |
735
|
aaronmk
|
|
70 |
1768
|
aaronmk
|
def merge_mapping_cols(in_, out, prefer=None):
|
71 |
|
|
'''@param prefer None = [in_[0], out[1]]; 0 = in_; 1 = out'''
|
72 |
|
|
if prefer == None: return [in_[0], out[1]]
|
73 |
|
|
elif prefer == 0: return in_
|
74 |
|
|
elif prefer == 1: return out
|
75 |
|
|
else: raise Parser.SyntaxError('Invalid prefer: '+repr(prefer))
|
76 |
|
|
|
77 |
|
|
def merge_mappings(in_, out, **kw_args):
|
78 |
735
|
aaronmk
|
'''e.g. ['in','join','in_comments'] + ['join','out','out_comments'] =
|
79 |
|
|
['in','out','in_comments; out_comments']'''
|
80 |
1768
|
aaronmk
|
return (merge_mapping_cols(in_[:2], out[:2], **kw_args)
|
81 |
|
|
+ merge_rows(in_[2:], out[2:]))
|
82 |
|
|
|
83 |
|
|
def merge_headers(in_, out, **kw_args):
|
84 |
|
|
out_cols = [in_[1], out[1]]
|
85 |
|
|
if cols_fully_combinable(*out_cols): out[1] = util.longest(*out_cols)
|
86 |
|
|
return merge_mappings(in_, out, **kw_args)
|