1
|
# Map spreadsheet manipulation
|
2
|
|
3
|
import re
|
4
|
|
5
|
import Parser
|
6
|
import strings
|
7
|
import util
|
8
|
|
9
|
##### Metadata
|
10
|
|
11
|
def col_info(col_name, require_root=False):
|
12
|
'''@return tuple (label, root, prefixes)'''
|
13
|
def syntax_err(): raise Parser.SyntaxError('Column name must have '
|
14
|
'syntax "datasrc[format,...]:root" (formats optional): '+col_name)
|
15
|
|
16
|
match = re.match(r'^([^\[:]*)(?:\[([^\]]*?)\])?(?::(.*))?$', col_name)
|
17
|
if not match: syntax_err()
|
18
|
label, prefixes, root = match.groups()
|
19
|
if require_root and root == None: syntax_err()
|
20
|
return label, root, strings.split(',', util.coalesce(prefixes, ''))
|
21
|
|
22
|
##### Combinability
|
23
|
|
24
|
def col_formats(col_name):
|
25
|
label, root, prefixes = col_info(col_name)
|
26
|
return [label]+prefixes
|
27
|
|
28
|
def cols_combinable(*col_names):
|
29
|
return strings.overlaps(*[','.join(col_formats(c)) for c in col_names])
|
30
|
|
31
|
def combinable(*headers): return cols_combinable(*[h[0] for h in headers])
|
32
|
|
33
|
def strip_root(root): return re.sub(r':\[[^\]]*?\]', r'', root)
|
34
|
|
35
|
def stripped_root(col_name):
|
36
|
'''@return NamedTuple (labels, root)'''
|
37
|
label, root = col_info(col_name)[:2]
|
38
|
return util.do_ignore_none(strip_root, root)
|
39
|
|
40
|
def roots_combinable(*roots):
|
41
|
roots = map(stripped_root, roots)
|
42
|
return roots[0] == None or roots[1] == None or roots[0] == roots[1]
|
43
|
|
44
|
def join_combinable(*headers):
|
45
|
cols = [headers[0][1], headers[1][0]]
|
46
|
return combinable(*cols) and roots_combinable(*cols)
|
47
|
|
48
|
##### Merging
|
49
|
|
50
|
def is_nonexplicit_empty_mapping(row):
|
51
|
return reduce(util.and_, (v == '' for v in row[1:]))
|
52
|
|
53
|
def merge_values(*vals):
|
54
|
new = []
|
55
|
for val in vals:
|
56
|
if val != '' and val not in new: new.append(val)
|
57
|
return '; '.join(new)
|
58
|
|
59
|
def merge_rows(*rows):
|
60
|
'''e.g. ['a','b'] + ['','y','z'] = ['a','b; y','z']'''
|
61
|
def get(row, i):
|
62
|
try: return row[i]
|
63
|
except IndexError: return ''
|
64
|
return [merge_values(*[get(row, i) for row in rows])
|
65
|
for i in xrange(max(map(len, rows)))]
|
66
|
|
67
|
def merge_mappings(in_, out):
|
68
|
'''e.g. ['in','join','in_comments'] + ['join','out','out_comments'] =
|
69
|
['in','out','in_comments; out_comments']'''
|
70
|
new = [in_[0], out[1]] # mapping
|
71
|
new[2:] = merge_rows(in_[2:], out[2:]) # comments
|
72
|
return new
|