Revision 274
Added by Aaron Marcuse-Kubitza about 13 years ago
util/env_password | ||
---|---|---|
1 |
#!/bin/bash |
|
2 |
# Sets a password environment variable |
|
3 |
|
|
4 |
test "$#" -ge 1 || \ |
|
5 |
{ echo "Usage: . $0 env_var_name [msg] (note the initial \".\")" >&2; exit 2;} |
|
6 |
|
|
7 |
msg="$2" |
|
8 |
test -n "$msg" || msg="$1" |
|
9 |
|
|
10 |
if test -z "${!1+t}"; then # env var with name $1 is unset |
|
11 |
read -s -p "Enter $msg password: "; echo |
|
12 |
export "$1"="$REPLY" |
|
13 |
fi |
|
14 | 0 |
util/ch_root | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Transforms a map spreadsheet to use a different root |
|
3 |
|
|
4 |
import csv |
|
5 |
import os.path |
|
6 |
import re |
|
7 |
import sys |
|
8 |
|
|
9 |
sys.path.append(os.path.dirname(__file__)+"/../lib") |
|
10 |
|
|
11 |
import opts |
|
12 |
|
|
13 |
def all_not_none(list_): |
|
14 |
return reduce(lambda a, b: a and b, map(lambda e: e != None, list_)) |
|
15 |
|
|
16 |
def main(): |
|
17 |
# Get config from env vars |
|
18 |
config_names = ['in_root', 'out_root'] |
|
19 |
env_names = [] |
|
20 |
configs = map(lambda col: opts.get_env_vars(config_names, col, env_names), |
|
21 |
['left', 'right']) |
|
22 |
if not all_not_none(configs): raise SystemExit( |
|
23 |
'Usage: '+opts.env_usage(env_names)+' '+sys.argv[0]+' <in_map >out_map') |
|
24 |
|
|
25 |
# Transform map |
|
26 |
reader = csv.reader(sys.stdin) |
|
27 |
writer = csv.writer(sys.stdout) |
|
28 |
cols = reader.next() |
|
29 |
for i in xrange(len(configs)): |
|
30 |
config = configs[i] |
|
31 |
label, sep, config['root'] = cols[i].partition(':') |
|
32 |
cols[i] = label+sep+config['out_root'] |
|
33 |
config['in_root_re'] = r'^'+re.escape(config['in_root'])+r'\b' |
|
34 |
writer.writerow(cols) |
|
35 |
for row in reader: |
|
36 |
for i in xrange(len(configs)): row[i] = configs[i]['root']+row[i] |
|
37 |
def sub(i): |
|
38 |
row[i], n = re.subn(configs[i]['in_root_re'], r'', row[i]) |
|
39 |
return n > 0 |
|
40 |
if sub(0): |
|
41 |
if not sub(1): raise SystemExit('Map error: Root "' |
|
42 |
+configs[1]['in_root']+'" is not contained in output mapping: ' |
|
43 |
+row[1]) |
|
44 |
writer.writerow(row) |
|
45 |
|
|
46 |
main() |
|
47 | 0 |
util/map | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Maps one datasource to another, using a map spreadsheet if needed |
|
3 |
# For outputting an XML file to a PostgreSQL database, use the general format of |
|
4 |
# http://vegbank.org/vegdocs/xml/vegbank_example_ver1.0.2.xml |
|
5 |
|
|
6 |
import os.path |
|
7 |
import sys |
|
8 |
import xml.dom.minidom |
|
9 |
|
|
10 |
sys.path.append(os.path.dirname(__file__)+"/../lib") |
|
11 |
|
|
12 |
import opts |
|
13 |
from Parser import SyntaxException |
|
14 |
import sql |
|
15 |
import xml_dom |
|
16 |
import xml_func |
|
17 |
|
|
18 |
def metadata_value(name): |
|
19 |
if type(name) == str and name.startswith(':'): return name[1:] |
|
20 |
else: return None |
|
21 |
|
|
22 |
def main(): |
|
23 |
env_names = [] |
|
24 |
def usage_err(): |
|
25 |
raise SystemExit('Usage: '+opts.env_usage(env_names, True) |
|
26 |
+' [commit=1] '+sys.argv[0]+' [map_path] [<input] [>output]') |
|
27 |
limit = opts.get_env_var('n', None, env_names) |
|
28 |
if limit != None: limit = int(limit) |
|
29 |
commit = opts.env_flag('commit') |
|
30 |
|
|
31 |
# Get db config from env vars |
|
32 |
db_config_names = ['engine', 'host', 'user', 'password', 'database'] |
|
33 |
def get_db_config(prefix): |
|
34 |
return opts.get_env_vars(db_config_names, prefix, env_names) |
|
35 |
in_db_config = get_db_config('in') |
|
36 |
out_db_config = get_db_config('out') |
|
37 |
in_is_db = 'engine' in in_db_config |
|
38 |
out_is_db = 'engine' in out_db_config |
|
39 |
|
|
40 |
# Parse args |
|
41 |
map_path = None |
|
42 |
try: _prog_name, map_path = sys.argv |
|
43 |
except ValueError: |
|
44 |
if in_is_db: usage_err() |
|
45 |
|
|
46 |
# Load map header |
|
47 |
in_is_xpaths = True |
|
48 |
if map_path != None: |
|
49 |
import copy |
|
50 |
import csv |
|
51 |
|
|
52 |
import xpath |
|
53 |
|
|
54 |
metadata = [] |
|
55 |
mappings = [] |
|
56 |
stream = open(map_path, 'rb') |
|
57 |
reader = csv.reader(stream) |
|
58 |
in_label, out_label = reader.next()[:2] |
|
59 |
def split_col_name(name): |
|
60 |
name, sep, root = name.partition(':') |
|
61 |
return name, sep != '', root |
|
62 |
in_label, in_is_xpaths, in_root = split_col_name(in_label) |
|
63 |
out_label, out_is_xpaths, out_root = split_col_name(out_label) |
|
64 |
assert out_is_xpaths # CSV output not supported yet |
|
65 |
has_types = out_root.startswith('/*s/') # outer elements are types |
|
66 |
for row in reader: |
|
67 |
in_, out = row[:2] |
|
68 |
if out != '': |
|
69 |
if out_is_xpaths: out = out_root+out |
|
70 |
mappings.append((in_, out)) |
|
71 |
stream.close() |
|
72 |
in_is_xml = in_is_xpaths and not in_is_db |
|
73 |
|
|
74 |
# Input datasource to XML tree, mapping if needed |
|
75 |
if in_is_xml: |
|
76 |
doc0 = xml.dom.minidom.parse(sys.stdin) |
|
77 |
if map_path != None: |
|
78 |
doc1 = xml_dom.create_doc(out_label) |
|
79 |
root = doc1.documentElement |
|
80 |
if in_is_db: |
|
81 |
assert in_is_xpaths |
|
82 |
|
|
83 |
import db_xml |
|
84 |
|
|
85 |
in_root_xml = xpath.path2xml(in_root) |
|
86 |
for i, mapping in enumerate(mappings): |
|
87 |
in_, out = mapping |
|
88 |
if metadata_value(in_) == None: |
|
89 |
mappings[i] = (xpath.path2xml(in_root+'/'+in_), out) |
|
90 |
|
|
91 |
in_db = sql.connect(in_db_config) |
|
92 |
in_pkeys = {} |
|
93 |
for row_idx, row in enumerate(sql.rows(db_xml.get(in_db, |
|
94 |
in_root_xml, in_pkeys, limit))): |
|
95 |
row_id = str(row_idx) |
|
96 |
pkey, = row |
|
97 |
for in_, out in mappings: |
|
98 |
value = metadata_value(in_) |
|
99 |
if value == None: |
|
100 |
in_ = in_.cloneNode(True) # don't modify orig value! |
|
101 |
xml_dom.set_id(xpath.get(in_, in_root), pkey) |
|
102 |
value = sql.value_or_none(db_xml.get(in_db, in_, |
|
103 |
in_pkeys)) |
|
104 |
if value != None: |
|
105 |
xpath.put_obj(root, out, row_id, has_types, str(value)) |
|
106 |
in_db.close() |
|
107 |
elif in_is_xml: |
|
108 |
row = xpath.get(doc0.documentElement, in_root) |
|
109 |
for row_idx, row in enumerate(xml_dom.NodeElemIter(row.parentNode)): |
|
110 |
if not (limit == None or row_idx < limit): break |
|
111 |
row_id = str(row_idx) |
|
112 |
for in_, out in mappings: |
|
113 |
value = metadata_value(in_) |
|
114 |
if value == None: |
|
115 |
node = xpath.get(row, in_) |
|
116 |
if node != None: value = xml_dom.value(node) |
|
117 |
if value != None: |
|
118 |
xpath.put_obj(root, out, row_id, has_types, value) |
|
119 |
else: # input is CSV |
|
120 |
map_ = dict(mappings) |
|
121 |
reader = csv.reader(sys.stdin) |
|
122 |
cols = reader.next() |
|
123 |
col_idxs = dict([(value, idx) for idx, value in enumerate(cols)]) |
|
124 |
for i, mapping in enumerate(mappings): |
|
125 |
in_, out = mapping |
|
126 |
if metadata_value(in_) == None: |
|
127 |
try: mappings[i] = (col_idxs[in_], out) |
|
128 |
except KeyError: pass |
|
129 |
|
|
130 |
for row_idx, row in enumerate(reader): |
|
131 |
if not (limit == None or row_idx < limit): break |
|
132 |
row_id = str(row_idx) |
|
133 |
for in_, out in mappings: |
|
134 |
value = metadata_value(in_) |
|
135 |
if value == None: |
|
136 |
value = row[in_] |
|
137 |
if value == '': value = None |
|
138 |
if value != None: |
|
139 |
xpath.put_obj(root, out, row_id, has_types, value) |
|
140 |
xml_func.process(root) |
|
141 |
else: doc1 = doc0 |
|
142 |
|
|
143 |
# Output XML tree |
|
144 |
if out_is_db: |
|
145 |
from psycopg2.extensions import ISOLATION_LEVEL_SERIALIZABLE |
|
146 |
import db_xml |
|
147 |
|
|
148 |
out_db = sql.connect(out_db_config) |
|
149 |
out_db.set_isolation_level(ISOLATION_LEVEL_SERIALIZABLE) |
|
150 |
try: |
|
151 |
row_ct_ref = [0] |
|
152 |
db_xml.xml2db(out_db, doc1.documentElement, row_ct_ref) |
|
153 |
print 'Inserted '+str(row_ct_ref[0])+' rows' |
|
154 |
if commit: out_db.commit() |
|
155 |
finally: |
|
156 |
out_db.rollback() |
|
157 |
out_db.close() |
|
158 |
else: xml_dom.writexml(sys.stdout, doc1) # output is XML |
|
159 |
|
|
160 |
try: main() |
|
161 |
except SyntaxException, ex: raise SystemExit(str(ex)) |
|
162 | 0 |
util/vegbien_dest | ||
---|---|---|
1 |
#!/bin/sh |
|
2 |
# Requires var bien_password |
|
3 |
|
|
4 |
export out_engine=PostgreSQL out_host=localhost out_user=bien \ |
|
5 |
out_password="$bien_password" out_database=vegbien |
|
6 | 0 |
util/psql_vegbien | ||
---|---|---|
1 |
#!/bin/sh |
|
2 |
# Accesses vegbank db |
|
3 |
|
|
4 |
selfDir="$(dirname -- "$0")" |
|
5 |
|
|
6 |
bien_password="$(cat "$selfDir/bien_password")" |
|
7 |
. "$selfDir/vegbien_dest" |
|
8 |
export PGHOST="$out_host" PGUSER="$out_user" PGPASSWORD="$out_password" |
|
9 |
|
|
10 |
psql --dbname="$out_database" "$@" |
|
11 | 0 |
util/join | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Inner-joins two map spreadsheets A->B and B->C to A->C |
|
3 |
|
|
4 |
import csv |
|
5 |
import sys |
|
6 |
|
|
7 |
def main(): |
|
8 |
try: _prog_name, map_1_path = sys.argv |
|
9 |
except ValueError: |
|
10 |
raise SystemExit('Usage: '+sys.argv[0]+' <map_0 map_1 [| '+sys.argv[0] |
|
11 |
+' map_2]... >joined_map') |
|
12 |
|
|
13 |
# Get map 1 |
|
14 |
map_1 = {} |
|
15 |
stream = open(map_1_path, 'rb') |
|
16 |
reader = csv.reader(stream) |
|
17 |
map_1_in, map_1_out = reader.next()[:2] |
|
18 |
for row in reader: |
|
19 |
if row[1] != '': map_1[row[0]] = row[1] |
|
20 |
stream.close() |
|
21 |
|
|
22 |
# Join map 1 to map 0 |
|
23 |
reader = csv.reader(sys.stdin) |
|
24 |
writer = csv.writer(sys.stdout) |
|
25 |
cols = reader.next() |
|
26 |
if not cols[1] == map_1_in: raise SystemExit('Map error: ' |
|
27 |
'Map 0 output column name doesn\'t match map 1 input column name') |
|
28 |
cols[1] = map_1_out |
|
29 |
writer.writerow(cols) |
|
30 |
for row in reader: |
|
31 |
try: row[1] = map_1[row[1]] |
|
32 |
except KeyError: continue # skip row |
|
33 |
writer.writerow(row) |
|
34 |
|
|
35 |
main() |
|
36 | 0 |
util/simplify_xpath | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Removes duplication from XPath expressions |
|
3 |
# Filters one XPath per line from stdin to stdout |
|
4 |
|
|
5 |
import re |
|
6 |
import sys |
|
7 |
|
|
8 |
def main(): |
|
9 |
while True: |
|
10 |
line = sys.stdin.readline() |
|
11 |
if line == '': break |
|
12 |
# Forward * abbrs |
|
13 |
line = re.sub(r'(?i)(\w+)(?=\w*(?:->/[^/]*)?/\1\b)', r'*', line) |
|
14 |
# Backward * abbrs |
|
15 |
line = re.sub(r'(?i)((\w+)->/[^/]*/[^/]*\[)\2', r'\1*', line) |
|
16 |
sys.stdout.write(line) |
|
17 |
|
|
18 |
main() |
|
19 | 0 |
util/review | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Converts a map spreadsheet to human-readable (but machine unusable) form |
|
3 |
# Usage: self <in_map >out_ma |
|
4 |
|
|
5 |
import csv |
|
6 |
import re |
|
7 |
import sys |
|
8 |
|
|
9 |
def sub_nested(regex, repl, str_): |
|
10 |
while True: |
|
11 |
str_, n = re.subn(regex, repl, str_) |
|
12 |
if n == 0: return str_ |
|
13 |
|
|
14 |
def cleanup(xpath): |
|
15 |
truncated = False |
|
16 |
|
|
17 |
# Remove attrs |
|
18 |
xpath = sub_nested(r':\[[^\[\]]*?\]', r'', xpath) |
|
19 |
|
|
20 |
# Remove lookahead assertions |
|
21 |
xpath = sub_nested(r'\((/[^\)]*?)\)(?=/)', r'\1', xpath) |
|
22 |
|
|
23 |
# Remove pointers |
|
24 |
xpath, n = re.subn(r'^.*->', r'', xpath) |
|
25 |
if n > 0: truncated = True |
|
26 |
|
|
27 |
# Remove part of path before first key list, XML function, or path end |
|
28 |
# Leave enough to include the table of a user-defined value |
|
29 |
xpath, n = re.subn(r'^(?:/(?!_)[\w*]+)*(?=(?:/(?!_)[\w*]+){2}(?:\[|/_|$))', |
|
30 |
r'', xpath) |
|
31 |
# Prepend / to show truncation |
|
32 |
if n > 0: truncated = True |
|
33 |
|
|
34 |
# Remove backward (child-to-parent) pointer's target ID attr |
|
35 |
xpath = re.sub(r'\[[\w*]+\]|(?<=\[)[\w*]+,', r'', xpath) |
|
36 |
|
|
37 |
# Remove negative keys |
|
38 |
xpath = re.sub(r',?!(?:[\w*]+/)*@?[\w*]+', r'', xpath) |
|
39 |
|
|
40 |
# Remove path before key |
|
41 |
xpath = re.sub(r'(?:[\w*]+/)*(@?[\w*]+)(?==)', r'\1', xpath) |
|
42 |
|
|
43 |
# Prepend / to show truncation |
|
44 |
if truncated: xpath = '/'+xpath |
|
45 |
|
|
46 |
return xpath |
|
47 |
|
|
48 |
def main(): |
|
49 |
# Convert map |
|
50 |
reader = csv.reader(sys.stdin) |
|
51 |
writer = csv.writer(sys.stdout) |
|
52 |
writer.writerow(reader.next()) |
|
53 |
for row in reader: |
|
54 |
for i in xrange(2): row[i] = cleanup(row[i]) |
|
55 |
writer.writerow(row) |
|
56 |
|
|
57 |
main() |
|
58 | 0 |
util/fix_permissions | ||
---|---|---|
1 |
#!/bin/sh |
|
2 |
# Sets correct permissions on shared bien files |
|
3 |
|
|
4 |
selfDir="$(dirname -- "$0")" # dir of symlink $0, not this script itself |
|
5 |
cd "$selfDir" |
|
6 |
|
|
7 |
opts='--quiet --changes' |
|
8 |
chgrp --recursive --no-dereference $opts bien . |
|
9 |
find . -type d -exec chmod $opts g+s "{}" \; # only dirs should be setgid |
|
10 |
for perm in r w x; do # extend user perms to group |
|
11 |
find . -not -type l -perm -u=$perm -exec chmod $opts g+$perm "{}" \; |
|
12 |
# don't chmod symlinks |
|
13 |
done |
|
14 | 0 |
util/repl | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Modifies a map spreadsheet A->B or any file using a replacements spreadsheet |
|
3 |
# A->C or B->C |
|
4 |
|
|
5 |
import csv |
|
6 |
import re |
|
7 |
import sys |
|
8 |
|
|
9 |
def main(): |
|
10 |
try: _prog_name, repl_path = sys.argv[:2] |
|
11 |
except ValueError: |
|
12 |
raise SystemExit('Usage: '+sys.argv[0]+' <map repl [col_num] [| '+ |
|
13 |
sys.argv[0]+' repl_1 [col_num_1]]... >new_map') |
|
14 |
col_num = None |
|
15 |
try: col_num = sys.argv[2] |
|
16 |
except IndexError: pass |
|
17 |
if col_num != None: col_num = int(col_num) # 0-based |
|
18 |
|
|
19 |
# Get replacements |
|
20 |
repls = [] |
|
21 |
stream = open(repl_path, 'rb') |
|
22 |
reader = csv.reader(stream) |
|
23 |
repl_in, repl_out = reader.next()[:2] |
|
24 |
for row in reader: |
|
25 |
in_, out = row[:2] |
|
26 |
if in_ != '': |
|
27 |
if re.match(r'^\w+$', in_): in_ = r'\b'+in_+r'(?=(?:(?i)_id)?\b)' |
|
28 |
# match whole word |
|
29 |
repls.append((in_, out)) |
|
30 |
stream.close() |
|
31 |
def repl_all(str_): |
|
32 |
for repl, with_ in repls: str_ = re.sub(repl, with_, str_) |
|
33 |
return str_ |
|
34 |
|
|
35 |
# Modify map or file |
|
36 |
if col_num != None: |
|
37 |
reader = csv.reader(sys.stdin) |
|
38 |
writer = csv.writer(sys.stdout) |
|
39 |
cols = reader.next() |
|
40 |
label, sep, root = cols[col_num].partition(':') |
|
41 |
if label != repl_in: raise SystemExit('Map error: Map column '+ |
|
42 |
str(col_num)+' label "'+label+'" doesn\'t match replacements input ' |
|
43 |
'column label "'+repl_in+'"') |
|
44 |
cols[col_num] = repl_out+sep+repl_all(root) |
|
45 |
writer.writerow(cols) |
|
46 |
for row in reader: |
|
47 |
row[col_num] = repl_all(row[col_num]) |
|
48 |
writer.writerow(row) |
|
49 |
else: |
|
50 |
while True: |
|
51 |
line = sys.stdin.readline() |
|
52 |
if line == '': break |
|
53 |
sys.stdout.write(repl_all(line)) |
|
54 |
|
|
55 |
main() |
|
56 | 0 |
util/join_sort | ||
---|---|---|
1 |
#!/bin/bash |
|
2 |
# Inner-joins two map spreadsheets and sorts the output on the output column |
|
3 |
|
|
4 |
selfDir="$(dirname -- "$0")" |
|
5 |
|
|
6 |
"$selfDir/join" "$@"|"$selfDir/sort" 1 0 |
|
7 | 0 |
util/sort | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Sorts a spreadsheet based on a column |
|
3 |
|
|
4 |
import csv |
|
5 |
import sys |
|
6 |
|
|
7 |
def by_idx(list_, idxes): return [list_[i] for i in idxes] |
|
8 |
|
|
9 |
def main(): |
|
10 |
col_nums = sys.argv[1:] |
|
11 |
if col_nums == []: |
|
12 |
raise SystemExit('Usage: '+sys.argv[0]+' col_num... <map >sorted_map') |
|
13 |
col_nums = map(int, col_nums) # 0-based |
|
14 |
|
|
15 |
# Sort map |
|
16 |
reader = csv.reader(sys.stdin) |
|
17 |
cols = reader.next() |
|
18 |
def get_sort_by(row): return by_idx(row, col_nums) |
|
19 |
rows = list(reader) |
|
20 |
rows.sort(lambda *rows: cmp(*map(get_sort_by, rows))) |
|
21 |
writer = csv.writer(sys.stdout) |
|
22 |
writer.writerow(cols) |
|
23 |
map(writer.writerow, rows) |
|
24 |
|
|
25 |
main() |
|
26 | 0 |
util/union | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Combines two map spreadsheets A0->B and A1->C to A->B, with A0 overwriting A1 |
|
3 |
|
|
4 |
import csv |
|
5 |
import sys |
|
6 |
|
|
7 |
def main(): |
|
8 |
try: _prog_name, map_1_path = sys.argv |
|
9 |
except ValueError: |
|
10 |
raise SystemExit('Usage: '+sys.argv[0]+' <map_0 map_1 [| '+sys.argv[0] |
|
11 |
+' map_2]... >union_map') |
|
12 |
|
|
13 |
map_ = {} |
|
14 |
def add_map(reader): |
|
15 |
for row in reader: |
|
16 |
if row[1] != '': map_[row[0]] = row[1:] |
|
17 |
|
|
18 |
# Get map 1 |
|
19 |
stream = open(map_1_path, 'rb') |
|
20 |
reader = csv.reader(stream) |
|
21 |
map_1_cols = reader.next() |
|
22 |
add_map(reader) |
|
23 |
stream.close() |
|
24 |
|
|
25 |
# Add map 0 to map 1, overwriting existing entries |
|
26 |
reader = csv.reader(sys.stdin) |
|
27 |
map_0_cols = reader.next() |
|
28 |
if not map_0_cols[0] == map_1_cols[0]: raise SystemExit('Map error: ' |
|
29 |
'Map 1 column 0 name doesn\'t match map 0 column 0 name') |
|
30 |
add_map(reader) |
|
31 |
|
|
32 |
# Write combined map |
|
33 |
writer = csv.writer(sys.stdout) |
|
34 |
writer.writerow(map_0_cols) |
|
35 |
for in_, rest in map_.iteritems(): writer.writerow([in_]+rest) |
|
36 |
|
|
37 |
main() |
|
38 | 0 |
util/join_passthru | ||
---|---|---|
1 |
#!/bin/sh |
|
2 |
# Joins two map spreadsheets A->B0 and B0->B1 to A->B1, B1 a modification of B0 |
|
3 |
|
|
4 |
selfDir="$(dirname -- "$0")" |
|
5 |
|
|
6 |
test "$#" -eq 2 || { echo "Usage: $0 map_0 map_1 >joined_map" >&2; exit 2;} |
|
7 |
|
|
8 |
"$selfDir/join" <"$1" "$2"|"$selfDir/union" "$1" |
|
9 | 0 |
test/map | ||
---|---|---|
12 | 12 |
test -n "$n" || export n="$tests_n" testMode=1 |
13 | 13 |
|
14 | 14 |
test "$(hostname)" = nimoy && isNimoy=t || isNimoy= |
15 |
test -n "$isNimoy" && . ../util/env_password mysql_password "your MySQL"
|
|
15 |
test -n "$isNimoy" && . ../bin/env_password mysql_password "your MySQL"
|
|
16 | 16 |
|
17 | 17 |
bien_password="$(cat ../config/bien_password)" |
18 | 18 |
|
... | ... | |
74 | 74 |
true # ignore last command's exit status |
75 | 75 |
} |
76 | 76 |
|
77 |
vegbienDest=../util/vegbien_dest
|
|
77 |
vegbienDest=../bin/vegbien_dest
|
|
78 | 78 |
|
79 | 79 |
function toDb() |
80 | 80 |
{ |
inputs/input.Makefile | ||
---|---|---|
4 | 4 |
vegbienMaps := $(subst .VegX.,.VegBIEN.,$(vegxMaps)) |
5 | 5 |
|
6 | 6 |
root := ../.. |
7 |
util := $(root)/util
|
|
7 |
bin := $(root)/bin
|
|
8 | 8 |
mappings := $(root)/mappings |
9 | 9 |
map := $(root)/map |
10 | 10 |
map2vegbien := env out_database=vegbien $(map) |
... | ... | |
30 | 30 |
##### |
31 | 31 |
|
32 | 32 |
map.VegBIEN.%.csv: map.VegX.%.csv |
33 |
$(util)/join_sort <$< $(mappings)/VegX-VegBIEN.$(*F).csv >$@
|
|
33 |
$(bin)/join_sort <$< $(mappings)/VegX-VegBIEN.$(*F).csv >$@
|
|
34 | 34 |
.PRECIOUS: map.VegBIEN.%.csv |
35 | 35 |
|
36 | 36 |
##### |
bin/fix_permissions | ||
---|---|---|
1 |
#!/bin/sh |
|
2 |
# Sets correct permissions on shared bien files |
|
3 |
|
|
4 |
selfDir="$(dirname -- "$0")" # dir of symlink $0, not this script itself |
|
5 |
cd "$selfDir" |
|
6 |
|
|
7 |
opts='--quiet --changes' |
|
8 |
chgrp --recursive --no-dereference $opts bien . |
|
9 |
find . -type d -exec chmod $opts g+s "{}" \; # only dirs should be setgid |
|
10 |
for perm in r w x; do # extend user perms to group |
|
11 |
find . -not -type l -perm -u=$perm -exec chmod $opts g+$perm "{}" \; |
|
12 |
# don't chmod symlinks |
|
13 |
done |
|
0 | 14 |
bin/vegbien_dest | ||
---|---|---|
1 |
#!/bin/sh |
|
2 |
# Requires var bien_password |
|
3 |
|
|
4 |
export out_engine=PostgreSQL out_host=localhost out_user=bien \ |
|
5 |
out_password="$bien_password" out_database=vegbien |
|
0 | 6 |
bin/join_sort | ||
---|---|---|
1 |
#!/bin/bash |
|
2 |
# Inner-joins two map spreadsheets and sorts the output on the output column |
|
3 |
|
|
4 |
selfDir="$(dirname -- "$0")" |
|
5 |
|
|
6 |
"$selfDir/join" "$@"|"$selfDir/sort" 1 0 |
|
0 | 7 |
bin/env_password | ||
---|---|---|
1 |
#!/bin/bash |
|
2 |
# Sets a password environment variable |
|
3 |
|
|
4 |
test "$#" -ge 1 || \ |
|
5 |
{ echo "Usage: . $0 env_var_name [msg] (note the initial \".\")" >&2; exit 2;} |
|
6 |
|
|
7 |
msg="$2" |
|
8 |
test -n "$msg" || msg="$1" |
|
9 |
|
|
10 |
if test -z "${!1+t}"; then # env var with name $1 is unset |
|
11 |
read -s -p "Enter $msg password: "; echo |
|
12 |
export "$1"="$REPLY" |
|
13 |
fi |
|
0 | 14 |
bin/map | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Maps one datasource to another, using a map spreadsheet if needed |
|
3 |
# For outputting an XML file to a PostgreSQL database, use the general format of |
|
4 |
# http://vegbank.org/vegdocs/xml/vegbank_example_ver1.0.2.xml |
|
5 |
|
|
6 |
import os.path |
|
7 |
import sys |
|
8 |
import xml.dom.minidom |
|
9 |
|
|
10 |
sys.path.append(os.path.dirname(__file__)+"/../lib") |
|
11 |
|
|
12 |
import opts |
|
13 |
from Parser import SyntaxException |
|
14 |
import sql |
|
15 |
import xml_dom |
|
16 |
import xml_func |
|
17 |
|
|
18 |
def metadata_value(name): |
|
19 |
if type(name) == str and name.startswith(':'): return name[1:] |
|
20 |
else: return None |
|
21 |
|
|
22 |
def main(): |
|
23 |
env_names = [] |
|
24 |
def usage_err(): |
|
25 |
raise SystemExit('Usage: '+opts.env_usage(env_names, True) |
|
26 |
+' [commit=1] '+sys.argv[0]+' [map_path] [<input] [>output]') |
|
27 |
limit = opts.get_env_var('n', None, env_names) |
|
28 |
if limit != None: limit = int(limit) |
|
29 |
commit = opts.env_flag('commit') |
|
30 |
|
|
31 |
# Get db config from env vars |
|
32 |
db_config_names = ['engine', 'host', 'user', 'password', 'database'] |
|
33 |
def get_db_config(prefix): |
|
34 |
return opts.get_env_vars(db_config_names, prefix, env_names) |
|
35 |
in_db_config = get_db_config('in') |
|
36 |
out_db_config = get_db_config('out') |
|
37 |
in_is_db = 'engine' in in_db_config |
|
38 |
out_is_db = 'engine' in out_db_config |
|
39 |
|
|
40 |
# Parse args |
|
41 |
map_path = None |
|
42 |
try: _prog_name, map_path = sys.argv |
|
43 |
except ValueError: |
|
44 |
if in_is_db: usage_err() |
|
45 |
|
|
46 |
# Load map header |
|
47 |
in_is_xpaths = True |
|
48 |
if map_path != None: |
|
49 |
import copy |
|
50 |
import csv |
|
51 |
|
|
52 |
import xpath |
|
53 |
|
|
54 |
metadata = [] |
|
55 |
mappings = [] |
|
56 |
stream = open(map_path, 'rb') |
|
57 |
reader = csv.reader(stream) |
|
58 |
in_label, out_label = reader.next()[:2] |
|
59 |
def split_col_name(name): |
|
60 |
name, sep, root = name.partition(':') |
|
61 |
return name, sep != '', root |
|
62 |
in_label, in_is_xpaths, in_root = split_col_name(in_label) |
|
63 |
out_label, out_is_xpaths, out_root = split_col_name(out_label) |
|
64 |
assert out_is_xpaths # CSV output not supported yet |
|
65 |
has_types = out_root.startswith('/*s/') # outer elements are types |
|
66 |
for row in reader: |
|
67 |
in_, out = row[:2] |
|
68 |
if out != '': |
|
69 |
if out_is_xpaths: out = out_root+out |
|
70 |
mappings.append((in_, out)) |
|
71 |
stream.close() |
|
72 |
in_is_xml = in_is_xpaths and not in_is_db |
|
73 |
|
|
74 |
# Input datasource to XML tree, mapping if needed |
|
75 |
if in_is_xml: |
|
76 |
doc0 = xml.dom.minidom.parse(sys.stdin) |
|
77 |
if map_path != None: |
|
78 |
doc1 = xml_dom.create_doc(out_label) |
|
79 |
root = doc1.documentElement |
|
80 |
if in_is_db: |
|
81 |
assert in_is_xpaths |
|
82 |
|
|
83 |
import db_xml |
|
84 |
|
|
85 |
in_root_xml = xpath.path2xml(in_root) |
|
86 |
for i, mapping in enumerate(mappings): |
|
87 |
in_, out = mapping |
|
88 |
if metadata_value(in_) == None: |
|
89 |
mappings[i] = (xpath.path2xml(in_root+'/'+in_), out) |
|
90 |
|
|
91 |
in_db = sql.connect(in_db_config) |
|
92 |
in_pkeys = {} |
|
93 |
for row_idx, row in enumerate(sql.rows(db_xml.get(in_db, |
|
94 |
in_root_xml, in_pkeys, limit))): |
|
95 |
row_id = str(row_idx) |
|
96 |
pkey, = row |
|
97 |
for in_, out in mappings: |
|
98 |
value = metadata_value(in_) |
|
99 |
if value == None: |
|
100 |
in_ = in_.cloneNode(True) # don't modify orig value! |
|
101 |
xml_dom.set_id(xpath.get(in_, in_root), pkey) |
|
102 |
value = sql.value_or_none(db_xml.get(in_db, in_, |
|
103 |
in_pkeys)) |
|
104 |
if value != None: |
|
105 |
xpath.put_obj(root, out, row_id, has_types, str(value)) |
|
106 |
in_db.close() |
|
107 |
elif in_is_xml: |
|
108 |
row = xpath.get(doc0.documentElement, in_root) |
|
109 |
for row_idx, row in enumerate(xml_dom.NodeElemIter(row.parentNode)): |
|
110 |
if not (limit == None or row_idx < limit): break |
|
111 |
row_id = str(row_idx) |
|
112 |
for in_, out in mappings: |
|
113 |
value = metadata_value(in_) |
|
114 |
if value == None: |
|
115 |
node = xpath.get(row, in_) |
|
116 |
if node != None: value = xml_dom.value(node) |
|
117 |
if value != None: |
|
118 |
xpath.put_obj(root, out, row_id, has_types, value) |
|
119 |
else: # input is CSV |
|
120 |
map_ = dict(mappings) |
|
121 |
reader = csv.reader(sys.stdin) |
|
122 |
cols = reader.next() |
|
123 |
col_idxs = dict([(value, idx) for idx, value in enumerate(cols)]) |
|
124 |
for i, mapping in enumerate(mappings): |
|
125 |
in_, out = mapping |
|
126 |
if metadata_value(in_) == None: |
|
127 |
try: mappings[i] = (col_idxs[in_], out) |
|
128 |
except KeyError: pass |
|
129 |
|
|
130 |
for row_idx, row in enumerate(reader): |
|
131 |
if not (limit == None or row_idx < limit): break |
|
132 |
row_id = str(row_idx) |
|
133 |
for in_, out in mappings: |
|
134 |
value = metadata_value(in_) |
|
135 |
if value == None: |
|
136 |
value = row[in_] |
|
137 |
if value == '': value = None |
|
138 |
if value != None: |
|
139 |
xpath.put_obj(root, out, row_id, has_types, value) |
|
140 |
xml_func.process(root) |
|
141 |
else: doc1 = doc0 |
|
142 |
|
|
143 |
# Output XML tree |
|
144 |
if out_is_db: |
|
145 |
from psycopg2.extensions import ISOLATION_LEVEL_SERIALIZABLE |
|
146 |
import db_xml |
|
147 |
|
|
148 |
out_db = sql.connect(out_db_config) |
|
149 |
out_db.set_isolation_level(ISOLATION_LEVEL_SERIALIZABLE) |
|
150 |
try: |
|
151 |
row_ct_ref = [0] |
|
152 |
db_xml.xml2db(out_db, doc1.documentElement, row_ct_ref) |
|
153 |
print 'Inserted '+str(row_ct_ref[0])+' rows' |
|
154 |
if commit: out_db.commit() |
|
155 |
finally: |
|
156 |
out_db.rollback() |
|
157 |
out_db.close() |
|
158 |
else: xml_dom.writexml(sys.stdout, doc1) # output is XML |
|
159 |
|
|
160 |
try: main() |
|
161 |
except SyntaxException, ex: raise SystemExit(str(ex)) |
|
0 | 162 |
bin/psql_vegbien | ||
---|---|---|
1 |
#!/bin/sh |
|
2 |
# Accesses vegbank db |
|
3 |
|
|
4 |
selfDir="$(dirname -- "$0")" |
|
5 |
|
|
6 |
bien_password="$(cat "$selfDir/bien_password")" |
|
7 |
. "$selfDir/vegbien_dest" |
|
8 |
export PGHOST="$out_host" PGUSER="$out_user" PGPASSWORD="$out_password" |
|
9 |
|
|
10 |
psql --dbname="$out_database" "$@" |
|
0 | 11 |
bin/repl | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Modifies a map spreadsheet A->B or any file using a replacements spreadsheet |
|
3 |
# A->C or B->C |
|
4 |
|
|
5 |
import csv |
|
6 |
import re |
|
7 |
import sys |
|
8 |
|
|
9 |
def main(): |
|
10 |
try: _prog_name, repl_path = sys.argv[:2] |
|
11 |
except ValueError: |
|
12 |
raise SystemExit('Usage: '+sys.argv[0]+' <map repl [col_num] [| '+ |
|
13 |
sys.argv[0]+' repl_1 [col_num_1]]... >new_map') |
|
14 |
col_num = None |
|
15 |
try: col_num = sys.argv[2] |
|
16 |
except IndexError: pass |
|
17 |
if col_num != None: col_num = int(col_num) # 0-based |
|
18 |
|
|
19 |
# Get replacements |
|
20 |
repls = [] |
|
21 |
stream = open(repl_path, 'rb') |
|
22 |
reader = csv.reader(stream) |
|
23 |
repl_in, repl_out = reader.next()[:2] |
|
24 |
for row in reader: |
|
25 |
in_, out = row[:2] |
|
26 |
if in_ != '': |
|
27 |
if re.match(r'^\w+$', in_): in_ = r'\b'+in_+r'(?=(?:(?i)_id)?\b)' |
|
28 |
# match whole word |
|
29 |
repls.append((in_, out)) |
|
30 |
stream.close() |
|
31 |
def repl_all(str_): |
|
32 |
for repl, with_ in repls: str_ = re.sub(repl, with_, str_) |
|
33 |
return str_ |
|
34 |
|
|
35 |
# Modify map or file |
|
36 |
if col_num != None: |
|
37 |
reader = csv.reader(sys.stdin) |
|
38 |
writer = csv.writer(sys.stdout) |
|
39 |
cols = reader.next() |
|
40 |
label, sep, root = cols[col_num].partition(':') |
|
41 |
if label != repl_in: raise SystemExit('Map error: Map column '+ |
|
42 |
str(col_num)+' label "'+label+'" doesn\'t match replacements input ' |
|
43 |
'column label "'+repl_in+'"') |
|
44 |
cols[col_num] = repl_out+sep+repl_all(root) |
|
45 |
writer.writerow(cols) |
|
46 |
for row in reader: |
|
47 |
row[col_num] = repl_all(row[col_num]) |
|
48 |
writer.writerow(row) |
|
49 |
else: |
|
50 |
while True: |
|
51 |
line = sys.stdin.readline() |
|
52 |
if line == '': break |
|
53 |
sys.stdout.write(repl_all(line)) |
|
54 |
|
|
55 |
main() |
|
0 | 56 |
bin/review | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Converts a map spreadsheet to human-readable (but machine unusable) form |
|
3 |
# Usage: self <in_map >out_ma |
|
4 |
|
|
5 |
import csv |
|
6 |
import re |
|
7 |
import sys |
|
8 |
|
|
9 |
def sub_nested(regex, repl, str_): |
|
10 |
while True: |
|
11 |
str_, n = re.subn(regex, repl, str_) |
|
12 |
if n == 0: return str_ |
|
13 |
|
|
14 |
def cleanup(xpath): |
|
15 |
truncated = False |
|
16 |
|
|
17 |
# Remove attrs |
|
18 |
xpath = sub_nested(r':\[[^\[\]]*?\]', r'', xpath) |
|
19 |
|
|
20 |
# Remove lookahead assertions |
|
21 |
xpath = sub_nested(r'\((/[^\)]*?)\)(?=/)', r'\1', xpath) |
|
22 |
|
|
23 |
# Remove pointers |
|
24 |
xpath, n = re.subn(r'^.*->', r'', xpath) |
|
25 |
if n > 0: truncated = True |
|
26 |
|
|
27 |
# Remove part of path before first key list, XML function, or path end |
|
28 |
# Leave enough to include the table of a user-defined value |
|
29 |
xpath, n = re.subn(r'^(?:/(?!_)[\w*]+)*(?=(?:/(?!_)[\w*]+){2}(?:\[|/_|$))', |
|
30 |
r'', xpath) |
|
31 |
# Prepend / to show truncation |
|
32 |
if n > 0: truncated = True |
|
33 |
|
|
34 |
# Remove backward (child-to-parent) pointer's target ID attr |
|
35 |
xpath = re.sub(r'\[[\w*]+\]|(?<=\[)[\w*]+,', r'', xpath) |
|
36 |
|
|
37 |
# Remove negative keys |
|
38 |
xpath = re.sub(r',?!(?:[\w*]+/)*@?[\w*]+', r'', xpath) |
|
39 |
|
|
40 |
# Remove path before key |
|
41 |
xpath = re.sub(r'(?:[\w*]+/)*(@?[\w*]+)(?==)', r'\1', xpath) |
|
42 |
|
|
43 |
# Prepend / to show truncation |
|
44 |
if truncated: xpath = '/'+xpath |
|
45 |
|
|
46 |
return xpath |
|
47 |
|
|
48 |
def main(): |
|
49 |
# Convert map |
|
50 |
reader = csv.reader(sys.stdin) |
|
51 |
writer = csv.writer(sys.stdout) |
|
52 |
writer.writerow(reader.next()) |
|
53 |
for row in reader: |
|
54 |
for i in xrange(2): row[i] = cleanup(row[i]) |
|
55 |
writer.writerow(row) |
|
56 |
|
|
57 |
main() |
|
0 | 58 |
bin/sort | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Sorts a spreadsheet based on a column |
|
3 |
|
|
4 |
import csv |
|
5 |
import sys |
|
6 |
|
|
7 |
def by_idx(list_, idxes): return [list_[i] for i in idxes] |
|
8 |
|
|
9 |
def main(): |
|
10 |
col_nums = sys.argv[1:] |
|
11 |
if col_nums == []: |
|
12 |
raise SystemExit('Usage: '+sys.argv[0]+' col_num... <map >sorted_map') |
|
13 |
col_nums = map(int, col_nums) # 0-based |
|
14 |
|
|
15 |
# Sort map |
|
16 |
reader = csv.reader(sys.stdin) |
|
17 |
cols = reader.next() |
|
18 |
def get_sort_by(row): return by_idx(row, col_nums) |
|
19 |
rows = list(reader) |
|
20 |
rows.sort(lambda *rows: cmp(*map(get_sort_by, rows))) |
|
21 |
writer = csv.writer(sys.stdout) |
|
22 |
writer.writerow(cols) |
|
23 |
map(writer.writerow, rows) |
|
24 |
|
|
25 |
main() |
|
0 | 26 |
bin/join_passthru | ||
---|---|---|
1 |
#!/bin/sh |
|
2 |
# Joins two map spreadsheets A->B0 and B0->B1 to A->B1, B1 a modification of B0 |
|
3 |
|
|
4 |
selfDir="$(dirname -- "$0")" |
|
5 |
|
|
6 |
test "$#" -eq 2 || { echo "Usage: $0 map_0 map_1 >joined_map" >&2; exit 2;} |
|
7 |
|
|
8 |
"$selfDir/join" <"$1" "$2"|"$selfDir/union" "$1" |
|
0 | 9 |
bin/union | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Combines two map spreadsheets A0->B and A1->C to A->B, with A0 overwriting A1 |
|
3 |
|
|
4 |
import csv |
|
5 |
import sys |
|
6 |
|
|
7 |
def main(): |
|
8 |
try: _prog_name, map_1_path = sys.argv |
|
9 |
except ValueError: |
|
10 |
raise SystemExit('Usage: '+sys.argv[0]+' <map_0 map_1 [| '+sys.argv[0] |
|
11 |
+' map_2]... >union_map') |
|
12 |
|
|
13 |
map_ = {} |
|
14 |
def add_map(reader): |
|
15 |
for row in reader: |
|
16 |
if row[1] != '': map_[row[0]] = row[1:] |
|
17 |
|
|
18 |
# Get map 1 |
|
19 |
stream = open(map_1_path, 'rb') |
|
20 |
reader = csv.reader(stream) |
|
21 |
map_1_cols = reader.next() |
|
22 |
add_map(reader) |
|
23 |
stream.close() |
|
24 |
|
|
25 |
# Add map 0 to map 1, overwriting existing entries |
|
26 |
reader = csv.reader(sys.stdin) |
|
27 |
map_0_cols = reader.next() |
|
28 |
if not map_0_cols[0] == map_1_cols[0]: raise SystemExit('Map error: ' |
|
29 |
'Map 1 column 0 name doesn\'t match map 0 column 0 name') |
|
30 |
add_map(reader) |
|
31 |
|
|
32 |
# Write combined map |
|
33 |
writer = csv.writer(sys.stdout) |
|
34 |
writer.writerow(map_0_cols) |
|
35 |
for in_, rest in map_.iteritems(): writer.writerow([in_]+rest) |
|
36 |
|
|
37 |
main() |
|
0 | 38 |
bin/join | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Inner-joins two map spreadsheets A->B and B->C to A->C |
|
3 |
|
|
4 |
import csv |
|
5 |
import sys |
|
6 |
|
|
7 |
def main(): |
|
8 |
try: _prog_name, map_1_path = sys.argv |
|
9 |
except ValueError: |
|
10 |
raise SystemExit('Usage: '+sys.argv[0]+' <map_0 map_1 [| '+sys.argv[0] |
|
11 |
+' map_2]... >joined_map') |
|
12 |
|
|
13 |
# Get map 1 |
|
14 |
map_1 = {} |
|
15 |
stream = open(map_1_path, 'rb') |
|
16 |
reader = csv.reader(stream) |
|
17 |
map_1_in, map_1_out = reader.next()[:2] |
|
18 |
for row in reader: |
|
19 |
if row[1] != '': map_1[row[0]] = row[1] |
|
20 |
stream.close() |
|
21 |
|
|
22 |
# Join map 1 to map 0 |
|
23 |
reader = csv.reader(sys.stdin) |
|
24 |
writer = csv.writer(sys.stdout) |
|
25 |
cols = reader.next() |
|
26 |
if not cols[1] == map_1_in: raise SystemExit('Map error: ' |
|
27 |
'Map 0 output column name doesn\'t match map 1 input column name') |
|
28 |
cols[1] = map_1_out |
|
29 |
writer.writerow(cols) |
|
30 |
for row in reader: |
|
31 |
try: row[1] = map_1[row[1]] |
|
32 |
except KeyError: continue # skip row |
|
33 |
writer.writerow(row) |
|
34 |
|
|
35 |
main() |
|
0 | 36 |
bin/ch_root | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Transforms a map spreadsheet to use a different root |
|
3 |
|
|
4 |
import csv |
|
5 |
import os.path |
|
6 |
import re |
|
7 |
import sys |
|
8 |
|
|
9 |
sys.path.append(os.path.dirname(__file__)+"/../lib") |
|
10 |
|
|
11 |
import opts |
|
12 |
|
|
13 |
def all_not_none(list_): |
|
14 |
return reduce(lambda a, b: a and b, map(lambda e: e != None, list_)) |
|
15 |
|
|
16 |
def main(): |
|
17 |
# Get config from env vars |
|
18 |
config_names = ['in_root', 'out_root'] |
|
19 |
env_names = [] |
|
20 |
configs = map(lambda col: opts.get_env_vars(config_names, col, env_names), |
|
21 |
['left', 'right']) |
|
22 |
if not all_not_none(configs): raise SystemExit( |
|
23 |
'Usage: '+opts.env_usage(env_names)+' '+sys.argv[0]+' <in_map >out_map') |
|
24 |
|
|
25 |
# Transform map |
|
26 |
reader = csv.reader(sys.stdin) |
|
27 |
writer = csv.writer(sys.stdout) |
|
28 |
cols = reader.next() |
|
29 |
for i in xrange(len(configs)): |
|
30 |
config = configs[i] |
|
31 |
label, sep, config['root'] = cols[i].partition(':') |
|
32 |
cols[i] = label+sep+config['out_root'] |
|
33 |
config['in_root_re'] = r'^'+re.escape(config['in_root'])+r'\b' |
|
34 |
writer.writerow(cols) |
|
35 |
for row in reader: |
|
36 |
for i in xrange(len(configs)): row[i] = configs[i]['root']+row[i] |
|
37 |
def sub(i): |
|
38 |
row[i], n = re.subn(configs[i]['in_root_re'], r'', row[i]) |
|
39 |
return n > 0 |
|
40 |
if sub(0): |
|
41 |
if not sub(1): raise SystemExit('Map error: Root "' |
|
42 |
+configs[1]['in_root']+'" is not contained in output mapping: ' |
|
43 |
+row[1]) |
|
44 |
writer.writerow(row) |
|
45 |
|
|
46 |
main() |
|
0 | 47 |
bin/simplify_xpath | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Removes duplication from XPath expressions |
|
3 |
# Filters one XPath per line from stdin to stdout |
|
4 |
|
|
5 |
import re |
|
6 |
import sys |
|
7 |
|
|
8 |
def main(): |
|
9 |
while True: |
|
10 |
line = sys.stdin.readline() |
|
11 |
if line == '': break |
|
12 |
# Forward * abbrs |
|
13 |
line = re.sub(r'(?i)(\w+)(?=\w*(?:->/[^/]*)?/\1\b)', r'*', line) |
|
14 |
# Backward * abbrs |
|
15 |
line = re.sub(r'(?i)((\w+)->/[^/]*/[^/]*\[)\2', r'\1*', line) |
|
16 |
sys.stdout.write(line) |
|
17 |
|
|
18 |
main() |
|
0 | 19 |
Makefile | ||
---|---|---|
69 | 69 |
|
70 | 70 |
psqlOpts := --set ON_ERROR_STOP=1 --quiet |
71 | 71 |
psqlAsAdmin := sudo -u postgres psql $(psqlOpts) |
72 |
psqlAsBien := ./util/psql_vegbien $(psqlOpts)
|
|
72 |
psqlAsBien := ./bin/psql_vegbien $(psqlOpts)
|
|
73 | 73 |
bienPassword := $(shell cat config/bien_password) |
74 | 74 |
|
75 | 75 |
postgres_user: _not_file |
map | ||
---|---|---|
2 | 2 |
# Runs map with BIEN defaults |
3 | 3 |
|
4 | 4 |
selfDir="$(dirname -- "$0")" |
5 |
utilDir="$selfDir/util"
|
|
5 |
binDir="$selfDir/bin"
|
|
6 | 6 |
|
7 | 7 |
shopt -s nullglob |
8 | 8 |
|
... | ... | |
14 | 14 |
if test -z "$in_user"; then |
15 | 15 |
if test _"$(hostname)" = _nimoy -a _"$in_engine" = _MySQL; then |
16 | 16 |
test -n "${mysql_password+t}" || \ |
17 |
. "$utilDir/env_password" mysql_password "your MySQL"
|
|
17 |
. "$binDir/env_password" mysql_password "your MySQL"
|
|
18 | 18 |
export in_user="$USER" in_password="$mysql_password" |
19 | 19 |
else |
20 | 20 |
export in_user=bien in_password="$bien_password" |
21 | 21 |
fi |
22 | 22 |
fi |
23 | 23 |
fi |
24 |
test _"$out_database" = _vegbien && . "$utilDir/vegbien_dest"
|
|
24 |
test _"$out_database" = _vegbien && . "$binDir/vegbien_dest"
|
|
25 | 25 |
|
26 |
"$utilDir/map" "$@" |
|
26 |
"$binDir/map" "$@" |
mappings/join | ||
---|---|---|
9 | 9 |
for map in *-$1.*.csv; do |
10 | 10 |
type="${map%.*}" # remove extension |
11 | 11 |
type="${type#*.}" # remove up to last "." |
12 |
../util/join <"$map" "$1-$2.$type.csv"|\
|
|
13 |
../util/sort 1 0 >"${map/$1/$2}" # sort on output col
|
|
12 |
../bin/join <"$map" "$1-$2.$type.csv"|\
|
|
13 |
../bin/sort 1 0 >"${map/$1/$2}" # sort on output col
|
|
14 | 14 |
done |
mappings/review | ||
---|---|---|
9 | 9 |
mkdir -p for_review |
10 | 10 |
|
11 | 11 |
for in in *.{organisms,plots}.csv; do |
12 |
../util/review <"$in" >"for_review/$in"
|
|
12 |
../bin/review <"$in" >"for_review/$in"
|
|
13 | 13 |
done |
mappings/Makefile | ||
---|---|---|
30 | 30 |
misc: $(misc) |
31 | 31 |
.PRECIOUS: $(misc) |
32 | 32 |
|
33 |
repl = ../util/repl
|
|
34 |
sort = ../util/sort 1 0
|
|
35 |
chRoot = ../util/ch_root <$< >$@
|
|
33 |
repl = ../bin/repl
|
|
34 |
sort = ../bin/sort 1 0
|
|
35 |
chRoot = ../bin/ch_root <$< >$@
|
|
36 | 36 |
empty = $(SED) -n \ |
37 | 37 |
's/^CREATE TABLE ([0-9A-Za-z_]+) \($$/TRUNCATE \1 CASCADE;/p' <$< >$@ |
38 | 38 |
|
Also available in: Unified diff
Renamed util to bin