Revision 161
Added by Aaron Marcuse-Kubitza about 13 years ago
scripts/test/map | ||
---|---|---|
8 | 8 |
shopt -s nullglob |
9 | 9 |
|
10 | 10 |
test -n "$n" || export n=2 |
11 |
|
|
12 | 11 |
. ../util/env_password in_password |
13 | 12 |
|
13 |
make --directory=../../mappings |
|
14 | 14 |
|
15 |
# Update generated mappings |
|
16 |
../../mappings/extract_plot_map |
|
17 |
../../mappings/join_all_vegbank |
|
18 |
|
|
19 | 15 |
function trace() |
20 | 16 |
{ |
21 | 17 |
( |
... | ... | |
25 | 21 |
) >&2 |
26 | 22 |
} |
27 | 23 |
|
28 |
function fromCsv()
|
|
24 |
function map()
|
|
29 | 25 |
{ |
30 |
(_in="$in"; trace) |
|
31 |
"$1" <"$in" || exit |
|
26 |
map="../../mappings/$src-$out_fmt.$table.csv" |
|
27 |
if test -e "$map" -a -e "$in"; then |
|
28 |
( |
|
29 |
ext="${in##*.}" # after last "." |
|
30 |
if test "$ext" == "sh"; then |
|
31 |
trace . "$in" |
|
32 |
(. "$in"; "$1") |
|
33 |
else |
|
34 |
(_in="$in"; trace) |
|
35 |
"$1" <"$in" |
|
36 |
fi |
|
37 |
) || exit # abort tester |
|
38 |
fi |
|
32 | 39 |
} |
33 | 40 |
|
34 |
function fromDb() |
|
35 |
{ |
|
36 |
trace . "$in" |
|
37 |
(. "$in"; "$1") || exit |
|
38 |
} |
|
39 |
|
|
40 | 41 |
function toXml() |
41 | 42 |
{ |
42 | 43 |
out="$stem.$out_fmt.xml" |
43 | 44 |
( |
44 | 45 |
set -x |
45 |
../map "../../mappings/$src-$out_fmt.$table.csv" >"output/$out" || exit
|
|
46 |
diff "accepted_output/$out" "output/$out" || true |
|
46 |
../map "$map" >"output/$out" || exit
|
|
47 |
diff "accepted_output/$out" "output/$out" || true # ignore exit status
|
|
47 | 48 |
) |
48 | 49 |
} |
49 | 50 |
|
50 | 51 |
function toDb() |
51 | 52 |
{ |
52 |
(set -x; ../map2vegbank "../../mappings/$src-VegBank.$table.csv")
|
|
53 |
(set -x; ../map2vegbank "$map")
|
|
53 | 54 |
} |
54 | 55 |
|
55 |
for in in input/*.{csv,sh}; do |
|
56 |
ext="${in##*.}" # after last "." |
|
56 |
for in in input/*; do |
|
57 | 57 |
stem="$(basename -- "${in%.*}")" # remove extension and dir |
58 | 58 |
src="${stem%.*}" # before last "." |
59 | 59 |
table="${stem##*.}" # after last "." |
60 | 60 |
|
61 |
# Test exporting to XML |
|
62 |
for out_fmt in VegX VegBank; do |
|
63 |
if test "$ext" == "csv"; then fromCsv toXml |
|
64 |
elif test "$ext" == "sh"; then fromDb toXml |
|
65 |
fi |
|
66 |
done |
|
67 |
|
|
68 |
# Test exporting to VegBank db |
|
69 |
if test "$ext" == "csv"; then fromCsv toDb |
|
70 |
elif test "$ext" == "sh"; then fromDb toDb |
|
71 |
fi |
|
61 |
for out_fmt in VegX VegBank; do map toXml; done # source to XML |
|
62 |
out_fmt=VegBank |
|
63 |
# VegX to VegBank |
|
64 |
( |
|
65 |
src=VegX |
|
66 |
in="output/$stem.$src.xml" |
|
67 |
stem="$stem.2-step" |
|
68 |
map toXml |
|
69 |
) || exit |
|
70 |
map toDb # source to VegBank db |
|
72 | 71 |
done |
scripts/map | ||
---|---|---|
55 | 55 |
mappings = [] |
56 | 56 |
stream = open(map_path, 'rb') |
57 | 57 |
reader = csv.reader(stream) |
58 |
src, dest = reader.next()[:2]
|
|
58 |
in_label, out_label = reader.next()[:2]
|
|
59 | 59 |
def split_col_name(name): |
60 | 60 |
name, sep, root = name.partition(':') |
61 | 61 |
return name, sep != '', root |
62 |
src, in_is_xpaths, src_root = split_col_name(src)
|
|
63 |
dest, out_is_xpaths, dest_root = split_col_name(dest)
|
|
62 |
in_label, in_is_xpaths, in_root = split_col_name(in_label)
|
|
63 |
out_label, out_is_xpaths, out_root = split_col_name(out_label)
|
|
64 | 64 |
assert out_is_xpaths # CSV output not supported yet |
65 |
has_types = dest_root.startswith('/*s/') # outer elements are types
|
|
65 |
has_types = out_root.startswith('/*s/') # outer elements are types
|
|
66 | 66 |
for row in reader: |
67 | 67 |
in_, out = row[:2] |
68 | 68 |
if out != '': |
69 | 69 |
value = metadata_value(in_) |
70 |
is_metadata = value != None |
|
71 |
if in_is_xpaths and not is_metadata: |
|
72 |
in_ = xpath.parse(src_root+in_) |
|
73 |
if out_is_xpaths: out = xpath.parse(dest_root+out) |
|
74 |
if is_metadata: metadata.append((value, out)) |
|
70 |
if out_is_xpaths: out = xpath.parse(out_root+out) |
|
71 |
if value != None: metadata.append((value, out)) |
|
75 | 72 |
else: mappings.append((in_, out)) |
76 | 73 |
stream.close() |
77 | 74 |
in_is_xml = in_is_xpaths and not in_is_db |
78 | 75 |
|
79 | 76 |
# Input datasource to XML tree, mapping if needed |
80 |
if in_is_xml: doc0 = xml.dom.minidom.parse(sys.stdin) |
|
77 |
if in_is_xml: |
|
78 |
doc0 = xml.dom.minidom.parse(sys.stdin) |
|
81 | 79 |
if map_path != None: |
82 |
doc1 = xml_dom.create_doc(dest)
|
|
80 |
doc1 = xml_dom.create_doc(out_label)
|
|
83 | 81 |
root = doc1.documentElement |
84 | 82 |
if in_is_db: |
85 | 83 |
assert in_is_xpaths |
86 | 84 |
|
87 | 85 |
import db_xml |
88 | 86 |
|
89 |
src_root = xpath.parse(src_root) |
|
90 |
src_root_xml = xpath.path2xml(src_root) |
|
91 |
mappings = [(xpath.path2xml(in_), out) for in_, out in mappings] |
|
87 |
in_root_xml = xpath.path2xml(in_root) |
|
88 |
mappings = [(xpath.path2xml(in_root+in_), out) for in_, out in mappings] |
|
92 | 89 |
|
93 | 90 |
in_db = sql.connect(in_db_config) |
94 | 91 |
in_pkeys = {} |
95 | 92 |
for row_idx, row in enumerate(sql.rows(db_xml.get(in_db, |
96 |
src_root_xml, in_pkeys, limit))):
|
|
93 |
in_root_xml, in_pkeys, limit))):
|
|
97 | 94 |
row_id, = row |
98 | 95 |
row_id = str(row_id) |
99 | 96 |
|
... | ... | |
102 | 99 |
for value, out in metadata: put_col(out, value) |
103 | 100 |
for in_, out in mappings: |
104 | 101 |
in_ = in_.cloneNode(True) # don't modify orig value! |
105 |
xml_dom.set_id(xpath.get(in_, src_root), row_id)
|
|
102 |
xml_dom.set_id(xpath.get(in_, in_root), row_id)
|
|
106 | 103 |
value = sql.value_or_none(db_xml.get(in_db, in_, in_pkeys)) |
107 | 104 |
if value != None: put_col(out, str(value)) |
108 | 105 |
in_db.close() |
109 |
elif in_is_xml: raise SystemExit('XML input not supported yet') |
|
106 |
elif in_is_xml: |
|
107 |
row = xpath.get(doc0.documentElement, in_root) |
|
108 |
for row_idx, row in enumerate(xml_dom.NodeElemIter(row.parentNode)): |
|
109 |
if not (limit == None or row_idx < limit): break |
|
110 |
row_id = str(row_idx) |
|
111 |
|
|
112 |
def put_col(path, value): |
|
113 |
xpath.put_obj(root, path, row_id, has_types, value) |
|
114 |
for value, out in metadata: put_col(out, value) |
|
115 |
for in_, out in mappings: |
|
116 |
node = xpath.get(row, in_) |
|
117 |
if node != None: put_col(out, xml_dom.value(node)) |
|
110 | 118 |
else: # input is CSV |
111 | 119 |
map_ = dict(mappings) |
112 | 120 |
reader = csv.reader(sys.stdin) |
Also available in: Unified diff
map: Added support for XML input