Revision 51
Added by Aaron Marcuse-Kubitza about 13 years ago
scripts/data2xml | ||
---|---|---|
1 | 1 |
#!/usr/bin/env python |
2 |
# Converts a CSV dataset to XML using a mappings spreadsheet
|
|
2 |
# Converts a CSV dataset to XML using a map spreadsheet |
|
3 | 3 |
|
4 |
import copy |
|
4 | 5 |
import csv |
5 | 6 |
import os.path |
6 | 7 |
import re |
7 | 8 |
import sys |
8 |
from copy import deepcopy |
|
9 | 9 |
from xml.dom.minidom import getDOMImplementation |
10 | 10 |
|
11 | 11 |
sys.path.append(os.path.dirname(__file__)+"/lib") |
... | ... | |
13 | 13 |
|
14 | 14 |
def main(): |
15 | 15 |
prog_name = sys.argv[0] |
16 |
try: mappings_path = sys.argv[1]
|
|
16 |
try: prog_name, map_path = sys.argv
|
|
17 | 17 |
except ValueError: |
18 |
raise Exception('Usage: '+prog_name+' mappings_path <dataset >output')
|
|
18 |
raise Exception('Usage: '+prog_name+' map_path <dataset >output') |
|
19 | 19 |
|
20 |
# Get mappings
|
|
21 |
mappings = {}
|
|
20 |
# Get map |
|
21 |
map = {} |
|
22 | 22 |
has_types = False # whether outer elements are type containiners |
23 |
stream = open(mappings_path, 'rb')
|
|
23 |
stream = open(map_path, 'rb') |
|
24 | 24 |
reader = csv.reader(stream, delimiter=',', quotechar='"') |
25 | 25 |
src, dest = reader.next()[:2] |
26 | 26 |
for row in reader: |
... | ... | |
28 | 28 |
if name != '' and path != '': |
29 | 29 |
if path.startswith('/*s/'): has_types = True # *s used for type elem |
30 | 30 |
path = path.replace('<name>', name) |
31 |
mappings[name] = xpath.XpathParser(path).parse()
|
|
31 |
map[name] = xpath.XpathParser(path).parse() |
|
32 | 32 |
stream.close() |
33 | 33 |
|
34 | 34 |
# Process dataset |
... | ... | |
41 | 41 |
row_id = str(row_idx) |
42 | 42 |
for idx, name in enumerate(fieldnames): |
43 | 43 |
value = row[idx] |
44 |
if value != '' and name in mappings:
|
|
45 |
path = deepcopy(mappings[name]) # don't modify main value!
|
|
44 |
if value != '' and name in map: |
|
45 |
path = copy.deepcopy(map[name]) # don't modify main value!
|
|
46 | 46 |
xpath.set_id(path, row_id, has_types) |
47 | 47 |
xpath.set_value(path, value) |
48 | 48 |
xpath.get(doc, path, True) |
scripts/util/join_maps | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Inner-joins two map spreadsheets A->B and B->C to A->C |
|
3 |
|
|
4 |
import csv |
|
5 |
import sys |
|
6 |
|
|
7 |
def main(): |
|
8 |
prog_name = sys.argv[0] |
|
9 |
try: prog_name, map_1_path = sys.argv |
|
10 |
except ValueError: |
|
11 |
raise Exception('Usage: '+prog_name+' map_1 <map_0 >joined_map') |
|
12 |
|
|
13 |
csv_config = dict(delimiter=',', quotechar='"') |
|
14 |
|
|
15 |
# Get map 1 |
|
16 |
map_1 = {} |
|
17 |
stream = open(map_1_path, 'rb') |
|
18 |
reader = csv.reader(stream, **csv_config) |
|
19 |
map_1_in, map_1_out = reader.next()[:2] |
|
20 |
for row in reader: |
|
21 |
if row[1] != '': map_1[row[0]] = row[1] |
|
22 |
stream.close() |
|
23 |
|
|
24 |
# Join map 1 to map 0 |
|
25 |
reader = csv.reader(sys.stdin, **csv_config) |
|
26 |
map_0_in, map_0_out = reader.next()[:2] |
|
27 |
assert map_0_out == map_1_in |
|
28 |
writer = csv.writer(sys.stdout, **csv_config) |
|
29 |
writer.writerow([map_0_in, map_1_out]) |
|
30 |
for row in reader: |
|
31 |
if row[1] in map_1: writer.writerow([row[0], map_1[row[1]]]) |
|
32 |
|
|
33 |
main() |
|
0 | 34 |
Also available in: Unified diff
Created join_maps to join two 2-column map spreadsheets