Project

General

Profile

1 11 aaronmk
#!/usr/bin/env python
2 51 aaronmk
# Converts a CSV dataset to XML using a map spreadsheet
3 11 aaronmk
4 51 aaronmk
import copy
5 11 aaronmk
import csv
6 42 aaronmk
import os.path
7 11 aaronmk
import re
8
import sys
9
from xml.dom.minidom import getDOMImplementation
10
11 43 aaronmk
sys.path.append(os.path.dirname(__file__)+"/lib")
12 21 aaronmk
import xpath
13 11 aaronmk
14
def main():
15 49 aaronmk
    prog_name = sys.argv[0]
16 51 aaronmk
    try: prog_name, map_path = sys.argv
17 49 aaronmk
    except ValueError:
18 51 aaronmk
        raise Exception('Usage: '+prog_name+' map_path <dataset >output')
19 11 aaronmk
20 51 aaronmk
    # Get map
21
    map = {}
22 22 aaronmk
    has_types = False # whether outer elements are type containiners
23 51 aaronmk
    stream = open(map_path, 'rb')
24 11 aaronmk
    reader = csv.reader(stream, delimiter=',', quotechar='"')
25 49 aaronmk
    src, dest = reader.next()[:2]
26 11 aaronmk
    for row in reader:
27 49 aaronmk
        name, path = row[:2]
28 18 aaronmk
        if name != '' and path != '':
29 31 aaronmk
            if path.startswith('/*s/'): has_types = True # *s used for type elem
30 11 aaronmk
            path = path.replace('<name>', name)
31 51 aaronmk
            map[name] = xpath.XpathParser(path).parse()
32 11 aaronmk
    stream.close()
33
34
    # Process dataset
35
    doc = getDOMImplementation().createDocument(None, dest, None)
36
    stream = sys.stdin
37
    reader = csv.reader(stream, delimiter=',', quotechar='"')
38
    fieldnames = reader.next()
39
    row_idx = 0
40
    for row in reader:
41 18 aaronmk
        row_id = str(row_idx)
42 11 aaronmk
        for idx, name in enumerate(fieldnames):
43
            value = row[idx]
44 51 aaronmk
            if value != '' and name in map:
45
                path = copy.deepcopy(map[name]) # don't modify main value!
46 22 aaronmk
                xpath.set_id(path, row_id, has_types)
47
                xpath.set_value(path, value)
48 21 aaronmk
                xpath.get(doc, path, True)
49 11 aaronmk
        row_idx += 1
50
    doc.writexml(sys.stdout, addindent='    ', newl='\n')
51
52
main()