Project

General

Profile

1 11 aaronmk
#!/usr/bin/env python
2
# Converts a CSV dataset to XML using a mappings spreadsheet
3
4
import csv
5 42 aaronmk
import os.path
6 11 aaronmk
import re
7
import sys
8
from copy import deepcopy
9
from xml.dom.minidom import getDOMImplementation
10
11 43 aaronmk
sys.path.append(os.path.dirname(__file__)+"/lib")
12 21 aaronmk
import xpath
13 11 aaronmk
14
def main():
15
    prog_name = sys.argv.pop(0)
16
    try:
17
        dest = sys.argv.pop(0)
18
        mappings_path = sys.argv.pop(0)
19
    except IndexError: raise Exception('Usage: '+prog_name
20 18 aaronmk
        +' dest_mappings_column mappings_path <dataset >output')
21 11 aaronmk
22
    # Get mappings
23
    mappings = {}
24 22 aaronmk
    has_types = False # whether outer elements are type containiners
25 11 aaronmk
    stream = open(mappings_path, 'rb')
26
    reader = csv.reader(stream, delimiter=',', quotechar='"')
27
    fieldnames = reader.next()
28
    src = fieldnames[0]
29
    dest_idx = fieldnames.index(dest)
30
    for row in reader:
31
        name = row[0]
32
        path = row[dest_idx]
33 18 aaronmk
        if name != '' and path != '':
34 31 aaronmk
            if path.startswith('/*s/'): has_types = True # *s used for type elem
35 11 aaronmk
            path = path.replace('<name>', name)
36 21 aaronmk
            mappings[name] = xpath.XpathParser(path).parse()
37 11 aaronmk
    stream.close()
38
39
    # Process dataset
40
    doc = getDOMImplementation().createDocument(None, dest, None)
41
    stream = sys.stdin
42
    reader = csv.reader(stream, delimiter=',', quotechar='"')
43
    fieldnames = reader.next()
44
    row_idx = 0
45
    for row in reader:
46 18 aaronmk
        row_id = str(row_idx)
47 11 aaronmk
        for idx, name in enumerate(fieldnames):
48
            value = row[idx]
49 18 aaronmk
            if value != '' and name in mappings:
50 11 aaronmk
                path = deepcopy(mappings[name]) # don't modify main value!
51 22 aaronmk
                xpath.set_id(path, row_id, has_types)
52
                xpath.set_value(path, value)
53 21 aaronmk
                xpath.get(doc, path, True)
54 11 aaronmk
        row_idx += 1
55
    doc.writexml(sys.stdout, addindent='    ', newl='\n')
56
57
main()