Project

General

Profile

1
#!/usr/bin/env python
2
# Converts a CSV dataset to XML using a mappings spreadsheet
3

    
4
import csv
5
import re
6
import sys
7
from copy import deepcopy
8
from xml.dom.minidom import getDOMImplementation
9

    
10
import xpath
11

    
12
def main():
13
    prog_name = sys.argv.pop(0)
14
    try:
15
        dest = sys.argv.pop(0)
16
        mappings_path = sys.argv.pop(0)
17
    except IndexError: raise Exception('Usage: '+prog_name
18
        +' dest_mappings_column mappings_path <dataset >output')
19
    
20
    # Get mappings
21
    mappings = {}
22
    has_types = False # whether outer elements are type containiners
23
    stream = open(mappings_path, 'rb')
24
    reader = csv.reader(stream, delimiter=',', quotechar='"')
25
    fieldnames = reader.next()
26
    src = fieldnames[0]
27
    dest_idx = fieldnames.index(dest)
28
    for row in reader:
29
        name = row[0]
30
        path = row[dest_idx]
31
        if name != '' and path != '':
32
            if path.startswith('/*s/'): has_types = True # *s used for type elem
33
            path = path.replace('<name>', name)
34
            mappings[name] = xpath.XpathParser(path).parse()
35
    stream.close()
36
    
37
    # Process dataset
38
    doc = getDOMImplementation().createDocument(None, dest, None)
39
    stream = sys.stdin
40
    reader = csv.reader(stream, delimiter=',', quotechar='"')
41
    fieldnames = reader.next()
42
    row_idx = 0
43
    for row in reader:
44
        row_id = str(row_idx)
45
        for idx, name in enumerate(fieldnames):
46
            value = row[idx]
47
            if value != '' and name in mappings:
48
                path = deepcopy(mappings[name]) # don't modify main value!
49
                xpath.set_id(path, row_id, has_types)
50
                xpath.set_value(path, value)
51
                xpath.get(doc, path, True)
52
        row_idx += 1
53
    doc.writexml(sys.stdout, addindent='    ', newl='\n')
54

    
55
main()
(5-5/10)