Project

General

Profile

1
#!/usr/bin/env python
2
# Converts a CSV dataset to XML using a mappings spreadsheet
3

    
4
import csv
5
import re
6
import sys
7
from copy import deepcopy
8
from xml.dom.minidom import getDOMImplementation
9

    
10
from XpathParser import XpathParser, XpathElem
11
from xml_dom import by_path
12

    
13
def main():
14
    prog_name = sys.argv.pop(0)
15
    try:
16
        dest = sys.argv.pop(0)
17
        mappings_path = sys.argv.pop(0)
18
    except IndexError: raise Exception('Usage: '+prog_name
19
        +' dest_mappings_column mappings_path <dataset')
20
    
21
    # Get mappings
22
    mappings = {}
23
    stream = open(mappings_path, 'rb')
24
    reader = csv.reader(stream, delimiter=',', quotechar='"')
25
    fieldnames = reader.next()
26
    src = fieldnames[0]
27
    dest_idx = fieldnames.index(dest)
28
    for row in reader:
29
        name = row[0]
30
        path = row[dest_idx]
31
        if path != '':
32
            if path.startswith('?'): path = path[1:]
33
            path = path.replace('<name>', name)
34
            path = re.sub(r'(?<=/)\*(?=s/(\w+))', r'\1', path)
35
            mappings[name] = XpathParser(path).parse()
36
    stream.close()
37
    
38
    # Process dataset
39
    doc = getDOMImplementation().createDocument(None, dest, None)
40
    stream = sys.stdin
41
    reader = csv.reader(stream, delimiter=',', quotechar='"')
42
    fieldnames = reader.next()
43
    row_idx = 0
44
    for row in reader:
45
        row_id = src+'_'+str(row_idx)
46
        for idx, name in enumerate(fieldnames):
47
            value = row[idx]
48
            if value and name in mappings:
49
                path = deepcopy(mappings[name]) # don't modify main value!
50
                path[1].attrs.append([XpathElem('id', row_id, is_attr=True)])
51
                path[-1].value = value
52
                by_path(doc, path, True)
53
        row_idx += 1
54
    doc.writexml(sys.stdout, addindent='    ', newl='\n')
55

    
56
main()
(7-7/13)