Project

General

Profile

1
#!/usr/bin/env python
2
# Converts a CSV dataset to XML using a mappings spreadsheet
3

    
4
import csv
5
import re
6
import sys
7
from copy import deepcopy
8
from xml.dom.minidom import getDOMImplementation
9

    
10
from XpathParser import XpathParser, XpathElem
11
from xml_dom import by_path
12

    
13
def main():
14
    prog_name = sys.argv.pop(0)
15
    try:
16
        dest = sys.argv.pop(0)
17
        mappings_path = sys.argv.pop(0)
18
    except IndexError: raise Exception('Usage: '+prog_name
19
        +' dest_mappings_column mappings_path <dataset >output')
20
    
21
    # Get mappings
22
    mappings = {}
23
    has_type_containers = False # whether outer elements are type containiners
24
    stream = open(mappings_path, 'rb')
25
    reader = csv.reader(stream, delimiter=',', quotechar='"')
26
    fieldnames = reader.next()
27
    src = fieldnames[0]
28
    dest_idx = fieldnames.index(dest)
29
    for row in reader:
30
        name = row[0]
31
        path = row[dest_idx]
32
        if name != '' and path != '':
33
            if path.startswith('?'): path = path[1:]
34
            path = path.replace('<name>', name)
35
            path, repl_ct = re.subn(r'(?<=/)\*(?=s/(\w+))', r'\1', path)
36
            if repl_ct > 0: has_type_containers = True # *s used for type elem
37
            mappings[name] = XpathParser(path).parse()
38
    stream.close()
39
    if has_type_containers: id_level = 1
40
    else: id_level = 0  
41
    
42
    # Process dataset
43
    doc = getDOMImplementation().createDocument(None, dest, None)
44
    stream = sys.stdin
45
    reader = csv.reader(stream, delimiter=',', quotechar='"')
46
    fieldnames = reader.next()
47
    row_idx = 0
48
    for row in reader:
49
        row_id = str(row_idx)
50
        for idx, name in enumerate(fieldnames):
51
            value = row[idx]
52
            if value != '' and name in mappings:
53
                path = deepcopy(mappings[name]) # don't modify main value!
54
                path[id_level].attrs.append([XpathElem('id', row_id,
55
                    is_attr=True)])
56
                path[-1].value = value
57
                by_path(doc, path, True)
58
        row_idx += 1
59
    doc.writexml(sys.stdout, addindent='    ', newl='\n')
60

    
61
main()
(5-5/9)