Project

General

Profile

1
#!/usr/bin/env python
2
# Converts a CSV dataset to XML using a mappings spreadsheet
3

    
4
import csv
5
import os.path
6
import re
7
import sys
8
from copy import deepcopy
9
from xml.dom.minidom import getDOMImplementation
10

    
11
sys.path.append(os.path.dirname(__file__)+"/lib")
12
import xpath
13

    
14
def main():
15
    prog_name = sys.argv.pop(0)
16
    try:
17
        dest = sys.argv.pop(0)
18
        mappings_path = sys.argv.pop(0)
19
    except IndexError: raise Exception('Usage: '+prog_name
20
        +' dest_mappings_column mappings_path <dataset >output')
21
    
22
    # Get mappings
23
    mappings = {}
24
    has_types = False # whether outer elements are type containiners
25
    stream = open(mappings_path, 'rb')
26
    reader = csv.reader(stream, delimiter=',', quotechar='"')
27
    fieldnames = reader.next()
28
    src = fieldnames[0]
29
    dest_idx = fieldnames.index(dest)
30
    for row in reader:
31
        name = row[0]
32
        path = row[dest_idx]
33
        if name != '' and path != '':
34
            if path.startswith('/*s/'): has_types = True # *s used for type elem
35
            path = path.replace('<name>', name)
36
            mappings[name] = xpath.XpathParser(path).parse()
37
    stream.close()
38
    
39
    # Process dataset
40
    doc = getDOMImplementation().createDocument(None, dest, None)
41
    stream = sys.stdin
42
    reader = csv.reader(stream, delimiter=',', quotechar='"')
43
    fieldnames = reader.next()
44
    row_idx = 0
45
    for row in reader:
46
        row_id = str(row_idx)
47
        for idx, name in enumerate(fieldnames):
48
            value = row[idx]
49
            if value != '' and name in mappings:
50
                path = deepcopy(mappings[name]) # don't modify main value!
51
                xpath.set_id(path, row_id, has_types)
52
                xpath.set_value(path, value)
53
                xpath.get(doc, path, True)
54
        row_idx += 1
55
    doc.writexml(sys.stdout, addindent='    ', newl='\n')
56

    
57
main()
(2-2/4)