Project

General

Profile

« Previous | Next » 

Revision 56

Started adding XML-XML mapping support to map

View differences:

scripts/map
3 3
# For outputting an XML file to a PostgreSQL database, use the general format of
4 4
# http://vegbank.org/vegdocs/xml/vegbank_example_ver1.0.2.xml
5 5

  
6
import copy
7
import csv
8 6
import os
9 7
import os.path
10
import re
11 8
import sys
12 9
import xml.dom.minidom
13 10

  
......
44 41
    
45 42
    csv_config = dict(delimiter=',', quotechar='"')
46 43
    
47
    # Input datasource to XML tree
48
    if uses_map: # input is CSV
49
        import xml_xpath
44
    # Load map
45
    is_xml = True
46
    if uses_map:
47
        import copy
48
        import csv
49
        
50 50
        import xpath
51 51
        
52
        # Load map
53 52
        map_ = {}
54 53
        has_types = False # whether outer elements are type containiners
55 54
        stream = open(map_path, 'rb')
56 55
        reader = csv.reader(stream, **csv_config)
57 56
        src, dest = reader.next()[:2]
57
        src, sep, src_base = src.partition('/')
58
        is_xml = sep != ''
58 59
        for row in reader:
59 60
            name, path = row[:2]
60 61
            if name != '' and path != '':
......
62 63
                path = path.replace('<name>', name)
63 64
                map_[name] = xpath.XpathParser(path).parse()
64 65
        stream.close()
66
    
67
    # Input datasource to XML tree
68
    if is_xml: doc = xml.dom.minidom.parse(sys.stdin)
69
    if uses_map:
70
        import xml_xpath
65 71
        
66
        # Load and map CSV
67
        doc = xml.dom.minidom.getDOMImplementation().createDocument(None, dest,
68
            None)
69
        reader = csv.reader(sys.stdin, **csv_config)
70
        fieldnames = reader.next()
71
        row_idx = 0
72
        for row in reader:
73
            row_id = str(row_idx)
74
            for idx, name in enumerate(fieldnames):
75
                value = row[idx]
76
                if value != '' and name in map_:
77
                    path = copy.deepcopy(map_[name]) # don't modify main value!
78
                    xpath.set_id(path, row_id, has_types)
79
                    xpath.set_value(path, value)
80
                    xml_xpath.get(doc, path, True)
81
            row_idx += 1
82
    else: doc = xml.dom.minidom.parse(sys.stdin) # input is XML
72
        out_doc = xml.dom.minidom.getDOMImplementation().createDocument(None,
73
            dest, None)
74
        if is_xml: raise Exception('XML-XML mapping not supported yet')
75
        else: # input is CSV
76
            reader = csv.reader(sys.stdin, **csv_config)
77
            fieldnames = reader.next()
78
            row_idx = 0
79
            for row in reader:
80
                row_id = str(row_idx)
81
                for idx, name in enumerate(fieldnames):
82
                    value = row[idx]
83
                    if value != '' and name in map_:
84
                        path = copy.deepcopy(map_[name]) # don't modify value!
85
                        xpath.set_id(path, row_id, has_types)
86
                        xpath.set_value(path, value)
87
                        xml_xpath.get(out_doc, path, True)
88
                row_idx += 1
89
        doc = out_doc
83 90
    
84 91
    # Output XML tree
85 92
    if to_db_config != None: # output is database

Also available in: Unified diff