Project

General

Profile

« Previous | Next » 

Revision 55

Split off xpath.py XML functionality into xml_xpath.py

View differences:

scripts/lib/xpath.py
1
# XPath-based XML tree manipulation
1
# XPath parsing
2 2

  
3
from copy import deepcopy
4
from xml.dom import Node
3
import copy
5 4

  
6 5
from Parser import Parser
7
import xml_dom
8 6

  
9 7
class XpathElem:
10 8
    def __init__(self, name, value=None, attrs=None, is_attr=False,
......
100 98
instance_level = 1
101 99

  
102 100
def obj(path):
103
    obj_path = deepcopy(path[:instance_level+1])
101
    obj_path = copy.deepcopy(path[:instance_level+1])
104 102
    obj_path[-1].is_ptr = False # prevent pointer w/o target
105 103
    return obj_path
106 104

  
......
108 106
    if has_types: id_level = instance_level
109 107
    else: id_level = 0
110 108
    path[id_level].attrs.append([XpathElem('id', id_, is_attr=True)])
111

  
112
def get(doc, path, create=False, last_only=None, parent=None):
113
    # Warning: The last_only optimization may put data that should be together
114
    # into separate nodes
115
    if parent == None: parent = doc.documentElement
116
    if last_only == None: last_only = create
117
    elem_idx = 0
118
    for elem in path:
119
        # Find possible matches
120
        children = []
121
        if elem.is_attr:
122
            child = parent.getAttributeNode(elem.name)
123
            if child != None: children = [child]
124
        elif elem.name == '.': children = [parent]
125
        else: children = xml_dom.by_tag_name(parent, elem.name, last_only)
126
        
127
        # Check each match
128
        node = None
129
        for child in children:
130
            is_match = elem.value == None or xml_dom.value(child) == elem.value
131
            for attr in elem.attrs:
132
                if not is_match: break
133
                is_match = get(doc, attr, False, last_only, child) != None
134
            if is_match: node = child; break
135
        
136
        # Create node
137
        if node == None:
138
            if not create: return None
139
            if elem.is_attr:
140
                parent.setAttribute(elem.name, '')
141
                node = parent.getAttributeNode(elem.name)
142
            else: node = parent.appendChild(doc.createElement(elem.name))
143
            if elem.value != None: xml_dom.set_value(doc, node, elem.value)
144
            for attr in elem.attrs: get(doc, attr, create, last_only, node)
145
        
146
        # Follow pointer
147
        if elem.is_ptr:
148
            path = deepcopy(path[elem_idx+1:]) # rest of path
149
            id_elem = backward_id(path[instance_level])
150
            if id_elem != None:
151
                # backward (child-to-parent) pointer with target ID attr
152
                set_value(id_elem, xml_dom.get_id(node))
153
            else: # forward (parent-to-child) pointer
154
                id_ = xml_dom.value(node)
155
                obj_path = obj(path) # target object
156
                if id_ == None or get(doc, obj_path, False, True) == None:
157
                    # no target or target attrs don't match
158
                    if not create: return None
159
                    
160
                    # Use last target object's ID + 1
161
                    obj_path[-1].attrs = [] # just get by tag name
162
                    last = get(doc, obj_path, False, True)
163
                    if last != None: id_ = str(int(xml_dom.get_id(last)) + 1)
164
                    else: id_ = '0'
165
                    
166
                    # Will append if target attrs didn't match. Place ! in XPath
167
                    # after element to fork at to avoid this.
168
                    xml_dom.set_value(doc, node, id_)
169
                else: last_only = False
170
                set_id(path, id_)
171
            return get(doc, path, create, last_only)
172
        
173
        parent = node
174
        elem_idx += 1
175
    return parent
scripts/lib/xml_xpath.py
1
# XPath-based XML tree manipulation
2

  
3
import copy
4

  
5
import xml_dom
6
import xpath
7

  
8
def get(doc, path, create=False, last_only=None, parent=None):
9
    # Warning: The last_only optimization may put data that should be together
10
    # into separate nodes
11
    if parent == None: parent = doc.documentElement
12
    if last_only == None: last_only = create
13
    elem_idx = 0
14
    for elem in path:
15
        # Find possible matches
16
        children = []
17
        if elem.is_attr:
18
            child = parent.getAttributeNode(elem.name)
19
            if child != None: children = [child]
20
        elif elem.name == '.': children = [parent]
21
        else: children = xml_dom.by_tag_name(parent, elem.name, last_only)
22
        
23
        # Check each match
24
        node = None
25
        for child in children:
26
            is_match = elem.value == None or xml_dom.value(child) == elem.value
27
            for attr in elem.attrs:
28
                if not is_match: break
29
                is_match = get(doc, attr, False, last_only, child) != None
30
            if is_match: node = child; break
31
        
32
        # Create node
33
        if node == None:
34
            if not create: return None
35
            if elem.is_attr:
36
                parent.setAttribute(elem.name, '')
37
                node = parent.getAttributeNode(elem.name)
38
            else: node = parent.appendChild(doc.createElement(elem.name))
39
            if elem.value != None: xml_dom.set_value(doc, node, elem.value)
40
            for attr in elem.attrs: get(doc, attr, create, last_only, node)
41
        
42
        # Follow pointer
43
        if elem.is_ptr:
44
            path = copy.deepcopy(path[elem_idx+1:]) # rest of path
45
            id_elem = xpath.backward_id(path[xpath.instance_level])
46
            if id_elem != None:
47
                # backward (child-to-parent) pointer with target ID attr
48
                xpath.set_value(id_elem, xml_dom.get_id(node))
49
            else: # forward (parent-to-child) pointer
50
                id_ = xml_dom.value(node)
51
                obj_path = xpath.obj(path) # target object
52
                if id_ == None or get(doc, obj_path, False, True) == None:
53
                    # no target or target attrs don't match
54
                    if not create: return None
55
                    
56
                    # Use last target object's ID + 1
57
                    obj_path[-1].attrs = [] # just get by tag name
58
                    last = get(doc, obj_path, False, True)
59
                    if last != None: id_ = str(int(xml_dom.get_id(last)) + 1)
60
                    else: id_ = '0'
61
                    
62
                    # Will append if target attrs didn't match. Place ! in XPath
63
                    # after element to fork at to avoid this.
64
                    xml_dom.set_value(doc, node, id_)
65
                else: last_only = False
66
                xpath.set_id(path, id_)
67
            return get(doc, path, create, last_only)
68
        
69
        parent = node
70
        elem_idx += 1
71
    return parent
scripts/map
10 10
import re
11 11
import sys
12 12
import xml.dom.minidom
13
from xml.dom.minidom import getDOMImplementation
14 13

  
15 14
sys.path.append(os.path.dirname(__file__)+"/lib")
16 15

  
......
47 46
    
48 47
    # Input datasource to XML tree
49 48
    if uses_map: # input is CSV
49
        import xml_xpath
50 50
        import xpath
51 51
        
52 52
        # Load map
......
64 64
        stream.close()
65 65
        
66 66
        # Load and map CSV
67
        doc = getDOMImplementation().createDocument(None, dest, None)
67
        doc = xml.dom.minidom.getDOMImplementation().createDocument(None, dest,
68
            None)
68 69
        reader = csv.reader(sys.stdin, **csv_config)
69 70
        fieldnames = reader.next()
70 71
        row_idx = 0
......
76 77
                    path = copy.deepcopy(map_[name]) # don't modify main value!
77 78
                    xpath.set_id(path, row_id, has_types)
78 79
                    xpath.set_value(path, value)
79
                    xpath.get(doc, path, True)
80
                    xml_xpath.get(doc, path, True)
80 81
            row_idx += 1
81 82
    else: doc = xml.dom.minidom.parse(sys.stdin) # input is XML
82 83
    

Also available in: Unified diff