Project

General

Profile

1
# XML DOM tree manipulation
2

    
3
import cgi
4
from HTMLParser import HTMLParser
5
from xml.dom import Node
6
import xml.dom.minidom as minidom
7

    
8
import strings
9

    
10
def escape(str_):
11
    return strings.to_unicode(cgi.escape(str_, True)).encode('ascii',
12
        'xmlcharrefreplace')
13

    
14
def unescape(str_): return HTMLParser().unescape(str_)
15

    
16
def get_id(node): return node.getAttribute('id')
17

    
18
def set_id(node, id_): node.setAttribute('id', id_)
19

    
20
def is_empty(node): return node.firstChild == None
21

    
22
def has_one_child(node):
23
    return node.firstChild != None and node.firstChild.nextSibling == None
24

    
25
class NodeIter:
26
    def __init__(self, node): self.child = node.firstChild
27
    
28
    def __iter__(self): return self
29
    
30
    def curr(self):
31
        if self.child != None: return self.child
32
        raise StopIteration
33
    
34
    def next(self):
35
        child = self.curr()
36
        self.child = self.child.nextSibling
37
        return child
38

    
39
def is_elem(node): return node.nodeType == Node.ELEMENT_NODE
40

    
41
class NodeElemIter:
42
    def __init__(self, node): self.child = node.firstChild
43
    
44
    def __iter__(self): return self
45
    
46
    def curr(self):
47
        while self.child != None:
48
            if is_elem(self.child): return self.child
49
            self.child = self.child.nextSibling
50
        raise StopIteration
51
    
52
    def next(self):
53
        child = self.curr()
54
        self.child = self.child.nextSibling
55
        return child
56

    
57
def first_elem(node): return NodeElemIter(node).next()
58

    
59
class NodeElemReverseIter:
60
    def __init__(self, node): self.child = node.lastChild
61
    
62
    def __iter__(self): return self
63
    
64
    def curr(self):
65
        while self.child != None:
66
            if is_elem(self.child): return self.child
67
            self.child = self.child.previousSibling
68
        raise StopIteration
69
    
70
    def next(self):
71
        child = self.curr()
72
        self.child = self.child.previousSibling
73
        return child
74

    
75
def last_elem(node): return NodeElemReverseIter(node).next()
76

    
77
class NodeParentIter:
78
    def __init__(self, node): self.node = node
79
    
80
    def __iter__(self): return self
81
    
82
    def curr(self):
83
        if self.node != None and is_elem(self.node): return self.node
84
        raise StopIteration
85
    
86
    def next(self):
87
        node = self.curr()
88
        self.node = self.node.parentNode
89
        return node
90

    
91
def is_text_node(node): return node.nodeType == Node.TEXT_NODE
92

    
93
def is_text(node): return has_one_child(node) and is_text_node(node.firstChild)
94

    
95
def value(node):
96
    if node.firstChild != None: return node.firstChild.nodeValue
97
    else: return node.nodeValue
98

    
99
def set_value(node, value):
100
    if is_elem(node): node.appendChild(node.ownerDocument.createTextNode(value))
101
    else: node.nodeValue = value
102

    
103
class NodeTextEntryIter:
104
    def __init__(self, node): self.iter_ = NodeElemIter(node)
105
    
106
    def __iter__(self): return self
107
    
108
    def curr(self):
109
        while True:
110
            child = self.iter_.curr()
111
            if is_text(child): return (child.tagName, value(child))
112
            self.iter_.next()
113
    
114
    def next(self):
115
        entry = self.curr()
116
        self.iter_.next()
117
        return entry
118

    
119
def set_child(node, name, value):
120
    '''Note: does not remove any existing child of the same name'''
121
    child = node.ownerDocument.createElement(name)
122
    set_value(child, value)
123
    node.appendChild(child)
124

    
125
def replace(old_node, new_node):
126
    old_node.parentNode.replaceChild(new_node, old_node) # note order reversed
127

    
128
def replace_with_text(node, str_):
129
    replace(node, node.ownerDocument.createTextNode(str_))
130

    
131
def by_tag_name(node, name, last_only=False):
132
    '''last_only optimization returns last matching node'''
133
    children = []
134
    for child in NodeElemReverseIter(node):
135
        if child.tagName == name:
136
            children.append(child)
137
            if last_only: break
138
    return children
139

    
140
def create_doc(root='_'):
141
    return minidom.getDOMImplementation().createDocument(None, root, None)
142

    
143
prettyxml_config = dict(addindent='    ', newl='\n')
144

    
145
# minidom modifications
146

    
147
minidom._write_data = lambda writer, data: writer.write(escape(data))
148

    
149
minidom.Node.__iter__ = lambda self: NodeIter(self)
150

    
151
minidom.Node.__str__ = lambda self: self.toxml()
152

    
153
minidom.Node.pop = lambda self: self.removeChild(self.lastChild)
154

    
155
def __Node_clear(self):
156
    while not is_empty(self): self.pop()
157
minidom.Node.clear = __Node_clear
158

    
159
def __Element_write_opening(self, writer, indent='', addindent='', newl=''):
160
    writer.write(indent+'<'+self.tagName)
161
    for attr_idx in xrange(self.attributes.length):
162
        attr = self.attributes.item(attr_idx)
163
        writer.write(' '+attr.name+'='+escape(attr.value))
164
    writer.write('>'+newl)
165
minidom.Element.write_opening = __Element_write_opening
166

    
167
def __Element_write_closing(self, writer, indent='', addindent='', newl=''):
168
    writer.write('</'+self.tagName+'>'+newl)
169
minidom.Element.write_closing = __Element_write_closing
170

    
171
_writexml_orig = minidom.Element.writexml
172
def __Element_writexml(self, writer, indent='', addindent='', newl=''):
173
    if isinstance(indent, int): indent = addindent*indent
174
    if is_text(self):
175
        self.write_opening(writer, indent, addindent, '') # no newline
176
        writer.write(escape(value(self)))
177
        self.write_closing(writer, indent, addindent, newl)
178
    else: _writexml_orig(self, writer, indent, addindent, newl)
179
minidom.Element.writexml = __Element_writexml
180

    
181
def __Document_write_opening(self, writer, indent='', addindent='', newl='',
182
    encoding=None):
183
    xmlDecl = '<?xml version="1.0" '
184
    if encoding != None: xmlDecl += 'encoding="'+escape(encoding)+'"'
185
    xmlDecl += '?>'+newl
186
    writer.write(xmlDecl)
187
    assert has_one_child(self)
188
    assert is_elem(self.firstChild)
189
    self.firstChild.write_opening(writer, indent, addindent, newl)
190
minidom.Document.write_opening = __Document_write_opening
191

    
192
def __Document_write_closing(self, writer, indent='', addindent='', newl=''):
193
    self.firstChild.write_closing(writer, indent, addindent, newl)
194
minidom.Document.write_closing = __Document_write_closing
(9-9/11)