Project

General

Profile

1 21 aaronmk
# XML DOM tree manipulation
2
3 73 aaronmk
import cgi
4
from HTMLParser import HTMLParser
5 21 aaronmk
from xml.dom import Node
6 299 aaronmk
import xml.dom.minidom as minidom
7 21 aaronmk
8 73 aaronmk
import strings
9
10
def escape(str_):
11
    return strings.to_unicode(cgi.escape(str_, True)).encode('ascii',
12
        'xmlcharrefreplace')
13
14
def unescape(str_): return HTMLParser().unescape(str_)
15
16 21 aaronmk
def get_id(node): return node.getAttribute('id')
17
18
def set_id(node, id_): node.setAttribute('id', id_)
19
20 135 aaronmk
def is_empty(node): return node.firstChild == None
21
22 301 aaronmk
def has_one_child(node):
23
    return node.firstChild != None and node.firstChild.nextSibling == None
24
25 305 aaronmk
class NodeIter:
26
    def __init__(self, node): self.child = node.firstChild
27
28
    def __iter__(self): return self
29
30
    def curr(self):
31
        if self.child != None: return self.child
32
        raise StopIteration
33
34
    def next(self):
35
        child = self.curr()
36
        self.child = self.child.nextSibling
37
        return child
38
39 298 aaronmk
def is_elem(node): return node.nodeType == Node.ELEMENT_NODE
40
41 21 aaronmk
class NodeElemIter:
42
    def __init__(self, node): self.child = node.firstChild
43
44
    def __iter__(self): return self
45
46
    def curr(self):
47
        while self.child != None:
48 298 aaronmk
            if is_elem(self.child): return self.child
49 21 aaronmk
            self.child = self.child.nextSibling
50
        raise StopIteration
51
52
    def next(self):
53
        child = self.curr()
54
        self.child = self.child.nextSibling
55
        return child
56
57
def first_elem(node): return NodeElemIter(node).next()
58
59
class NodeElemReverseIter:
60
    def __init__(self, node): self.child = node.lastChild
61
62
    def __iter__(self): return self
63
64
    def curr(self):
65
        while self.child != None:
66 298 aaronmk
            if is_elem(self.child): return self.child
67 21 aaronmk
            self.child = self.child.previousSibling
68
        raise StopIteration
69
70
    def next(self):
71
        child = self.curr()
72
        self.child = self.child.previousSibling
73
        return child
74
75
def last_elem(node): return NodeElemReverseIter(node).next()
76
77
class NodeParentIter:
78
    def __init__(self, node): self.node = node
79
80
    def __iter__(self): return self
81
82
    def curr(self):
83 298 aaronmk
        if self.node != None and is_elem(self.node): return self.node
84 21 aaronmk
        raise StopIteration
85
86
    def next(self):
87
        node = self.curr()
88
        self.node = self.node.parentNode
89
        return node
90
91 298 aaronmk
def is_text_node(node): return node.nodeType == Node.TEXT_NODE
92
93 301 aaronmk
def is_text(node): return has_one_child(node) and is_text_node(node.firstChild)
94 21 aaronmk
95
def value(node):
96 29 aaronmk
    if node.firstChild != None: return node.firstChild.nodeValue
97 21 aaronmk
    else: return node.nodeValue
98
99 143 aaronmk
def set_value(node, value):
100 298 aaronmk
    if is_elem(node): node.appendChild(node.ownerDocument.createTextNode(value))
101 22 aaronmk
    else: node.nodeValue = value
102
103 86 aaronmk
class NodeTextEntryIter:
104
    def __init__(self, node): self.iter_ = NodeElemIter(node)
105
106
    def __iter__(self): return self
107
108
    def curr(self):
109
        while True:
110
            child = self.iter_.curr()
111 139 aaronmk
            if is_text(child): return (child.tagName, value(child))
112 86 aaronmk
            self.iter_.next()
113
114
    def next(self):
115
        entry = self.curr()
116
        self.iter_.next()
117
        return entry
118
119 135 aaronmk
def set_child(node, name, value):
120
    '''Note: does not remove any existing child of the same name'''
121
    child = node.ownerDocument.createElement(name)
122 143 aaronmk
    set_value(child, value)
123 135 aaronmk
    node.appendChild(child)
124
125 86 aaronmk
def replace(old_node, new_node):
126
    old_node.parentNode.replaceChild(new_node, old_node) # note order reversed
127
128 142 aaronmk
def replace_with_text(node, str_):
129
    replace(node, node.ownerDocument.createTextNode(str_))
130 86 aaronmk
131 22 aaronmk
def by_tag_name(node, name, last_only=False):
132 135 aaronmk
    '''last_only optimization returns last matching node'''
133 22 aaronmk
    children = []
134 21 aaronmk
    for child in NodeElemReverseIter(node):
135 22 aaronmk
        if child.tagName == name:
136
            children.append(child)
137
            if last_only: break
138
    return children
139 28 aaronmk
140 133 aaronmk
def create_doc(root='_'):
141 303 aaronmk
    return minidom.getDOMImplementation().createDocument(None, root, None)
142 133 aaronmk
143 304 aaronmk
prettyxml_config = dict(addindent='    ', newl='\n')
144
145 299 aaronmk
# minidom modifications
146 73 aaronmk
147 299 aaronmk
minidom._write_data = lambda writer, data: writer.write(escape(data))
148 73 aaronmk
149 305 aaronmk
minidom.Node.__iter__ = lambda self: NodeIter(self)
150
151 301 aaronmk
minidom.Node.__str__ = lambda self: self.toxml()
152
153
def __Element_write_opening(self, writer, indent='', addindent='', newl=''):
154 298 aaronmk
    writer.write(indent+'<'+self.tagName)
155
    for attr_idx in xrange(self.attributes.length):
156
        attr = self.attributes.item(attr_idx)
157
        writer.write(' '+attr.name+'='+escape(attr.value))
158
    writer.write('>'+newl)
159 301 aaronmk
minidom.Element.write_opening = __Element_write_opening
160 73 aaronmk
161 301 aaronmk
def __Element_write_closing(self, writer, indent='', addindent='', newl=''):
162 298 aaronmk
    writer.write('</'+self.tagName+'>'+newl)
163 301 aaronmk
minidom.Element.write_closing = __Element_write_closing
164 298 aaronmk
165 299 aaronmk
_writexml_orig = minidom.Element.writexml
166 301 aaronmk
def __Element_writexml(self, writer, indent='', addindent='', newl=''):
167 306 aaronmk
    if isinstance(indent, int): indent = addindent*indent
168 298 aaronmk
    if is_text(self):
169
        self.write_opening(writer, indent, addindent, '') # no newline
170
        writer.write(escape(value(self)))
171
        self.write_closing(writer, indent, addindent, newl)
172 28 aaronmk
    else: _writexml_orig(self, writer, indent, addindent, newl)
173 301 aaronmk
minidom.Element.writexml = __Element_writexml
174 28 aaronmk
175 301 aaronmk
def __Document_write_opening(self, writer, indent='', addindent='', newl='',
176
    encoding=None):
177
    xmlDecl = '<?xml version="1.0" '
178
    if encoding != None: xmlDecl += 'encoding="'+escape(encoding)+'"'
179
    xmlDecl += '?>'+newl
180
    writer.write(xmlDecl)
181
    assert has_one_child(self)
182
    assert is_elem(self.firstChild)
183
    self.firstChild.write_opening(writer, indent, addindent, newl)
184
minidom.Document.write_opening = __Document_write_opening
185
186
def __Document_write_closing(self, writer, indent='', addindent='', newl=''):
187
    self.firstChild.write_closing(writer, indent, addindent, newl)
188
minidom.Document.write_closing = __Document_write_closing