Project

General

Profile

1 21 aaronmk
# XML DOM tree manipulation
2
3 73 aaronmk
import cgi
4
from HTMLParser import HTMLParser
5 21 aaronmk
from xml.dom import Node
6 299 aaronmk
import xml.dom.minidom as minidom
7 21 aaronmk
8 73 aaronmk
import strings
9
10
def escape(str_):
11
    return strings.to_unicode(cgi.escape(str_, True)).encode('ascii',
12
        'xmlcharrefreplace')
13
14
def unescape(str_): return HTMLParser().unescape(str_)
15
16 21 aaronmk
def get_id(node): return node.getAttribute('id')
17
18
def set_id(node, id_): node.setAttribute('id', id_)
19
20 135 aaronmk
def is_empty(node): return node.firstChild == None
21
22 301 aaronmk
def has_one_child(node):
23
    return node.firstChild != None and node.firstChild.nextSibling == None
24
25 298 aaronmk
def is_elem(node): return node.nodeType == Node.ELEMENT_NODE
26
27 21 aaronmk
class NodeElemIter:
28
    def __init__(self, node): self.child = node.firstChild
29
30
    def __iter__(self): return self
31
32
    def curr(self):
33
        while self.child != None:
34 298 aaronmk
            if is_elem(self.child): return self.child
35 21 aaronmk
            self.child = self.child.nextSibling
36
        raise StopIteration
37
38
    def next(self):
39
        child = self.curr()
40
        self.child = self.child.nextSibling
41
        return child
42
43
def first_elem(node): return NodeElemIter(node).next()
44
45
class NodeElemReverseIter:
46
    def __init__(self, node): self.child = node.lastChild
47
48
    def __iter__(self): return self
49
50
    def curr(self):
51
        while self.child != None:
52 298 aaronmk
            if is_elem(self.child): return self.child
53 21 aaronmk
            self.child = self.child.previousSibling
54
        raise StopIteration
55
56
    def next(self):
57
        child = self.curr()
58
        self.child = self.child.previousSibling
59
        return child
60
61
def last_elem(node): return NodeElemReverseIter(node).next()
62
63
class NodeParentIter:
64
    def __init__(self, node): self.node = node
65
66
    def __iter__(self): return self
67
68
    def curr(self):
69 298 aaronmk
        if self.node != None and is_elem(self.node): return self.node
70 21 aaronmk
        raise StopIteration
71
72
    def next(self):
73
        node = self.curr()
74
        self.node = self.node.parentNode
75
        return node
76
77 298 aaronmk
def is_text_node(node): return node.nodeType == Node.TEXT_NODE
78
79 301 aaronmk
def is_text(node): return has_one_child(node) and is_text_node(node.firstChild)
80 21 aaronmk
81
def value(node):
82 29 aaronmk
    if node.firstChild != None: return node.firstChild.nodeValue
83 21 aaronmk
    else: return node.nodeValue
84
85 143 aaronmk
def set_value(node, value):
86 298 aaronmk
    if is_elem(node): node.appendChild(node.ownerDocument.createTextNode(value))
87 22 aaronmk
    else: node.nodeValue = value
88
89 86 aaronmk
class NodeTextEntryIter:
90
    def __init__(self, node): self.iter_ = NodeElemIter(node)
91
92
    def __iter__(self): return self
93
94
    def curr(self):
95
        while True:
96
            child = self.iter_.curr()
97 139 aaronmk
            if is_text(child): return (child.tagName, value(child))
98 86 aaronmk
            self.iter_.next()
99
100
    def next(self):
101
        entry = self.curr()
102
        self.iter_.next()
103
        return entry
104
105 135 aaronmk
def set_child(node, name, value):
106
    '''Note: does not remove any existing child of the same name'''
107
    child = node.ownerDocument.createElement(name)
108 143 aaronmk
    set_value(child, value)
109 135 aaronmk
    node.appendChild(child)
110
111 86 aaronmk
def replace(old_node, new_node):
112
    old_node.parentNode.replaceChild(new_node, old_node) # note order reversed
113
114 142 aaronmk
def replace_with_text(node, str_):
115
    replace(node, node.ownerDocument.createTextNode(str_))
116 86 aaronmk
117 22 aaronmk
def by_tag_name(node, name, last_only=False):
118 135 aaronmk
    '''last_only optimization returns last matching node'''
119 22 aaronmk
    children = []
120 21 aaronmk
    for child in NodeElemReverseIter(node):
121 22 aaronmk
        if child.tagName == name:
122
            children.append(child)
123
            if last_only: break
124
    return children
125 28 aaronmk
126 133 aaronmk
def create_doc(root='_'):
127 303 aaronmk
    return minidom.getDOMImplementation().createDocument(None, root, None)
128 133 aaronmk
129 299 aaronmk
# minidom modifications
130 73 aaronmk
131 299 aaronmk
minidom._write_data = lambda writer, data: writer.write(escape(data))
132 73 aaronmk
133 301 aaronmk
minidom.Node.__str__ = lambda self: self.toxml()
134
135
def __Node_writeprettyxml(self, writer, indent='', addindent='    ', newl='\n'):
136
    self.writexml(writer, indent, addindent, newl)
137
minidom.Node.writeprettyxml = __Node_writeprettyxml
138
139
def __Element_write_opening(self, writer, indent='', addindent='', newl=''):
140 298 aaronmk
    writer.write(indent+'<'+self.tagName)
141
    for attr_idx in xrange(self.attributes.length):
142
        attr = self.attributes.item(attr_idx)
143
        writer.write(' '+attr.name+'='+escape(attr.value))
144
    writer.write('>'+newl)
145 301 aaronmk
minidom.Element.write_opening = __Element_write_opening
146 73 aaronmk
147 301 aaronmk
def __Element_write_closing(self, writer, indent='', addindent='', newl=''):
148 298 aaronmk
    writer.write('</'+self.tagName+'>'+newl)
149 301 aaronmk
minidom.Element.write_closing = __Element_write_closing
150 298 aaronmk
151 299 aaronmk
_writexml_orig = minidom.Element.writexml
152 301 aaronmk
def __Element_writexml(self, writer, indent='', addindent='', newl=''):
153 298 aaronmk
    if is_text(self):
154
        self.write_opening(writer, indent, addindent, '') # no newline
155
        writer.write(escape(value(self)))
156
        self.write_closing(writer, indent, addindent, newl)
157 28 aaronmk
    else: _writexml_orig(self, writer, indent, addindent, newl)
158 301 aaronmk
minidom.Element.writexml = __Element_writexml
159 28 aaronmk
160 301 aaronmk
def __Document_write_opening(self, writer, indent='', addindent='', newl='',
161
    encoding=None):
162
    xmlDecl = '<?xml version="1.0" '
163
    if encoding != None: xmlDecl += 'encoding="'+escape(encoding)+'"'
164
    xmlDecl += '?>'+newl
165
    writer.write(xmlDecl)
166
    assert has_one_child(self)
167
    assert is_elem(self.firstChild)
168
    self.firstChild.write_opening(writer, indent, addindent, newl)
169
minidom.Document.write_opening = __Document_write_opening
170
171
def __Document_write_closing(self, writer, indent='', addindent='', newl=''):
172
    self.firstChild.write_closing(writer, indent, addindent, newl)
173
minidom.Document.write_closing = __Document_write_closing