Project

General

Profile

1
# XML DOM tree manipulation
2

    
3
import cgi
4
from HTMLParser import HTMLParser
5
from xml.dom import Node
6
import xml.dom.minidom as minidom
7

    
8
import strings
9

    
10
def escape(str_):
11
    return strings.to_unicode(cgi.escape(str_, True)).encode('ascii',
12
        'xmlcharrefreplace')
13

    
14
def unescape(str_): return HTMLParser().unescape(str_)
15

    
16
def get_id(node): return node.getAttribute('id')
17

    
18
def set_id(node, id_): node.setAttribute('id', id_)
19

    
20
def is_empty(node): return node.firstChild == None
21

    
22
def has_one_child(node):
23
    return node.firstChild != None and node.firstChild.nextSibling == None
24

    
25
def is_elem(node): return node.nodeType == Node.ELEMENT_NODE
26

    
27
class NodeElemIter:
28
    def __init__(self, node): self.child = node.firstChild
29
    
30
    def __iter__(self): return self
31
    
32
    def curr(self):
33
        while self.child != None:
34
            if is_elem(self.child): return self.child
35
            self.child = self.child.nextSibling
36
        raise StopIteration
37
    
38
    def next(self):
39
        child = self.curr()
40
        self.child = self.child.nextSibling
41
        return child
42

    
43
def first_elem(node): return NodeElemIter(node).next()
44

    
45
class NodeElemReverseIter:
46
    def __init__(self, node): self.child = node.lastChild
47
    
48
    def __iter__(self): return self
49
    
50
    def curr(self):
51
        while self.child != None:
52
            if is_elem(self.child): return self.child
53
            self.child = self.child.previousSibling
54
        raise StopIteration
55
    
56
    def next(self):
57
        child = self.curr()
58
        self.child = self.child.previousSibling
59
        return child
60

    
61
def last_elem(node): return NodeElemReverseIter(node).next()
62

    
63
class NodeParentIter:
64
    def __init__(self, node): self.node = node
65
    
66
    def __iter__(self): return self
67
    
68
    def curr(self):
69
        if self.node != None and is_elem(self.node): return self.node
70
        raise StopIteration
71
    
72
    def next(self):
73
        node = self.curr()
74
        self.node = self.node.parentNode
75
        return node
76

    
77
def is_text_node(node): return node.nodeType == Node.TEXT_NODE
78

    
79
def is_text(node): return has_one_child(node) and is_text_node(node.firstChild)
80

    
81
def value(node):
82
    if node.firstChild != None: return node.firstChild.nodeValue
83
    else: return node.nodeValue
84

    
85
def set_value(node, value):
86
    if is_elem(node): node.appendChild(node.ownerDocument.createTextNode(value))
87
    else: node.nodeValue = value
88

    
89
class NodeTextEntryIter:
90
    def __init__(self, node): self.iter_ = NodeElemIter(node)
91
    
92
    def __iter__(self): return self
93
    
94
    def curr(self):
95
        while True:
96
            child = self.iter_.curr()
97
            if is_text(child): return (child.tagName, value(child))
98
            self.iter_.next()
99
    
100
    def next(self):
101
        entry = self.curr()
102
        self.iter_.next()
103
        return entry
104

    
105
def set_child(node, name, value):
106
    '''Note: does not remove any existing child of the same name'''
107
    child = node.ownerDocument.createElement(name)
108
    set_value(child, value)
109
    node.appendChild(child)
110

    
111
def replace(old_node, new_node):
112
    old_node.parentNode.replaceChild(new_node, old_node) # note order reversed
113

    
114
def replace_with_text(node, str_):
115
    replace(node, node.ownerDocument.createTextNode(str_))
116

    
117
def by_tag_name(node, name, last_only=False):
118
    '''last_only optimization returns last matching node'''
119
    children = []
120
    for child in NodeElemReverseIter(node):
121
        if child.tagName == name:
122
            children.append(child)
123
            if last_only: break
124
    return children
125

    
126
def create_doc(root='_'):
127
    return minidom.getDOMImplementation().createDocument(None, root, None)
128

    
129
prettyxml_config = dict(addindent='    ', newl='\n')
130

    
131
# minidom modifications
132

    
133
minidom._write_data = lambda writer, data: writer.write(escape(data))
134

    
135
minidom.Node.__str__ = lambda self: self.toxml()
136

    
137
def __Element_write_opening(self, writer, indent='', addindent='', newl=''):
138
    writer.write(indent+'<'+self.tagName)
139
    for attr_idx in xrange(self.attributes.length):
140
        attr = self.attributes.item(attr_idx)
141
        writer.write(' '+attr.name+'='+escape(attr.value))
142
    writer.write('>'+newl)
143
minidom.Element.write_opening = __Element_write_opening
144

    
145
def __Element_write_closing(self, writer, indent='', addindent='', newl=''):
146
    writer.write('</'+self.tagName+'>'+newl)
147
minidom.Element.write_closing = __Element_write_closing
148

    
149
_writexml_orig = minidom.Element.writexml
150
def __Element_writexml(self, writer, indent='', addindent='', newl=''):
151
    if is_text(self):
152
        self.write_opening(writer, indent, addindent, '') # no newline
153
        writer.write(escape(value(self)))
154
        self.write_closing(writer, indent, addindent, newl)
155
    else: _writexml_orig(self, writer, indent, addindent, newl)
156
minidom.Element.writexml = __Element_writexml
157

    
158
def __Document_write_opening(self, writer, indent='', addindent='', newl='',
159
    encoding=None):
160
    xmlDecl = '<?xml version="1.0" '
161
    if encoding != None: xmlDecl += 'encoding="'+escape(encoding)+'"'
162
    xmlDecl += '?>'+newl
163
    writer.write(xmlDecl)
164
    assert has_one_child(self)
165
    assert is_elem(self.firstChild)
166
    self.firstChild.write_opening(writer, indent, addindent, newl)
167
minidom.Document.write_opening = __Document_write_opening
168

    
169
def __Document_write_closing(self, writer, indent='', addindent='', newl=''):
170
    self.firstChild.write_closing(writer, indent, addindent, newl)
171
minidom.Document.write_closing = __Document_write_closing
(9-9/11)