Project

General

Profile

1 1708 aaronmk
# XML parsing
2
3 1712 aaronmk
import re
4 1708 aaronmk
import xml.dom.minidom as minidom
5 1712 aaronmk
import xml.parsers.expat as expat
6 1708 aaronmk
7 1757 aaronmk
import exc
8 1712 aaronmk
import streams
9
10 1708 aaronmk
def parse_str(str_): return minidom.parseString(str_).documentElement
11 1712 aaronmk
12
class ConsecXmlInputStream(streams.TracedStream):
13
    '''Wraps an input stream, inserting EOF after each consecutive XML document
14
    '''
15
    def __init__(self, stream):
16
        self.eof = False
17
        self.root_close_tag = None
18
19
        def trace(line):
20
            if self.root_close_tag == None: # before XML document
21
                match = re.match(r'^<(?!\?)([^\s>]+)', line)
22
                if match: self.root_close_tag = '</'+match.group(1)+'>'
23
            else: # inside XML document
24
                if line.rstrip().endswith(self.root_close_tag):
25
                    self.root_close_tag = None # now outside XML document
26
                    self.eof = True # next read will produce EOF
27
28
        streams.TracedStream.__init__(self, trace, stream)
29
30
    def readline(self):
31
        if self.eof:
32
            self.eof = False
33
            return '' # don't read from underlying stream
34
        else: return streams.TracedStream.readline(self)
35
36
    def read(self, n): return self.readline() # forward all reads to readline()
37
38 1757 aaronmk
def parse_next(stream, on_error=exc.raise_):
39 1712 aaronmk
    '''Parses the next document in a consecutive sequence of XML documents.
40
    @return The next document's root, or None if no more documents available
41
    '''
42
    try: return minidom.parse(ConsecXmlInputStream(stream)).documentElement
43
    except expat.ExpatError, e:
44 1757 aaronmk
        # Save in case another exception raised, overwriting sys.exc_info()
45
        exc.add_traceback(e)
46 1712 aaronmk
        if str(e).startswith('no element found:'): return None # no more docs
47 1757 aaronmk
        else: on_error(e)
48 1712 aaronmk
49 1757 aaronmk
def docs_iter(stream, *args, **kw_args):
50
    return iter(lambda: parse_next(stream, *args, **kw_args), None)