1 |
11
|
aaronmk
|
# A general recursive descent parser
|
2 |
|
|
|
3 |
|
|
from Parser import Parser
|
4 |
|
|
|
5 |
|
|
class XpathElem:
|
6 |
|
|
def __init__(self, name, value=None, is_attr=False, attrs=None):
|
7 |
|
|
if attrs == None: attrs = []
|
8 |
|
|
self.name = name
|
9 |
|
|
self.value = value
|
10 |
|
|
self.is_attr = is_attr
|
11 |
|
|
self.attrs = attrs
|
12 |
|
|
|
13 |
|
|
def __repr__(self):
|
14 |
|
|
if self.is_attr: prefix = '@'
|
15 |
|
|
else: prefix = ''
|
16 |
|
|
return prefix+self.name+repr(self.attrs)+'='+repr(self.value)
|
17 |
|
|
|
18 |
|
|
def __eq__(self, other): return self.__dict__ == other.__dict__
|
19 |
|
|
|
20 |
|
|
class XpathParser(Parser):
|
21 |
|
|
def _main(self):
|
22 |
|
|
while True:
|
23 |
|
|
self._match_str('/', required=True)
|
24 |
|
|
tree = self._path() # just use last path for now
|
25 |
|
|
if not self._match_str('->'): break
|
26 |
|
|
return tree
|
27 |
|
|
|
28 |
|
|
def _path(self):
|
29 |
|
|
tree = []
|
30 |
|
|
while True:
|
31 |
|
|
elem = XpathElem(is_attr=self._match_str('@'), name=self._fields())
|
32 |
|
|
if self._match_str('['):
|
33 |
|
|
elem.attrs = self._attrs()
|
34 |
|
|
self._match_str(']', required=True)
|
35 |
|
|
tree.append(elem)
|
36 |
|
|
if not self._match_str('/'): break
|
37 |
|
|
return tree
|
38 |
|
|
|
39 |
|
|
def _fields(self):
|
40 |
|
|
if self._match_str('{'):
|
41 |
|
|
tree = []
|
42 |
|
|
while True:
|
43 |
|
|
tree.append(self._field())
|
44 |
|
|
if not self._match_str(','): break
|
45 |
|
|
self._match_str('}', required=True)
|
46 |
|
|
tree = tuple(tree)
|
47 |
|
|
tree = tree[0] # just use first field for now
|
48 |
|
|
else: tree = self._field()
|
49 |
|
|
return tree
|
50 |
|
|
|
51 |
|
|
def _attrs(self):
|
52 |
|
|
tree = []
|
53 |
|
|
while True:
|
54 |
|
|
path = self._path()
|
55 |
|
|
self._match_str('=', required=True)
|
56 |
|
|
path[-1].value = self._value()
|
57 |
|
|
tree.append(path)
|
58 |
|
|
if not self._match_str(','): break
|
59 |
|
|
return tree
|
60 |
|
|
|
61 |
|
|
def _field(self):
|
62 |
|
|
return self._name()
|
63 |
|
|
|
64 |
|
|
def _name(self): return self._match_re(r'[\w.]+', required=True)
|
65 |
|
|
|
66 |
|
|
def _value(self): return self._match_re(r'[\w|]+', required=True)
|