Skip to content

Commit 3186ecf

Browse files
committed
Add a minidom-based dom builder
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40445
1 parent 0674289 commit 3186ecf

3 files changed

Lines changed: 128 additions & 6 deletions

File tree

src/treebuilders/dom.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import _base
2+
from xml.dom import minidom, Node
3+
4+
class AttrList:
5+
def __init__(self, element):
6+
self.element = element
7+
def __iter__(self):
8+
return self.element.attributes.items().__iter__()
9+
def __setitem__(self, name, value):
10+
self.element.setAttribute(name, value)
11+
def items(self):
12+
return self.element.attributes.items()
13+
14+
class NodeBuilder(_base.Node):
15+
def __init__(self, element):
16+
_base.Node.__init__(self, element.nodeName)
17+
self.element = element
18+
19+
def appendChild(self, node):
20+
node.parent = self
21+
self.element.appendChild(node.element)
22+
23+
def insertText(self, data, insertBefore=None):
24+
text = self.element.ownerDocument.createTextNode(data)
25+
if insertBefore:
26+
self.element.insertBefore(text, insertBefore.element)
27+
else:
28+
self.element.appendChild(text)
29+
30+
def insertBefore(self, node, refNode):
31+
self.element.insertBefore(node.element, refNode.element)
32+
node.parent = self
33+
34+
def removeChild(self, node):
35+
self.element.removeChild(node.element)
36+
node.parent = None
37+
38+
def reparentChildren(self, newParent):
39+
while self.element.hasChildNodes():
40+
child = self.element.firstChild
41+
self.element.removeChild(child)
42+
newParent.element.appendChild(child)
43+
self.childNodes = []
44+
45+
def getAttributes(self):
46+
return AttrList(self.element)
47+
48+
def setAttributes(self, attributes):
49+
if attributes:
50+
for name, value in attributes.items():
51+
self.element.setAttribute(name, value)
52+
53+
attributes = property(getAttributes, setAttributes)
54+
55+
def cloneNode(self):
56+
return NodeBuilder(self.element.cloneNode(False))
57+
58+
def hasContent(self):
59+
return self.element.hasChildNodes()
60+
61+
class TreeBuilder(_base.TreeBuilder):
62+
def documentClass(self):
63+
self.dom = minidom.getDOMImplementation().createDocument(None,None,None)
64+
return self
65+
66+
def doctypeClass(self,name):
67+
domimpl = minidom.getDOMImplementation()
68+
return NodeBuilder(domimpl.createDocumentType(name,None,None))
69+
70+
def elementClass(self, name):
71+
return NodeBuilder(self.dom.createElement(name))
72+
73+
def commentClass(self, data):
74+
return NodeBuilder(self.dom.createComment(data))
75+
76+
def appendChild(self, node):
77+
self.dom.appendChild(node.element)
78+
79+
def testSerializer(self, element):
80+
return testSerializer(element)
81+
82+
def getDocument(self):
83+
return self.dom
84+
85+
def insertText(self, data, parent=None):
86+
if parent <> self:
87+
_base.TreeBuilder.insertText(self, data, parent)
88+
else:
89+
# HACK: allow text nodes as children of the document node
90+
if hasattr(self.dom, '_child_node_types'):
91+
if not Node.TEXT_NODE in self.dom._child_node_types:
92+
self.dom._child_node_types=list(self.dom._child_node_types)
93+
self.dom._child_node_types.append(Node.TEXT_NODE)
94+
self.dom.appendChild(self.dom.createTextNode(data))
95+
96+
name = None
97+
98+
def testSerializer(element):
99+
element.normalize()
100+
rv = []
101+
def serializeElement(element, indent=0):
102+
if element.nodeType == Node.DOCUMENT_TYPE_NODE:
103+
rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.name))
104+
elif element.nodeType == Node.DOCUMENT_NODE:
105+
rv.append("#document")
106+
elif element.nodeType == Node.COMMENT_NODE:
107+
rv.append("|%s<!-- %s -->"%(' '*indent, element.nodeValue))
108+
elif element.nodeType == Node.TEXT_NODE:
109+
rv.append("|%s\"%s\"" %(' '*indent, element.nodeValue))
110+
else:
111+
rv.append("|%s<%s>"%(' '*indent, element.nodeName))
112+
if element.hasAttributes():
113+
for name, value in element.attributes.items():
114+
rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
115+
indent += 2
116+
for child in element.childNodes:
117+
serializeElement(child, indent)
118+
serializeElement(element, 0)
119+
120+
return "\n".join(rv)

tests/test_lxp.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
66

77
from liberalxmlparser import *
8-
from treebuilders import simpletree
8+
from treebuilders import dom
99

1010
import unittest, re
1111

@@ -25,13 +25,14 @@ def ncr(match):
2525
class Xhtml5Test(unittest.TestCase):
2626

2727
def assertXmlEquals(self, input, expected=None, parser=XMLParser):
28-
document = parser(tree=simpletree.TreeBuilder).parse(input)
28+
document = parser(tree=dom.TreeBuilder).parse(input).documentElement
2929
if not expected:
3030
expected = xmlelem.sub(sortattrs, input)
3131
expected = re.sub('&#(\d+);', ncr, expected)
32-
self.assertEquals(expected, xmlelem.sub(sortattrs, document.toxml()))
32+
output = xmlelem.sub(sortattrs, document.toxml('utf-8'))
33+
self.assertEquals(expected, output)
3334
else:
34-
self.assertEquals(expected, document.toxml())
35+
self.assertEquals(expected, document.toxml('utf-8'))
3536

3637
def assertXhtmlEquals(self, input, expected=None, parser=XHTMLParser):
3738
self.assertXmlEquals(input, expected, parser)

tests/test_parser.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@
1212
import html5parser
1313
#Run tests over all treebuilders
1414
#XXX - it would be nice to automate finding all treebuilders or to allow running just one
15-
from treebuilders import simpletree, etree
15+
from treebuilders import simpletree, etree, dom
1616

1717
treetypes = {"simpletree":simpletree.TreeBuilder,
18-
"ElementTree":etree.TreeBuilder}
18+
"ElementTree":etree.TreeBuilder,
19+
"DOM":dom.TreeBuilder}
1920

2021
def parseTestcase(testString):
2122
testString = testString.split("\n")

0 commit comments

Comments
 (0)