Skip to content

Commit 91c9eff

Browse files
committed
Make DOM filter pass existing tests (more tests needed)
--HG-- branch : svgmathml extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/branches/svgmathml%401300
1 parent 7f9f651 commit 91c9eff

2 files changed

Lines changed: 40 additions & 20 deletions

File tree

src/html5lib/treebuilders/dom.py

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
import _base
1+
22
from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE
33
import new
4-
54
import re
6-
illegal_xml_chars = re.compile("[\x01-\x08\x0B\x0C\x0E-\x1F]")
5+
6+
import _base
7+
from html5lib import constants, ihatexml
78

89
moduleCache = {}
910

@@ -20,14 +21,15 @@ def getDomModule(DomImplementation):
2021

2122
def getDomBuilder(DomImplementation):
2223
Dom = DomImplementation
24+
infoset_filter = ihatexml.InfosetFilter()
2325
class AttrList:
2426
def __init__(self, element):
2527
self.element = element
2628
def __iter__(self):
2729
return self.element.attributes.items().__iter__()
2830
def __setitem__(self, name, value):
29-
value=illegal_xml_chars.sub(u'\uFFFD',value)
30-
self.element.setAttribute(name, value)
31+
self.element.setAttribute(infoset_filter.coerceAttribute(name),
32+
infoset_filter.coerceCharacters(value))
3133
def items(self):
3234
return self.element.attributes.items()
3335
def keys(self):
@@ -40,16 +42,15 @@ def __init__(self, element):
4042
_base.Node.__init__(self, element.localName)
4143
self.element = element
4244

43-
namespace = property(lambda self:(hasattr(self.element, "namespace")
44-
and self.element.namespace
45-
or None))
45+
namespace = property(lambda self:hasattr(self.element, "namespaceURI")
46+
and self.element.namespaceURI or None)
4647

4748
def appendChild(self, node):
4849
node.parent = self
4950
self.element.appendChild(node.element)
5051

5152
def insertText(self, data, insertBefore=None):
52-
data=illegal_xml_chars.sub(u'\uFFFD',data)
53+
data=infoset_filter.coerceCharacters(data)
5354
text = self.element.ownerDocument.createTextNode(data)
5455
if insertBefore:
5556
self.element.insertBefore(text, insertBefore.element)
@@ -78,17 +79,19 @@ def getAttributes(self):
7879
def setAttributes(self, attributes):
7980
if attributes:
8081
for name, value in attributes.items():
81-
value=illegal_xml_chars.sub(u'\uFFFD',value)
8282
if isinstance(name, tuple):
8383
if name[0] is not None:
84-
qualifiedName = name[0] + ":" + name[1]
84+
qualifiedName = (name[0] + ":" +
85+
infoset_filter.coerceAttribute(
86+
name[1]))
8587
else:
86-
qualifiedName = name[1]
88+
qualifiedName = infoset_filter.coerceAttribute(
89+
name[1])
8790
self.element.setAttributeNS(name[2], qualifiedName,
8891
value)
8992
else:
90-
self.element.setAttribute(name, value)
91-
93+
self.element.setAttribute(
94+
infoset_filter.coerceAttribute(name), value)
9295
attributes = property(getAttributes, setAttributes)
9396

9497
def cloneNode(self):
@@ -140,7 +143,7 @@ def getFragment(self):
140143
return _base.TreeBuilder.getFragment(self).element
141144

142145
def insertText(self, data, parent=None):
143-
data=illegal_xml_chars.sub(u'\uFFFD',data)
146+
data=infoset_filter.coerceCharacters(data)
144147
if parent <> self:
145148
_base.TreeBuilder.insertText(self, data, parent)
146149
else:
@@ -177,9 +180,26 @@ def serializeElement(element, indent=0):
177180
elif element.nodeType == Node.TEXT_NODE:
178181
rv.append("|%s\"%s\"" %(' '*indent, element.nodeValue))
179182
else:
180-
rv.append("|%s<%s>"%(' '*indent, element.nodeName))
183+
if (hasattr(element, "namespaceURI") and
184+
element.namespaceURI not in (None,
185+
constants.namespaces["html"])):
186+
name = "%s %s"%(constants.prefixes[element.namespaceURI],
187+
element.nodeName)
188+
else:
189+
name = element.nodeName
190+
rv.append("|%s<%s>"%(' '*indent, name))
181191
if element.hasAttributes():
182-
for name, value in element.attributes.items():
192+
i = 0
193+
attr = element.attributes.item(i)
194+
while attr:
195+
name = infoset_filter.fromXmlName(attr.localName)
196+
value = attr.value
197+
ns = attr.namespaceURI
198+
if ns:
199+
name = "%s %s"%(constants.prefixes[ns], name)
200+
i += 1
201+
attr = element.attributes.item(i)
202+
183203
rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
184204
indent += 2
185205
for child in element.childNodes:

tests/test_parser.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
import html5lib
1212
from html5lib import html5parser, treebuilders, constants
1313

14-
treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree")}
15-
#"DOM":treebuilders.getTreeBuilder("dom")}
14+
treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
15+
"DOM":treebuilders.getTreeBuilder("dom")}
1616

1717
#Try whatever etree implementations are avaliable from a list that are
1818
#"supposed" to work
@@ -55,7 +55,7 @@
5555
#"supposed" to work
5656
try:
5757
import pxdom
58-
#treeTypes["pxdom"] = treebuilders.getTreeBuilder("dom", pxdom)
58+
treeTypes["pxdom"] = treebuilders.getTreeBuilder("dom", pxdom)
5959
except ImportError:
6060
pass
6161

0 commit comments

Comments
 (0)