1- import _base
1+
22from xml .dom import minidom , Node , XML_NAMESPACE , XMLNS_NAMESPACE
33import new
4-
54import re
6- illegal_xml_chars = re .compile ("[\x01 -\x08 \x0B \x0C \x0E -\x1F ]" )
5+
6+ import _base
7+ from html5lib import constants , ihatexml
78
89moduleCache = {}
910
@@ -20,14 +21,15 @@ def getDomModule(DomImplementation):
2021
2122def getDomBuilder (DomImplementation ):
2223 Dom = DomImplementation
24+ infoset_filter = ihatexml .InfosetFilter ()
2325 class AttrList :
2426 def __init__ (self , element ):
2527 self .element = element
2628 def __iter__ (self ):
2729 return self .element .attributes .items ().__iter__ ()
2830 def __setitem__ (self , name , value ):
29- value = illegal_xml_chars . sub ( u' \uFFFD ' , value )
30- self . element . setAttribute ( name , value )
31+ self . element . setAttribute ( infoset_filter . coerceAttribute ( name ),
32+ infoset_filter . coerceCharacters ( value ) )
3133 def items (self ):
3234 return self .element .attributes .items ()
3335 def keys (self ):
@@ -40,16 +42,15 @@ def __init__(self, element):
4042 _base .Node .__init__ (self , element .localName )
4143 self .element = element
4244
43- namespace = property (lambda self :(hasattr (self .element , "namespace" )
44- and self .element .namespace
45- or None ))
45+ namespace = property (lambda self :hasattr (self .element , "namespaceURI" )
46+ and self .element .namespaceURI or None )
4647
4748 def appendChild (self , node ):
4849 node .parent = self
4950 self .element .appendChild (node .element )
5051
5152 def insertText (self , data , insertBefore = None ):
52- data = illegal_xml_chars . sub ( u' \uFFFD ' , data )
53+ data = infoset_filter . coerceCharacters ( data )
5354 text = self .element .ownerDocument .createTextNode (data )
5455 if insertBefore :
5556 self .element .insertBefore (text , insertBefore .element )
@@ -78,17 +79,19 @@ def getAttributes(self):
7879 def setAttributes (self , attributes ):
7980 if attributes :
8081 for name , value in attributes .items ():
81- value = illegal_xml_chars .sub (u'\uFFFD ' ,value )
8282 if isinstance (name , tuple ):
8383 if name [0 ] is not None :
84- qualifiedName = name [0 ] + ":" + name [1 ]
84+ qualifiedName = (name [0 ] + ":" +
85+ infoset_filter .coerceAttribute (
86+ name [1 ]))
8587 else :
86- qualifiedName = name [1 ]
88+ qualifiedName = infoset_filter .coerceAttribute (
89+ name [1 ])
8790 self .element .setAttributeNS (name [2 ], qualifiedName ,
8891 value )
8992 else :
90- self .element .setAttribute (name , value )
91-
93+ self .element .setAttribute (
94+ infoset_filter . coerceAttribute ( name ), value )
9295 attributes = property (getAttributes , setAttributes )
9396
9497 def cloneNode (self ):
@@ -140,7 +143,7 @@ def getFragment(self):
140143 return _base .TreeBuilder .getFragment (self ).element
141144
142145 def insertText (self , data , parent = None ):
143- data = illegal_xml_chars . sub ( u' \uFFFD ' , data )
146+ data = infoset_filter . coerceCharacters ( data )
144147 if parent <> self :
145148 _base .TreeBuilder .insertText (self , data , parent )
146149 else :
@@ -177,9 +180,26 @@ def serializeElement(element, indent=0):
177180 elif element .nodeType == Node .TEXT_NODE :
178181 rv .append ("|%s\" %s\" " % (' ' * indent , element .nodeValue ))
179182 else :
180- rv .append ("|%s<%s>" % (' ' * indent , element .nodeName ))
183+ if (hasattr (element , "namespaceURI" ) and
184+ element .namespaceURI not in (None ,
185+ constants .namespaces ["html" ])):
186+ name = "%s %s" % (constants .prefixes [element .namespaceURI ],
187+ element .nodeName )
188+ else :
189+ name = element .nodeName
190+ rv .append ("|%s<%s>" % (' ' * indent , name ))
181191 if element .hasAttributes ():
182- for name , value in element .attributes .items ():
192+ i = 0
193+ attr = element .attributes .item (i )
194+ while attr :
195+ name = infoset_filter .fromXmlName (attr .localName )
196+ value = attr .value
197+ ns = attr .namespaceURI
198+ if ns :
199+ name = "%s %s" % (constants .prefixes [ns ], name )
200+ i += 1
201+ attr = element .attributes .item (i )
202+
183203 rv .append ('|%s%s="%s"' % (' ' * (indent + 2 ), name , value ))
184204 indent += 2
185205 for child in element .childNodes :
0 commit comments