1212from filters .inject_meta_charset import Filter as InjectMetaCharsetFilter
1313
1414from constants import voidElements , booleanAttributes , spaceCharacters
15+ from constants import rcdataElements
16+
17+ from xml .sax .saxutils import escape
1518
1619spaceCharacters = u"" .join (spaceCharacters )
1720
@@ -49,11 +52,10 @@ def htmlentityreplace_errors(exc):
4952
5053 del register_error
5154
52- def escape_text (text , encoding ):
53- return text .replace ( "&" , "&" ). encode (encoding , unicode_encode_errors )
55+ def encode (text , encoding ):
56+ return text .encode (encoding , unicode_encode_errors )
5457
5558class HTMLSerializer (object ):
56- cdata_elements = frozenset (("style" , "script" , "xmp" , "iframe" , "noembed" , "noframes" , "noscript" ))
5759
5860 quote_attr_values = False
5961 quote_char = '"'
@@ -109,16 +111,13 @@ def serialize(self, treewalker, encoding=None):
109111 else :
110112 yield token ["data" ]
111113 elif encoding :
112- yield escape_text ( token ["data" ], encoding )
114+ yield encode ( escape ( token ["data" ]) , encoding )
113115 else :
114- yield token ["data" ] \
115- .replace ("&" , "&" ) \
116- .replace ("<" , "<" ) \
117- .replace (">" , ">" )
116+ yield escape (token ["data" ])
118117
119118 elif type in ("StartTag" , "EmptyTag" ):
120119 name = token ["name" ]
121- if name in self . cdata_elements :
120+ if name in rcdataElements :
122121 in_cdata = True
123122 elif in_cdata :
124123 self .serializeError (_ ("Unexpected child element of a CDATA element" ))
@@ -142,10 +141,9 @@ def serialize(self, treewalker, encoding=None):
142141 else :
143142 quote_attr = reduce (lambda x ,y : x or (y in v ),
144143 spaceCharacters + "<>\" '" , False )
144+ v = v .replace ("&" , "&" )
145145 if encoding :
146- v = escape_text (v , encoding )
147- else :
148- v = v .replace ("&" , "&" )
146+ v = encode (v , encoding )
149147 if quote_attr :
150148 quote_char = self .quote_char
151149 if self .use_best_quote_char :
@@ -174,7 +172,7 @@ def serialize(self, treewalker, encoding=None):
174172
175173 elif type == "EndTag" :
176174 name = token ["name" ]
177- if name in self . cdata_elements :
175+ if name in rcdataElements :
178176 in_cdata = False
179177 elif in_cdata :
180178 self .serializeError (_ ("Unexpected child element of a CDATA element" ))
0 commit comments