@@ -45,6 +45,9 @@ def htmlentityreplace_errors(exc):
4545
4646 del register_error
4747
48+ def escape_text (text , encoding ):
49+ return text .replace ("&" , "&" ).encode (encoding , unicode_encode_errors )
50+
4851class OptionalTagFilter :
4952 def __init__ (self , source ):
5053 self .source = source
@@ -246,8 +249,7 @@ def __init__(self, **kwargs):
246249 "minimize_boolean_attributes" , "use_trailing_solidus" ,
247250 "space_before_trailing_solidus" , "omit_optional_tags" ,
248251 "strip_whitespace" , "inject_meta_charset" ):
249- if attr in kwargs :
250- setattr (self , attr , kwargs [attr ])
252+ setattr (self , attr , kwargs .get (attr , getattr (self , attr )))
251253 self .errors = []
252254 self .strict = False
253255
@@ -274,12 +276,11 @@ def serialize(self, treewalker, encoding=None):
274276 if in_cdata and token ["data" ].find ("</" ) >= 0 :
275277 self .serializeError (_ ("Unexpected </ in CDATA" ))
276278 if encoding :
277- yield token ["data" ].encode (encoding , errors or "strict" )
279+ yield token ["data" ].encode (encoding , "strict" )
278280 else :
279281 yield token ["data" ]
280282 elif encoding :
281- yield token ["data" ].replace ("&" , "&" ) \
282- .encode (encoding , unicode_encode_errors )
283+ yield escape_text (token ["data" ], encoding )
283284 else :
284285 yield token ["data" ] \
285286 .replace ("&" , "&" ) \
@@ -299,7 +300,7 @@ def serialize(self, treewalker, encoding=None):
299300 attributes = []
300301 for k ,v in attrs :
301302 if encoding :
302- k = k .encode (encoding )
303+ k = k .encode (encoding , "strict" )
303304 attributes .append (' ' )
304305
305306 attributes .append (k )
@@ -310,11 +311,12 @@ def serialize(self, treewalker, encoding=None):
310311 if self .quote_attr_values or not v :
311312 quote_attr = True
312313 else :
313- quote_attr = reduce (lambda x ,y : x or y in v ,
314+ quote_attr = reduce (lambda x ,y : x or ( y in v ) ,
314315 spaceCharacters + "<>\" '" , False )
315- v = v .replace ("&" , "&" )
316316 if encoding :
317- v = v .encode (encoding , unicode_encode_errors )
317+ v = escape_text (v , encoding )
318+ else :
319+ v = v .replace ("&" , "&" )
318320 if quote_attr :
319321 quote_char = self .quote_char
320322 if self .use_best_quote_char :
@@ -337,7 +339,7 @@ def serialize(self, treewalker, encoding=None):
337339 else :
338340 attributes .append ("/" )
339341 if encoding :
340- yield "<%s%s>" % (name .encode (encoding ), "" .join (attributes ))
342+ yield "<%s%s>" % (name .encode (encoding , "strict" ), "" .join (attributes ))
341343 else :
342344 yield u"<%s%s>" % (name , u"" .join (attributes ))
343345
@@ -349,7 +351,7 @@ def serialize(self, treewalker, encoding=None):
349351 self .serializeError (_ ("Unexpected child element of a CDATA element" ))
350352 end_tag = u"</%s>" % name
351353 if encoding :
352- end_tag = end_tag .encode (encoding )
354+ end_tag = end_tag .encode (encoding , "strict" )
353355 yield end_tag
354356
355357 elif type == "Comment" :
0 commit comments