Skip to content

Commit 0312190

Browse files
committed
Constructor of the serializer now duplicates class-level default options into instance variables (so that if you change the class variables, your already created serializers' options aren't impacted)
Added an escape_text function --HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40663
1 parent e5f06ff commit 0312190

1 file changed

Lines changed: 13 additions & 11 deletions

File tree

src/serializer.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ def htmlentityreplace_errors(exc):
4545

4646
del register_error
4747

48+
def escape_text(text, encoding):
49+
return text.replace("&", "&").encode(encoding, unicode_encode_errors)
50+
4851
class OptionalTagFilter:
4952
def __init__(self, source):
5053
self.source = source
@@ -246,8 +249,7 @@ def __init__(self, **kwargs):
246249
"minimize_boolean_attributes", "use_trailing_solidus",
247250
"space_before_trailing_solidus", "omit_optional_tags",
248251
"strip_whitespace", "inject_meta_charset"):
249-
if attr in kwargs:
250-
setattr(self, attr, kwargs[attr])
252+
setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
251253
self.errors = []
252254
self.strict = False
253255

@@ -274,12 +276,11 @@ def serialize(self, treewalker, encoding=None):
274276
if in_cdata and token["data"].find("</") >= 0:
275277
self.serializeError(_("Unexpected </ in CDATA"))
276278
if encoding:
277-
yield token["data"].encode(encoding, errors or "strict")
279+
yield token["data"].encode(encoding, "strict")
278280
else:
279281
yield token["data"]
280282
elif encoding:
281-
yield token["data"].replace("&", "&amp;") \
282-
.encode(encoding, unicode_encode_errors)
283+
yield escape_text(token["data"], encoding)
283284
else:
284285
yield token["data"] \
285286
.replace("&", "&amp;") \
@@ -299,7 +300,7 @@ def serialize(self, treewalker, encoding=None):
299300
attributes = []
300301
for k,v in attrs:
301302
if encoding:
302-
k = k.encode(encoding)
303+
k = k.encode(encoding, "strict")
303304
attributes.append(' ')
304305

305306
attributes.append(k)
@@ -310,11 +311,12 @@ def serialize(self, treewalker, encoding=None):
310311
if self.quote_attr_values or not v:
311312
quote_attr = True
312313
else:
313-
quote_attr = reduce(lambda x,y: x or y in v,
314+
quote_attr = reduce(lambda x,y: x or (y in v),
314315
spaceCharacters + "<>\"'", False)
315-
v = v.replace("&", "&amp;")
316316
if encoding:
317-
v = v.encode(encoding, unicode_encode_errors)
317+
v = escape_text(v, encoding)
318+
else:
319+
v = v.replace("&", "&amp;")
318320
if quote_attr:
319321
quote_char = self.quote_char
320322
if self.use_best_quote_char:
@@ -337,7 +339,7 @@ def serialize(self, treewalker, encoding=None):
337339
else:
338340
attributes.append("/")
339341
if encoding:
340-
yield "<%s%s>" % (name.encode(encoding), "".join(attributes))
342+
yield "<%s%s>" % (name.encode(encoding, "strict"), "".join(attributes))
341343
else:
342344
yield u"<%s%s>" % (name, u"".join(attributes))
343345

@@ -349,7 +351,7 @@ def serialize(self, treewalker, encoding=None):
349351
self.serializeError(_("Unexpected child element of a CDATA element"))
350352
end_tag = u"</%s>" % name
351353
if encoding:
352-
end_tag = end_tag.encode(encoding)
354+
end_tag = end_tag.encode(encoding, "strict")
353355
yield end_tag
354356

355357
elif type == "Comment":

0 commit comments

Comments
 (0)