Skip to content

Commit 2cb82d2

Browse files
miss-islingtonasqui
authored andcommitted
bpo-36582: Make collections.UserString.encode() return bytes, not str (GH-13138) (GH-15557)
(cherry picked from commit 2a16eea) Co-authored-by: Daniel Fortunov <asqui@users.noreply.github.com>
1 parent 03c52f2 commit 2cb82d2

4 files changed

Lines changed: 20 additions & 6 deletions

File tree

Lib/collections/__init__.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,12 +1200,10 @@ def count(self, sub, start=0, end=_sys.maxsize):
12001200
if isinstance(sub, UserString):
12011201
sub = sub.data
12021202
return self.data.count(sub, start, end)
1203-
def encode(self, encoding=None, errors=None): # XXX improve this?
1204-
if encoding:
1205-
if errors:
1206-
return self.__class__(self.data.encode(encoding, errors))
1207-
return self.__class__(self.data.encode(encoding))
1208-
return self.__class__(self.data.encode())
1203+
def encode(self, encoding='utf-8', errors='strict'):
1204+
encoding = 'utf-8' if encoding is None else encoding
1205+
errors = 'strict' if errors is None else errors
1206+
return self.data.encode(encoding, errors)
12091207
def endswith(self, suffix, start=0, end=_sys.maxsize):
12101208
return self.data.endswith(suffix, start, end)
12111209
def expandtabs(self, tabsize=8):

Lib/test/test_userstring.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,20 @@ def __rmod__(self, other):
5151
str3 = ustr3('TEST')
5252
self.assertEqual(fmt2 % str3, 'value is TEST')
5353

54+
def test_encode_default_args(self):
55+
self.checkequal(b'hello', 'hello', 'encode')
56+
# Check that encoding defaults to utf-8
57+
self.checkequal(b'\xf0\xa3\x91\x96', '\U00023456', 'encode')
58+
# Check that errors defaults to 'strict'
59+
self.checkraises(UnicodeError, '\ud800', 'encode')
60+
61+
def test_encode_explicit_none_args(self):
62+
self.checkequal(b'hello', 'hello', 'encode', None, None)
63+
# Check that encoding defaults to utf-8
64+
self.checkequal(b'\xf0\xa3\x91\x96', '\U00023456', 'encode', None, None)
65+
# Check that errors defaults to 'strict'
66+
self.checkraises(UnicodeError, '\ud800', 'encode', None, None)
67+
5468

5569
if __name__ == "__main__":
5670
unittest.main()

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,7 @@ Arnaud Fontaine
509509
Michael Foord
510510
Amaury Forgeot d'Arc
511511
Doug Fort
512+
Daniel Fortunov
512513
Evens Fortuné
513514
Chris Foster
514515
John Fouhy
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix ``UserString.encode()`` to correctly return ``bytes`` rather than a ``UserString`` instance.

0 commit comments

Comments
 (0)