Skip to content

Commit 6495136

Browse files
committed
#1466065: add validate option to base64.b64decode
Patch by Neil Tallim. This provides a mechanism for module users to achieve RFC 3548 compliance in the cases where ignoring non-base64-alphabet input characters is *not* mandated by the RFC that references RFC 3548.
1 parent 49afa38 commit 6495136

4 files changed

Lines changed: 37 additions & 9 deletions

File tree

Doc/library/base64.rst

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,21 @@ The modern interface provides:
3737
The encoded byte string is returned.
3838

3939

40-
.. function:: b64decode(s, altchars=None)
40+
.. function:: b64decode(s, altchars=None, validate=False)
4141

4242
Decode a Base64 encoded byte string.
4343

4444
*s* is the byte string to decode. Optional *altchars* must be a string of
4545
at least length 2 (additional characters are ignored) which specifies the
4646
alternative alphabet used instead of the ``+`` and ``/`` characters.
4747

48-
The decoded byte string is returned. A :exc:`TypeError` is raised if *s* were
49-
incorrectly padded or if there are non-alphabet characters present in the
50-
string.
48+
The decoded string is returned. A `binascii.Error` is raised if *s* is
49+
incorrectly padded.
50+
51+
If *validate* is ``False`` (the default), non-base64-alphabet characters are
52+
discarded prior to the padding check. If *validate* is ``True``,
53+
non-base64-alphabet characters in the input result in a
54+
:exc:`binascii.Error`.
5155

5256

5357
.. function:: standard_b64encode(s)

Lib/base64.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,19 @@ def b64encode(s, altchars=None):
6565
return encoded
6666

6767

68-
def b64decode(s, altchars=None):
68+
def b64decode(s, altchars=None, validate=False):
6969
"""Decode a Base64 encoded byte string.
7070
7171
s is the byte string to decode. Optional altchars must be a
7272
string of length 2 which specifies the alternative alphabet used
7373
instead of the '+' and '/' characters.
7474
75-
The decoded byte string is returned. binascii.Error is raised if
76-
s were incorrectly padded or if there are non-alphabet characters
77-
present in the string.
75+
The decoded string is returned. A binascii.Error is raised if s is
76+
incorrectly padded.
77+
78+
If validate is False (the default), non-base64-alphabet characters are
79+
discarded prior to the padding check. If validate is True,
80+
non-base64-alphabet characters in the input result in a binascii.Error.
7881
"""
7982
if not isinstance(s, bytes_types):
8083
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
@@ -84,6 +87,8 @@ def b64decode(s, altchars=None):
8487
% altchars.__class__.__name__)
8588
assert len(altchars) == 2, repr(altchars)
8689
s = _translate(s, {chr(altchars[0]): b'+', chr(altchars[1]): b'/'})
90+
if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
91+
raise binascii.Error('Non-base64 digit found')
8792
return binascii.a2b_base64(s)
8893

8994

Lib/test/test_base64.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,25 @@ def test_b64decode(self):
138138
eq(base64.urlsafe_b64decode(b'01a-b_cd'), b'\xd3V\xbeo\xf7\x1d')
139139
self.assertRaises(TypeError, base64.urlsafe_b64decode, "")
140140

141-
def test_b64decode_error(self):
141+
def test_b64decode_padding_error(self):
142142
self.assertRaises(binascii.Error, base64.b64decode, b'abc')
143143

144+
def test_b64decode_invalid_chars(self):
145+
# issue 1466065: Test some invalid characters.
146+
tests = ((b'%3d==', b'\xdd'),
147+
(b'$3d==', b'\xdd'),
148+
(b'[==', b''),
149+
(b'YW]3=', b'am'),
150+
(b'3{d==', b'\xdd'),
151+
(b'3d}==', b'\xdd'),
152+
(b'@@', b''),
153+
(b'!', b''),
154+
(b'YWJj\nYWI=', b'abcab'))
155+
for bstr, res in tests:
156+
self.assertEquals(base64.b64decode(bstr), res)
157+
with self.assertRaises(binascii.Error):
158+
base64.b64decode(bstr, validate=True)
159+
144160
def test_b32encode(self):
145161
eq = self.assertEqual
146162
eq(base64.b32encode(b''), b'')

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ Core and Builtins
6363
Library
6464
-------
6565

66+
- Issue #1466065: Add 'validate' option to base64.b64decode to raise
67+
an error if there are non-base64 alphabet characters in the input.
68+
6669
- Issue #10386: Add __all__ to token module; this simplifies importing
6770
in tokenize module and prevents leaking of private names through
6871
import *.

0 commit comments

Comments
 (0)