Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
05ae5ad
Add Ascii85, base85, and Z85 support to binascii
kangtastic Mar 8, 2023
aa06c5d
Restore base64.py
kangtastic Apr 26, 2025
6377440
Create _base64 module with wrappers for accelerated functions
kangtastic Apr 26, 2025
6c0e4a3
Test both Python and C codepaths in base64
kangtastic Apr 26, 2025
ce4773c
Match behavior between Python and C base 85 functions
kangtastic Apr 26, 2025
4072e3b
Add Z85 tests to binascii
kangtastic Apr 27, 2025
bc9217f
Update generated files
kangtastic Apr 27, 2025
2c40ba0
Avoid importing functools
kangtastic Apr 28, 2025
fd9eaf7
Avoid circular import in _base64
kangtastic Apr 28, 2025
4746d18
Do not use a decorator for changing exception type
kangtastic Apr 28, 2025
d075593
Test Python and C codepaths in base64 using mixins
kangtastic Apr 28, 2025
6d65fec
Remove leading underscore from functions in private module
kangtastic Apr 29, 2025
a241356
Merge branch 'main' into gh-101178-rework-base85
serhiy-storchaka Dec 24, 2025
0df9a40
Use more modern C API.
serhiy-storchaka Dec 24, 2025
60fbd7c
Fix tests.
serhiy-storchaka Dec 24, 2025
a070887
Merge branch 'main' into gh-101178-rework-base85
serhiy-storchaka Dec 25, 2025
167e83e
Fix new tests.
serhiy-storchaka Dec 25, 2025
01df442
Optimize binascii.b2a_ascii85().
serhiy-storchaka Dec 26, 2025
7885918
Apply suggestions from code review
serhiy-storchaka Dec 27, 2025
1e928e3
Update C style to more closely adhere to PEP-7
kangtastic Dec 28, 2025
2691a0a
Remove pure-Python base-85-related codepaths in base64
kangtastic Dec 28, 2025
b9d27bd
Remove now-unnecessary _base64 module and fix tests
kangtastic Dec 28, 2025
780517a
Separate Z85 from Base85 on the Python API side
kangtastic Dec 28, 2025
bc9a66d
Fix tests after separating Base85 from Z85
kangtastic Dec 28, 2025
dc1d3fc
Merge branch 'main' into gh-101178-rework-base85
kangtastic Dec 28, 2025
c5de5a1
Update generated files after merging main
kangtastic Dec 28, 2025
3bb3b18
Update Misc/NEWS.d and Misc/ACKS
kangtastic Dec 28, 2025
6f09fa8
Update generated files again
kangtastic Dec 29, 2025
6d8f897
Fix typo in NEWS entry
kangtastic Dec 29, 2025
3582492
Merge branch 'main' into gh-101178-rework-base85
serhiy-storchaka Jan 14, 2026
879dd86
Move the NEWS entry to the correct section.
serhiy-storchaka Jan 14, 2026
3cdc3c5
Minor cleanups, align lookup tables to 64 bytes (NFC)
kangtastic Jan 17, 2026
8adaf2c
Allow up to sys.maxsize output length when encoding base 85
kangtastic Jan 18, 2026
2b2ecc4
Fix Ascii85 test from mainline
kangtastic Jan 18, 2026
da165d1
Allow up to sys.maxsize output length when decoding base 85
kangtastic Jan 18, 2026
bf32f99
Defer base 85 overflow check during decoding
kangtastic Jan 18, 2026
74f6ceb
Merge branch 'main' into gh-101178-rework-base85
kangtastic Jan 18, 2026
4ba3e50
Merge branch 'main' into gh-101178-rework-base85
serhiy-storchaka Feb 6, 2026
99e0bde
Rename parameters to match the base64 module.
serhiy-storchaka Feb 6, 2026
cc6d485
Remove parameters strict_mode and newline.
serhiy-storchaka Feb 6, 2026
30f54a1
Optimize ignorechars cache.
serhiy-storchaka Feb 6, 2026
37df735
Harmonize documentation.
serhiy-storchaka Feb 6, 2026
56a02b2
Add What's New entries.
serhiy-storchaka Feb 6, 2026
adb1922
Polish tests.
serhiy-storchaka Feb 6, 2026
0730fdf
Rename internal Base 85 codec functions to match Base 64 helpers
kangtastic Feb 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Restore base64.py
  • Loading branch information
kangtastic committed Apr 26, 2025
commit aa06c5dcfcab69e5e6f618d346b9531145e5c2c8
179 changes: 171 additions & 8 deletions Lib/base64.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,36 @@ def b16decode(s, casefold=False):
#
# Ascii85 encoding/decoding
#

_a85chars = None
_a85chars2 = None
_A85START = b"<~"
_A85END = b"~>"

def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
# Helper function for a85encode and b85encode
if not isinstance(b, bytes_types):
b = memoryview(b).tobytes()

padding = (-len(b)) % 4
if padding:
b = b + b'\0' * padding
words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)

chunks = [b'z' if foldnuls and not word else
b'y' if foldspaces and word == 0x20202020 else
(chars2[word // 614125] +
chars2[word // 85 % 7225] +
chars[word % 85])
for word in words]

if padding and not pad:
if chunks[-1] == b'z':
chunks[-1] = chars[0] * 5
chunks[-1] = chunks[-1][:-padding]

return b''.join(chunks)

def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
"""Encode bytes-like object b using Ascii85 and return a bytes object.

Expand All @@ -307,8 +337,29 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
adobe controls whether the encoded byte sequence is framed with <~ and ~>,
which is used by the Adobe implementation.
"""
return binascii.b2a_ascii85(b, fold_spaces=foldspaces,
wrap=adobe, width=wrapcol, pad=pad)
global _a85chars, _a85chars2
# Delay the initialization of tables to not waste memory
# if the function is never called
if _a85chars2 is None:
_a85chars = [bytes((i,)) for i in range(33, 118)]
_a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]

result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)

if adobe:
result = _A85START + result
if wrapcol:
wrapcol = max(2 if adobe else 1, wrapcol)
chunks = [result[i: i + wrapcol]
for i in range(0, len(result), wrapcol)]
if adobe:
if len(chunks[-1]) + 2 > wrapcol:
chunks.append(b'')
result = b'\n'.join(chunks)
if adobe:
result += _A85END

return result

def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
"""Decode the Ascii85 encoded bytes-like object or ASCII string b.
Expand All @@ -327,36 +378,148 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
The result is returned as a bytes object.
"""
b = _bytes_from_decode_data(b)
return binascii.a2b_ascii85(b, fold_spaces=foldspaces,
wrap=adobe, ignore=ignorechars)
if adobe:
if not b.endswith(_A85END):
raise ValueError(
"Ascii85 encoded byte sequences must end "
"with {!r}".format(_A85END)
)
if b.startswith(_A85START):
b = b[2:-2] # Strip off start/end markers
else:
b = b[:-2]
#
# We have to go through this stepwise, so as to ignore spaces and handle
# special short sequences
#
packI = struct.Struct('!I').pack
decoded = []
decoded_append = decoded.append
curr = []
curr_append = curr.append
curr_clear = curr.clear
for x in b + b'u' * 4:
if b'!'[0] <= x <= b'u'[0]:
curr_append(x)
if len(curr) == 5:
acc = 0
for x in curr:
acc = 85 * acc + (x - 33)
try:
decoded_append(packI(acc))
except struct.error:
raise ValueError('Ascii85 overflow') from None
curr_clear()
elif x == b'z'[0]:
if curr:
raise ValueError('z inside Ascii85 5-tuple')
decoded_append(b'\0\0\0\0')
elif foldspaces and x == b'y'[0]:
if curr:
raise ValueError('y inside Ascii85 5-tuple')
decoded_append(b'\x20\x20\x20\x20')
elif x in ignorechars:
# Skip whitespace
continue
else:
raise ValueError('Non-Ascii85 digit found: %c' % x)

result = b''.join(decoded)
padding = 4 - len(curr)
if padding:
# Throw away the extra padding
result = result[:-padding]
return result

# The following code is originally taken (with permission) from Mercurial

_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
_b85chars = None
_b85chars2 = None
_b85dec = None

def b85encode(b, pad=False):
"""Encode bytes-like object b in base85 format and return a bytes object.

If pad is true, the input is padded with b'\\0' so its length is a multiple of
4 bytes before encoding.
"""
return binascii.b2a_base85(b, pad=pad, newline=False)
global _b85chars, _b85chars2
# Delay the initialization of tables to not waste memory
# if the function is never called
if _b85chars2 is None:
_b85chars = [bytes((i,)) for i in _b85alphabet]
_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
return _85encode(b, _b85chars, _b85chars2, pad)

def b85decode(b):
"""Decode the base85-encoded bytes-like object or ASCII string b

The result is returned as a bytes object.
"""
global _b85dec
# Delay the initialization of tables to not waste memory
# if the function is never called
if _b85dec is None:
_b85dec = [None] * 256
for i, c in enumerate(_b85alphabet):
_b85dec[c] = i

b = _bytes_from_decode_data(b)
return binascii.a2b_base85(b, strict_mode=True)
padding = (-len(b)) % 5
b = b + b'~' * padding
out = []
packI = struct.Struct('!I').pack
for i in range(0, len(b), 5):
chunk = b[i:i + 5]
acc = 0
try:
for c in chunk:
acc = acc * 85 + _b85dec[c]
except TypeError:
for j, c in enumerate(chunk):
if _b85dec[c] is None:
raise ValueError('bad base85 character at position %d'
% (i + j)) from None
raise
try:
out.append(packI(acc))
except struct.error:
raise ValueError('base85 overflow in hunk starting at byte %d'
% i) from None

result = b''.join(out)
if padding:
result = result[:-padding]
return result

_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz'
b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#')
# Translating b85 valid but z85 invalid chars to b'\x00' is required
# to prevent them from being decoded as b85 valid chars.
_z85_b85_decode_diff = b';_`|~'
_z85_decode_translation = bytes.maketrans(
_z85alphabet + _z85_b85_decode_diff,
_b85alphabet + b'\x00' * len(_z85_b85_decode_diff)
)
_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet)

def z85encode(s):
"""Encode bytes-like object b in z85 format and return a bytes object."""
return binascii.b2a_base85(s, newline=False, z85=True)
return b85encode(s).translate(_z85_encode_translation)

def z85decode(s):
"""Decode the z85-encoded bytes-like object or ASCII string b

The result is returned as a bytes object.
"""
s = _bytes_from_decode_data(s)
return binascii.a2b_base85(s, strict_mode=True, z85=True)
s = s.translate(_z85_decode_translation)
try:
return b85decode(s)
except ValueError as e:
raise ValueError(e.args[0].replace('base85', 'z85')) from None

# Legacy interface. This code could be cleaned up since I don't believe
# binascii has any line length limitations. It just doesn't seem worth it
Expand Down