From 397ac7ab9a0fa014bca2ab9bed6adef788f20998 Mon Sep 17 00:00:00 2001 From: Vincent Gao Date: Tue, 16 Jun 2026 00:34:22 +0200 Subject: [PATCH] Fixed leading null bytes being dropped by big-integer base codecs The generic base_encode/base_decode convert the whole input to a single integer, so leading null bytes (high-order zeros) were silently lost: e.g. Base58 encoded b'\x00abc' to 'ZiCa' instead of '1ZiCa', and b'\x00' to an empty string. Per the Base58 spec each leading 0x00 byte maps to a leading charset[0] character. Preserve the leading-zero count on encode and restore it on decode, so values round-trip and match reference implementations. --- src/codext/base/_base.py | 7 ++++++- tests/test_base.py | 10 +++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/codext/base/_base.py b/src/codext/base/_base.py index f41df0b..c7db513 100644 --- a/src/codext/base/_base.py +++ b/src/codext/base/_base.py @@ -130,6 +130,10 @@ def base_encode(input, charset, errors="strict", exc=BaseEncodeError): while i > 0: i, c = divmod(i, n) r = charset[c] + r + # preserve leading zero bytes: big-integer bases such as Base58 map each + # leading null byte of the input to a leading charset[0] character + if not isinstance(input, int): + r = charset[0] * (len(input) - len(input.lstrip("\x00"))) + r return r @@ -151,7 +155,8 @@ def base_decode(input, charset, errors="strict", exc=BaseDecodeError): i = i * n + charset.index(c) except ValueError: handle_error("base", errors, exc, decode=True)(c, k, dec(i), "base%d" % n) - return dec(i) + # restore the leading zero bytes encoded as leading charset[0] characters + return chr(0) * (len(input) - len(input.lstrip(charset[0]))) + dec(i) # base codec factory functions diff --git a/tests/test_base.py b/tests/test_base.py index a37d1a6..7a12c11 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -172,7 +172,15 @@ def test_codec_base58(self): self.assertEqual(codecs.decode(B58, "base58-fl"), STR) self.assertEqual(codecs.encode(STR, "base58-short-url"), B58) self.assertEqual(codecs.encode(STR, "base58-url"), B58) - + # leading null bytes must be preserved as leading charset[0] ('1') + self.assertEqual(codecs.encode("\x00abc", "base58"), "1ZiCa") + self.assertEqual(codecs.encode("\x00", "base58"), "1") + self.assertEqual(codecs.encode("\x00\x00abc", "base58"), "11ZiCa") + self.assertEqual(codecs.decode("1ZiCa", "base58"), "\x00abc") + self.assertEqual(codecs.decode("11ZiCa", "base58"), "\x00\x00abc") + self.assertEqual(codecs.encode(b("\x00abc"), "base58"), b("1ZiCa")) + self.assertEqual(codecs.decode(b("1ZiCa"), "base58"), b("\x00abc")) + def test_codec_base62(self): for b62, enc in zip(["CsoB4HQ5gmgMyCenF7E", "M2yLERaFqwqW8MoxPHO"], ["base62", "base62-inv"]): self.assertEqual(codecs.encode(STR, enc), b62)