diff --git a/src/codext/base/_base.py b/src/codext/base/_base.py index f41df0b..c7db513 100644 --- a/src/codext/base/_base.py +++ b/src/codext/base/_base.py @@ -130,6 +130,10 @@ def base_encode(input, charset, errors="strict", exc=BaseEncodeError): while i > 0: i, c = divmod(i, n) r = charset[c] + r + # preserve leading zero bytes: big-integer bases such as Base58 map each + # leading null byte of the input to a leading charset[0] character + if not isinstance(input, int): + r = charset[0] * (len(input) - len(input.lstrip("\x00"))) + r return r @@ -151,7 +155,8 @@ def base_decode(input, charset, errors="strict", exc=BaseDecodeError): i = i * n + charset.index(c) except ValueError: handle_error("base", errors, exc, decode=True)(c, k, dec(i), "base%d" % n) - return dec(i) + # restore the leading zero bytes encoded as leading charset[0] characters + return chr(0) * (len(input) - len(input.lstrip(charset[0]))) + dec(i) # base codec factory functions diff --git a/tests/test_base.py b/tests/test_base.py index a37d1a6..7a12c11 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -172,7 +172,15 @@ def test_codec_base58(self): self.assertEqual(codecs.decode(B58, "base58-fl"), STR) self.assertEqual(codecs.encode(STR, "base58-short-url"), B58) self.assertEqual(codecs.encode(STR, "base58-url"), B58) - + # leading null bytes must be preserved as leading charset[0] ('1') + self.assertEqual(codecs.encode("\x00abc", "base58"), "1ZiCa") + self.assertEqual(codecs.encode("\x00", "base58"), "1") + self.assertEqual(codecs.encode("\x00\x00abc", "base58"), "11ZiCa") + self.assertEqual(codecs.decode("1ZiCa", "base58"), "\x00abc") + self.assertEqual(codecs.decode("11ZiCa", "base58"), "\x00\x00abc") + self.assertEqual(codecs.encode(b("\x00abc"), "base58"), b("1ZiCa")) + self.assertEqual(codecs.decode(b("1ZiCa"), "base58"), b("\x00abc")) + def test_codec_base62(self): for b62, enc in zip(["CsoB4HQ5gmgMyCenF7E", "M2yLERaFqwqW8MoxPHO"], ["base62", "base62-inv"]): self.assertEqual(codecs.encode(STR, enc), b62)