diff --git a/src/codext/base/base45.py b/src/codext/base/base45.py index 272c3e9..1590460 100644 --- a/src/codext/base/base45.py +++ b/src/codext/base/base45.py @@ -34,7 +34,10 @@ def base45_encode(mode): b45 = _get_charset(B45, mode) def encode(text, errors="strict"): t, s = b(text), "" - for i in range(0, len(text), 2): + # iterate over the byte sequence (t), not len(text): when the input + # holds non-ASCII characters, b(text) is longer than text and using + # len(text) silently drops the trailing bytes + for i in range(0, len(t), 2): n = 256 * __ord(t[i]) try: n += __ord(t[i+1]) @@ -54,7 +57,7 @@ def base45_decode(mode): def decode(text, errors="strict"): t, s = b(text), "" ehandler = handle_error("base45", errors, decode=True) - for i in range(0, len(text), 3): + for i in range(0, len(t), 3): try: n = b45[__chr(t[i])] except KeyError: diff --git a/tests/test_base.py b/tests/test_base.py index a37d1a6..193b173 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -211,6 +211,19 @@ def test_codec_base100(self): self.assertRaises(ValueError, codecs.decode, b(B100)[1:], "base100") self.assertIsNotNone(codecs.decode(b(B100) + b"\n", "base100", "ignore")) + def test_codec_base45(self): + # RFC 9285 test vectors + for s, b45 in [("AB", "BB8"), ("Hello!!", "%69 VD92EX0"), ("base-45", "UJCLQE7W581")]: + self.assertEqual(codecs.encode(s, "base45"), b45) + self.assertEqual(codecs.encode(b(s), "base45"), b(b45)) + self.assertEqual(codecs.decode(b45, "base45"), s) + self.assertEqual(codecs.decode(b(b45), "base45"), b(s)) + # a trailing non-ASCII byte must not be dropped (byte length, not str length, drives encoding) + self.assertEqual(codecs.encode(b"\xcf\xb1\x1b", "base45"), b"OBQR0") + self.assertEqual(codecs.decode(b"OBQR0", "base45"), b"\xcf\xb1\x1b") + for data in [b"\xff\xfe", b"hello", b"\x00", b"\x80\x81\x82\x83\x84"]: + self.assertEqual(codecs.decode(codecs.encode(data, "base45"), "base45"), data) + def test_codec_base_generic(self): for n in range(2, 255): bn = "base{}_generic".format(n)