Balance encode and decodes, remove validate=True, cover str input.

per review from Serhiy.
python · gpshead · Jan 3, 2026 · Dec 29, 2025 · Dec 29, 2025 · Dec 29, 2025
commit 1570cad1d7f32f5f415f864212d91c89bb2e3187
diff --git a/pyperformance/data-files/benchmarks/bm_base64/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_base64/run_benchmark.py
@@ -2,65 +2,68 @@
 
 Tests encoding and decoding performance across various variants
 and data sizes, split into _small (balanced small data) and _large variants.
+
+Small weighs towards measuring overhead, large measures the core algorithm
+loop implementation.
 """
 
 import base64
 import random
 import pyperf
 
 
-# Generate test data with fixed seed for reproducibility
+# Generate test data with a fixed seed for reproducibility
 random.seed(12345)
-DATA_TINY = bytes(random.randrange(256) for _ in range(20))
-DATA_SMALL = bytes(random.randrange(256) for _ in range(127))  # odd on purpose
-DATA_MEDIUM = bytes(random.randrange(256) for _ in range(3072))
-DATA_9K = bytes(random.randrange(256) for _ in range(9000))
-DATA_LARGE = bytes(random.randrange(256) for _ in range(102400))
-DATA_HUGE = bytes(random.randrange(256) for _ in range(1048576))
+DATA_TINY = random.randbytes(20)
+DATA_SMALL = random.randbytes(127)  # odd on purpose
+DATA_MEDIUM = random.randbytes(3072)
+DATA_9K = random.randbytes(9000)
+DATA_LARGE = random.randbytes(102400)
+DATA_HUGE = random.randbytes(1048574)  # 1M-2
 
 # Pre-encoded data for decode benchmarks
 B64_TINY = base64.b64encode(DATA_TINY)
 B64_SMALL = base64.b64encode(DATA_SMALL)
 B64_MEDIUM = base64.b64encode(DATA_MEDIUM)
-B64_9K = base64.b64encode(DATA_9K)
+B64_9K_STR = base64.b64encode(DATA_9K).decode('ascii')
 B64_LARGE = base64.b64encode(DATA_LARGE)
 B64_HUGE = base64.b64encode(DATA_HUGE)
 
 B64_URLSAFE_TINY = base64.urlsafe_b64encode(DATA_TINY)
 B64_URLSAFE_SMALL = base64.urlsafe_b64encode(DATA_SMALL)
 B64_URLSAFE_MEDIUM = base64.urlsafe_b64encode(DATA_MEDIUM)
-B64_URLSAFE_9K = base64.urlsafe_b64encode(DATA_9K)
+B64_URLSAFE_9K_STR = base64.urlsafe_b64encode(DATA_9K).decode('ascii')
 
 B32_TINY = base64.b32encode(DATA_TINY)
 B32_SMALL = base64.b32encode(DATA_SMALL)
 B32_MEDIUM = base64.b32encode(DATA_MEDIUM)
-B32_9K = base64.b32encode(DATA_9K)
+B32_9K_STR = base64.b32encode(DATA_9K).decode('ascii')
 B32_LARGE = base64.b32encode(DATA_LARGE)
 B32_HUGE = base64.b32encode(DATA_HUGE)
 
 B16_TINY = base64.b16encode(DATA_TINY)
 B16_SMALL = base64.b16encode(DATA_SMALL)
 B16_MEDIUM = base64.b16encode(DATA_MEDIUM)
-B16_9K = base64.b16encode(DATA_9K)
+B16_9K_STR = base64.b16encode(DATA_9K).decode('ascii')
 B16_LARGE = base64.b16encode(DATA_LARGE)
 B16_HUGE = base64.b16encode(DATA_HUGE)
 
 A85_TINY = base64.a85encode(DATA_TINY)
 A85_SMALL = base64.a85encode(DATA_SMALL)
 A85_MEDIUM = base64.a85encode(DATA_MEDIUM)
-A85_9K = base64.a85encode(DATA_9K)
+A85_9K_STR = base64.a85encode(DATA_9K).decode('ascii')
 A85_LARGE = base64.a85encode(DATA_LARGE)
 A85_HUGE = base64.a85encode(DATA_HUGE)
 
 B85_TINY = base64.b85encode(DATA_TINY)
 B85_SMALL = base64.b85encode(DATA_SMALL)
 B85_MEDIUM = base64.b85encode(DATA_MEDIUM)
-B85_9K = base64.b85encode(DATA_9K)
+B85_9K_STR = base64.b85encode(DATA_9K).decode('ascii')
 B85_LARGE = base64.b85encode(DATA_LARGE)
 B85_HUGE = base64.b85encode(DATA_HUGE)
 
 
-# --- Base64 (includes validate=True) ---
+# --- Base64 ---
 
 def bench_b64_small(loops):
     range_it = range(loops)
@@ -69,18 +72,14 @@ def bench_b64_small(loops):
         for _ in range(450):
             base64.b64encode(DATA_TINY)
             base64.b64decode(B64_TINY)
-            base64.b64decode(B64_TINY, validate=True)
         for _ in range(71):
             base64.b64encode(DATA_SMALL)
             base64.b64decode(B64_SMALL)
-            base64.b64decode(B64_SMALL, validate=True)
         for _ in range(3):
             base64.b64encode(DATA_MEDIUM)
             base64.b64decode(B64_MEDIUM)
-            base64.b64decode(B64_MEDIUM, validate=True)
         base64.b64encode(DATA_9K)
-        base64.b64decode(B64_9K)
-        base64.b64decode(B64_9K, validate=True)
+        base64.b64decode(B64_9K_STR)
     return pyperf.perf_counter() - t0
 
 
@@ -91,10 +90,8 @@ def bench_b64_large(loops):
         for _ in range(10):
             base64.b64encode(DATA_LARGE)
             base64.b64decode(B64_LARGE)
-            base64.b64decode(B64_LARGE, validate=True)
         base64.b64encode(DATA_HUGE)
         base64.b64decode(B64_HUGE)
-        base64.b64decode(B64_HUGE, validate=True)
     return pyperf.perf_counter() - t0
 
 
@@ -114,7 +111,7 @@ def bench_urlsafe_b64_small(loops):
             base64.urlsafe_b64encode(DATA_MEDIUM)
             base64.urlsafe_b64decode(B64_URLSAFE_MEDIUM)
         base64.urlsafe_b64encode(DATA_9K)
-        base64.urlsafe_b64decode(B64_URLSAFE_9K)
+        base64.urlsafe_b64decode(B64_URLSAFE_9K_STR)
     return pyperf.perf_counter() - t0
 
 
@@ -134,7 +131,7 @@ def bench_b32_small(loops):
             base64.b32encode(DATA_MEDIUM)
             base64.b32decode(B32_MEDIUM)
         base64.b32encode(DATA_9K)
-        base64.b32decode(B32_9K)
+        base64.b32decode(B32_9K_STR)
     return pyperf.perf_counter() - t0
 
 
@@ -166,7 +163,7 @@ def bench_b16_small(loops):
             base64.b16encode(DATA_MEDIUM)
             base64.b16decode(B16_MEDIUM)
         base64.b16encode(DATA_9K)
-        base64.b16decode(B16_9K)
+        base64.b16decode(B16_9K_STR)
     return pyperf.perf_counter() - t0
 
 
@@ -192,17 +189,21 @@ def bench_a85_small(loops):
             base64.a85encode(DATA_TINY)
             base64.a85encode(DATA_TINY, wrapcol=76)
             base64.a85decode(A85_TINY)
+            base64.a85decode(A85_TINY)  # balance enc+dec weight
         for _ in range(71):
             base64.a85encode(DATA_SMALL)
             base64.a85encode(DATA_SMALL, wrapcol=76)
             base64.a85decode(A85_SMALL)
+            base64.a85decode(A85_SMALL)  # balance enc+dec weight
         for _ in range(3):
             base64.a85encode(DATA_MEDIUM)
             base64.a85encode(DATA_MEDIUM, wrapcol=76)
             base64.a85decode(A85_MEDIUM)
+            base64.a85decode(A85_MEDIUM)  # balance enc+dec weight
         base64.a85encode(DATA_9K)
         base64.a85encode(DATA_9K, wrapcol=76)
-        base64.a85decode(A85_9K)
+        base64.a85decode(A85_9K_STR)
+        base64.a85decode(A85_9K_STR)  # balance enc+dec weight
     return pyperf.perf_counter() - t0
 
 
@@ -214,9 +215,11 @@ def bench_a85_large(loops):
             base64.a85encode(DATA_LARGE)
             base64.a85encode(DATA_LARGE, wrapcol=76)
             base64.a85decode(A85_LARGE)
+            base64.a85decode(A85_LARGE)  # balance enc+dec weight
         base64.a85encode(DATA_HUGE)
         base64.a85encode(DATA_HUGE, wrapcol=76)
         base64.a85decode(A85_HUGE)
+        base64.a85decode(A85_HUGE)  # balance enc+dec weight
     return pyperf.perf_counter() - t0
 
 
@@ -236,7 +239,7 @@ def bench_b85_small(loops):
             base64.b85encode(DATA_MEDIUM)
             base64.b85decode(B85_MEDIUM)
         base64.b85encode(DATA_9K)
-        base64.b85decode(B85_9K)
+        base64.b85decode(B85_9K_STR)
     return pyperf.perf_counter() - t0