Skip to content

Commit e443fd4

Browse files
committed
Fixed codec: base122
1 parent 60f88e5 commit e443fd4

3 files changed

Lines changed: 88 additions & 9 deletions

File tree

codext/base/base122.py

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,88 @@
1010
from ..__common__ import *
1111

1212

13-
# no __examples__ ; handled manually in tests/test_base.py
13+
__examples__ = {
14+
'enc(base122|base-122)': {
15+
'this is a test': ":\x1aʗ\x19\x01Rs\x10\x18$\x07#\x15ft",
16+
b'This is another longer test string with d1g1t5 and sp3c141 characters !\n': \
17+
b"*\x1a\xca\x97\x19\x01Rs\x10\x18-f{QPe9\x08\xcb\x86{9Ne9\x08\x0eF+Mh 9]\x0e\xd3\x8b"
18+
b"9N ;Z.FA\x01H13L.C)\x01Bn2\x08\x0e7\x01MF1\x1a\x0c$\x06\x1b!Br0XnF+If \x10B@"
19+
},
20+
'enc-dec(base_122)': ["@random"],
21+
}
1422

1523

16-
def base122_encode(input, errors="strict"):
17-
raise NotImplementedError
24+
_BAD = [0, 10, 13, 34, 38, 92]
25+
_i = lambda c: c if isinstance(c, int) else ord(c)
1826

1927

20-
def base122_decode(input, errors="strict"):
21-
raise NotImplementedError
28+
if PY3:
29+
# inspired from: https://github.com/kevinAlbs/Base122/blob/master/base122.js
30+
def base122_encode(input, errors="strict"):
31+
idx, bit, r, l = 0, 0, [], len(input)
32+
33+
def _get_7bits(idx, bit):
34+
if idx >= l:
35+
return idx, bit, False
36+
B1 = _i(input[idx])
37+
p1 = (((254 >> bit) & B1) << bit) >> 1
38+
bit += 7
39+
if bit < 8:
40+
return idx, bit, p1
41+
bit -= 8
42+
idx += 1
43+
if idx >= l:
44+
return idx, bit, p1
45+
B2 = _i(input[idx])
46+
p2 = (((65280 >> bit) & B2) & 255) >> (8 - bit)
47+
return idx, bit, (p1 | p2)
48+
49+
while True:
50+
if idx >= l:
51+
break
52+
# get seven bits of input data
53+
idx, bit, B = _get_7bits(idx, bit)
54+
# check for illegal chars
55+
try:
56+
bad_idx = _BAD.index(B)
57+
except ValueError:
58+
r.append(B)
59+
continue
60+
idx, bit, nB = _get_7bits(idx, bit)
61+
if nB is False:
62+
nB, bad_idx = B, 7
63+
B1, B2 = 194, 128
64+
B1 |= (7 & bad_idx) << 2
65+
B1 |= int((nB & 64) > 0)
66+
B2 |= nB & 63
67+
r.extend([B1, B2])
68+
return "".join(map(chr, r)).encode("latin-1"), len(input)
2269

2370

24-
add("base122", base122_encode, base122_decode, r"^base[-_]?122$")
71+
# inspired from: https://github.com/kevinAlbs/Base122/blob/master/base122.js
72+
def base122_decode(input, errors="strict"):
73+
currB, bob, r, input = 0, 0, [], list(map(ord, input))
74+
75+
def _get_7bits(currB, bob, B, decoded):
76+
B <<= 1
77+
currB |= (B % 0x100000000) >> bob
78+
bob += 7
79+
if bob >= 8:
80+
decoded += [currB]
81+
bob -= 8
82+
return (B << (7 - bob)) & 255, bob
83+
84+
for i in range(len(input)):
85+
if input[i] >= 128:
86+
try:
87+
currB, bob = _get_7bits(currB, bob, _BAD[(input[i] >> 8) & 7], r)
88+
except IndexError:
89+
pass
90+
currB, bob = _get_7bits(currB, bob, input[i] & 127, r)
91+
else:
92+
currB, bob = _get_7bits(currB, bob, input[i], r)
93+
return "".join(map(chr, r)), len(input)
94+
95+
96+
add("base122", base122_encode, base122_decode, r"^base[-_]?122$")
2597

docs/enc/base.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,7 @@ This encoding relies on the `base64` library and is only supported in Python 3.
134134
:---: | :---: | --- | ---
135135
`base85` | text <-> Base85 encoded text | `base[-_]?85` | Python 3 only (relies on `base64` module)
136136
`base100` | text <-> Base100 encoded text | `base[-_]?100|emoji` | Python 3 only
137-
138-
With Python 3, `base85` and `base100` (emoji's) are also supported.
137+
`base122` | text <-> Base122 encoded text | `base[-_]?122` | Python 3 only
139138

140139
```python
141140
>>> codecs.encode("this is a test", "base85")
@@ -151,3 +150,10 @@ With Python 3, `base85` and `base100` (emoji's) are also supported.
151150
'this is a test'
152151
```
153152

153+
```python
154+
>>> codecs.encode("this is a test", "base122")
155+
':\x1aʗ\x19\x01Rs\x10\x18$\x07#\x15ft'
156+
>>> codecs.decode(":\x1aʗ\x19\x01Rs\x10\x18$\x07#\x15ft", "base122")
157+
'this is a test'
158+
```
159+

docs/howto.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ __examples__ = {
192192

193193
```python
194194
__examples__ = {
195-
'enc-dec(codec)': ["test string", "TEST STRING"]
195+
'enc-dec(codec)': ["test string", "TEST STRING", "@random", "@random{1024}"]
196196
}
197197
```
198198

@@ -201,6 +201,7 @@ __examples__ = {
201201
- `__examples__` is the standard dunder, thus not specified in `add`/`add_map`.
202202
- `enc-dec` is used, meaning that a list of inputs is defined.
203203
- So, whatever its encoded output, the input string shall give the same while applying encoding then decoding.
204+
- The special values `@random` and `@random{1024}`, meaning that test strings are generated from any possible byte-character with a specified length (512 when not specified, otherwise specified with `{...}`).
204205

205206
```python
206207
__examples__ = {

0 commit comments

Comments
 (0)