|
| 1 | +# -*- coding: UTF-8 -*- |
| 2 | +"""Bazeries Cipher Codec - bazeries content encoding. |
| 3 | +
|
| 4 | +The Bazeries cipher is an encryption system created by Étienne Bazeries that combines |
| 5 | +two Polybius grids (5×5 square arrays of letters) and a transposition based on a |
| 6 | +numeric key. The plaintext is split into groups whose sizes are the digits of the key, |
| 7 | +each group is reversed, and then a substitution is applied by mapping each letter's |
| 8 | +position in the first (standard) Polybius square to the same position in the second |
| 9 | +(key-based) Polybius square. When the key is a keyword instead of a number, the |
| 10 | +lengths of the words in the keyword are used as group sizes. |
| 11 | +
|
| 12 | +This codec: |
| 13 | +- en/decodes strings from str to str |
| 14 | +- en/decodes strings from bytes to bytes |
| 15 | +- decodes file content to str (read) |
| 16 | +- encodes file content from str to bytes (write) |
| 17 | +
|
| 18 | +Reference: https://www.dcode.fr/bazeries-cipher |
| 19 | +""" |
| 20 | +from ..__common__ import * |
| 21 | + |
| 22 | + |
| 23 | +__examples__ = { |
| 24 | + 'enc(bazeries-137)': {'HELLO': 'TSSUB', 'ATTACK': 'OOLLYE'}, |
| 25 | + 'dec(bazeries-137)': {'TSSUB': 'HELLO', 'OOLLYE': 'ATTACK'}, |
| 26 | +} |
| 27 | +__guess__ = ["bazeries-137"] |
| 28 | + |
| 29 | + |
| 30 | +_DEFAULT_KEY = "137" |
| 31 | +# Standard 5×5 Polybius square alphabet (I and J share the same cell) |
| 32 | +_DEFAULT_ALPHABET = "ABCDEFGHIKLMNOPQRSTUVWXYZ" |
| 33 | + |
| 34 | +_ONES = ["", "ONE", "TWO", "THREE", "FOUR", "FIVE", "SIX", "SEVEN", "EIGHT", "NINE", |
| 35 | + "TEN", "ELEVEN", "TWELVE", "THIRTEEN", "FOURTEEN", "FIFTEEN", "SIXTEEN", |
| 36 | + "SEVENTEEN", "EIGHTEEN", "NINETEEN"] |
| 37 | +_TENS = ["", "", "TWENTY", "THIRTY", "FORTY", "FIFTY", "SIXTY", "SEVENTY", "EIGHTY", "NINETY"] |
| 38 | + |
| 39 | + |
| 40 | +def _num_to_words(n): |
| 41 | + """ Convert a non-negative integer to its English word representation (uppercase). """ |
| 42 | + if n == 0: |
| 43 | + return "ZERO" |
| 44 | + if n < 20: |
| 45 | + return _ONES[n] |
| 46 | + if n < 100: |
| 47 | + rest = n % 10 |
| 48 | + return (_TENS[n // 10] + (" " + _ONES[rest] if rest else "")).strip() |
| 49 | + if n < 1000: |
| 50 | + rest = n % 100 |
| 51 | + return (_ONES[n // 100] + " HUNDRED" + (" " + _num_to_words(rest) if rest else "")).strip() |
| 52 | + if n < 1_000_000: |
| 53 | + rest = n % 1000 |
| 54 | + return (_num_to_words(n // 1000) + " THOUSAND" + (" " + _num_to_words(rest) if rest else "")).strip() |
| 55 | + if n < 1_000_000_000: |
| 56 | + rest = n % 1_000_000 |
| 57 | + return (_num_to_words(n // 1_000_000) + " MILLION" + (" " + _num_to_words(rest) if rest else "")).strip() |
| 58 | + rest = n % 1_000_000_000 |
| 59 | + return (_num_to_words(n // 1_000_000_000) + " BILLION" + (" " + _num_to_words(rest) if rest else "")).strip() |
| 60 | + |
| 61 | + |
| 62 | +def _parse_key(key): |
| 63 | + """ Parse the key into (phrase, group_sizes). |
| 64 | +
|
| 65 | + For a numeric key, it is written in English words to build the phrase, and its |
| 66 | + individual non-zero digits form the group sizes for transposition. |
| 67 | + For a keyword, the key itself is the phrase and word lengths are the group sizes. |
| 68 | + """ |
| 69 | + if not key: |
| 70 | + key = _DEFAULT_KEY |
| 71 | + key_str = str(key).upper().replace("-", " ").replace("_", " ").strip() |
| 72 | + if key_str.replace(" ", "").isdigit(): |
| 73 | + n = int(key_str.replace(" ", "")) |
| 74 | + phrase = _num_to_words(n) |
| 75 | + digits = [int(d) for d in str(n) if d != '0'] |
| 76 | + if not digits: |
| 77 | + digits = [1] |
| 78 | + else: |
| 79 | + phrase = key_str |
| 80 | + digits = [len(w) for w in key_str.split() if w] |
| 81 | + if not digits: |
| 82 | + digits = [1] |
| 83 | + return phrase, digits |
| 84 | + |
| 85 | + |
| 86 | +def _build_key_alphabet(phrase): |
| 87 | + """ Build a 25-character cipher alphabet from the key phrase for the second Polybius square. |
| 88 | +
|
| 89 | + Letters appear in the order they first occur in the phrase (with J merged into I), |
| 90 | + followed by the remaining letters of the standard alphabet. |
| 91 | + """ |
| 92 | + seen = [] |
| 93 | + for c in phrase.upper(): |
| 94 | + if c == 'J': |
| 95 | + c = 'I' |
| 96 | + if c.isalpha() and c not in seen: |
| 97 | + seen.append(c) |
| 98 | + for c in _DEFAULT_ALPHABET: |
| 99 | + if c not in seen: |
| 100 | + seen.append(c) |
| 101 | + return "".join(seen) |
| 102 | + |
| 103 | + |
| 104 | +def _build_squares(key_alphabet): |
| 105 | + """ Build position maps and lookup maps for the two 5×5 Polybius squares. |
| 106 | +
|
| 107 | + Returns (sq1_pos, sq2_pos, sq1_lkp, sq2_lkp) where: |
| 108 | + - sq1_pos / sq2_pos map a letter to its (row, col) 1-indexed coordinate |
| 109 | + - sq1_lkp / sq2_lkp map a (row, col) coordinate to its letter |
| 110 | + """ |
| 111 | + alph1 = _DEFAULT_ALPHABET |
| 112 | + alph2 = key_alphabet |
| 113 | + sq1_pos = {alph1[i]: (i // 5 + 1, i % 5 + 1) for i in range(25)} |
| 114 | + sq2_pos = {alph2[i]: (i // 5 + 1, i % 5 + 1) for i in range(25)} |
| 115 | + sq1_lkp = {(i // 5 + 1, i % 5 + 1): alph1[i] for i in range(25)} |
| 116 | + sq2_lkp = {(i // 5 + 1, i % 5 + 1): alph2[i] for i in range(25)} |
| 117 | + # J shares the cell with I in both squares |
| 118 | + sq1_pos['J'] = sq1_pos['I'] |
| 119 | + sq2_pos['J'] = sq2_pos['I'] |
| 120 | + return sq1_pos, sq2_pos, sq1_lkp, sq2_lkp |
| 121 | + |
| 122 | + |
| 123 | +def _transpose(chars, digits): |
| 124 | + """ Split chars into consecutive groups of sizes given by digits (cycling) and reverse each group. """ |
| 125 | + result, i, grp_idx = [], 0, 0 |
| 126 | + while i < len(chars): |
| 127 | + size = digits[grp_idx % len(digits)] |
| 128 | + grp_idx += 1 |
| 129 | + group = chars[i:i + size] |
| 130 | + result.extend(reversed(group)) |
| 131 | + i += size |
| 132 | + return result |
| 133 | + |
| 134 | + |
| 135 | +def bazeries_encode(key=""): |
| 136 | + phrase, digits = _parse_key(key) |
| 137 | + key_alph = _build_key_alphabet(phrase) |
| 138 | + sq1_pos, sq2_pos, sq1_lkp, sq2_lkp = _build_squares(key_alph) |
| 139 | + |
| 140 | + def encode(text, errors="strict"): |
| 141 | + _h = handle_error("bazeries", errors) |
| 142 | + alpha = [('I' if c == 'J' else c) for c in ensure_str(text).upper() if c.isalpha()] |
| 143 | + transposed = _transpose(alpha, digits) |
| 144 | + result = [] |
| 145 | + for pos, c in enumerate(transposed): |
| 146 | + if c in sq1_pos: |
| 147 | + result.append(sq2_lkp[sq1_pos[c]]) |
| 148 | + else: |
| 149 | + result.append(_h(c, pos, "".join(result))) |
| 150 | + r = "".join(result) |
| 151 | + return r, len(text) |
| 152 | + return encode |
| 153 | + |
| 154 | + |
| 155 | +def bazeries_decode(key=""): |
| 156 | + phrase, digits = _parse_key(key) |
| 157 | + key_alph = _build_key_alphabet(phrase) |
| 158 | + sq1_pos, sq2_pos, sq1_lkp, sq2_lkp = _build_squares(key_alph) |
| 159 | + |
| 160 | + def decode(text, errors="strict"): |
| 161 | + _h = handle_error("bazeries", errors, decode=True) |
| 162 | + alpha = [c for c in ensure_str(text).upper() if c.isalpha()] |
| 163 | + sub = [] |
| 164 | + for pos, c in enumerate(alpha): |
| 165 | + if c in sq2_pos: |
| 166 | + sub.append(sq1_lkp[sq2_pos[c]]) |
| 167 | + else: |
| 168 | + sub.append(_h(c, pos, "".join(sub))) |
| 169 | + result = _transpose(sub, digits) |
| 170 | + r = "".join(result) |
| 171 | + return r, len(text) |
| 172 | + return decode |
| 173 | + |
| 174 | + |
| 175 | +add("bazeries", bazeries_encode, bazeries_decode, |
| 176 | + r"^bazeries(?:[-_](.+))?$", |
| 177 | + printables_rate=1., expansion_factor=1.) |
0 commit comments