Skip to content

Commit c85f5c4

Browse files
committed
Moved hashing to new category
1 parent becd892 commit c85f5c4

13 files changed

Lines changed: 601 additions & 53 deletions

File tree

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,8 @@ o
250250
- [X] `pkzip_bzip2`: standard BZip2 compression/decompression
251251
- [X] `pkzip_lzma`: standard LZMA compression/decompression
252252

253+
> :warning: Compression functions are of course definitely **NOT** encoding functions ; they are implemented for leveraging the `.encode(...)` API from `codecs`.
254+
253255
#### Cryptography
254256

255257
- [X] `affine`: aka Affine Cipher
@@ -262,6 +264,19 @@ o
262264
- [X] `shiftN`: shift ordinals (*N* belongs to [1,255])
263265
- [X] `xorN`: XOR with a single byte (*N* belongs to [1,255])
264266

267+
> :warning: Crypto functions are of course definitely **NOT** encoding functions ; they are implemented for leveraging the `.encode(...)` API from `codecs`.
268+
269+
#### Hashing
270+
271+
- [X] `blake`: includes BLAKE2b and BLAKE2s (Python 3 only ; relies on `hashlib`)
272+
- [X] `checksums`: includes Adler32 and CRC32 (relies on `zlib`)
273+
- [X] `crypt`: Unix's crypt hash for passwords (Python 3 and Unix only ; relies on `crypt`)
274+
- [X] `md`: aka Message Digest ; includes MD4 and MD5 (relies on `hashlib`)
275+
- [X] `sha`: aka Secure Hash Algorithms ; includes SHA1, 224, 256, 384, 512 (Python2/3) but also SHA3-224, -256, -384 and -512 (Python 3 only ; relies on `hashlib`)
276+
- [X] `shake`: aka SHAKE hashing (Python 3 only ; relies on `hashlib`)
277+
278+
> :warning: Hash functions are of course definitely **NOT** encoding functions ; they are implemented for convenience with the `.encode(...)` API from `codecs` and useful for chaning codecs.
279+
265280
#### Languages
266281

267282
- [X] `braille`: well-known braille language (Python 3 only)

codext/__common__.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from inspect import currentframe
1313
from itertools import chain, product
1414
from math import log
15+
from platform import system
1516
from random import randint
1617
from six import binary_type, string_types, text_type, BytesIO
1718
from string import *
@@ -33,7 +34,8 @@
3334
__all__ = ["add", "add_macro", "add_map", "b", "clear", "codecs", "decode", "encode", "ensure_str", "examples", "guess",
3435
"isb", "generate_strings_from_regex", "get_alphabet_from_mask", "handle_error", "is_native",
3536
"list_categories", "list_encodings", "list_macros", "lookup", "maketrans", "os", "rank", "re", "register",
36-
"remove", "reset", "s2i", "search", "stopfunc", "BytesIO", "MASKS", "PY3", "_input", "_stripl", "CodecMacro"]
37+
"remove", "reset", "s2i", "search", "stopfunc", "BytesIO", "_input", "_stripl", "CodecMacro",
38+
"DARWIN", "LINUX", "MASKS", "PY3", "UNIX", "WINDOWS"]
3739
CODECS_REGISTRY = None
3840
CODECS_CATEGORIES = ["native", "custom"]
3941
MASKS = {
@@ -47,13 +49,19 @@
4749
's': " ",
4850
'u': ascii_uppercase,
4951
}
50-
PY3 = sys.version[0] == "3"
52+
5153
__codecs_registry = []
5254

5355
MACROS = {}
5456
PERS_MACROS = {}
5557
PERS_MACROS_FILE = os.path.expanduser("~/.codext-macros.json")
5658

59+
DARWIN = system() == "Darwin"
60+
LINUX = system() == "Linux"
61+
PY3 = sys.version[0] == "3"
62+
UNIX = DARWIN or LINUX
63+
WINDOWS = system() == "Windows"
64+
5765

5866
entropy = lambda s: -sum([p * log(p, 2) for p in [float(s.count(c)) / len(s) for c in set(s)]])
5967

codext/common/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from .a1z26 import *
33
from .cases import *
44
from .dummy import *
5-
from .hashes import *
65
from .octal import *
76
from .ordinal import *
87

codext/hashing/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# -*- coding: UTF-8 -*-
2+
from .blake import *
3+
from .checksums import *
4+
from .crypt import *
5+
from .md import *
6+
from .sha import *
7+
from .shake import *
8+

codext/hashing/blake.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# -*- coding: UTF-8 -*-
2+
"""Case Codecs - string hashing with blake.
3+
4+
These are codecs for hashing strings, for use with other codecs in encoding chains.
5+
6+
These codecs:
7+
- transform strings from str to str
8+
- transform strings from bytes to bytes
9+
- transform file content from str to bytes (write)
10+
"""
11+
import hashlib
12+
13+
from ..__common__ import add, b, PY3
14+
15+
16+
if PY3:
17+
def blake_hash(c):
18+
def _hash_transform(l):
19+
l = (l or "64" if c == "b" else "32").lstrip("_-")
20+
def _encode(data, error="strict"):
21+
return getattr(hashlib, "blake2%s" % c)(b(data), digest_size=int(l)).hexdigest(), len(data)
22+
return _encode
23+
return _hash_transform
24+
25+
add("blake2b", blake_hash("b"), pattern=r"^blake2b(|[-_](?:[1-9]|[1-5]\d|6[0-4]))$", guess=None)
26+
add("blake2s", blake_hash("s"), pattern=r"^blake2s(|[-_](?:[1-9]|[1-2]\d|3[0-2]))$", guess=None)
27+

codext/hashing/checksums.py

Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
# -*- coding: UTF-8 -*-
2+
"""Case Codecs - string common checksums.
3+
4+
These are codecs for hashing strings, for use with other codecs in encoding chains.
5+
6+
These codecs:
7+
- transform strings from str to str
8+
- transform strings from bytes to bytes
9+
- transform file content from str to bytes (write)
10+
"""
11+
from zlib import adler32
12+
13+
from ..__common__ import add, b
14+
15+
16+
CRC = {
17+
'': {
18+
'a': (0x1021, 0xc6c6, True, True, 0, 0xbf05),
19+
},
20+
8: {
21+
'': (0x07, 0, False, False, 0, 0xf4),
22+
'aes': (0x1d, 0xff, True, True, 0, 0x97),
23+
'autosar': (0x2f, 0xff, False, False, 0xff, 0xdf),
24+
'bluetooth': (0xa7, 0, True, True, 0, 0x26),
25+
'cdma2000': (0x9b, 0xff, False, False, 0, 0xda),
26+
'dallas-1-wire': (0x31, 0, True, True, 0, 0xa1),
27+
'darc': (0x39, 0, True, True, 0, 0x15),
28+
'dvb-s2': (0xd5, 0, False, False, 0, 0xbc),
29+
'ebu': (0x1d, 0xff, True, True, 0, 0x97),
30+
'gsm-a': (0x1d, 0, False, False, 0, 0x37),
31+
'gsm-b': (0x49, 0, False, False, 0xff, 0x94),
32+
'hitag': (0x1d, 0xff, False, False, 0, 0xb4),
33+
'i-432-1': (0x07, 0, False, False, 0x55, 0xa1),
34+
'i-code': (0x1d, 0xfd, False, False, 0, 0x7e),
35+
'itu': (0x07, 0, False, False, 0x55, 0xa1),
36+
'lte': (0x9b, 0, False, False, 0, 0xea),
37+
'maxim': (0x31, 0, True, True, 0, 0xa1),
38+
'maxim-dow': (0x31, 0, True, True, 0, 0xa1),
39+
'mifare-mad': (0x1d, 0xc7, False, False, 0, 0x99),
40+
'nrsc-5': (0x31, 0xff, False, False, 0, 0xf7),
41+
'opensafety': (0x2f, 0, False, False, 0, 0x3e),
42+
'rohc': (0x07, 0xff, True, True, 0, 0xd0),
43+
'sae-j1850': (0x1d, 0xff, False, False, 0xff, 0x4b),
44+
'smbus': (0x07, 0, False, False, 0, 0xf4),
45+
'tech-3250': (0x1d, 0xff, True, True, 0, 0x97),
46+
'wcdma': (0x9b, 0, True, True, 0, 0x25),
47+
},
48+
10: {
49+
'': (0x233, 0, False, False, 0, 0x199),
50+
'atm': (0x233, 0, False, False, 0, 0x199),
51+
'cdma2000': (0x3d9, 0x3ff, False, False, 0, 0x233),
52+
'gsm': (0x175, 0, False, False, 0x3ff, 0x12a),
53+
'i-610': (0x233, 0, False, False, 0, 0x199),
54+
},
55+
11: {
56+
'': (0x385, 0x01a, False, False, 0, 0x5a3),
57+
'flexray': (0x385, 0x01a, False, False, 0, 0x5a3),
58+
'umts': (0x307, 0, False, False, 0, 0x061),
59+
},
60+
12: {
61+
'': (0x80f, 0, False, True, 0, 0xdaf),
62+
'3gpp': (0x80f, 0, False, True, 0, 0xdaf),
63+
'cdma2000': (0xf13, 0xfff, False, False, 0, 0xd4d),
64+
'dect': (0x80f, 0, False, False, 0, 0xf5b),
65+
'gsm': (0xd31, 0, False, False, 0xfff, 0xb34),
66+
'umts': (0x80f, 0, False, True, 0, 0xdaf),
67+
},
68+
13: {
69+
'': (0x1cf5, 0, False, False, 0, 0x04fa),
70+
'bbc': (0x1cf5, 0, False, False, 0, 0x04fa),
71+
},
72+
14: {
73+
'': (0x0805, 0, True, True, 0, 0x082d),
74+
'darc': (0x0805, 0, True, True, 0, 0x082d),
75+
'gsm': (0x202d, 0, False, False, 0x3fff, 0x30ae),
76+
},
77+
15: {
78+
'': (0x4599, 0, False, False, 0, 0x059e),
79+
'can': (0x4599, 0, False, False, 0, 0x059e),
80+
'mpt1327': (0x6815, 0, False, False, 1, 0x2566),
81+
},
82+
16: {
83+
'acorn': (0x1021, 0, False, False, 0, 0x31c3),
84+
'arc': (0x8005, 0, True, True, 0, 0xbb3d),
85+
'atom': (0x002d, 0, True, True, 0, 0x4287),
86+
'aug-2-ccitt': (0x1021, 0x84c0, False, False, 0, 0x19cf),
87+
'aug-2-citt': (0x1021, 0x84c0, False, False, 0, 0x19cf),
88+
'aug-ccitt': (0x1021, 0x1d0f, False, False, 0, 0xe5cc),
89+
'aug-citt': (0x1021, 0x1d0f, False, False, 0, 0xe5cc),
90+
'autosar': (0x1021, 0xffff, False, False, 0, 0x29b1),
91+
'bt-chip': (0x1021, 0xffff, True, False, 0, 0x89f6),
92+
'buypass': (0x8005, 0, False, False, 0, 0xfee8),
93+
'cms': (0x8005, 0xffff, False, False, 0, 0xaee7),
94+
'ccitt': (0x1021, 0, True, True, 0, 0x2189),
95+
'ccitt-false': (0x1021, 0xffff, False, False, 0, 0x29b1),
96+
'ccitt-true': (0x1021, 0, True, True, 0, 0x2189),
97+
'cdma2000': (0xc867, 0xffff, False, False, 0, 0x4c06),
98+
'darc': (0x1021, 0xffff, False, False, 0xffff, 0xd64e),
99+
'dds-110': (0x8005, 0x800d, False, False, 0, 0x9ecf),
100+
'dect-r': (0x0589, 0, False, False, 1, 0x007e),
101+
'dect-x': (0x0589, 0, False, False, 0, 0x007f),
102+
'dnp': (0x3d65, 0, True, True, 0xffff, 0xea82),
103+
'en-13757': (0x3d65, 0, False, False, 0xffff, 0xc2b7),
104+
'epc': (0x1021, 0xffff, False, False, 0xffff, 0xd64e),
105+
'epc-c1g2': (0x1021, 0xffff, False, False, 0xffff, 0xd64e),
106+
'genibus': (0x1021, 0xffff, False, False, 0xffff, 0xd64e),
107+
'gsm': (0x1021, 0, False, False, 0xffff, 0xce3c),
108+
'i-code': (0x1021, 0xffff, False, False, 0xffff, 0xd64e),
109+
'ibm': (0x8005, 0, True, True, 0, 0xbb3d),
110+
'ibm-3740': (0x1021, 0xffff, False, False, 0, 0x29b1),
111+
'ibm-sdlc': (0x1021, 0xffff, True, True, 0xffff, 0x906e),
112+
'iec-61158-2': (0x1dcf, 0xffff, False, False, 0xffff, 0xa819),
113+
'iso-hdlc': (0x1021, 0xffff, True, True, 0xffff, 0x906e),
114+
'kermit': (0x1021, 0, True, True, 0, 0x2189),
115+
'lha': (0x8005, 0, True, True, 0, 0xbb3d),
116+
'lj1200': (0x6f63, 0, False, False, 0, 0xbdf4),
117+
'maxim': (0x8005, 0, True, True, 0xffff, 0x44c2),
118+
'maxim-dom': (0x8005, 0, True, True, 0xffff, 0x44c2),
119+
'mcrf4xx': (0x1021, 0xffff, True, True, 0, 0x6f91),
120+
'modbus': (0x8005, 0xffff, True, True, 0, 0x4b37),
121+
'opensafety-a': (0x5935, 0, False, False, 0, 0x5d38),
122+
'opensafety-b': (0x755b, 0, False, False, 0, 0x20fe),
123+
'profibus': (0x1dcf, 0xffff, False, False, 0xffff, 0xa819),
124+
'riello': (0x1021, 0xb2aa, True, True, 0, 0x63d0),
125+
'spi-fujitsu': (0x1021, 0x84c0, False, False, 0, 0x19cf),
126+
't10-dif': (0x8bb7, 0, False, False, 0, 0xd0db),
127+
'teledisk': (0xa097, 0, False, False, 0, 0x0fb3),
128+
'tms37157': (0x1021, 0x89ec, True, True, 0, 0x26b1),
129+
'umts': (0x8005, 0, False, False, 0, 0xfee8),
130+
'usb': (0x8005, 0xffff, True, True, 0xffff, 0xb4c8),
131+
'v-41-lsb': (0x1021, 0, True, True, 0, 0x2189),
132+
'verifone': (0x8005, 0, False, False, 0, 0xfee8),
133+
'x-25': (0x1021, 0xffff, True, True, 0xffff, 0x906e),
134+
'x-kermit': (0x8408, 0, True, True, 0, 0x0c73),
135+
'x-xmodem': (0x8408, 0, True, True, 0, 0x0c73),
136+
'xmodem': (0x1021, 0, False, False, 0, 0x31c3),
137+
'zmodem': (0x1021, 0, False, False, 0, 0x31c3),
138+
},
139+
17: {
140+
'': (0x1685b, 0, False, False, 0, 0x04f03),
141+
'can-fd': (0x1685b, 0, False, False, 0, 0x04f03),
142+
},
143+
21: {
144+
'': (0x102899, 0, False, False, 0, 0x0ed841),
145+
'can-fd': (0x102899, 0, False, False, 0, 0x0ed841),
146+
},
147+
24: {
148+
'': (0x864cfb, 0xb704ce, False, False, 0, 0x21cf02),
149+
'ble': (0x00065b, 0x555555, True, True, 0, 0xc25a56),
150+
'flexray-a': (0x5d6dcb, 0xfedcba, False, False, 0, 0x7979bd),
151+
'flexray-b': (0x5d6dcb, 0xabcdef, False, False, 0, 0x1f23b8),
152+
'interlaken': (0x328b63, 0xffffff, False, False, 0xffffff, 0xb4f3e6),
153+
'lte-a': (0x864cfb, 0, False, False, 0, 0xcde703),
154+
'lte-b': (0x800063, 0, False, False, 0, 0x23ef52),
155+
'openpgp': (0x864cfb, 0xb704ce, False, False, 0, 0x21cf02),
156+
'os-9': (0x800063, 0xffffff, False, False, 0xffffff, 0x200fa5),
157+
'pgp': (0x864cfb, 0xb704ce, False, False, 0, 0x21cf02),
158+
},
159+
30: {
160+
'': (0x2030b9c7, 0x3fffffff, False, False, 0x3fffffff, 0x04c34abf),
161+
'cdma': (0x2030b9c7, 0x3fffffff, False, False, 0x3fffffff, 0x04c34abf),
162+
},
163+
31: {
164+
'': (0x04c11db7, 0x7fffffff, False, False, 0x7fffffff, 0x0ce9e46c),
165+
'philips': (0x04c11db7, 0x7fffffff, False, False, 0x7fffffff, 0x0ce9e46c),
166+
},
167+
32: {
168+
'': (0x04c11db7, 0xffffffff, True, True, 0xffffffff, 0xcbf43926),
169+
'aal5': (0x04c11db7, 0xffffffff, False, False, 0xffffffff, 0xfc891918),
170+
'adccp': (0x04C11db7, 0xffffffff, True, True, 0xffffffff, 0xcbf43926),
171+
'aixm': (0x814141ab, 0, False, False, 0, 0x3010bf7f),
172+
'autosar': (0xf4acfb13, 0xffffffff, True, True, 0xffffffff, 0x1697d06a),
173+
'b': (0x04c11db7, 0xffffffff, False, False, 0xffffffff, 0xfc891918),
174+
'base91-c': (0x1edc6f41, 0xffffffff, True, True, 0xffffffff, 0xe3069283),
175+
'base91-d': (0xa833982b, 0xffffffff, True, True, 0xffffffff, 0x87315576),
176+
'bzip2': (0x04c11db7, 0xffffffff, False, False, 0xffffffff, 0xfc891918),
177+
'c': (0x1edc6f41, 0xffffffff, True, True, 0xffffffff, 0xe3069283),
178+
'castagnoli': (0x1edc6f41, 0xffffffff, True, True, 0xffffffff, 0xe3069283),
179+
'cd-rom-edc': (0x8001801b, 0, True, True, 0, 0x6ec2edc4),
180+
'cksum': (0x04c11db7, 0, False, False, 0xffffffff, 0x765e7680),
181+
'd': (0xa833982b, 0xffffffff, True, True, 0xffffffff, 0x87315576),
182+
'dect-b': (0x04c11db7, 0xffffffff, False, False, 0xffffffff, 0xfc891918),
183+
'interlaken': (0x1edc6f41, 0xffffffff, True, True, 0xffffffff, 0xe3069283),
184+
'iscsi': (0x1edc6f41, 0xffffffff, True, True, 0xffffffff, 0xe3069283),
185+
'iso-hdlc': (0x04c11db7, 0xffffffff, True, True, 0xffffffff, 0xcbf43926),
186+
'jamcrc': (0x04c11db7, 0xffffffff, True, True, 0, 0x340bc6d9),
187+
'mpeg-2': (0x04c11db7, 0xffffffff, False, False, 0, 0x0376e6e7),
188+
'mpeg2': (0x04c11db7, 0xffffffff, False, False, 0, 0x0376e6e7),
189+
'posix': (0x04c11db7, 0, False, False, 0xffffffff, 0x765e7680),
190+
'q': (0x814141ab, 0, False, False, 0, 0x3010bf7f),
191+
'v-42': (0x04C11db7, 0xffffffff, True, True, 0xffffffff, 0xcbf43926),
192+
'xfer': (0x000000af, 0, False, False, 0, 0xbd0be338),
193+
'xz': (0x04C11db7, 0xffffffff, True, True, 0xffffffff, 0xcbf43926),
194+
'zip': (0x04C11DB7, 0xffffffff, True, True, 0xffffffff, 0xcbf43926),
195+
},
196+
40: {
197+
'': (0x0004820009, 0, False, False, 0xffffffffff, 0xd4164fc646),
198+
'gsm': (0x0004820009, 0, False, False, 0xffffffffff, 0xd4164fc646),
199+
},
200+
64: {
201+
'': (0x000000000000001b, 0xffffffffffffffff, True, True, 0xffffffffffffffff, 0xb90956c775a41001),
202+
'ecma': (0x42f0e1eba9ea3693, 0, False, False, 0, 0x6c40df5f0b497347),
203+
'ecma-182': (0x42f0e1eba9ea3693, 0, False, False, 0, 0x6c40df5f0b497347),
204+
'go-ecma': (0x42f0e1eba9ea3693, 0xffffffffffffffff, True, True, 0xffffffffffffffff, 0x995dc9bbdf1939fa),
205+
'go-iso': (0x000000000000001b, 0xffffffffffffffff, True, True, 0xffffffffffffffff, 0xb90956c775a41001),
206+
'iso': (0x000000000000001b, 0xffffffffffffffff, True, True, 0xffffffffffffffff, 0xb90956c775a41001),
207+
'we': (0x42f0e1eba9ea3693, 0xffffffffffffffff, False, False, 0xffffffffffffffff, 0x62ec59e3f1a4f00a),
208+
'xz': (0x42f0e1eba9ea3693, 0xffffffffffffffff, True, True, 0xffffffffffffffff, 0x995dc9bbdf1939fa),
209+
'xz64': (0x42f0e1eba9ea3693, 0xffffffffffffffff, True, True, 0xffffffffffffffff, 0x995dc9bbdf1939fa),
210+
},
211+
82: {
212+
'': (0x0308c0111011401440411, 0, True, True, 0, 0x09ea83f625023801fd612),
213+
'darc': (0x0308c0111011401440411, 0, True, True, 0, 0x09ea83f625023801fd612),
214+
},
215+
}
216+
217+
_pattern = lambda n="": r"^crc" + str(n) + r"(|[-_]?(?:%s))$" % "|".join(x for x in CRC[n].keys() if len(x) > 0)
218+
_rev_int = lambda i, l=None: int(bin(i)[2:].zfill(l or len(bin(i)[2:]))[::-1], 2)
219+
220+
221+
def crc(data, length, poly, init=0, refl_in=False, refl_out=False, xor_out=0):
222+
""" Generic CRC computation function. """
223+
table = [None] * 256
224+
# input reflected
225+
if refl_in:
226+
init, poly = _rev_int(init, length), _rev_int(poly, length)
227+
# prepare the lookup table
228+
for x in range(2**8):
229+
crc = x
230+
for i in range(8):
231+
crc = (crc >> 1) ^ [0, poly][crc & 0x1 != 0]
232+
table[x] = crc
233+
# compute CRC
234+
crc = init
235+
for c in data:
236+
crc = (crc >> 8) ^ table[(crc ^ ord(c)) & 0xff]
237+
# input NOT reflected
238+
else:
239+
# prepare the lookup table
240+
for x in range(2**8):
241+
crc = x << (length - 8)
242+
for i in range(8):
243+
crc = ((crc << 1) & ((1 << length) - 1)) ^ [0, poly][crc >> (length - 1) != 0]
244+
table[x] = crc
245+
# compute CRC
246+
crc = init
247+
for c in data:
248+
crc = ((crc << 8) & ((1 << length) - 1)) ^ table[(crc >> (length - 8)) ^ ord(c)]
249+
# output reflected
250+
if refl_in ^ refl_out:
251+
crc = _rev_int(crc, length)
252+
return crc ^ xor_out
253+
254+
255+
def crc_checksum(n=""):
256+
def _crc(name):
257+
def _encode(data, error="strict"):
258+
r = crc(data, n or 16, *CRC[n][name.lstrip("-_")][:5])
259+
return "%0{}x".format(round((n or 16)/4+.5)) % r, len(data)
260+
return _encode
261+
return _crc
262+
263+
264+
add("adler32", lambda data, error="strict": (adler32(b(data)) & 0xffffffff, len(data)))
265+
add("crca", crc_checksum(), pattern=_pattern())
266+
for i in CRC.keys():
267+
if isinstance(i, int):
268+
add("crc%d" % i, crc_checksum(i), pattern=_pattern(i))
269+

0 commit comments

Comments
 (0)