Skip to content

Commit d737a21

Browse files
committed
Added new feature: macros
1 parent 75f9eb3 commit d737a21

7 files changed

Lines changed: 331 additions & 69 deletions

File tree

.coveragerc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ exclude_lines =
1111
def main\(\)\:
1212
def __stdin_pipe\(\)\:
1313
def __literal_eval\(o\)\:
14+
def __print_tabular\(lst, space\=4\)\:
1415
except ImportError:
1516
except NameError:
1617
raise NotImplementedError

README.md

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ $ echo -en "test" | codext encode base100
3838
👫👜👪👫
3939
```
4040

41-
Chaining codecs:
41+
### Chaining codecs
4242

4343
```sh
4444
$ echo -en "Test string" | codext encode reverse
@@ -57,6 +57,23 @@ $ echo -en "AGTCAGTCAGTGAGAAAGTCAGTGAGAAAGTGAGTGAGAAAGTGAGTCAGTGAGAAAGTCAGAAAGTG
5757
test string
5858
```
5959

60+
### Using macros
61+
62+
```sh
63+
$ codext add-macro my-encoding-chain gzip base63 lzma base64
64+
65+
$ codext list macros
66+
example-macro, my-encoding-chain
67+
68+
$ echo -en "Test string" | codext encode my-encoding-chain
69+
CQQFAF0AAIAAABuTgySPa7WaZC5Sunt6FS0ko71BdrYE8zHqg91qaqadZIR2LafUzpeYDBalvE///ug4AA==
70+
71+
$ codext remove-macro my-encoding-chain
72+
73+
$ codext list macros
74+
example-macro
75+
```
76+
6077
## :computer: Usage (base CLI tool) <a href="https://twitter.com/intent/tweet?text=Debase%20-%20Decode%20any%20multi-layer%20base-encoded%20string.%0D%0APython%20tool%20for%20decoding%20any%20base-encoded%20string,%20even%20when%20encoded%20with%20multiple%20layers.%0D%0Ahttps%3a%2f%2fgithub%2ecom%2fdhondta%2fpython-codext%0D%0A&hashtags=python,base,encodings,codecs,cryptography,stegano,steganography,ctftools"><img src="https://img.shields.io/badge/Tweet%20(debase)--lightgrey?logo=twitter&style=social" alt="Tweet on debase" height="20"/></a>
6178

6279
```session

codext/__common__.py

Lines changed: 147 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# -*- coding: UTF-8 -*-
22
import _codecs
33
import codecs
4+
import json
45
import os
56
import random
67
import re
@@ -11,6 +12,7 @@
1112
from inspect import currentframe
1213
from itertools import chain, product
1314
from math import log
15+
from random import randint
1416
from six import binary_type, string_types, text_type, BytesIO
1517
from string import *
1618
from types import FunctionType, ModuleType
@@ -28,10 +30,11 @@
2830
maketrans = str.maketrans
2931

3032

31-
__all__ = ["add", "add_map", "b", "clear", "codecs", "decode", "encode", "ensure_str", "examples", "guess", "isb",
32-
"generate_strings_from_regex", "get_alphabet_from_mask", "handle_error", "is_native", "list_categories",
33-
"list_encodings", "lookup", "maketrans", "rank", "re", "register", "remove", "reset", "s2i", "search",
34-
"stopfunc", "BytesIO", "MASKS", "PY3", "_input", "_stripl"]
33+
__all__ = ["add", "add_macro", "add_map", "b", "clear", "codecs", "decode", "encode", "ensure_str", "examples", "guess",
34+
"isb", "generate_strings_from_regex", "get_alphabet_from_mask", "handle_error", "is_native",
35+
"list_categories", "list_encodings", "list_macros", "lookup", "maketrans", "os", "rank", "re", "register",
36+
"remove", "remove_macro", "reset", "s2i", "search", "stopfunc", "BytesIO", "MASKS", "PY3", "_input",
37+
"_stripl", "CodecMacro"]
3538
CODECS_REGISTRY = None
3639
CODECS_CATEGORIES = ["native", "custom"]
3740
MASKS = {
@@ -48,6 +51,10 @@
4851
PY3 = sys.version[0] == "3"
4952
__codecs_registry = []
5053

54+
MACROS = {}
55+
PERS_MACROS = {}
56+
PERS_MACROS_FILE = os.path.expanduser("~/.codext-macros.json")
57+
5158

5259
entropy = lambda s: -sum([p * log(p, 2) for p in [float(s.count(c)) / len(s) for c in set(s)]])
5360

@@ -58,6 +65,74 @@
5865
s2i = lambda s: int(codecs.encode(s, "base16"), 16)
5966

6067

68+
class CodecMacro(tuple):
69+
"""Macro details when looking up the codec registry. """
70+
def __new__(cls, name):
71+
self = tuple.__new__(cls)
72+
self.name = name
73+
# get from personal macros first
74+
try:
75+
self.codecs = PERS_MACROS[name]
76+
except KeyError:
77+
try:
78+
self.codecs = MACROS[name]
79+
except KeyError:
80+
raise LookupError("unknown macro: %s" % name)
81+
if not isinstance(self.codecs, (tuple, list)):
82+
raise ValueError("bad macro list: %s" % str(self.codecs))
83+
self.codecs = [lookup(e, False) for e in self.codecs] # lookup(e, False)
84+
self.parameters = {'name': name, 'category': "macro"} # ^ means that macros won't be nestable
85+
# test examples to check that the chain of encodings works
86+
for action, examples in (self.codecs[0].parameters.get('examples', {}) or {}).items():
87+
if re.match(r"enc(-dec)?\(", action):
88+
for e in (examples.keys() if action.startswith("enc(") else examples or []):
89+
rd = re.match(r"\@random(?:\{(\d+(?:,(\d+))*?)\})?$", e)
90+
if rd:
91+
for n in (rd.group(1) or "512").split(","):
92+
self.encode("".join(chr(randint(0, 255)) for i in range(int(n))))
93+
continue
94+
self.encode(e)
95+
96+
class Codec:
97+
decode = self.decode
98+
encode = self.encode
99+
100+
class IncrementalEncoder(codecs.IncrementalEncoder):
101+
def encode(self, input, final=False):
102+
return b(self.encode(input, self.errors)[0])
103+
self.incrementalencoder = IncrementalEncoder
104+
105+
class IncrementalDecoder(codecs.IncrementalDecoder):
106+
def decode(self, input, final=False):
107+
return ensure_str(self.decode(input, self.errors)[0])
108+
self.incrementaldecoder = IncrementalDecoder
109+
110+
class StreamWriter(Codec, codecs.StreamWriter):
111+
charbuffertype = bytes
112+
self.streamwriter = StreamWriter
113+
114+
class StreamReader(Codec, codecs.StreamReader):
115+
charbuffertype = bytes
116+
self.streamreader = StreamReader
117+
118+
return self
119+
120+
def decode(self, input, error="strict"):
121+
""" Decode with each codec in reverse order. """
122+
for ci in self.codecs[::-1]:
123+
input, l = ci.decode(input, error)
124+
return input, l
125+
126+
def encode(self, input, error="strict"):
127+
""" Encode with each codec. """
128+
for ci in self.codecs:
129+
input, l = ci.encode(input, error)
130+
return input, l
131+
132+
def __repr__(self):
133+
return "<codext.CodecMacro object for encoding %s at %#x>" % (self.name, id(self))
134+
135+
61136
def __stdin_pipe():
62137
""" Stdin pipe read function. """
63138
try:
@@ -215,6 +290,28 @@ class StreamReader(Codec, codecs.StreamReader):
215290
register(getregentry, add_to_codecs)
216291

217292

293+
def add_macro(mname, *encodings):
294+
""" This allows to define a macro, chaining multiple codecs one after the other. This relies on a default set of
295+
macros from a YAML file embedded in the package and a local YAML file from the home folder that takes
296+
precedence for defining personal macros.
297+
298+
:param mname: macro name
299+
:param encodings: encoding names of the encodings to be chained with the macro
300+
"""
301+
# check for name clash with alreday existing macros and codecs
302+
if mname in MACROS or mname in PERS_MACROS:
303+
raise ValueError("Macro name already exists")
304+
try:
305+
ci = lookup(mname, False)
306+
raise ValueError("Macro name clashes with codec '%s'" % ci.name)
307+
except LookupError:
308+
#TODO: test if the encodings sequence can work, using an example from the first codec
309+
PERS_MACROS[mname] = encodings
310+
with open(PERS_MACROS_FILE, 'w') as f:
311+
json.dump(PERS_MACROS, f)
312+
codecs.add_macro = add_macro
313+
314+
218315
def add_map(ename, encmap, repl_char="?", sep="", ignore_case=None, no_error=False, intype=None, outype=None, **kwargs):
219316
""" This adds a new mapping codec (that is, declarable with a simple character mapping dictionary) to the codecs
220317
module dynamically setting its encode and/or decode functions, eventually dynamically naming the encoding with
@@ -474,7 +571,7 @@ def examples(encoding, number=10):
474571
while i < min(number, len(temp)):
475572
if not temp[i].isdigit():
476573
try:
477-
lookup(temp[i])
574+
lookup(temp[i], False)
478575
e.append(temp[i])
479576
except LookupError:
480577
pass
@@ -492,7 +589,7 @@ def examples(encoding, number=10):
492589

493590
def is_native(encoding):
494591
""" Determine if a given encoding is native or not. """
495-
return codecs.lookup(encoding).parameters['category'] == "native"
592+
return lookup(encoding, False).parameters['category'] == "native"
496593

497594

498595
def list_categories():
@@ -546,6 +643,11 @@ def list_encodings(*categories):
546643
return sorted(list(set(enc)), key=_human_keys)
547644

548645

646+
def list_macros():
647+
""" Get a list of all macros, with the precedence on personal ones. """
648+
return sorted(list(set(list(MACROS.keys()) + list(PERS_MACROS.keys()))))
649+
650+
549651
def remove(encoding):
550652
""" Remove all search functions matching the input encoding name from codext's local registry. """
551653
tbr = []
@@ -557,9 +659,23 @@ def remove(encoding):
557659
codecs.remove = remove
558660

559661

662+
def remove_macro(name):
663+
""" Remove the given macro from the macro registries. """
664+
try:
665+
del MACROS[name]
666+
except KeyError:
667+
pass
668+
try:
669+
del PERS_MACROS[name]
670+
with open(PERS_MACROS_FILE, 'w') as f:
671+
json.dump(PERS_MACROS, f)
672+
except KeyError:
673+
pass
674+
675+
560676
def reset():
561-
""" Reset codext's local registry of search functions. """
562-
global CODECS_REGISTRY, __codecs_registry
677+
""" Reset codext's local registry of search functions and macros. """
678+
global CODECS_REGISTRY, MACROS, PERS_MACROS, __codecs_registry
563679
clear()
564680
d = os.path.dirname(__file__)
565681
for pkg in sorted(os.listdir(d)):
@@ -572,6 +688,14 @@ def reset():
572688
# restore codext's registry
573689
else:
574690
__codecs_registry = CODECS_REGISTRY[:]
691+
# restore codext's embedded set of macros
692+
with open(os.path.join(os.path.dirname(__file__), "macros.json")) as f:
693+
MACROS = json.load(f)
694+
# reload personal set of macros
695+
PERS_MACROS = {}
696+
if os.path.exists(PERS_MACROS_FILE):
697+
with open(PERS_MACROS_FILE) as f:
698+
PERS_MACROS = json.load(f)
575699
codecs.reset = reset
576700

577701

@@ -709,7 +833,7 @@ def encode(obj, encoding='utf-8', errors='strict'):
709833
codecs.encode = encode
710834

711835

712-
def lookup(encoding):
836+
def lookup(encoding, macro=True):
713837
""" Hooked lookup function for searching first for codecs in the local registry of this module. """
714838
# first, try to match the given encoding with codecs' search functions
715839
for search_function in __codecs_registry:
@@ -723,10 +847,18 @@ def lookup(encoding):
723847
codecinfo = search_function(generate_string_from_regex(search_function.__pattern__))
724848
if codecinfo is not None:
725849
return codecinfo
726-
# finally, get a CodecInfo with the original lookup function and refine it with a dictionary of parameters
727-
ci = __orig_lookup(encoding)
728-
ci.parameters = {'category': "native", 'module': "codecs", 'name': aliases.get(ci.name, ci.name)}
729-
return ci
850+
try:
851+
# finally, get a CodecInfo with the original lookup function and refine it with a dictionary of parameters
852+
ci = __orig_lookup(encoding)
853+
ci.parameters = {'category': "native", 'module': "codecs", 'name': aliases.get(ci.name, ci.name)}
854+
return ci
855+
except LookupError:
856+
if not macro:
857+
raise
858+
try:
859+
return CodecMacro(encoding)
860+
except LookupError:
861+
raise LookupError("unknown encoding: %s" % encoding)
730862
codecs.lookup = lookup
731863

732864

@@ -945,7 +1077,7 @@ def __develop(encodings):
9451077
enc = []
9461078
for e in (encodings or []):
9471079
try:
948-
ci = lookup(e)
1080+
ci = lookup(e, False)
9491081
g = ci.parameters['guess']
9501082
except:
9511083
g = [e]
@@ -1023,7 +1155,7 @@ def __init__(self, text, pad_char=None):
10231155

10241156
def __score(prev_input, input, codec, heuristic=False, extended=False):
10251157
""" Score relevant encodings given an input. """
1026-
obj, ci = None, lookup(codec) # NB: lookup(...) won't fail as the codec value comes from list_encodings(...)
1158+
obj, ci = None, lookup(codec, False) # NB: lookup(...) won't fail as the codec value comes from list_encodings(...)
10271159
sc = ci.parameters.get('scoring', {})
10281160
for encoding in ci.parameters.get('guess', [codec]):
10291161
# ignore encodings that fail to decode with their default errors handling value

0 commit comments

Comments
 (0)