sirodevasc
diff --git a/‎codext/__common__.py‎
Lines changed: 29 additions & 3 deletions b/‎codext/__common__.py‎
Lines changed: 29 additions & 3 deletions
diff --git a/‎codext/__init__.py‎
Lines changed: 18 additions & 28 deletions b/‎codext/__init__.py‎
Lines changed: 18 additions & 28 deletions
diff --git a/‎codext/base/__init__.py‎
Lines changed: 49 additions & 0 deletions b/‎codext/base/__init__.py‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎codext/base/_base.py‎
Lines changed: 60 additions & 7 deletions b/‎codext/base/_base.py‎
Lines changed: 60 additions & 7 deletions
diff --git a/‎codext/base/base100.py‎
Lines changed: 13 additions & 5 deletions b/‎codext/base/base100.py‎
Lines changed: 13 additions & 5 deletions
diff --git a/‎codext/base/base122.py‎
Lines changed: 9 additions & 2 deletions b/‎codext/base/base122.py‎
Lines changed: 9 additions & 2 deletions
@@ -31,7 +31,7 @@
 __all__ = ["add", "add_map", "b", "clear", "codecs", "decode", "encode", "ensure_str", "examples", "guess",
            "generate_strings_from_regex", "get_alphabet_from_mask", "handle_error", "is_native", "list_categories",
            "list_encodings", "lookup", "maketrans", "rank", "re", "register", "remove", "reset", "s2i", "search",
-           "stopfunc", "BytesIO", "MASKS", "PY3"]
+           "stopfunc", "BytesIO", "MASKS", "PY3", "_input"]
 CODECS_REGISTRY = None
 CODECS_CATEGORIES = ["native", "custom"]
 MASKS = {
@@ -58,6 +58,29 @@
 s2i = lambda s: int(codecs.encode(s, "base16"), 16)
 
 
+def __stdin_pipe():
+    """ Stdin pipe read function. """
+    try:
+        with open(0, 'rb') as f:
+            for l in f:
+                yield l
+    except TypeError:
+        for l in sys.stdin:
+            yield l
+
+
+def _input(infile):
+    # handle input file or stdin
+    if infile:
+        with open(infile, 'rb') as f:
+            c = f.read()
+    else:
+        c = b("")
+        for line in __stdin_pipe():
+            c += line
+    return c
+
+
 def add(ename, encode=None, decode=None, pattern=None, text=True, add_to_codecs=False, **kwargs):
     """ This adds a new codec to the codecs module setting its encode and/or decode functions, eventually dynamically
          naming the encoding with a pattern and with file handling.
@@ -612,15 +635,18 @@ def handle_error(ename, errors, sep="", repl_char="?", repl_minlen=1, decode=Fal
     glob = {'__name__': "__main__"}
     exec("class %s(ValueError): pass" % exc, glob)
 
-    def _handle_error(token, position):
+    def _handle_error(token, position, output=""):
         """ This handles an encoding/decoding error according to the selected handling mode.
         
         :param token:    input token to be encoded/decoded
         :param position: token position index
+        :param output:   output, as decoded up to the position of the error
         """
         if errors == "strict":
             msg = "'{}' codec can't {}code character '{}' in {} {}"
-            raise glob[exc](msg.format(ename, ["en", "de"][decode], token, item, position))
+            err = glob[exc](msg.format(ename, ["en", "de"][decode], token, item, position))
+            err.output = output
+            raise err
         elif errors == "leave":
             return token + sep
         elif errors == "replace":
 
@@ -18,6 +18,7 @@
 lookup   = codecs.lookup
 open     = codecs.open
 
+_lst = list
 list = list_encodings  # not included in __all__ because of shadow name
 
 
@@ -32,18 +33,6 @@ def __literal_eval(o):
         return literal_eval("'" + str(o) + "'")
 
 
-def __stdin_pipe():
-    """ Stdin pipe read function. """
-    try:
-        with open(0, 'rb') as f:
-            for l in f:
-                yield l
-    except TypeError:
-        import sys
-        for l in sys.stdin:
-            yield l
-
-
 def main():
     import argparse, os
     descr = "Codecs Extension (CodExt) {}\n\nAuthor   : {} ({})\nCopyright: {}\nLicense  : {}\nSource   : {}\n" \
@@ -79,10 +68,10 @@ def main():
                         help="error handling (default: strict)")
     guess = sparsers.add_parser("guess", help="try guessing the decoding codecs")
     guess.add_argument("encoding", nargs="*", help="list of known encodings to apply (default: none)")
-    guess.add_argument("-c", "--codec-categories", help="codec categories to be included in the search ; "
-                                                        "format: string|tuple|list(strings|tuples)")
-    guess.add_argument("-e", "--exclude-codecs", help="codecs to be explicitely not used ; "
-                                                      "format: string|tuple|list(strings|tuples)")
+    guess.add_argument("-c", "--codec-categories", nargs="*", help="codec categories to be included in the search ; "
+                                                                   "format: string|tuple")
+    guess.add_argument("-e", "--exclude-codecs", nargs="*", help="codecs to be explicitely not used ; "
+                                                                 "format: string|tuple")
     guess.add_argument("-f", "--stop-function", default="text", help="result checking function (default: text) ; "
                        "format: printables|text|flag|lang_[bigram]|[regex]")
     guess.add_argument("--max-depth", default=5, type=int, help="maximum codec search depth (default: 5)")
@@ -107,6 +96,15 @@ def main():
     search = sparsers.add_parser("search", help="search for codecs")
     search.add_argument("pattern", nargs="+", help="encoding pattern to search")
     args = parser.parse_args()
+    try:
+        args.codec_categories = _lst(map(__literal_eval, args.codec_categories))
+    except (AttributeError, TypeError):
+        pass
+    try:
+        args.exclude_codecs = _lst(map(__literal_eval, args.exclude_codecs))
+    except (AttributeError, TypeError):
+        pass
+    #print(args.codec_categories, args.exclude_codecs)
     # if a search pattern is given, only handle it
     if args.command == "search":
         results = []
@@ -115,14 +113,7 @@ def main():
         print(", ".join(results) or "No encoding found")
         return
     # handle input file or stdin
-    if args.infile:
-        with open(args.infile, 'rb') as f:
-            c = f.read()
-    else:
-        c = b("")
-        for line in __stdin_pipe():
-            c += line
-    # strip only the very last (CR)LF
+    c =_input(args.infile)
     c = c.rstrip("\r\n") if isinstance(c, str) else c.rstrip(b"\r\n")
     # strip any other (CR)LF
     if args.strip:
@@ -142,8 +133,8 @@ def main():
                          getattr(stopfunc, args.stop_function, args.stop_function),
                          args.min_depth,
                          args.max_depth,
-                         __literal_eval(args.codec_categories),
-                         __literal_eval(args.exclude_codecs),
+                         args.codec_categories,
+                         args.exclude_codecs,
                          args.encoding,
                          not args.do_not_stop,
                          True,  # show
@@ -162,8 +153,7 @@ def main():
         if len(r) == 0:
             print("Could not decode :-(")
     elif args.command == "rank":
-        for i, e in codecs.rank(c, args.extended, args.limit,
-                                __literal_eval(args.codec_categories), __literal_eval(args.exclude_codecs)):
+        for i, e in codecs.rank(c, args.extended, args.limit, args.codec_categories, args.exclude_codecs):
             s = "[+] %.5f: %s" % (i[0], e)
             print(s if len(s) <= 80 else s[:77] + "...")
 
@@ -1,9 +1,58 @@
 # -*- coding: UTF-8 -*-
+from argparse import ArgumentParser, RawTextHelpFormatter
+from types import MethodType
+
 from .ascii85 import *
 from .base45 import *
 from .base85 import *
 from .base91 import *
 from .base100 import *
 from .base122 import *
 from .baseN import *
+from ..__common__ import *
+from ..__info__ import __version__
+
+
+def main():
+    descr = """Usage: debase [OPTION]... [FILE]
+Base decode multi-layer FILE, or standard input, to standard output.
+
+With no FILE, or when FILE is -, read standard input.
+
+Optional arguments:
+  -f, --stop-function   set the result chceking function (default: text)
+                         format: printables|text|flag|lang_[bigram]|[regex]
+  -i, --ignore-generic  ignore generic base codecs while guess-decoding
+  -M, --max-depth       maximum codec search depth (default: 5)
+  -m, --min-depth       minimum codec search depth (default: 0)
+  -s, --do-not-stop     do not stop if a valid output is found
+
+      --help     display this help and exit
+      --verbose  show guessing information and steps
+      --version  output version information and exit
+
+Report debase bugs to <https://github.com/dhondta/python-codext/issues/new>
+Full documentation at: <https://python-codext.readthedocs.io/en/latest/enc/base.html>
+"""
+    parser = ArgumentParser(description=descr, formatter_class=RawTextHelpFormatter, add_help=False)
+    parser.format_help = MethodType(lambda s: s.description, parser)
+    parser.add_argument("file", nargs="?")
+    parser.add_argument("-f", "--stop-function", default="text")
+    parser.add_argument("-i", "--ignore-generic", action="store_true")
+    parser.add_argument("-M", "--max-depth", default=5, type=int)
+    parser.add_argument("-m", "--min-depth", default=0, type=int)
+    parser.add_argument("-s", "--do-not-stop", action="store_true")
+    parser.add_argument("--help", action="help")
+    parser.add_argument("--version", action="version")
+    parser.add_argument("--verbose", action="store_true")
+    parser.version = "CodExt " + __version__
+    args = parser.parse_args()
+    excl = [[], ["base%d-generic" % i for i in range(2, 255)]][args.ignore_generic]
+    sfunc = getattr(stopfunc, args.stop_function, args.stop_function)
+    c = _input(args.file)
+    c = c.rstrip("\r\n") if isinstance(c, str) else c.rstrip(b"\r\n")
+    r = codecs.guess(c, sfunc, args.min_depth, args.max_depth, exclude=excl, codec_categories="base",
+                     stop=not args.do_not_stop, show=True, scoring_heuristic=False)
+    if not args.do_not_stop:
+        print(ensure_str(list(r.items())[0][1]))
 
@@ -2,12 +2,15 @@
 """Generic baseN functions.
 
 """
+from argparse import ArgumentParser, RawTextHelpFormatter
 from math import log
 from six import integer_types, string_types
 from string import ascii_lowercase as lower, ascii_uppercase as upper, digits, printable
-from types import FunctionType
+from textwrap import wrap
+from types import FunctionType, MethodType
 
 from ..__common__ import *
+from ..__info__ import __version__
 
 
 class BaseError(ValueError):
@@ -86,9 +89,7 @@ def base_encode(input, charset, errors="strict", exc=BaseEncodeError):
     :param errors:  errors handling marker
     :param exc:     exception to be raised in case of error
     """
-    i = input if isinstance(input, integer_types) else s2i(input)
-    n = len(charset)
-    r = ""
+    i, n, r = input if isinstance(input, integer_types) else s2i(input), len(charset), ""
     while i > 0:
         i, c = divmod(i, n)
         r = charset[c] + r
@@ -103,13 +104,13 @@ def base_decode(input, charset, errors="strict", exc=BaseDecodeError):
     :param errors:  errors handling marker
     :param exc:     exception to be raised in case of error
     """
-    i, n = 0, len(charset)
+    i, n, dec = 0, len(charset), lambda n: base_encode(n, [chr(x) for x in range(256)], errors, exc)
     for k, c in enumerate(input):
         try:
             i = i * n + charset.index(c)
         except ValueError:
-            handle_error("base", errors, exc, decode=True)(c, k)
-    return base_encode(i, [chr(j) for j in range(256)], errors, exc)
+            handle_error("base", errors, exc, decode=True)(c, k, dec(i))
+    return dec(i)
 
 
 # base codec factory functions
@@ -162,3 +163,55 @@ def _decode(input, errors="strict"):
         guess=["base%d-generic" % i for i in range(2, 255)], entropy=lambda e, n: log(int(n.split("-")[0][4:]), 2),
         len_charset=lambda n: int(n.split("-")[0][4:]), printables_rate=1., category="base-generic", penalty=.4)
 
+
+def main(n, ref=None, alt=None):
+    base = str(n) + ("-" + alt.lstrip("-") if alt else "")
+    src = "The data are encoded as described for the base%(base)s alphabet in %(reference)s.\n" % \
+          {'base': base, 'reference': "\n" + ref if len(ref) > 10 else ref} if ref else ""
+    descr = """Usage: base%(base)s [OPTION]... [FILE]
+Base%(base)s encode or decode FILE, or standard input, to standard output.
+
+With no FILE, or when FILE is -, read standard input.
+
+Mandatory arguments to long options are mandatory for short options too.
+  -d, --decode          decode data
+  -i, --ignore-garbage  when decoding, ignore non-alphabet characters
+  -I, --invert          invert charsets from the base alphabet (e.g. lower- and uppercase)
+  -w, --wrap=COLS       wrap encoded lines after COLS character (default 76).
+                          Use 0 to disable line wrapping
+
+      --help     display this help and exit
+      --version  output version information and exit
+
+%(source)sWhen decoding, the input may contain newlines in addition to the bytes of
+the formal base%(base)s alphabet.  Use --ignore-garbage to attempt to recover
+from any other non-alphabet bytes in the encoded stream.
+
+Report base%(base)s translation bugs to <https://github.com/dhondta/python-codext/issues/new>
+Full documentation at: <https://python-codext.readthedocs.io/en/latest/enc/base.html>
+""" % {'base': base, 'source': src}
+    
+    def _main():
+        parser = ArgumentParser(description=descr, formatter_class=RawTextHelpFormatter, add_help=False)
+        parser.format_help = MethodType(lambda s: s.description, parser)
+        parser.add_argument("file", nargs="?")
+        parser.add_argument("-d", "--decode", action="store_true")
+        parser.add_argument("-i", "--ignore-garbage", action="store_true")
+        parser.add_argument("-I", "--invert", action="store_true")
+        parser.add_argument("-w", "--wrap", type=int, default=76)
+        parser.add_argument("--help", action="help")
+        parser.add_argument("--version", action="version")
+        parser.version = "CodExt " + __version__
+        args = parser.parse_args()
+        c, f = _input(args.file), [encode, decode][args.decode]
+        c = c.rstrip("\r\n") if isinstance(c, str) else c.rstrip(b"\r\n")
+        try:
+            c = f(c, "base" + base + ["", "-inv"][args.invert], ["strict", "ignore"][args.ignore_garbage])
+        except Exception as err:
+            print("%sbase%d: invalid input" % (err.output, n))
+            return 1
+        for l in wrap(ensure_str(c), args.wrap):
+            print(l)
+        return 0
+    return _main
+
@@ -9,6 +9,7 @@
 - decodes file content to str (read)
 - encodes file content from str to bytes (write)
 """
+from ._base import main
 from ..__common__ import *
 
 
@@ -19,18 +20,18 @@
     class Base100DecodeError(ValueError):
         pass
 
-    
-    def base100_encode(input, errors='strict'):
+    def base100_encode(input, errors="strict"):
         input = b(input)
         r = [240, 159, 0, 0] * len(input)
         for i, c in enumerate(input):
             r[4*i+2] = (c + 55) // 64 + 143
             r[4*i+3] = (c + 55) % 64 + 128
         return bytes(r), len(input)
 
-    
-    def base100_decode(input, errors='strict'):
+    def base100_decode(input, errors="strict"):
         input = b(input)
+        if errors == "ignore":
+            input = input.replace(b"\n", "")
         if len(input) % 4 != 0:
             raise Base100DecodeError("Bad input (length should be multiple of 4)")
         r = [None] * (len(input) // 4)
@@ -40,7 +41,14 @@ def base100_decode(input, errors='strict'):
             elif i % 4 == 3:
                 r[i//4] = (c - 128 + tmp - 55) & 0xff
         return bytes(r), len(input)
+else:
+    def base100_encode(input, errors='strict'):
+        raise NotImplementedError
+
+    def base100_decode(input, errors='strict'):
+        raise NotImplementedError
 
 
-    add("base100", base100_encode, base100_decode, r"^(?:base[-_]?100|emoji)$")
+add("base100", base100_encode, base100_decode, r"^(?:base[-_]?100|emoji)$")
+main = main(100, "<https://github.com/AdamNiederer/base100>")
 
@@ -7,6 +7,7 @@
 - decodes file content to str (read)
 - encodes file content from str to bytes (write)
 """
+from ._base import main
 from ..__common__ import *
 
 
@@ -67,7 +68,6 @@ def _get_7bits(idx, bit):
             r.extend([B1, B2])
         return "".join(map(chr, r)).encode("latin-1"), len(input)
 
-
     # inspired from: https://github.com/kevinAlbs/Base122/blob/master/base122.js
     def base122_decode(input, errors="strict"):
         currB, bob, r, input = 0, 0, [], list(map(ord, input))
@@ -91,7 +91,14 @@ def _get_7bits(currB, bob, B, decoded):
             else:
                 currB, bob = _get_7bits(currB, bob, input[i], r)
         return "".join(map(chr, r)), len(input)
+else:
+    def base122_encode(input, errors='strict'):
+        raise NotImplementedError
+
+    def base122_decode(input, errors='strict'):
+        raise NotImplementedError
 
 
-    add("base122", base122_encode, base122_decode, r"^base[-_]?122$")
+add("base122", base122_encode, base122_decode, r"^base[-_]?122$")
+main = main(122, "<http://blog.kevinalbs.com/base122>")