Added codecs: pkzip*

dhondta · dhondta · commit 66df98d9f4e5 · 2021-10-02T13:40:08.000+02:00
diff --git a/README.md b/README.md
@@ -191,6 +191,9 @@ o
 `navajo` | text <-> Navajo | only handles letters (not full words from the Navajo dictionary)
 `octal` | text <-> octal digits | dummy octal conversion (converts to 3-digits groups)
 `ordinal` | text <-> ordinal digits | dummy character ordinals conversion (converts to 3-digits groups)
+`pkzip_deflate` | text <-> deflated text | standard Zip-deflate compression/decompression
+`pkzip_bzip2` | text <-> Bzipped text | standard BZip2 compression/decompression
+`pkzip_lzma` | text <-> LZMA-compressed text | standard LZMA compression/decompression
 `radio` | text <-> radio words | aka NATO or radio phonetic alphabet
 `resistor` | text <-> resistor colors | aka resistor color codes
 `rot` | text <-> rot(N) ciphertext | aka Caesar cipher (N belongs to [1,25])
diff --git a/codext/VERSION.txt b/codext/VERSION.txt
@@ -1 +1 @@
-1.8.2
+1.8.3
diff --git a/codext/compressions/__init__.py b/codext/compressions/__init__.py
@@ -0,0 +1,4 @@
+# -*- coding: UTF-8 -*-
+from .gzipp import *
+from .pkzip import *
+
diff --git a/codext/compressions/gzipp.py b/codext/compressions/gzipp.py
@@ -16,7 +16,6 @@
 
 
 __examples__ = {'enc-dec(gzip)': ["test", "This is a test"]}
-__guess__ = ["gzip"]
 
 
 def gzip_encode(text, errors="strict"):
diff --git a/codext/compressions/pkzip.py b/codext/compressions/pkzip.py
@@ -0,0 +1,47 @@
+# -*- coding: UTF-8 -*-
+"""Pkzip Codec - pkzip content compression.
+
+NB: Not an encoding properly speaking.
+
+This codec:
+- en/decodes strings from str to str
+- en/decodes strings from bytes to bytes
+- decodes file content to str (read)
+- encodes file content from str to bytes (write)
+"""
+import zipfile
+
+from ..__common__ import *
+
+
+_str          = ["test", "This is a test", "@random{1024}"]
+__examples1__ = {'enc-dec(pkzip-deflate|deflate)': _str}
+__examples2__ = {'enc-dec(pkzip_bz2|bzip2)':       _str}
+__examples3__ = {'enc-dec(pkzip-lzma|lzma)':       _str}
+
+
+if PY3:
+    def pkzip_encode(compression_type):
+        def _encode(text, errors="strict"):
+            c = zipfile._get_compressor(compression_type)
+            return c.compress(b(text)) + c.flush(), len(text)
+        return _encode
+
+
+    def pkzip_decode(compression_type):
+        def _decode(data, errors="strict"):
+            d = zipfile._get_decompressor(compression_type)
+            r = d.decompress(b(data))
+            return r, len(r)
+        return _decode
+
+
+    add("pkzip_deflate", pkzip_encode(8), pkzip_decode(8), r"(?:(?:pk)?zip[-_])?deflate",
+        entropy=7.9, examples=__examples1__, guess=["deflate"])
+
+    add("pkzip_bzip2", pkzip_encode(12), pkzip_decode(12), r"(?:(?:pk)?zip[-_])?bz(?:ip)?2",
+        entropy=7.9, examples=__examples2__, guess=["bz2"])
+
+    add("pkzip_lzma", pkzip_encode(14), pkzip_decode(14), r"(?:(?:pk)?zip[-_])?lzma",
+        entropy=7.9, examples=__examples3__, guess=["lzma"])
+
diff --git a/codext/others/__init__.py b/codext/others/__init__.py
@@ -1,6 +1,5 @@
 # -*- coding: UTF-8 -*-
 from .dna import *
-from .gzipp import *
 from .html import *
 from .letters import *
 from .markdown import *
diff --git a/docs/enc/compressions.md b/docs/enc/compressions.md
@@ -0,0 +1,50 @@
+`codext` provides a few common compression codecs.
+
+-----
+
+### GZip
+
+**Codec** | **Conversions** | **Aliases** | **Comment**
+:---: | :---: | --- | ---
+`gzip` | data <-> GZipped data |  | decoding tries with and without the file signature
+
+```python
+>>> codext.encode('test', "gzip")
+'\x1f\x8b\x08\x00\x0esÛ_\x02ÿ+I-.\x01\x00\x0c~\x7fØ\x04\x00\x00\x00'
+>>> codext.decode('\x1f\x8b\x08\x00\x0esÛ_\x02ÿ+I-.\x01\x00\x0c~\x7fØ\x04\x00\x00\x00', "gzip")
+'test'
+```
+
+-----
+
+### PKZip
+
+This implements multiple compression types available in the native [`zipfile`](https://docs.python.org/3/library/zipfile.html) library.
+
+**Codec** | **Conversions** | **Aliases** | **Comment**
+:---: | :---: | --- | ---
+`pkzip_deflate` | data <-> Deflated data | `deflate`, `zip_deflate` | Python3 only
+`pkzip_bzip2` | data <-> Bzipped data | `bz2`, `bzip2`, `zip_bz2` | Python3 only
+`pkzip_lzma` | data <-> LZMA-compressed data | `lzma`, `zip_lzma` | Python3 only
+
+```python
+>>> codecs.encode("a test string", "deflate")
+'KT(I-.Q(.)ÊÌK\x07\x00'
+>>> codecs.decode("KT(I-.Q(.)ÊÌK\x07\x00", "zip_deflate")
+'a test string'
+```
+
+```python
+>>> codecs.encode("a test string", "bzip2")
+'BZh91AY&SY°\x92µÏ\x00\x00\x01\x11\x80@\x00"¡\x1c\x00 \x00"\x1a\x07¤ É\x88u\x95Á`Òñw$S\x85\t\x0b\t+\\ð'
+>>> codecs.decode("BZh91AY&SY°\x92µÏ\x00\x00\x01\x11\x80@\x00\"¡\x1c\x00 \x00\"\x1a\x07¤ É\x88u\x95Á`Òñw$S\x85\t\x0b\t+\\ð", "bz2")
+'a test string'
+```
+
+```python
+>>> codecs.encode("a test string", "lzma")
+'\t\x04\x05\x00]\x00\x00\x80\x00\x000\x88\n\x86\x94\\Uf\x14Þ\x82*\x11ëê\x93fÿý\x84 \x00'
+>>> codecs.decode("\t\x04\x05\x00]\x00\x00\x80\x00\x000\x88\n\x86\x94\\Uf\x14Þ\x82*\x11ëê\x93fÿý\x84 \x00", "zip_lzma")
+'a test string'
+```
+
diff --git a/docs/enc/others.md b/docs/enc/others.md
@@ -29,23 +29,6 @@ CACTCGGTCGGCCATATGTTCGGCCATATGTTCGTCTGTTCACTCGCCCATACACT
 
 -----
 
-### GZip
-
-This is, of course, not an encoding properly speaking, but it is implemented for the sake of convenience.
-
-**Codec** | **Conversions** | **Aliases** | **Comment**
-:---: | :---: | --- | ---
-`gzip` | data <-> GZipped data |  | decoding tries with and without the file signature
-
-```python
->>> codext.encode('test', "gzip")
-'\x1f\x8b\x08\x00\x0esÛ_\x02ÿ+I-.\x01\x00\x0c~\x7fØ\x04\x00\x00\x00'
->>> codext.decode('\x1f\x8b\x08\x00\x0esÛ_\x02ÿ+I-.\x01\x00\x0c~\x7fØ\x04\x00\x00\x00', "gzip")
-'test'
-```
-
------
-
 ### HTML Entities
 
 This implements the full list of characters available at [this reference](https://dev.w3.org/html5/html-author/charref).
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -7,13 +7,14 @@ pages:
   - Features: features.md
   - Encodings: encodings.md
   - Encodings:
-    - 'Base': enc/base.md
-    - 'Binary': enc/binary.md
-    - 'Common': enc/common.md
-    - 'Cryptography': enc/crypto.md
-    - 'Languages': enc/languages.md
-    - 'Others': enc/others.md
-    - 'Steganography': enc/stegano.md
+    - Base: enc/base.md
+    - Binary: enc/binary.md
+    - Common: enc/common.md
+    - Compressions: enc/compressions.md
+    - Cryptography: enc/crypto.md
+    - Languages: enc/languages.md
+    - Others: enc/others.md
+    - Steganography: enc/stegano.md
   - 'String manipulations': manipulations.md
   - 'CLI tool': cli.md
   - 'Create your codec': howto.md

Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,6 @@`
`16`	`16`
`17`	`17`
`18`	`18`	`__examples__ = {'enc-dec(gzip)': ["test", "This is a test"]}`
`19`		`-__guess__ = ["gzip"]`
`20`	`19`
`21`	`20`
`22`	`21`	`def gzip_encode(text, errors="strict"):`