Skip to content

Commit 202d95f

Browse files
committed
Added string manipulations
1 parent 346740f commit 202d95f

5 files changed

Lines changed: 43 additions & 10 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ o
235235

236236
- [X] `a1z26`: keeps words whitespace-separated and uses a custom character separator
237237
- [X] `cases`: set of case-related encodings (including camel-, kebab-, lower-, pascal-, upper-, snake- and swap-case, slugify, capitalize, title)
238-
- [X] `dummy`: set of simple encodings (including reverse and word-reverse)
238+
- [X] `dummy`: set of simple encodings (including replace, reverse, word-reverse, substite and strip-spaces)
239239
- [X] `octal`: dummy octal conversion (converts to 3-digits groups)
240240
- [X] `octal-spaced`: variant of `octal` ; dummy octal conversion, handling whitespace separators
241241
- [X] `ordinal`: dummy character ordinals conversion (converts to 3-digits groups)

codext/__common__.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -968,7 +968,9 @@ def __gen_str_from_re(regex, star_plus_max, repeat_max, yield_max, parsed=False)
968968
if code in ["assert_not", "at"]:
969969
continue
970970
elif code == "any":
971-
tokens.append(printable.replace("\n", "")) # should be ord(x) with x belongs to [0, 256[
971+
charset = list(printable.replace("\n", ""))
972+
random.shuffle(charset)
973+
tokens.append(charset) # should be ord(x) with x belongs to [0, 256[
972974
elif code == "assert":
973975
tokens.append(list(__gen_str_from_re(value[1], star_plus_max, repeat_max, yield_max, True)))
974976
elif code == "branch":
@@ -977,10 +979,11 @@ def __gen_str_from_re(regex, star_plus_max, repeat_max, yield_max, parsed=False)
977979
result += list(__gen_str_from_re(r, star_plus_max, repeat_max, yield_max, True)) or [""]
978980
tokens.append(result)
979981
elif code == "category":
980-
charset = CATEGORIES[value[9:]]
982+
charset = list(CATEGORIES[value[9:]])
981983
if negate:
982984
negate = False
983985
charset = list(set(printable).difference(charset))
986+
random.shuffle(charset)
984987
tokens.append(charset)
985988
elif code == "groupref":
986989
tokens.extend(__groups[value])
@@ -1015,7 +1018,9 @@ def __gen_str_from_re(regex, star_plus_max, repeat_max, yield_max, parsed=False)
10151018
elif code == "negate":
10161019
negate = True
10171020
elif code == "not_literal":
1018-
tokens.append(printable.replace(chr(value), ""))
1021+
charset = list(printable.replace(chr(value), ""))
1022+
random.shuffle(charset)
1023+
tokens.append(charset)
10191024
elif code == "range":
10201025
tokens.append("".join(chr(i) for i in range(value[0], value[1] + 1)))
10211026
elif code == "subpattern":

codext/common/dummy.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,30 @@
1414
from ..__common__ import add
1515

1616

17+
def replace(pair, *args):
18+
def code(input, error="strict"):
19+
return input.replace(pair[0], pair[1]), len(input)
20+
return code
21+
add("replace", replace, replace, r"^replace[-_]?((?!.*(.).*\2)..)$", guess=None)
22+
# important note: ^
23+
# using "{2}" here instead will break the codec
24+
# this is due to the fact the codext.__common__.generate_string_from_regex DOES NOT handle ASSERT_NOT (?!) and will
25+
# faill to generate a valid instance in lookup(...) when an encoding name is to be generated to get the CodecInfo
26+
27+
28+
def substitute(token, replacement):
29+
def code(input, error="strict"):
30+
return input.replace(token, replacement), len(input)
31+
return code
32+
add("substitute", substitute, substitute, r"^substitute[-_]?(.*?)/(.*?)$", guess=None)
33+
34+
1735
reverse = lambda i, e="strict": (i[::-1], len(i))
1836
add("reverse", reverse, reverse)
1937

2038
word_reverse = lambda i, e="strict": (" ".join(w[::-1] for w in i.split()), len(i))
2139
add("reverse-words", word_reverse, word_reverse, r"^reverse[-_]words$")
2240

41+
strip_spaces = lambda i, e="strict": (i.replace(" ", ""), len(i))
42+
add("strip-spaces", strip_spaces, strip_spaces, guess=None)
43+

docs/manipulations.md

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ These transformation functions are simple string transformations, including `str
2020
`title` | text <-> titled text | | decoding "untitles" the text
2121
`uppercase` | text <-> uppercase text | `upper` | decoding is `lowercase`
2222

23-
Of course, these "encodings" have no interest while using them in Python as the `str` methods can be called. It can be useful while using `codext` from the terminal (see [*CLI tool*](cli.html)).
23+
Of course, these transformations have no interest while using them in Python as the `str` methods can be called. It can be useful while using `codext` from the terminal (see [*CLI tool*](cli.html)).
2424

2525
Some simple examples:
2626

@@ -43,22 +43,25 @@ These transformation functions are simple string transformations.
4343

4444
**Codec** | **Conversions** | **Aliases** | **Comment**
4545
:---: | :---: | --- | ---
46+
`replace` | text <-> text with single-char replaced | |
4647
`reverse` | text <-> reversed text | |
4748
`reverse-words` | text <-> reversed words | | same as `reverse` but not on the whole text, only on the words (text split by whitespace)
49+
`strip-spaces` | text <-> all whitespaces stripped | |
50+
`substitute` | text <-> text with token substituted | |
4851

49-
As in the previous section, these "encodings" have no interest while using them in Python but well while using `codext` from the terminal (see [*CLI tool*](cli.html)).
52+
As in the previous section, these transformations have no interest while using them in Python but well while using `codext` from the terminal (see [*CLI tool*](cli.html)).
5053

5154
A simple example:
5255

5356
```sh
54-
$ echo -en "test string" | codext encode reverse-words | codext encode reverse
55-
string test
57+
$ echo -en "test string" | codext encode reverse-words | codext encode reverse replace-\ _
58+
string_test
5659
```
5760

5861
Or using encodings chaining:
5962

6063
```sh
61-
$ echo -en "test string" | codext encode reverse-words reverse
62-
string test
64+
$ echo -en "test string" | codext encode reverse-words reverse substitute-string/phrase
65+
phrase test
6366
```
6467

tests/test_manual.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ def test_codec_dummy_str_manips(self):
9696
self.assertEqual(codecs.decode(STR, "reverse"), "tset a si siht")
9797
self.assertEqual(codecs.decode(STR, "reverse_words"), "siht si a tset")
9898
self.assertEqual(codecs.decode(STR.split()[0], "reverse"), codecs.decode(STR.split()[0], "reverse-words"))
99+
self.assertEqual(codecs.encode(STR, "replace-i1"), STR.replace("i", "1"))
100+
self.assertEqual(codecs.decode(STR.replace("i", "1"), "replace-1i"), STR)
101+
self.assertEqual(codecs.encode(STR, "substitute-this/that"), STR.replace("this", "that"))
102+
self.assertEqual(codecs.decode(STR.replace("this", "that"), "substitute-that/this"), STR)
99103

100104
def test_codec_hash_functions(self):
101105
STR = b"This is a test string!"

0 commit comments

Comments
 (0)