Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 6.1.0

- Add `allow_unicode` flag to allow unicode characters in the slug

## 6.0.1

- Rework regex_pattern to mean the opposite (disallowed chars instead of allowed)
Expand Down
16 changes: 15 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ def slugify(
stopwords=(),
regex_pattern=None,
lowercase=True,
replacements=()
replacements=(),
allow_unicode=False
):
"""
Make a slug from the given text.
Expand All @@ -58,6 +59,7 @@ def slugify(
:param regex_pattern (str): regex pattern for disallowed characters
:param lowercase (bool): activate case sensitivity by setting it to False
:param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
:param allow_unicode (bool): allow unicode characters
:return (str): slugify text
"""
```
Expand All @@ -75,6 +77,10 @@ txt = '影師嗎'
r = slugify(txt)
self.assertEqual(r, "ying-shi-ma")

txt = '影師嗎'
r = slugify(txt, allow_unicode=True)
self.assertEqual(r, "影師嗎")

txt = 'C\'est déjà l\'été.'
r = slugify(txt)
self.assertEqual(r, "c-est-deja-l-ete")
Expand Down Expand Up @@ -133,6 +139,14 @@ txt = 'ÜBER Über German Umlaut'
r = slugify(txt, replacements=[['Ü', 'UE'], ['ü', 'ue']])
self.assertEqual(r, "ueber-ueber-german-umlaut")

txt = 'i love 🦄'
r = slugify(txt, allow_unicode=True)
self.assertEqual(r, "i-love")

txt = 'i love 🦄'
r = slugify(txt, allow_unicode=True, regex_pattern=r'[^🦄]+')
self.assertEqual(r, "🦄")

```

For more examples, have a look at the [test.py](test.py) file.
Expand Down
5 changes: 4 additions & 1 deletion slugify/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def parse_args(argv):
help="Activate case sensitivity")
parser.add_argument("--replacements", nargs='+',
help="""Additional replacement rules e.g. "|->or", "%%->percent".""")
parser.add_argument("--allow-unicode", action='store_true', default=False,
help="Allow unicode characters")

args = parser.parse_args(argv[1:])

Expand Down Expand Up @@ -73,7 +75,8 @@ def slugify_params(args):
separator=args.separator,
stopwords=args.stopwords,
lowercase=args.lowercase,
replacements=args.replacements
replacements=args.replacements,
allow_unicode=args.allow_unicode
)


Expand Down
2 changes: 1 addition & 1 deletion slugify/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
__url__ = 'https://github.com/un33k/python-slugify'
__license__ = 'MIT'
__copyright__ = 'Copyright 2022 Val Neekman @ Neekware Inc.'
__version__ = '6.0.1'
__version__ = '6.1.0'
20 changes: 16 additions & 4 deletions slugify/slugify.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
HEX_PATTERN = re.compile(r'&#x([\da-fA-F]+);')
QUOTE_PATTERN = re.compile(r'[\']+')
DISALLOWED_CHARS_PATTERN = re.compile(r'[^-a-zA-Z0-9]+')
DISALLOWED_UNICODE_CHARS_PATTERN = re.compile(r'[\W_]+')
DUPLICATE_DASH_PATTERN = re.compile(r'-{2,}')
NUMBERS_PATTERN = re.compile(r'(?<=\d),(?=\d)')
DEFAULT_SEPARATOR = '-'
Expand Down Expand Up @@ -66,7 +67,8 @@ def smart_truncate(string, max_length=0, word_boundary=False, separator=' ', sav

def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False,
separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None, lowercase=True,
replacements: typing.Iterable[typing.Iterable[str]] = ()):
replacements: typing.Iterable[typing.Iterable[str]] = (),
allow_unicode=False):
"""
Make a slug from the given text.
:param text (str): initial text
Expand All @@ -81,6 +83,7 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
:param regex_pattern (str): regex pattern for disallowed characters
:param lowercase (bool): activate case sensitivity by setting it to False
:param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
:param allow_unicode (bool): allow unicode characters
:return (str):
"""

Expand All @@ -97,7 +100,8 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text)

# decode unicode
text = unidecode.unidecode(text)
if not allow_unicode:
text = unidecode.unidecode(text)

# ensure text is still in unicode
if not isinstance(text, str):
Expand All @@ -122,7 +126,11 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
pass

# translate
text = unicodedata.normalize('NFKD', text)
if allow_unicode:
text = unicodedata.normalize('NFKC', text)
else:
text = unicodedata.normalize('NFKD', text)

if sys.version_info < (3,):
text = text.encode('ascii', 'ignore')

Expand All @@ -137,7 +145,11 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
text = NUMBERS_PATTERN.sub('', text)

# replace all other unwanted characters
pattern = regex_pattern or DISALLOWED_CHARS_PATTERN
if allow_unicode:
pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN
else:
pattern = regex_pattern or DISALLOWED_CHARS_PATTERN

text = re.sub(pattern, DEFAULT_SEPARATOR, text)

# remove redundant
Expand Down
Loading