JiangYongKang · JiangYongKang · Apr 1, 2026
diff --git a/slugify/slugify.py b/slugify/slugify.py
@@ -4,13 +4,19 @@
 import unicodedata
 from collections.abc import Iterable
 from html.entities import name2codepoint
+from typing import NamedTuple
 
 try:
     import unidecode
 except ImportError:
     import text_unidecode as unidecode  # type: ignore[import-untyped, no-redef]
 
-__all__ = ['slugify', 'smart_truncate']
+__all__ = ['slugify', 'slugify_with_map', 'smart_truncate']
+
+
+class SlugifyResult(NamedTuple):
+    slug: str
+    steps: list[dict[str, str]]
 
 
 CHAR_ENTITY_PATTERN = re.compile(r'&(%s);' % '|'.join(name2codepoint))
@@ -195,3 +201,160 @@ def slugify(
         text = text.replace(DEFAULT_SEPARATOR, separator)
 
     return text
+
+
+def slugify_with_map(
+    text: str,
+    entities: bool = True,
+    decimal: bool = True,
+    hexadecimal: bool = True,
+    max_length: int = 0,
+    word_boundary: bool = False,
+    separator: str = DEFAULT_SEPARATOR,
+    save_order: bool = False,
+    stopwords: Iterable[str] = (),
+    regex_pattern: re.Pattern[str] | str | None = None,
+    lowercase: bool = True,
+    replacements: Iterable[Iterable[str]] = (),
+    allow_unicode: bool = False,
+) -> SlugifyResult:
+    """
+    Make a slug from the given text and return transformation steps.
+    :param text (str): initial text
+    :param entities (bool): converts html entities to unicode
+    :param decimal (bool): converts html decimal to unicode
+    :param hexadecimal (bool): converts html hexadecimal to unicode
+    :param max_length (int): output string length
+    :param word_boundary (bool): truncates to complete word even if length ends up shorter than max_length
+    :param save_order (bool): when set, does not include shorter subsequent words even if they fit
+    :param separator (str): separator between words
+    :param stopwords (iterable): words to discount
+    :param regex_pattern (str): regex pattern for disallowed characters
+    :param lowercase (bool): activate case sensitivity by setting it to False
+    :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
+    :param allow_unicode (bool): allow unicode characters
+    :return (SlugifyResult): slug and transformation steps
+    """
+    steps = []
+    original = text
+
+    def _record(step_name: str, action: str, before: str, after: str) -> None:
+        if before != after:
+            steps.append({
+                'step': step_name,
+                'action': action,
+                'before': before,
+                'after': after
+            })
+
+    current = original
+
+    if replacements:
+        before = current
+        for old, new in replacements:
+            current = current.replace(old, new)
+        _record('replacements', 'replace', before, current)
+
+    if not isinstance(current, str):
+        before = current
+        current = str(current, 'utf-8', 'ignore')
+        _record('ensure_unicode', 'convert', before, current)
+
+    before = current
+    current = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, current)
+    _record('quotes', 'replace', before, current)
+
+    before = current
+    if allow_unicode:
+        current = unicodedata.normalize('NFKC', current)
+    else:
+        current = unicodedata.normalize('NFKD', current)
+        current = unidecode.unidecode(current)
+    _record('normalize', 'transliterate', before, current)
+
+    if not isinstance(current, str):
+        before = current
+        current = str(current, 'utf-8', 'ignore')
+        _record('ensure_unicode', 'convert', before, current)
+
+    if entities:
+        before = current
+        current = CHAR_ENTITY_PATTERN.sub(lambda m: chr(name2codepoint[m.group(1)]), current)
+        _record('entities', 'decode', before, current)
+
+    if decimal:
+        try:
+            before = current
+            current = DECIMAL_PATTERN.sub(lambda m: chr(int(m.group(1))), current)
+            _record('decimal', 'decode', before, current)
+        except Exception:
+            pass
+
+    if hexadecimal:
+        try:
+            before = current
+            current = HEX_PATTERN.sub(lambda m: chr(int(m.group(1), 16)), current)
+            _record('hexadecimal', 'decode', before, current)
+        except Exception:
+            pass
+
+    before = current
+    if allow_unicode:
+        current = unicodedata.normalize('NFKC', current)
+    else:
+        current = unicodedata.normalize('NFKD', current)
+    _record('renormalize', 'normalize', before, current)
+
+    if lowercase:
+        before = current
+        current = current.lower()
+        _record('lowercase', 'transform', before, current)
+
+    before = current
+    current = QUOTE_PATTERN.sub('', current)
+    _record('remove_quotes', 'delete', before, current)
+
+    before = current
+    current = NUMBERS_PATTERN.sub('', current)
+    _record('clean_numbers', 'clean', before, current)
+
+    if allow_unicode:
+        pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN
+    else:
+        pattern = regex_pattern or DISALLOWED_CHARS_PATTERN
+
+    before = current
+    current = re.sub(pattern, DEFAULT_SEPARATOR, current)
+    _record('clean_chars', 'clean', before, current)
+
+    before = current
+    current = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, current).strip(DEFAULT_SEPARATOR)
+    _record('clean_duplicates', 'clean', before, current)
+
+    if stopwords:
+        before = current
+        if lowercase:
+            stopwords_lower = [s.lower() for s in stopwords]
+            words = [w for w in current.split(DEFAULT_SEPARATOR) if w not in stopwords_lower]
+        else:
+            words = [w for w in current.split(DEFAULT_SEPARATOR) if w not in stopwords]
+        current = DEFAULT_SEPARATOR.join(words)
+        _record('stopwords', 'delete', before, current)
+
+    if replacements:
+        before = current
+        for old, new in replacements:
+            current = current.replace(old, new)
+        _record('final_replacements', 'replace', before, current)
+
+    if max_length > 0:
+        before = current
+        current = smart_truncate(current, max_length, word_boundary, DEFAULT_SEPARATOR, save_order)
+        _record('truncate', 'truncate', before, current)
+
+    if separator != DEFAULT_SEPARATOR:
+        before = current
+        current = current.replace(DEFAULT_SEPARATOR, separator)
+        _record('separator', 'replace', before, current)
+
+    return SlugifyResult(slug=current, steps=steps)
diff --git a/test.py b/test.py
@@ -5,7 +5,7 @@
 from contextlib import contextmanager
 
 from slugify import PRE_TRANSLATIONS
-from slugify import slugify
+from slugify import slugify, slugify_with_map
 from slugify import smart_truncate
 from slugify.__main__ import slugify_params, parse_args
 
@@ -653,5 +653,105 @@ def test_multivalued_options_with_text(self):
         self.assertEqual(params['stopwords'], ['the', 'in', 'a', 'hurry'])
 
 
+class TestSlugifyWithMap(unittest.TestCase):
+
+    def test_slug_compatibility(self):
+        test_cases = [
+            ("This is a test ---", {}),
+            ("影師嗎", {}),
+            ("C'est déjà l'été.", {}),
+            ("10 | 20 %", {'replacements': [['|', 'or'], ['%', 'percent']]}),
+            ("this has a stopword", {'stopwords': ['stopword']}),
+            ("foo &amp; bar", {}),
+            ("Foo A FOO B foo C", {'stopwords': ['foo']}),
+            ("jaja---lol-méméméoo--a", {'max_length': 15, 'word_boundary': True}),
+        ]
+        for text, kwargs in test_cases:
+            with self.subTest(text=text, kwargs=kwargs):
+                self.assertEqual(
+                    slugify_with_map(text, **kwargs).slug,
+                    slugify(text, **kwargs)
+                )
+
+    def test_slug_compatibility_unicode(self):
+        test_cases = [
+            ("影師嗎", {'allow_unicode': True}),
+            ("C'est déjà l'été.", {'allow_unicode': True}),
+            ("Компьютер", {'allow_unicode': True}),
+            ("this has a Öländ", {'allow_unicode': True, 'stopwords': ['Öländ']}),
+            ("foo &amp; bår", {'allow_unicode': True, 'entities': False}),
+        ]
+        for text, kwargs in test_cases:
+            with self.subTest(text=text, kwargs=kwargs):
+                self.assertEqual(
+                    slugify_with_map(text, **kwargs).slug,
+                    slugify(text, **kwargs)
+                )
+
+    def test_replacements_steps(self):
+        result = slugify_with_map('10 | 20 %', replacements=[['|', 'or'], ['%', 'percent']])
+        steps = [s['step'] for s in result.steps]
+        self.assertIn('replacements', steps)
+        replacement_step = next(s for s in result.steps if s['step'] == 'replacements')
+        self.assertEqual(replacement_step['action'], 'replace')
+        self.assertIn('|', replacement_step['before'])
+        self.assertIn('%', replacement_step['before'])
+        self.assertIn('or', replacement_step['after'])
+        self.assertIn('percent', replacement_step['after'])
+
+    def test_stopwords_steps(self):
+        result = slugify_with_map('the quick brown fox', stopwords=['the'])
+        steps = [s['step'] for s in result.steps]
+        self.assertIn('stopwords', steps)
+        stopword_step = next(s for s in result.steps if s['step'] == 'stopwords')
+        self.assertEqual(stopword_step['action'], 'delete')
+        self.assertIn('the', stopword_step['before'])
+        self.assertNotIn('the', stopword_step['after'])
+
+    def test_html_entities_steps(self):
+        result = slugify_with_map('foo &amp; bar')
+        steps = [s['step'] for s in result.steps]
+        self.assertIn('entities', steps)
+        entity_step = next(s for s in result.steps if s['step'] == 'entities')
+        self.assertEqual(entity_step['action'], 'decode')
+        self.assertIn('&amp;', entity_step['before'])
+        self.assertIn('&', entity_step['after'])
+
+    def test_allow_unicode_false(self):
+        result = slugify_with_map('影師嗎', allow_unicode=False)
+        steps = [s['step'] for s in result.steps]
+        self.assertIn('normalize', steps)
+        normalize_step = next(s for s in result.steps if s['step'] == 'normalize')
+        self.assertEqual(normalize_step['action'], 'transliterate')
+        self.assertIn('影師嗎', normalize_step['before'])
+        self.assertNotEqual(normalize_step['after'], normalize_step['before'])
+        self.assertEqual(result.slug, slugify('影師嗎', allow_unicode=False))
+
+    def test_allow_unicode_true(self):
+        result = slugify_with_map('Hello 影師嗎 WORLD', allow_unicode=True)
+        steps = [s['step'] for s in result.steps]
+        self.assertIn('lowercase', steps)
+        lowercase_step = next(s for s in result.steps if s['step'] == 'lowercase')
+        self.assertIn('WORLD', lowercase_step['before'])
+        self.assertIn('world', lowercase_step['after'])
+
+    def test_lowercase_step(self):
+        result = slugify_with_map('HELLO WORLD', lowercase=True)
+        steps = [s['step'] for s in result.steps]
+        self.assertIn('lowercase', steps)
+        lowercase_step = next(s for s in result.steps if s['step'] == 'lowercase')
+        self.assertEqual(lowercase_step['action'], 'transform')
+        self.assertIn('HELLO', lowercase_step['before'])
+        self.assertIn('hello', lowercase_step['after'])
+
+    def test_clean_chars_step(self):
+        result = slugify_with_map('This -- is a ## test ---')
+        steps = [s['step'] for s in result.steps]
+        self.assertIn('clean_chars', steps)
+        clean_step = next(s for s in result.steps if s['step'] == 'clean_chars')
+        self.assertEqual(clean_step['action'], 'clean')
+        self.assertIn('##', clean_step['before'])
+
+
 if __name__ == '__main__':  # pragma: nocover
     unittest.main()