JiangYongKang · JiangYongKang · Apr 1, 2026
diff --git a/slugify/slugify.py b/slugify/slugify.py
@@ -3,14 +3,22 @@
 import re
 import unicodedata
 from collections.abc import Iterable
+from dataclasses import dataclass
 from html.entities import name2codepoint
+from typing import Any
 
 try:
     import unidecode
 except ImportError:
     import text_unidecode as unidecode  # type: ignore[import-untyped, no-redef]
 
-__all__ = ['slugify', 'smart_truncate']
+__all__ = ['slugify', 'slugify_with_map', 'smart_truncate', 'SlugWithMap']
+
+
+@dataclass
+class SlugWithMap:
+    slug: str
+    steps: list[dict[str, Any]]
 
 
 CHAR_ENTITY_PATTERN = re.compile(r'&(%s);' % '|'.join(name2codepoint))
@@ -72,7 +80,7 @@ def smart_truncate(
     return truncated.strip(separator)
 
 
-def slugify(
+def _slugify_core(
     text: str,
     entities: bool = True,
     decimal: bool = True,
@@ -86,112 +94,257 @@ def slugify(
     lowercase: bool = True,
     replacements: Iterable[Iterable[str]] = (),
     allow_unicode: bool = False,
-) -> str:
-    """
-    Make a slug from the given text.
-    :param text (str): initial text
-    :param entities (bool): converts html entities to unicode
-    :param decimal (bool): converts html decimal to unicode
-    :param hexadecimal (bool): converts html hexadecimal to unicode
-    :param max_length (int): output string length
-    :param word_boundary (bool): truncates to complete word even if length ends up shorter than max_length
-    :param save_order (bool): when set, does not include shorter subsequent words even if they fit
-    :param separator (str): separator between words
-    :param stopwords (iterable): words to discount
-    :param regex_pattern (str): regex pattern for disallowed characters
-    :param lowercase (bool): activate case sensitivity by setting it to False
-    :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
-    :param allow_unicode (bool): allow unicode characters
-    :return (str):
-    """
+    track_steps: bool = False,
+) -> tuple[str, list[dict[str, Any]]]:
+    steps: list[dict[str, Any]] = []
 
-    # user-specific replacements
+    def _record(step_type: str, **kwargs: Any) -> None:
+        if track_steps:
+            steps.append({'type': step_type, **kwargs})
+
+    # user-specific replacements (pre)
     if replacements:
         for old, new in replacements:
+            prev_text = text
             text = text.replace(old, new)
+            if track_steps and text != prev_text:
+                _record('replace', old=old, new=new, before=prev_text, after=text)
 
     # ensure text is unicode
     if not isinstance(text, str):
+        prev_text = text
         text = str(text, 'utf-8', 'ignore')
+        _record('normalize', before=prev_text, after=text)
 
     # replace quotes with dashes - pre-process
+    prev_text = text
     text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text)
+    if track_steps and text != prev_text:
+        _record('replace', pattern='quotes', before=prev_text, after=text)
 
     # normalize text, convert to unicode if required
+    prev_text = text
     if allow_unicode:
         text = unicodedata.normalize('NFKC', text)
     else:
         text = unicodedata.normalize('NFKD', text)
         text = unidecode.unidecode(text)
+    if track_steps and text != prev_text:
+        _record('normalize', mode='unicode', before=prev_text, after=text)
 
     # ensure text is still in unicode
     if not isinstance(text, str):
+        prev_text = text
         text = str(text, 'utf-8', 'ignore')
+        _record('normalize', before=prev_text, after=text)
 
     # character entity reference
     if entities:
+        prev_text = text
         text = CHAR_ENTITY_PATTERN.sub(lambda m: chr(name2codepoint[m.group(1)]), text)
+        if track_steps and text != prev_text:
+            _record('entity', mode='named', before=prev_text, after=text)
 
     # decimal character reference
     if decimal:
         try:
+            prev_text = text
             text = DECIMAL_PATTERN.sub(lambda m: chr(int(m.group(1))), text)
+            if track_steps and text != prev_text:
+                _record('entity', mode='decimal', before=prev_text, after=text)
         except Exception:
             pass
 
     # hexadecimal character reference
     if hexadecimal:
         try:
+            prev_text = text
             text = HEX_PATTERN.sub(lambda m: chr(int(m.group(1), 16)), text)
+            if track_steps and text != prev_text:
+                _record('entity', mode='hexadecimal', before=prev_text, after=text)
         except Exception:
             pass
 
     # re normalize text
+    prev_text = text
     if allow_unicode:
         text = unicodedata.normalize('NFKC', text)
     else:
         text = unicodedata.normalize('NFKD', text)
+    if track_steps and text != prev_text:
+        _record('normalize', mode='final', before=prev_text, after=text)
 
     # make the text lowercase (optional)
     if lowercase:
+        prev_text = text
         text = text.lower()
+        if track_steps and text != prev_text:
+            _record('lowercase', before=prev_text, after=text)
 
     # remove generated quotes -- post-process
+    prev_text = text
     text = QUOTE_PATTERN.sub('', text)
+    if track_steps and text != prev_text:
+        _record('delete', pattern='quotes', before=prev_text, after=text)
 
     # cleanup numbers
+    prev_text = text
     text = NUMBERS_PATTERN.sub('', text)
+    if track_steps and text != prev_text:
+        _record('cleanup', pattern='numbers', before=prev_text, after=text)
 
     # replace all other unwanted characters
     if allow_unicode:
         pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN
     else:
         pattern = regex_pattern or DISALLOWED_CHARS_PATTERN
 
+    prev_text = text
     text = re.sub(pattern, DEFAULT_SEPARATOR, text)
+    if track_steps and text != prev_text:
+        _record('regex_cleanup', before=prev_text, after=text)
 
     # remove redundant
+    prev_text = text
     text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)
+    if track_steps and text != prev_text:
+        _record('deduplicate', pattern='dashes', before=prev_text, after=text)
 
     # remove stopwords
     if stopwords:
+        prev_text = text
         if lowercase:
             stopwords_lower = [s.lower() for s in stopwords]
             words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords_lower]
+            removed = [w for w in text.split(DEFAULT_SEPARATOR) if w in stopwords_lower]
         else:
             words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords]
+            removed = [w for w in text.split(DEFAULT_SEPARATOR) if w in stopwords]
         text = DEFAULT_SEPARATOR.join(words)
+        if track_steps and removed:
+            _record('stopwords', removed=removed, before=prev_text, after=text)
 
     # finalize user-specific replacements
     if replacements:
         for old, new in replacements:
+            prev_text = text
             text = text.replace(old, new)
+            if track_steps and text != prev_text:
+                _record('replace', old=old, new=new, phase='final', before=prev_text, after=text)
 
     # smart truncate if requested
     if max_length > 0:
+        prev_text = text
         text = smart_truncate(text, max_length, word_boundary, DEFAULT_SEPARATOR, save_order)
+        if track_steps and text != prev_text:
+            _record('truncate', before=prev_text, after=text)
 
     if separator != DEFAULT_SEPARATOR:
+        prev_text = text
         text = text.replace(DEFAULT_SEPARATOR, separator)
+        if track_steps and text != prev_text:
+            _record('separator', old=DEFAULT_SEPARATOR, new=separator, before=prev_text, after=text)
+
+    return text, steps
+
 
-    return text
+def slugify(
+    text: str,
+    entities: bool = True,
+    decimal: bool = True,
+    hexadecimal: bool = True,
+    max_length: int = 0,
+    word_boundary: bool = False,
+    separator: str = DEFAULT_SEPARATOR,
+    save_order: bool = False,
+    stopwords: Iterable[str] = (),
+    regex_pattern: re.Pattern[str] | str | None = None,
+    lowercase: bool = True,
+    replacements: Iterable[Iterable[str]] = (),
+    allow_unicode: bool = False,
+) -> str:
+    """
+    Make a slug from the given text.
+    :param text (str): initial text
+    :param entities (bool): converts html entities to unicode
+    :param decimal (bool): converts html decimal to unicode
+    :param hexadecimal (bool): converts html hexadecimal to unicode
+    :param max_length (int): output string length
+    :param word_boundary (bool): truncates to complete word even if length ends up shorter than max_length
+    :param save_order (bool): when set, does not include shorter subsequent words even if they fit
+    :param separator (str): separator between words
+    :param stopwords (iterable): words to discount
+    :param regex_pattern (str): regex pattern for disallowed characters
+    :param lowercase (bool): activate case sensitivity by setting it to False
+    :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
+    :param allow_unicode (bool): allow unicode characters
+    :return (str):
+    """
+    result, _ = _slugify_core(
+        text=text,
+        entities=entities,
+        decimal=decimal,
+        hexadecimal=hexadecimal,
+        max_length=max_length,
+        word_boundary=word_boundary,
+        separator=separator,
+        save_order=save_order,
+        stopwords=stopwords,
+        regex_pattern=regex_pattern,
+        lowercase=lowercase,
+        replacements=replacements,
+        allow_unicode=allow_unicode,
+        track_steps=False,
+    )
+    return result
+
+
+def slugify_with_map(
+    text: str,
+    entities: bool = True,
+    decimal: bool = True,
+    hexadecimal: bool = True,
+    max_length: int = 0,
+    word_boundary: bool = False,
+    separator: str = DEFAULT_SEPARATOR,
+    save_order: bool = False,
+    stopwords: Iterable[str] = (),
+    regex_pattern: re.Pattern[str] | str | None = None,
+    lowercase: bool = True,
+    replacements: Iterable[Iterable[str]] = (),
+    allow_unicode: bool = False,
+) -> SlugWithMap:
+    """
+    Make a slug from the given text with transformation steps mapping.
+    :param text (str): initial text
+    :param entities (bool): converts html entities to unicode
+    :param decimal (bool): converts html decimal to unicode
+    :param hexadecimal (bool): converts html hexadecimal to unicode
+    :param max_length (int): output string length
+    :param word_boundary (bool): truncates to complete word even if length ends up shorter than max_length
+    :param save_order (bool): when set, does not include shorter subsequent words even if they fit
+    :param separator (str): separator between words
+    :param stopwords (iterable): words to discount
+    :param regex_pattern (str): regex pattern for disallowed characters
+    :param lowercase (bool): activate case sensitivity by setting it to False
+    :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
+    :param allow_unicode (bool): allow unicode characters
+    :return (SlugWithMap): dataclass with slug (str) and steps (list)
+    """
+    result, steps = _slugify_core(
+        text=text,
+        entities=entities,
+        decimal=decimal,
+        hexadecimal=hexadecimal,
+        max_length=max_length,
+        word_boundary=word_boundary,
+        separator=separator,
+        save_order=save_order,
+        stopwords=stopwords,
+        regex_pattern=regex_pattern,
+        lowercase=lowercase,
+        replacements=replacements,
+        allow_unicode=allow_unicode,
+        track_steps=True,
+    )
+    return SlugWithMap(slug=result, steps=steps)