python · xi · Sep 3, 2021 · Sep 3, 2021 · Sep 3, 2021 · Sep 3, 2021
diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst
@@ -281,6 +281,14 @@ hyphenated words; only then will long words be broken if necessary, unless
       .. versionadded:: 3.4
 
 
+   .. attribute:: text_len
+
+      (default: ``len``) Used to determine the length of a string. You can
+      provide a custom function, e.g. to account for wide characters.
+
+      .. versionadded:: 3.11
+
+
    .. index:: single: ...; placeholder
 
    .. attribute:: placeholder

diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
@@ -9,6 +9,7 @@
 #
 
 import unittest
+import unicodedata
 
 from textwrap import TextWrapper, wrap, fill, dedent, indent, shorten
 
@@ -1076,5 +1077,49 @@ def test_first_word_too_long_but_placeholder_fits(self):
         self.check_shorten("Helloo", 5, "[...]")
 
 
+class WideCharacterTestCase(BaseTestCase):
+    def text_len(self, text):
+        n = 0
+        for c in text:
+            if unicodedata.east_asian_width(c) in {'F', 'W'}:
+                n += 2
+            else:
+                n += 1
+        return n
+
+    def check_shorten(self, text, width, expect, **kwargs):
+        result = shorten(text, width, **kwargs)
+        self.check(result, expect)
+
+    def test_wrap(self):
+        text = "123 🔧"
+        self.check_wrap(text, 5, ["123 🔧"])
+        self.check_wrap(text, 5, ["123", "🔧"], text_len=self.text_len)
+
+    def test_wrap_initial_indent(self):
+        text = "12 12"
+        self.check_wrap(text, 6, ["🔧12 12"], initial_indent="🔧")
+        self.check_wrap(text, 6, ["🔧12", "12"], initial_indent="🔧",
+                        text_len=self.text_len)
+
+    def test_wrap_subsequent_indent(self):
+        text = "12 12 12 12"
+        self.check_wrap(text, 6, ["12 12", "🔧12 12"], subsequent_indent="🔧")
+        self.check_wrap(text, 6, ["12 12", "🔧12", "🔧12"],
+                        subsequent_indent="🔧", text_len=self.text_len)
+
+    def test_shorten(self):
+        text = "123 1234🔧"
+        expected = "123 [...]"
+        self.check_shorten(text, 9, "123 1234🔧")
+        self.check_shorten(text, 9, "123 [...]", text_len=self.text_len)
+
+    def test_shorten_placeholder(self):
+        text = "123 1 123"
+        self.check_shorten(text, 7, "123 1 🔧", placeholder=" 🔧")
+        self.check_shorten(text, 7, "123 🔧", placeholder=" 🔧",
+                           text_len=self.text_len)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/Lib/textwrap.py b/Lib/textwrap.py
@@ -125,7 +125,8 @@ def __init__(self,
                  tabsize=8,
                  *,
                  max_lines=None,
-                 placeholder=' [...]'):
+                 placeholder=' [...]',
+                 text_len=len):
         self.width = width
         self.initial_indent = initial_indent
         self.subsequent_indent = subsequent_indent
@@ -138,6 +139,7 @@ def __init__(self,
         self.tabsize = tabsize
         self.max_lines = max_lines
         self.placeholder = placeholder
+        self.text_len = text_len
 
 
     # -- Private methods -----------------------------------------------
@@ -217,7 +219,7 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
         if self.break_long_words:
             end = space_left
             chunk = reversed_chunks[-1]
-            if self.break_on_hyphens and len(chunk) > space_left:
+            if self.break_on_hyphens and self.text_len(chunk) > space_left:
                 # break after last hyphen, but only if there are
                 # non-hyphens before it
                 hyphen = chunk.rfind('-', 0, space_left)
@@ -259,7 +261,8 @@ def _wrap_chunks(self, chunks):
                 indent = self.subsequent_indent
             else:
                 indent = self.initial_indent
-            if len(indent) + len(self.placeholder.lstrip()) > self.width:
+            if (self.text_len(indent) +
+                    self.text_len(self.placeholder.lstrip()) > self.width):
                 raise ValueError("placeholder too large for max width")
 
         # Arrange in reverse order so items can be efficiently popped
@@ -280,15 +283,15 @@ def _wrap_chunks(self, chunks):
                 indent = self.initial_indent
 
             # Maximum width for this line.
-            width = self.width - len(indent)
+            width = self.width - self.text_len(indent)
 
             # First chunk on line is whitespace -- drop it, unless this
             # is the very beginning of the text (ie. no lines started yet).
             if self.drop_whitespace and chunks[-1].strip() == '' and lines:
                 del chunks[-1]
 
             while chunks:
-                l = len(chunks[-1])
+                l = self.text_len(chunks[-1])
 
                 # Can at least squeeze this chunk onto the current line.
                 if cur_len + l <= width:
@@ -301,13 +304,13 @@ def _wrap_chunks(self, chunks):
 
             # The current line is full, and the next chunk is too big to
             # fit on *any* line (not just this one).
-            if chunks and len(chunks[-1]) > width:
+            if chunks and self.text_len(chunks[-1]) > width:
                 self._handle_long_word(chunks, cur_line, cur_len, width)
-                cur_len = sum(map(len, cur_line))
+                cur_len = sum(map(self.text_len, cur_line))
 
             # If the last chunk on this line is all whitespace, drop it.
             if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
-                cur_len -= len(cur_line[-1])
+                cur_len -= self.text_len(cur_line[-1])
                 del cur_line[-1]
 
             if cur_line:
@@ -323,16 +326,17 @@ def _wrap_chunks(self, chunks):
                 else:
                     while cur_line:
                         if (cur_line[-1].strip() and
-                            cur_len + len(self.placeholder) <= width):
+                            cur_len + self.text_len(self.placeholder) <= width):
                             cur_line.append(self.placeholder)
                             lines.append(indent + ''.join(cur_line))
                             break
-                        cur_len -= len(cur_line[-1])
+                        cur_len -= self.text_len(cur_line[-1])
                         del cur_line[-1]
                     else:
                         if lines:
                             prev_line = lines[-1].rstrip()
-                            if (len(prev_line) + len(self.placeholder) <=
+                            if (self.text_len(prev_line) +
+                                    self.text_len(self.placeholder) <=
                                     self.width):
                                 lines[-1] = prev_line + self.placeholder
                                 break