Skip to content
8 changes: 8 additions & 0 deletions Doc/library/textwrap.rst
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,14 @@ hyphenated words; only then will long words be broken if necessary, unless
.. versionadded:: 3.4


.. attribute:: text_len

(default: ``len``) Used to determine the length of a string. You can
provide a custom function, e.g. to account for wide characters.
Comment thread
merwok marked this conversation as resolved.

.. versionadded:: 3.11


.. index:: single: ...; placeholder

.. attribute:: placeholder
Expand Down
45 changes: 45 additions & 0 deletions Lib/test/test_textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#

import unittest
import unicodedata

from textwrap import TextWrapper, wrap, fill, dedent, indent, shorten

Expand Down Expand Up @@ -1076,5 +1077,49 @@ def test_first_word_too_long_but_placeholder_fits(self):
self.check_shorten("Helloo", 5, "[...]")


class WideCharacterTestCase(BaseTestCase):
def text_len(self, text):
n = 0
for c in text:
if unicodedata.east_asian_width(c) in {'F', 'W'}:
n += 2
else:
n += 1
return n
Comment thread
xi marked this conversation as resolved.
Outdated

def check_shorten(self, text, width, expect, **kwargs):
result = shorten(text, width, **kwargs)
self.check(result, expect)

def test_wrap(self):
text = "123 🔧"
self.check_wrap(text, 5, ["123 🔧"])
self.check_wrap(text, 5, ["123", "🔧"], text_len=self.text_len)

def test_wrap_initial_indent(self):
text = "12 12"
self.check_wrap(text, 6, ["🔧12 12"], initial_indent="🔧")
self.check_wrap(text, 6, ["🔧12", "12"], initial_indent="🔧",
text_len=self.text_len)

def test_wrap_subsequent_indent(self):
text = "12 12 12 12"
self.check_wrap(text, 6, ["12 12", "🔧12 12"], subsequent_indent="🔧")
self.check_wrap(text, 6, ["12 12", "🔧12", "🔧12"],
subsequent_indent="🔧", text_len=self.text_len)

def test_shorten(self):
text = "123 1234🔧"
expected = "123 [...]"
self.check_shorten(text, 9, "123 1234🔧")
self.check_shorten(text, 9, "123 [...]", text_len=self.text_len)

def test_shorten_placeholder(self):
text = "123 1 123"
self.check_shorten(text, 7, "123 1 🔧", placeholder=" 🔧")
self.check_shorten(text, 7, "123 🔧", placeholder=" 🔧",
text_len=self.text_len)


if __name__ == '__main__':
unittest.main()
26 changes: 15 additions & 11 deletions Lib/textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ def __init__(self,
tabsize=8,
*,
max_lines=None,
placeholder=' [...]'):
placeholder=' [...]',
text_len=len):
Comment thread
merwok marked this conversation as resolved.
self.width = width
self.initial_indent = initial_indent
self.subsequent_indent = subsequent_indent
Expand All @@ -138,6 +139,7 @@ def __init__(self,
self.tabsize = tabsize
self.max_lines = max_lines
self.placeholder = placeholder
self.text_len = text_len


# -- Private methods -----------------------------------------------
Expand Down Expand Up @@ -217,7 +219,7 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
if self.break_long_words:
end = space_left
chunk = reversed_chunks[-1]
if self.break_on_hyphens and len(chunk) > space_left:
if self.break_on_hyphens and self.text_len(chunk) > space_left:
# break after last hyphen, but only if there are
# non-hyphens before it
hyphen = chunk.rfind('-', 0, space_left)
Expand Down Expand Up @@ -259,7 +261,8 @@ def _wrap_chunks(self, chunks):
indent = self.subsequent_indent
else:
indent = self.initial_indent
if len(indent) + len(self.placeholder.lstrip()) > self.width:
if (self.text_len(indent) +
self.text_len(self.placeholder.lstrip()) > self.width):
raise ValueError("placeholder too large for max width")

# Arrange in reverse order so items can be efficiently popped
Expand All @@ -280,15 +283,15 @@ def _wrap_chunks(self, chunks):
indent = self.initial_indent

# Maximum width for this line.
width = self.width - len(indent)
width = self.width - self.text_len(indent)

# First chunk on line is whitespace -- drop it, unless this
# is the very beginning of the text (ie. no lines started yet).
if self.drop_whitespace and chunks[-1].strip() == '' and lines:
del chunks[-1]

while chunks:
l = len(chunks[-1])
l = self.text_len(chunks[-1])

# Can at least squeeze this chunk onto the current line.
if cur_len + l <= width:
Expand All @@ -301,13 +304,13 @@ def _wrap_chunks(self, chunks):

# The current line is full, and the next chunk is too big to
# fit on *any* line (not just this one).
if chunks and len(chunks[-1]) > width:
if chunks and self.text_len(chunks[-1]) > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
cur_len = sum(map(len, cur_line))
cur_len = sum(map(self.text_len, cur_line))

# If the last chunk on this line is all whitespace, drop it.
if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
cur_len -= len(cur_line[-1])
cur_len -= self.text_len(cur_line[-1])
del cur_line[-1]

if cur_line:
Expand All @@ -323,16 +326,17 @@ def _wrap_chunks(self, chunks):
else:
while cur_line:
if (cur_line[-1].strip() and
cur_len + len(self.placeholder) <= width):
cur_len + self.text_len(self.placeholder) <= width):
cur_line.append(self.placeholder)
lines.append(indent + ''.join(cur_line))
break
cur_len -= len(cur_line[-1])
cur_len -= self.text_len(cur_line[-1])
del cur_line[-1]
else:
if lines:
prev_line = lines[-1].rstrip()
if (len(prev_line) + len(self.placeholder) <=
if (self.text_len(prev_line) +
self.text_len(self.placeholder) <=
self.width):
lines[-1] = prev_line + self.placeholder
break
Expand Down