Skip to content

Commit 13f87a4

Browse files
committed
Fix ZWJ and edge cases
1 parent 1d402e0 commit 13f87a4

4 files changed

Lines changed: 47 additions & 15 deletions

File tree

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [14.3.2] - 2026-02-01
9+
10+
### Fixed
11+
12+
- Fixed solo ZWJ crash
13+
- Fixed control codes reporting width of 1
14+
815
## [14.3.1] - 2026-01-24
916

1017
### Fixed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name = "rich"
33
homepage = "https://github.com/Textualize/rich"
44
documentation = "https://rich.readthedocs.io/en/latest/"
5-
version = "14.3.1"
5+
version = "14.3.2"
66
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
77
authors = ["Will McGugan <willmcgugan@gmail.com>"]
88
license = "MIT"

rich/cells.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,23 +55,32 @@ def get_character_cell_size(character: str, unicode_version: str = "auto") -> in
5555
int: Number of cells (0, 1 or 2) occupied by that character.
5656
"""
5757
codepoint = ord(character)
58+
if codepoint and codepoint < 32 or 0x07F <= codepoint < 0x0A0:
59+
return 0
5860
table = load_cell_table(unicode_version).widths
59-
if codepoint > table[-1][1]:
61+
62+
# Fast path: codepoint beyond table range
63+
last_entry = table[-1]
64+
if codepoint > last_entry[1]:
6065
return 1
66+
67+
# Binary search with fewer tuple unpacks
6168
lower_bound = 0
6269
upper_bound = len(table) - 1
63-
index = (lower_bound + upper_bound) // 2
64-
while True:
65-
start, end, width = table[index]
70+
71+
while lower_bound <= upper_bound:
72+
index = (lower_bound + upper_bound) >> 1 # Faster than // 2
73+
entry = table[index]
74+
start = entry[0]
75+
6676
if codepoint < start:
6777
upper_bound = index - 1
68-
elif codepoint > end:
78+
elif codepoint > entry[1]: # end
6979
lower_bound = index + 1
7080
else:
71-
return 0 if width == -1 else width
72-
if upper_bound < lower_bound:
73-
break
74-
index = (lower_bound + upper_bound) // 2
81+
# Found: codepoint is in range [start, end]
82+
return entry[2]
83+
7584
return 1
7685

7786

@@ -135,19 +144,20 @@ def _cell_len(text: str, unicode_version: str) -> int:
135144

136145
SPECIAL = {"\u200d", "\ufe0f"}
137146

138-
iter_characters = iter(text)
147+
index = 0
148+
character_count = len(text)
139149

140-
for character in iter_characters:
150+
while index < character_count:
151+
character = text[index]
141152
if character in SPECIAL:
142-
if character == "\u200d":
143-
next(iter_characters)
144-
elif last_measured_character:
153+
if character == "\ufe0f" and last_measured_character:
145154
total_width += last_measured_character in cell_table.narrow_to_wide
146155
last_measured_character = None
147156
else:
148157
if character_width := get_character_cell_size(character, unicode_version):
149158
last_measured_character = character
150159
total_width += character_width
160+
index += 1
151161

152162
return total_width
153163

tests/test_cells.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,3 +187,18 @@ def test_nerd_font():
187187
"""Regression test for https://github.com/Textualize/rich/issues/3943"""
188188
# Not allocated by unicode, but used by nerd fonts
189189
assert cell_len("\U000f024d") == 1
190+
191+
192+
def test_zwj():
193+
"""Test special case of zero width joiners"""
194+
assert cell_len("") == 0
195+
assert cell_len("\u200d") == 0
196+
assert cell_len("1\u200d") == 1
197+
assert cell_len("1\u200d2") == 2
198+
199+
200+
def test_non_printable():
201+
"""Non printable characters should report a width of 0."""
202+
for ordinal in range(31):
203+
character = chr(ordinal)
204+
assert cell_len(character) == 0

0 commit comments

Comments
 (0)