Skip to content

Commit e009cc0

Browse files
fix: Swapcase must handle multibyte expansions (RustPython#7559)
`swapcase` used `to_ascii_lowercase` and uppercase to swap cases. This is fine for ASCII, but code points may expand into multiple bytes which leads to incorrect case swaps for some languages. The fix is to use `to_lowercase` and `to_uppercase` instead. Unfortunately, this leads to a realloc in `swapcase` when bytes are expanded. Part of RustPython#7526.
1 parent eed618d commit e009cc0

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

crates/vm/src/builtins/str.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,11 +1045,11 @@ impl PyStr {
10451045
let mut swapped_str = Wtf8Buf::with_capacity(self.data.len());
10461046
for c_orig in self.as_wtf8().code_points() {
10471047
let c = c_orig.to_char_lossy();
1048-
// to_uppercase returns an iterator, to_ascii_uppercase returns the char
1048+
// to_uppercase returns an iterator because case changes may be multiple bytes
10491049
if c.is_lowercase() {
1050-
swapped_str.push_char(c.to_ascii_uppercase());
1050+
swapped_str.extend(c.to_uppercase());
10511051
} else if c.is_uppercase() {
1052-
swapped_str.push_char(c.to_ascii_lowercase());
1052+
swapped_str.extend(c.to_lowercase());
10531053
} else {
10541054
swapped_str.push(c_orig);
10551055
}

extra_tests/snippets/builtin_str.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,8 @@
235235
assert not "😂".isidentifier()
236236
assert not "123".isidentifier()
237237

238+
assert "Σίσυφος".swapcase() == "σΊΣΥΦΟΣ"
239+
238240
# String Formatting
239241
assert "{} {}".format(1, 2) == "1 2"
240242
assert "{0} {1}".format(2, 3) == "2 3"

0 commit comments

Comments
 (0)