diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index 6d0e935c1c..5317bfc4dc 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -974,7 +974,6 @@ def test_title(self): self.assertEqual('A\u03a3 \u1fa1xy'.title(), 'A\u03c2 \u1fa9xy') self.assertEqual('A\u03a3A'.title(), 'A\u03c3a') - @unittest.expectedFailure # TODO: RUSTPYTHON; + 𐐧 def test_swapcase(self): string_tests.StringLikeTest.test_swapcase(self) self.assertEqual('\U0001044F'.swapcase(), '\U00010427') diff --git a/crates/vm/src/builtins/str.rs b/crates/vm/src/builtins/str.rs index 402cde304a..a4b3668c1b 100644 --- a/crates/vm/src/builtins/str.rs +++ b/crates/vm/src/builtins/str.rs @@ -1089,19 +1089,22 @@ impl PyStr { #[pymethod] fn swapcase(&self) -> Wtf8Buf { - let mut swapped_str = Wtf8Buf::with_capacity(self.data.len()); - for c_orig in self.as_wtf8().code_points() { - let c = c_orig.to_char_lossy(); - // to_uppercase returns an iterator because case changes may be multiple bytes - if c.is_lowercase() { - swapped_str.extend(c.to_uppercase()); - } else if c.is_uppercase() { - swapped_str.extend(c.to_lowercase()); - } else { - swapped_str.push(c_orig); + match self.as_str_kind() { + PyKindStr::Ascii(s) => swapcase_ascii(s.as_bytes()).into(), + PyKindStr::Utf8(s) => { + let mut out = VecFmtWriter(Vec::with_capacity(s.len())); + swapcase_utf8(s, &mut out); + out.0.into() + } + PyKindStr::Wtf8(s) => { + let mut out = VecFmtWriter(Vec::with_capacity(s.len())); + for chunk in s.as_bytes().utf8_chunks() { + swapcase_utf8(chunk.valid(), &mut out); + out.0.extend(chunk.invalid()); + } + out.0.into() } } - swapped_str } #[pymethod] @@ -1555,6 +1558,20 @@ impl PyStr { } } +fn swapcase_utf8(s: &str, out: &mut VecFmtWriter) { + for (i, ch) in s.char_indices() { + if ch.is_uppercase() { + lowercase_or_sigma(ch, s, i, out); + } else { + for ch in ch.to_lowercase() { + let mut buf = [0u8; 4]; + let s = ch.encode_utf8(&mut buf); + out.0.extend(s.as_bytes()); + } + } + } +} + impl PyRef { #[must_use] pub fn is_empty(&self) -> bool { diff --git a/crates/vm/src/bytes_inner.rs b/crates/vm/src/bytes_inner.rs index c864524561..9a8b182424 100644 --- a/crates/vm/src/bytes_inner.rs +++ b/crates/vm/src/bytes_inner.rs @@ -413,15 +413,7 @@ impl PyBytesInner { } pub fn swapcase(&self) -> Vec { - let mut new: Vec = Vec::with_capacity(self.elements.len()); - for w in &self.elements { - match w { - b'A'..=b'Z' => new.push(w.to_ascii_lowercase()), - b'a'..=b'z' => new.push(w.to_ascii_uppercase()), - x => new.push(*x), - } - } - new + swapcase_ascii(self.as_bytes()) } pub fn hex( @@ -1236,3 +1228,10 @@ pub(crate) fn bytes_to_hex( pub(crate) const fn is_py_ascii_whitespace(b: u8) -> bool { matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' | b'\x0B') } + +pub(crate) fn swapcase_ascii(bytes: &[u8]) -> Vec { + bytes + .iter() + .map(|&b| if b.is_ascii_alphabetic() { b ^ 0x20 } else { b }) + .collect() +}