Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 45 additions & 24 deletions crates/vm/src/anystr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ use crate::{
convert::TryFromBorrowedObject,
function::OptionalOption,
};
use icu_properties::{
CodePointSetData,
props::{Alphabetic, ChangesWhenLowercased, ChangesWhenUppercased},
use icu_properties::props::{
BinaryProperty, EnumeratedProperty, GeneralCategory, GeneralCategoryGroup,
};
use num_traits::{cast::ToPrimitive, sign::Signed};

Expand Down Expand Up @@ -405,42 +404,64 @@ pub trait AnyStr {
rustpython_common::str::zfill(self.as_bytes(), width)
}

// Unified form of CPython functions:
// _Py_bytes_islower
// unicode_islower_impl
// _Py_bytes_islower
fn py_islower(&self) -> bool {
let case_change = CodePointSetData::new::<ChangesWhenLowercased>();
let alphabetic = CodePointSetData::new::<Alphabetic>();
let mut lower = false;
for chunk in self.as_bytes().utf8_chunks().map(|c| c.valid()) {
if chunk.chars().any(|c| case_change.contains(c)) {
for byte in self
.as_bytes()
.iter()
.copied()
.filter(u8::is_ascii_alphabetic)
{
if byte.is_ascii_uppercase() {
return false;
}

if !lower && chunk.chars().any(|c| alphabetic.contains(c)) {
lower = true;
}
lower = true;
}
lower
}

// Unified form of CPython functions:
// Py_bytes_isupper
// unicode_isupper_impl
// Py_bytes_isupper
fn py_isupper(&self) -> bool {
let case_change = CodePointSetData::new::<ChangesWhenUppercased>();
let alphabetic = CodePointSetData::new::<Alphabetic>();
let mut upper = false;
for chunk in self.as_bytes().utf8_chunks().map(|c| c.valid()) {
if chunk.chars().any(|c| case_change.contains(c)) {
for byte in self
.as_bytes()
.iter()
.copied()
.filter(u8::is_ascii_alphabetic)
{
if byte.is_ascii_lowercase() {
return false;
}
upper = true;
}
upper
}

if !upper && chunk.chars().any(|c| alphabetic.contains(c)) {
upper = true;
// Unified form of CPython functions:
// unicode_isupper_impl
// unicode_islower_impl
fn is_cased<VALID, INVALID>(&self) -> bool
where
VALID: BinaryProperty,
INVALID: BinaryProperty,
{
let mut all_cased = false;
for c in self
.as_bytes()
.utf8_chunks()
.flat_map(|c| c.valid().chars())
{
if INVALID::for_char(c)
|| GeneralCategoryGroup::TitlecaseLetter.contains(GeneralCategory::for_char(c))
{
return false;
}
if !all_cased && VALID::for_char(c) {
all_cased = true;
}
}
upper
all_cased
Comment thread
joshuamegnauth54 marked this conversation as resolved.
}
}

Expand Down
20 changes: 18 additions & 2 deletions crates/vm/src/builtins/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ use super::{
builtins_iter,
},
};
use crate::common::lock::LazyLock;
use crate::{
AsObject, Context, Py, PyExact, PyObject, PyObjectRef, PyPayload, PyRef, PyRefExact, PyResult,
TryFromBorrowedObject, VirtualMachine,
anystr::{self, AnyStr, AnyStrContainer, AnyStrWrapper, adjust_indices},
atomic_func,
cformat::cformat_string,
class::PyClassImpl,
common::lock::LazyLock,
common::str::{PyKindStr, StrData, StrKind},
convert::{IntoPyException, ToPyException, ToPyObject, ToPyResult},
format::{format, format_map},
Expand Down Expand Up @@ -46,7 +46,7 @@ use rustpython_common::{

use icu_properties::props::{
BidiClass, BinaryProperty, EnumeratedProperty, GeneralCategory, GeneralCategoryGroup,
NumericType, XidContinue, XidStart,
Lowercase, NumericType, Uppercase, XidContinue, XidStart,
};
use unicode_casing::CharExt;

Expand Down Expand Up @@ -2330,6 +2330,14 @@ impl AnyStr for str {
}
splits
}

fn py_islower(&self) -> bool {
self.is_cased::<Lowercase, Uppercase>()
}

fn py_isupper(&self) -> bool {
self.is_cased::<Uppercase, Lowercase>()
}
}

impl AnyStrContainer<Wtf8> for Wtf8Buf {
Expand Down Expand Up @@ -2442,6 +2450,14 @@ impl AnyStr for Wtf8 {
}
splits
}

fn py_islower(&self) -> bool {
self.is_cased::<Lowercase, Uppercase>()
}

fn py_isupper(&self) -> bool {
self.is_cased::<Uppercase, Lowercase>()
}
}

impl AnyStrContainer<AsciiStr> for AsciiString {
Expand Down
2 changes: 1 addition & 1 deletion crates/vm/src/stdlib/_thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ pub(crate) mod _thread {
{
// On Unix, use pthread ID from the handle
use std::os::unix::thread::JoinHandleExt;
handle.as_pthread_t() as u64
handle.as_pthread_t() as _
}
#[cfg(not(unix))]
{
Expand Down
9 changes: 8 additions & 1 deletion extra_tests/snippets/builtin_str.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import sys

from testutils import AssertRaises, assert_raises, skip_if_unsupported

assert "".__eq__(1) == NotImplemented
Expand Down Expand Up @@ -250,7 +252,10 @@
assert not "\U0001f431".islower()
assert "\U0001f431 CAT".isupper()
assert "\U0001f431 cat".islower()
assert "\u0295".islower()
if sys.version_info >= (3, 15):
assert not "\u0295".islower()
assert not "\u0295".isupper()
assert not "\u0295".istitle()
assert "\u1c89".isupper()
assert "hello, my name is".partition("my ") == ("hello, ", "my ", "name is")
assert "hello".partition("is") == ("hello", "", "")
Expand Down Expand Up @@ -525,6 +530,8 @@ def try_mutate_str():
assert "1a".islower()
assert "가나다a".islower()
assert "가나다A".isupper()
assert not "ジョジョ".isupper()
assert not "ジョジョ".islower()

# test str.format_map()
#
Expand Down
12 changes: 12 additions & 0 deletions extra_tests/snippets/builtin_str_encode.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,15 @@ def round_trip(s, encoding="utf-8"):
round_trip("👺♦ 𝐚Şđƒ ☆☝")
round_trip("☢🐣 ᖇ𝓤𝕊тⓟ𝕐𝕥卄σ𝔫 ♬👣")
round_trip("💀👌 ק𝔂tℍⓞ𝓷 3 🔥👤")

# Bytes should not assume an encoding for isupper/islower
assert "Æ".isupper()
assert not "Æ".encode().isupper()
assert "æ".islower()
assert not "æ".encode().islower()

# Invalid Unicode
assert not b"\x80\x80".islower()
assert not b"\x80\x80".isupper()
assert b"\x80cat\x80".islower()
assert b"\x80CAT\x80".isupper()
Loading