Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Fix isalpha/isalnum fallback for chars unassigned in Unicode 10; add …
…regression tests

Agent-Logs-Url: https://github.com/RustPython/RustPython/sessions/43f9f7dc-f5af-48b8-b93b-6363bcda334c

Co-authored-by: youknowone <69878+youknowone@users.noreply.github.com>
  • Loading branch information
Copilot and youknowone committed Mar 29, 2026
commit b9cbd5133b3bea76e48e3a8a005e703477ebb0f6
23 changes: 11 additions & 12 deletions crates/sre_engine/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,18 +443,17 @@ pub(crate) const fn is_uni_linebreak(ch: u32) -> bool {
#[inline]
pub(crate) fn is_uni_alnum(ch: u32) -> bool {
char::try_from(ch)
.map(|c| {
matches!(
GeneralCategory::of(c),
GeneralCategory::UppercaseLetter
| GeneralCategory::LowercaseLetter
| GeneralCategory::TitlecaseLetter
| GeneralCategory::ModifierLetter
| GeneralCategory::OtherLetter
| GeneralCategory::DecimalNumber
| GeneralCategory::LetterNumber
| GeneralCategory::OtherNumber
)
.map(|c| match GeneralCategory::of(c) {
GeneralCategory::UppercaseLetter
| GeneralCategory::LowercaseLetter
| GeneralCategory::TitlecaseLetter
| GeneralCategory::ModifierLetter
| GeneralCategory::OtherLetter
| GeneralCategory::DecimalNumber
| GeneralCategory::LetterNumber
| GeneralCategory::OtherNumber => true,
GeneralCategory::Unassigned => c.is_alphanumeric(),
_ => false,
})
.unwrap_or(false)
}
Expand Down
40 changes: 19 additions & 21 deletions crates/vm/src/builtins/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -946,18 +946,17 @@ impl PyStr {
#[pymethod]
fn isalnum(&self) -> bool {
!self.data.is_empty()
&& self.char_all(|c| {
matches!(
GeneralCategory::of(c),
GeneralCategory::UppercaseLetter
| GeneralCategory::LowercaseLetter
| GeneralCategory::TitlecaseLetter
| GeneralCategory::ModifierLetter
| GeneralCategory::OtherLetter
| GeneralCategory::DecimalNumber
| GeneralCategory::LetterNumber
| GeneralCategory::OtherNumber
)
&& self.char_all(|c| match GeneralCategory::of(c) {
GeneralCategory::UppercaseLetter
| GeneralCategory::LowercaseLetter
| GeneralCategory::TitlecaseLetter
| GeneralCategory::ModifierLetter
| GeneralCategory::OtherLetter
| GeneralCategory::DecimalNumber
| GeneralCategory::LetterNumber
| GeneralCategory::OtherNumber => true,
GeneralCategory::Unassigned => c.is_alphanumeric(),
_ => false,
})
}

Expand Down Expand Up @@ -1070,15 +1069,14 @@ impl PyStr {
#[pymethod]
fn isalpha(&self) -> bool {
!self.data.is_empty()
&& self.char_all(|c| {
matches!(
GeneralCategory::of(c),
GeneralCategory::UppercaseLetter
| GeneralCategory::LowercaseLetter
| GeneralCategory::TitlecaseLetter
| GeneralCategory::ModifierLetter
| GeneralCategory::OtherLetter
)
&& self.char_all(|c| match GeneralCategory::of(c) {
GeneralCategory::UppercaseLetter
| GeneralCategory::LowercaseLetter
| GeneralCategory::TitlecaseLetter
| GeneralCategory::ModifierLetter
| GeneralCategory::OtherLetter => true,
GeneralCategory::Unassigned => c.is_alphabetic(),
_ => false,
})
}

Expand Down
10 changes: 10 additions & 0 deletions extra_tests/snippets/builtin_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,3 +839,13 @@ class MyString(str):
assert id(b) != id(b * 1)
assert id(b) != id(1 * b)
assert id(b) != id(b * 2)


# Regression tests for isalpha/isalnum Unicode General Category correctness.
# These characters are in letter categories (Ll/Lo) and should return True,
# but were missed in older Unicode tables used by unic-ucd-category.
# See: https://github.com/RustPython/RustPython/pull/7520#issuecomment-4148322294
for _cp in [1376, 1416, 1519, 2160, 2161, 2162, 2163, 2164, 2165, 2166]:
_c = chr(_cp)
assert _c.isalpha(), f"U+{_cp:04X} should be isalpha"
assert _c.isalnum(), f"U+{_cp:04X} should be isalnum"
Loading