Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/sre_engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ rustpython-wtf8 = { workspace = true }
num_enum = { workspace = true }
bitflags = { workspace = true }
optional = { workspace = true }
icu_properties = { workspace = true }

[dev-dependencies]
criterion = { workspace = true }
Expand Down
4 changes: 3 additions & 1 deletion crates/sre_engine/src/string.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use icu_properties::{CodePointMapData, props::CanonicalCombiningClass};
use rustpython_wtf8::Wtf8;

#[derive(Debug, Clone, Copy)]
Expand Down Expand Up @@ -442,8 +443,9 @@ pub(crate) const fn is_uni_linebreak(ch: u32) -> bool {
#[inline]
pub(crate) fn is_uni_alnum(ch: u32) -> bool {
// TODO: check with cpython
let map = CodePointMapData::<CanonicalCombiningClass>::new();
char::try_from(ch)
.map(|x| x.is_alphanumeric())
.map(|x| x.is_alphanumeric() && map.get(x) == CanonicalCombiningClass::NotReordered)
Copy link
Copy Markdown
Contributor

@ShaharNaveh ShaharNaveh Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove line 446 and:

Suggested change
.map(|x| x.is_alphanumeric() && map.get(x) == CanonicalCombiningClass::NotReordered)
.map(|x| x.is_alphanumeric() && CanonicalCombiningClass::for_char(x) == CanonicalCombiningClass::NotReordered)

.unwrap_or(false)
}
#[inline]
Expand Down
14 changes: 11 additions & 3 deletions crates/vm/src/builtins/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,12 @@ use rustpython_common::{
wtf8::{CodePoint, Wtf8, Wtf8Buf, Wtf8Chunk, Wtf8Concat},
};

use icu_properties::props::{
BidiClass, BinaryProperty, EnumeratedProperty, GeneralCategory, XidContinue, XidStart,
use icu_properties::{
CodePointMapData,
props::{
BidiClass, BinaryProperty, CanonicalCombiningClass, EnumeratedProperty, GeneralCategory,
XidContinue, XidStart,
},
};
use unicode_casing::CharExt;

Expand Down Expand Up @@ -946,7 +950,11 @@ impl PyStr {

#[pymethod]
fn isalnum(&self) -> bool {
!self.data.is_empty() && self.char_all(char::is_alphanumeric)
let map = CodePointMapData::<CanonicalCombiningClass>::new();
!self.data.is_empty()
&& self.char_all(|c| {
c.is_alphanumeric() && map.get(c) == CanonicalCombiningClass::NotReordered
Copy link
Copy Markdown
Contributor

@ShaharNaveh ShaharNaveh Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove line 953 and

Suggested change
c.is_alphanumeric() && map.get(c) == CanonicalCombiningClass::NotReordered
c.is_alphanumeric() && CanonicalCombiningClass::for_char(c) == CanonicalCombiningClass::NotReordered

})
}

#[pymethod]
Expand Down
9 changes: 9 additions & 0 deletions extra_tests/snippets/builtin_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,15 @@
assert a.istitle()
assert a.isalpha()

# Combining characters differ slightly between Rust and Python
assert "\u006e".isalnum()
assert not "\u0303".isalnum()
assert not "\u006e\u0303".isalnum()
assert "\u00f1".isalnum()
assert not "\u0345".isalnum()
for raw in range(0x0363, 0x036f):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for raw in range(0x0363, 0x036f):
for raw in range(0x0363, 0x036F):

assert not chr(raw).isalnum()

s = "1 2 3"
assert s.split(" ", 1) == ["1", "2 3"]
assert s.rsplit(" ", 1) == ["1 2", "3"]
Expand Down
3 changes: 3 additions & 0 deletions extra_tests/snippets/stdlib_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,6 @@
# Test of fix re.fullmatch POSSESSIVE_REPEAT, issue #7183
assert re.fullmatch(r"([0-9]++(?:\.[0-9]+)*+)", "1.25.38")
assert re.fullmatch(r"([0-9]++(?:\.[0-9]+)*+)", "1.25.38").group(0) == "1.25.38"

# Combining characters; issue #7518
assert not re.match(r"\w", "\u0345"), r"\w should not match U+0345 (category Mn)"
Loading