Skip to content

Commit 9b2ad34

Browse files
authored
check surrogates (#6547)
1 parent 5b20c45 commit 9b2ad34

File tree

3 files changed

+31
-6
lines changed

3 files changed

+31
-6
lines changed

Lib/test/test_builtin.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2399,8 +2399,6 @@ def test_type_nokwargs(self):
23992399
with self.assertRaises(TypeError):
24002400
type('a', (), dict={})
24012401

2402-
# TODO: RUSTPYTHON
2403-
@unittest.expectedFailure
24042402
def test_type_name(self):
24052403
for name in 'A', '\xc4', '\U0001f40d', 'B.A', '42', '':
24062404
with self.subTest(name=name):
@@ -2450,8 +2448,6 @@ def test_type_qualname(self):
24502448
A.__qualname__ = b'B'
24512449
self.assertEqual(A.__qualname__, 'D.E')
24522450

2453-
# TODO: RUSTPYTHON
2454-
@unittest.expectedFailure
24552451
def test_type_doc(self):
24562452
for doc in 'x', '\xc4', '\U0001f40d', 'x\x00y', b'x', 42, None:
24572453
A = type('A', (), {'__doc__': doc})

crates/vm/src/builtins/str.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ impl PyStr {
441441
self.data.as_str()
442442
}
443443

444-
fn ensure_valid_utf8(&self, vm: &VirtualMachine) -> PyResult<()> {
444+
pub(crate) fn ensure_valid_utf8(&self, vm: &VirtualMachine) -> PyResult<()> {
445445
if self.is_utf8() {
446446
Ok(())
447447
} else {
@@ -1336,7 +1336,7 @@ impl PyStr {
13361336
}
13371337

13381338
#[pymethod]
1339-
fn isidentifier(&self) -> bool {
1339+
pub fn isidentifier(&self) -> bool {
13401340
let Some(s) = self.to_str() else { return false };
13411341
let mut chars = s.chars();
13421342
let is_identifier_start = chars.next().is_some_and(|c| c == '_' || is_xid_start(c));

crates/vm/src/builtins/type.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,6 +1163,7 @@ impl PyType {
11631163
if name.as_bytes().contains(&0) {
11641164
return Err(vm.new_value_error("type name must not contain null characters"));
11651165
}
1166+
name.ensure_valid_utf8(vm)?;
11661167

11671168
// Use std::mem::replace to swap the new value in and get the old value out,
11681169
// then drop the old value after releasing the lock (similar to CPython's Py_SETREF)
@@ -1254,6 +1255,7 @@ impl Constructor for PyType {
12541255
if name.as_bytes().contains(&0) {
12551256
return Err(vm.new_value_error("type name must not contain null characters"));
12561257
}
1258+
name.ensure_valid_utf8(vm)?;
12571259

12581260
let (metatype, base, bases, base_is_type) = if bases.is_empty() {
12591261
let base = vm.ctx.types.object_type.to_owned();
@@ -1306,6 +1308,13 @@ impl Constructor for PyType {
13061308
});
13071309
let mut attributes = dict.to_attributes(vm);
13081310

1311+
// Check __doc__ for surrogates - raises UnicodeEncodeError during type creation
1312+
if let Some(doc) = attributes.get(identifier!(vm, __doc__))
1313+
&& let Some(doc_str) = doc.downcast_ref::<PyStr>()
1314+
{
1315+
doc_str.ensure_valid_utf8(vm)?;
1316+
}
1317+
13091318
if let Some(f) = attributes.get_mut(identifier!(vm, __init_subclass__))
13101319
&& f.class().is(vm.ctx.types.function_type)
13111320
{
@@ -1340,6 +1349,13 @@ impl Constructor for PyType {
13401349

13411350
let (heaptype_slots, add_dict): (Option<PyRef<PyTuple<PyStrRef>>>, bool) =
13421351
if let Some(x) = attributes.get(identifier!(vm, __slots__)) {
1352+
// Check if __slots__ is bytes - not allowed
1353+
if x.class().is(vm.ctx.types.bytes_type) {
1354+
return Err(vm.new_type_error(
1355+
"__slots__ items must be strings, not 'bytes'".to_owned(),
1356+
));
1357+
}
1358+
13431359
let slots = if x.class().is(vm.ctx.types.str_type) {
13441360
let x = unsafe { x.downcast_unchecked_ref::<PyStr>() };
13451361
PyTuple::new_ref_typed(vec![x.to_owned()], &vm.ctx)
@@ -1348,6 +1364,12 @@ impl Constructor for PyType {
13481364
let elements = {
13491365
let mut elements = Vec::new();
13501366
while let PyIterReturn::Return(element) = iter.next(vm)? {
1367+
// Check if any slot item is bytes
1368+
if element.class().is(vm.ctx.types.bytes_type) {
1369+
return Err(vm.new_type_error(
1370+
"__slots__ items must be strings, not 'bytes'".to_owned(),
1371+
));
1372+
}
13511373
elements.push(element);
13521374
}
13531375
elements
@@ -1356,6 +1378,13 @@ impl Constructor for PyType {
13561378
tuple.try_into_typed(vm)?
13571379
};
13581380

1381+
// Validate that all slots are valid identifiers
1382+
for slot in slots.iter() {
1383+
if !slot.isidentifier() {
1384+
return Err(vm.new_type_error("__slots__ must be identifiers".to_owned()));
1385+
}
1386+
}
1387+
13591388
// Check if __dict__ is in slots
13601389
let dict_name = "__dict__";
13611390
let has_dict = slots.iter().any(|s| s.as_str() == dict_name);

0 commit comments

Comments
 (0)