Skip to content

Commit 58caf31

Browse files
committed
Remove PyStr::as_str, use as_wtf8/PyUtf8Str instead
- Remove as_str() from PyStr/Py<PyStr> (was panicking on surrogates) - Add Wtf8Concat trait and concat! macro for WTF-8 formatting - Add impl From<&str> for &Wtf8 conversion - Add AsPyStr/DictKey impls for PyUtf8Str types - Migrate all call sites to as_wtf8(), to_str(), or PyUtf8Str - Fix exception message APIs to accept Wtf8Buf - Deduplicate inner-scope imports across modules
1 parent c98215a commit 58caf31

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

121 files changed

+1947
-1450
lines changed

Lib/test/test_tstring.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55

66
class TestTString(unittest.TestCase, TStringBaseCase):
7-
@unittest.expectedFailure # TODO: RUSTPYTHON; + Template(strings=('Hello',), interpolations=())
87
def test_string_representation(self):
98
# Test __repr__
109
t = t"Hello"

crates/common/src/str.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ impl StrData {
258258
&self.data
259259
}
260260

261+
// TODO: rename to to_str
261262
#[inline]
262263
pub fn as_str(&self) -> Option<&str> {
263264
self.kind
@@ -429,13 +430,13 @@ pub fn zfill(bytes: &[u8], width: usize) -> Vec<u8> {
429430

430431
/// Convert a string to ascii compatible, escaping unicode-s into escape
431432
/// sequences.
432-
pub fn to_ascii(value: &str) -> AsciiString {
433+
pub fn to_ascii(value: &Wtf8) -> AsciiString {
433434
let mut ascii = Vec::new();
434-
for c in value.chars() {
435-
if c.is_ascii() {
436-
ascii.push(c as u8);
435+
for cp in value.code_points() {
436+
if cp.is_ascii() {
437+
ascii.push(cp.to_u32() as u8);
437438
} else {
438-
let c = c as i64;
439+
let c = cp.to_u32();
439440
let hex = if c < 0x100 {
440441
format!("\\x{c:02x}")
441442
} else if c < 0x10000 {

crates/stdlib/src/_asyncio.rs

Lines changed: 43 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ pub(crate) use _asyncio::module_def;
66

77
#[pymodule]
88
pub(crate) mod _asyncio {
9+
use crate::common::wtf8::{Wtf8Buf, wtf8_concat};
910
use crate::{
1011
common::lock::PyRwLock,
1112
vm::{
@@ -859,7 +860,7 @@ pub(crate) mod _asyncio {
859860
}
860861
}
861862

862-
fn get_future_repr_info(future: &PyObject, vm: &VirtualMachine) -> PyResult<String> {
863+
fn get_future_repr_info(future: &PyObject, vm: &VirtualMachine) -> PyResult<Wtf8Buf> {
863864
// Try to use asyncio.base_futures._future_repr_info
864865
// Import from sys.modules if available, otherwise try regular import
865866
let sys_modules = vm.sys_module.get_attr("modules", vm)?;
@@ -892,29 +893,34 @@ pub(crate) mod _asyncio {
892893
Err(_) => return get_future_repr_info_fallback(future, vm),
893894
};
894895

895-
let parts: Vec<String> = list
896-
.borrow_vec()
897-
.iter()
898-
.filter_map(|x: &PyObjectRef| x.str(vm).ok().map(|s| s.as_str().to_string()))
899-
.collect();
900-
Ok(parts.join(" "))
896+
let mut result = Wtf8Buf::new();
897+
let parts = list.borrow_vec();
898+
for (i, x) in parts.iter().enumerate() {
899+
if i > 0 {
900+
result.push_str(" ");
901+
}
902+
if let Ok(s) = x.str(vm) {
903+
result.push_wtf8(s.as_wtf8());
904+
}
905+
}
906+
Ok(result)
901907
}
902908

903-
fn get_future_repr_info_fallback(future: &PyObject, vm: &VirtualMachine) -> PyResult<String> {
909+
fn get_future_repr_info_fallback(future: &PyObject, vm: &VirtualMachine) -> PyResult<Wtf8Buf> {
904910
// Fallback: build repr from properties directly
905911
if let Ok(Some(state)) =
906912
vm.get_attribute_opt(future.to_owned(), vm.ctx.intern_str("_state"))
907913
{
908-
let state_str = state
914+
let s = state
909915
.str(vm)
910-
.map(|s| s.as_str().to_lowercase())
911-
.unwrap_or_else(|_| "unknown".to_string());
912-
return Ok(state_str);
916+
.map(|s| s.as_wtf8().to_lowercase())
917+
.unwrap_or_else(|_| Wtf8Buf::from("unknown"));
918+
return Ok(s);
913919
}
914-
Ok("state=unknown".to_string())
920+
Ok(Wtf8Buf::from("state=unknown"))
915921
}
916922

917-
fn get_task_repr_info(task: &PyObject, vm: &VirtualMachine) -> PyResult<String> {
923+
fn get_task_repr_info(task: &PyObject, vm: &VirtualMachine) -> PyResult<Wtf8Buf> {
918924
// vm.import returns the top-level module, get base_tasks submodule
919925
match vm
920926
.import("asyncio.base_tasks", 0)
@@ -927,12 +933,15 @@ pub(crate) mod _asyncio {
927933
let list: PyListRef = info.downcast().map_err(|_| {
928934
vm.new_type_error("_task_repr_info should return a list")
929935
})?;
930-
let parts: Vec<String> = list
931-
.borrow_vec()
932-
.iter()
933-
.map(|x: &PyObjectRef| x.str(vm).map(|s| s.as_str().to_string()))
934-
.collect::<PyResult<Vec<_>>>()?;
935-
Ok(parts.join(" "))
936+
let mut result = Wtf8Buf::new();
937+
let parts = list.borrow_vec();
938+
for (i, x) in parts.iter().enumerate() {
939+
if i > 0 {
940+
result.push_str(" ");
941+
}
942+
result.push_wtf8(x.str(vm)?.as_wtf8());
943+
}
944+
Ok(result)
936945
}
937946
_ => get_future_repr_info(task, vm),
938947
}
@@ -1928,40 +1937,28 @@ pub(crate) mod _asyncio {
19281937
}
19291938

19301939
impl Representable for PyTask {
1931-
fn repr_str(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<String> {
1940+
fn repr_wtf8(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<Wtf8Buf> {
19321941
let class_name = zelf.class().name().to_string();
19331942

19341943
if let Some(_guard) = ReprGuard::enter(vm, zelf.as_object()) {
19351944
// Try to use _task_repr_info if available
19361945
if let Ok(info) = get_task_repr_info(zelf.as_object(), vm)
1937-
&& info != "state=unknown"
1946+
&& info.as_bytes() != b"state=unknown"
19381947
{
1939-
return Ok(format!("<{} {}>", class_name, info));
1948+
return Ok(wtf8_concat!("<", class_name, " ", info, ">"));
19401949
}
19411950

19421951
// Fallback: build repr from task properties directly
19431952
let state = zelf.base.fut_state.load().as_str().to_lowercase();
1944-
let name = zelf
1945-
.task_name
1946-
.read()
1947-
.as_ref()
1948-
.and_then(|n| n.str(vm).ok())
1949-
.map(|s| s.as_str().to_string())
1950-
.unwrap_or_else(|| "?".to_string());
1951-
let coro_repr = zelf
1952-
.task_coro
1953-
.read()
1954-
.as_ref()
1955-
.and_then(|c| c.repr(vm).ok())
1956-
.map(|s| s.as_str().to_string())
1957-
.unwrap_or_else(|| "?".to_string());
1958-
1959-
Ok(format!(
1960-
"<{} {} name='{}' coro={}>",
1961-
class_name, state, name, coro_repr
1953+
let name = zelf.task_name.read().as_ref().and_then(|n| n.str(vm).ok());
1954+
let coro_repr = zelf.task_coro.read().as_ref().and_then(|c| c.repr(vm).ok());
1955+
let name = name.as_ref().map_or("?".as_ref(), |s| s.as_wtf8());
1956+
let coro_repr = coro_repr.as_ref().map_or("?".as_ref(), |s| s.as_wtf8());
1957+
Ok(wtf8_concat!(
1958+
"<", class_name, " ", state, " name='", name, "' coro=", coro_repr, ">"
19621959
))
19631960
} else {
1964-
Ok(format!("<{} ...>", class_name))
1961+
Ok(Wtf8Buf::from(format!("<{class_name} ...>")))
19651962
}
19661963
}
19671964
}
@@ -2151,10 +2148,8 @@ pub(crate) mod _asyncio {
21512148
// Check if task awaits on itself
21522149
let task_obj: PyObjectRef = task.clone().into();
21532150
if result.is(&task_obj) {
2154-
let msg = format!(
2155-
"Task cannot await on itself: {}",
2156-
task_obj.repr(vm)?.as_str()
2157-
);
2151+
let task_repr = task_obj.repr(vm)?;
2152+
let msg = format!("Task cannot await on itself: {}", task_repr.as_wtf8());
21582153
task.base.fut_state.store(FutureState::Finished);
21592154
*task.base.fut_exception.write() = Some(vm.new_runtime_error(msg).into());
21602155
PyTask::schedule_callbacks(task, vm)?;
@@ -2254,7 +2249,8 @@ pub(crate) mod _asyncio {
22542249
vm.call_method(&loop_obj, "call_soon", (step_wrapper,))?;
22552250
}
22562251
} else {
2257-
let msg = format!("Task got bad yield: {}", result.repr(vm)?.as_str());
2252+
let result_repr = result.repr(vm)?;
2253+
let msg = format!("Task got bad yield: {}", result_repr.as_wtf8());
22582254
task.base.fut_state.store(FutureState::Finished);
22592255
*task.base.fut_exception.write() = Some(vm.new_runtime_error(msg).into());
22602256
PyTask::schedule_callbacks(task, vm)?;

crates/stdlib/src/_sqlite3.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ mod _sqlite3 {
8989
$(
9090
#[allow(dead_code)]
9191
fn [<new_ $x:snake>](vm: &VirtualMachine, msg: String) -> PyBaseExceptionRef {
92-
vm.new_exception_msg([<$x:snake _type>]().to_owned(), msg)
92+
vm.new_exception_msg([<$x:snake _type>]().to_owned(), msg.into())
9393
}
9494
fn [<$x:snake _type>]() -> &'static Py<PyType> {
9595
[<$x:snake:upper>].get().expect("exception type not initialize")
@@ -723,7 +723,7 @@ mod _sqlite3 {
723723
converter: ArgCallable,
724724
vm: &VirtualMachine,
725725
) -> PyResult<()> {
726-
let name = typename.as_str().to_uppercase();
726+
let name = typename.expect_str().to_uppercase();
727727
converters().set_item(&name, converter.into(), vm)
728728
}
729729

@@ -2194,8 +2194,8 @@ mod _sqlite3 {
21942194
let Some(obj) = obj.downcast_ref::<PyStr>() else {
21952195
break;
21962196
};
2197-
let a_iter = name.as_str().chars().flat_map(|x| x.to_uppercase());
2198-
let b_iter = obj.as_str().chars().flat_map(|x| x.to_uppercase());
2197+
let a_iter = name.expect_str().chars().flat_map(|x| x.to_uppercase());
2198+
let b_iter = obj.expect_str().chars().flat_map(|x| x.to_uppercase());
21992199

22002200
if a_iter.eq(b_iter) {
22012201
return self.data.getitem_by_index(vm, i);
@@ -2918,7 +2918,7 @@ mod _sqlite3 {
29182918
};
29192919
let mut s = Vec::with_capacity(16);
29202920
s.extend(b"BEGIN ");
2921-
s.extend(isolation_level.as_str().bytes());
2921+
s.extend(isolation_level.expect_str().bytes());
29222922
s.push(b'\0');
29232923
self._exec(&s, vm)
29242924
}
@@ -3469,7 +3469,7 @@ mod _sqlite3 {
34693469
return e;
34703470
}
34713471

3472-
vm.new_exception_msg_dict(typ, msg, dict)
3472+
vm.new_exception_msg_dict(typ, msg.into(), dict)
34733473
}
34743474

34753475
static BEGIN_STATEMENTS: &[&[u8]] = &[

crates/stdlib/src/array.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ mod array {
1919
builtins::{
2020
PositionIterInternal, PyByteArray, PyBytes, PyBytesRef, PyDictRef, PyFloat,
2121
PyGenericAlias, PyInt, PyList, PyListRef, PyStr, PyStrRef, PyTupleRef, PyType,
22-
PyTypeRef, builtins_iter,
22+
PyTypeRef, PyUtf8StrRef, builtins_iter,
2323
},
2424
class_or_notimplemented,
2525
convert::{ToPyObject, ToPyResult, TryFromBorrowedObject, TryFromObject},
@@ -559,7 +559,7 @@ mod array {
559559

560560
impl ArrayElement for WideChar {
561561
fn try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<Self> {
562-
PyStrRef::try_from_object(vm, obj)?
562+
PyUtf8StrRef::try_from_object(vm, obj)?
563563
.as_str()
564564
.chars()
565565
.exactly_one()
@@ -625,7 +625,7 @@ mod array {
625625
#[derive(FromArgs)]
626626
pub struct ArrayNewArgs {
627627
#[pyarg(positional)]
628-
spec: PyStrRef,
628+
spec: PyUtf8StrRef,
629629
#[pyarg(positional, optional)]
630630
init: OptionalArg<PyObjectRef>,
631631
}
@@ -884,7 +884,7 @@ mod array {
884884
if not_enough_bytes {
885885
Err(vm.new_exception_msg(
886886
vm.ctx.exceptions.eof_error.to_owned(),
887-
"read() didn't return enough bytes".to_owned(),
887+
"read() didn't return enough bytes".into(),
888888
))
889889
} else {
890890
Ok(())
@@ -1425,7 +1425,7 @@ mod array {
14251425
#[pyarg(positional)]
14261426
arraytype: PyTypeRef,
14271427
#[pyarg(positional)]
1428-
typecode: PyStrRef,
1428+
typecode: PyUtf8StrRef,
14291429
#[pyarg(positional)]
14301430
mformat_code: MachineFormatCode,
14311431
#[pyarg(positional)]
@@ -1568,7 +1568,7 @@ mod array {
15681568
Ok(typ)
15691569
}
15701570

1571-
fn check_type_code(spec: PyStrRef, vm: &VirtualMachine) -> PyResult<ArrayContentType> {
1571+
fn check_type_code(spec: PyUtf8StrRef, vm: &VirtualMachine) -> PyResult<ArrayContentType> {
15721572
let spec = spec.as_str().chars().exactly_one().map_err(|_| {
15731573
vm.new_type_error(
15741574
"_array_reconstructor() argument 2 must be a unicode character, not str",

crates/stdlib/src/binascii.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -849,7 +849,7 @@ mod decl {
849849
struct Base64DecodeError(base64::DecodeError);
850850

851851
fn new_binascii_error(msg: String, vm: &VirtualMachine) -> PyBaseExceptionRef {
852-
vm.new_exception_msg(decl::error_type(vm), msg)
852+
vm.new_exception_msg(decl::error_type(vm), msg.into())
853853
}
854854

855855
impl ToPyException for Base64DecodeError {

crates/stdlib/src/contextvars.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ mod _contextvars {
1515
AsObject, Py, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, atomic_func,
1616
builtins::{PyGenericAlias, PyList, PyStrRef, PyType, PyTypeRef},
1717
class::StaticType,
18-
common::hash::PyHash,
18+
common::{hash::PyHash, wtf8::Wtf8Buf},
1919
function::{ArgCallable, FuncArgs, OptionalArg},
2020
protocol::{PyMappingMethods, PySequenceMethods},
2121
types::{AsMapping, AsSequence, Constructor, Hashable, Iterable, Representable},
@@ -333,7 +333,7 @@ mod _contextvars {
333333
if vars.swap_remove(zelf).is_none() {
334334
// TODO:
335335
// PyErr_SetObject(PyExc_LookupError, (PyObject *)var);
336-
let msg = zelf.as_object().repr(vm)?.as_str().to_owned();
336+
let msg = zelf.as_object().repr(vm)?.as_wtf8().to_owned();
337337
return Err(vm.new_lookup_error(msg));
338338
}
339339

@@ -409,7 +409,7 @@ mod _contextvars {
409409
default.clone()
410410
} else {
411411
let msg = zelf.as_object().repr(vm)?;
412-
return Err(vm.new_lookup_error(msg.as_str().to_owned()));
412+
return Err(vm.new_lookup_error(msg.as_wtf8().to_owned()));
413413
};
414414
Ok(Some(value))
415415
}
@@ -611,11 +611,14 @@ mod _contextvars {
611611

612612
impl Representable for ContextToken {
613613
#[inline]
614-
fn repr_str(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<String> {
614+
fn repr_wtf8(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<Wtf8Buf> {
615615
let used = if zelf.used.get() { " used" } else { "" };
616-
let var = Representable::repr_str(&zelf.var, vm)?;
616+
let var = Representable::repr_wtf8(&zelf.var, vm)?;
617617
let ptr = zelf.as_object().get_id() as *const u8;
618-
Ok(format!("<Token{used} var={var} at {ptr:p}>"))
618+
let mut result = Wtf8Buf::from(format!("<Token{used} var="));
619+
result.push_wtf8(&var);
620+
result.push_str(&format!(" at {ptr:p}>"));
621+
Ok(result)
619622
}
620623
}
621624

0 commit comments

Comments
 (0)