Skip to content

Commit 20cb884

Browse files
Add unicode & bytes c-api support (#7904)
* Add unicode & bytes c-api support * Check for negative size
1 parent a4579a9 commit 20cb884

4 files changed

Lines changed: 230 additions & 0 deletions

File tree

crates/capi/src/bytesobject.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
use crate::PyObject;
2+
use crate::object::define_py_check;
3+
use crate::pystate::with_vm;
4+
use core::ffi::c_char;
5+
use rustpython_vm::builtins::PyBytes;
6+
7+
define_py_check!(fn PyBytes_Check, types.bytes_type);
8+
define_py_check!(exact fn PyBytes_CheckExact, types.bytes_type);
9+
10+
#[unsafe(no_mangle)]
11+
#[allow(clippy::uninit_vec)]
12+
pub unsafe extern "C" fn PyBytes_FromStringAndSize(
13+
bytes: *mut c_char,
14+
len: isize,
15+
) -> *mut PyObject {
16+
with_vm(|vm| {
17+
let len = len.try_into().map_err(|_| {
18+
vm.new_system_error("Negative size passed to PyBytes_FromStringAndSize")
19+
})?;
20+
21+
let data = if bytes.is_null() {
22+
let mut data = Vec::with_capacity(len);
23+
unsafe { data.set_len(len) };
24+
data
25+
} else {
26+
unsafe { core::slice::from_raw_parts(bytes as *const u8, len) }.to_vec()
27+
};
28+
29+
Ok(vm.ctx.new_bytes(data))
30+
})
31+
}
32+
33+
#[unsafe(no_mangle)]
34+
pub unsafe extern "C" fn PyBytes_Size(bytes: *mut PyObject) -> isize {
35+
with_vm(|vm| {
36+
let bytes = unsafe { &*bytes }.try_downcast_ref::<PyBytes>(vm)?;
37+
Ok(bytes.as_bytes().len())
38+
})
39+
}
40+
41+
#[unsafe(no_mangle)]
42+
pub unsafe extern "C" fn PyBytes_AsString(bytes: *mut PyObject) -> *mut c_char {
43+
with_vm(|vm| {
44+
let bytes = unsafe { &*bytes }.try_downcast_ref::<PyBytes>(vm)?;
45+
Ok(bytes.as_bytes().as_ptr())
46+
})
47+
}
48+
49+
#[cfg(false)]
50+
mod tests {
51+
use pyo3::prelude::*;
52+
use pyo3::types::PyBytes;
53+
54+
#[test]
55+
fn test_bytes() {
56+
Python::attach(|py| {
57+
let bytes = PyBytes::new(py, b"Hello, World!");
58+
assert_eq!(bytes.as_bytes(), b"Hello, World!");
59+
})
60+
}
61+
62+
#[test]
63+
fn test_bytes_uninit() {
64+
Python::attach(|py| {
65+
let bytes = PyBytes::new_with(py, 13, |data| {
66+
data.copy_from_slice(b"Hello, World!");
67+
Ok(())
68+
})
69+
.unwrap();
70+
assert_eq!(bytes.as_bytes(), b"Hello, World!");
71+
})
72+
}
73+
}

crates/capi/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@ use std::sync::MutexGuard;
99
extern crate alloc;
1010

1111
pub mod abstract_;
12+
pub mod bytesobject;
1213
pub mod import;
1314
pub mod object;
1415
pub mod pyerrors;
1516
pub mod pylifecycle;
1617
pub mod pystate;
1718
pub mod refcount;
19+
pub mod unicodeobject;
1820
mod util;
1921

2022
/// Get main interpreter of this process. Will be None if it has not been initialized yet.

crates/capi/src/object.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ macro_rules! define_py_check {
3333
};
3434
}
3535

36+
pub(crate) use define_py_check;
3637
define_py_check!(fn PyType_Check, types.type_type);
3738
define_py_check!(exact fn PyType_CheckExact, types.type_type);
3839

crates/capi/src/unicodeobject.rs

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
use crate::PyObject;
2+
use crate::object::define_py_check;
3+
use crate::pystate::with_vm;
4+
use core::ffi::{CStr, c_char, c_int};
5+
use core::ptr::NonNull;
6+
use core::slice;
7+
use core::str;
8+
use rustpython_vm::PyObjectRef;
9+
use rustpython_vm::builtins::PyStr;
10+
11+
define_py_check!(fn PyUnicode_Check, types.str_type);
12+
define_py_check!(exact fn PyUnicode_CheckExact, types.str_type);
13+
14+
#[unsafe(no_mangle)]
15+
pub unsafe extern "C" fn PyUnicode_FromStringAndSize(
16+
s: *const c_char,
17+
len: isize,
18+
) -> *mut PyObject {
19+
with_vm(|vm| {
20+
let len: usize = len
21+
.try_into()
22+
.map_err(|_| vm.new_system_error("length must be non-negative"))?;
23+
24+
let text = if s.is_null() {
25+
if len != 0 {
26+
return Err(vm.new_system_error(
27+
"PyUnicode_FromStringAndSize called with null data and non-zero len",
28+
));
29+
}
30+
""
31+
} else {
32+
let bytes = unsafe { slice::from_raw_parts(s.cast::<u8>(), len) };
33+
str::from_utf8(bytes).expect("PyUnicode_FromStringAndSize got non-UTF8 data")
34+
};
35+
36+
Ok(vm.ctx.new_str(text))
37+
})
38+
}
39+
40+
#[unsafe(no_mangle)]
41+
pub unsafe extern "C" fn PyUnicode_AsUTF8AndSize(
42+
obj: *mut PyObject,
43+
size: *mut isize,
44+
) -> *const c_char {
45+
with_vm(|vm| {
46+
let unicode = unsafe { &*obj }.try_downcast_ref::<PyStr>(vm)?;
47+
48+
let str = unicode.to_str().ok_or_else(|| {
49+
vm.new_system_error("PyUnicode_AsUTF8AndSize only supports UTF-8 or ASCII strings")
50+
})?;
51+
52+
if size.is_null() {
53+
// We do not support null size arguments because the returned string is not NULL terminated.
54+
return Err(
55+
vm.new_system_error("size argument to PyUnicode_AsUTF8AndSize cannot be null")
56+
);
57+
}
58+
59+
unsafe { *size = str.len() as isize };
60+
Ok(str.as_ptr())
61+
})
62+
}
63+
64+
#[unsafe(no_mangle)]
65+
pub unsafe extern "C" fn PyUnicode_AsEncodedString(
66+
unicode: *mut PyObject,
67+
encoding: *const c_char,
68+
errors: *const c_char,
69+
) -> *mut PyObject {
70+
with_vm(|vm| {
71+
let unicode = unsafe { &*unicode }
72+
.try_downcast_ref::<PyStr>(vm)?
73+
.to_owned();
74+
let encoding = if encoding.is_null() {
75+
"utf-8"
76+
} else {
77+
unsafe { CStr::from_ptr(encoding) }
78+
.to_str()
79+
.expect("encoding must be valid UTF-8")
80+
};
81+
let errors = if errors.is_null() {
82+
None
83+
} else {
84+
let errors = unsafe { CStr::from_ptr(errors) }
85+
.to_str()
86+
.expect("errors must be valid UTF-8");
87+
Some(vm.ctx.new_utf8_str(errors))
88+
};
89+
vm.state
90+
.codec_registry
91+
.encode_text(unicode, encoding, errors, vm)
92+
})
93+
}
94+
95+
#[unsafe(no_mangle)]
96+
pub unsafe extern "C" fn PyUnicode_InternInPlace(string: *mut *mut PyObject) {
97+
with_vm(|vm| {
98+
let old_str = unsafe { PyObjectRef::from_raw(NonNull::new_unchecked(*string)) }
99+
.downcast_exact::<PyStr>(vm)
100+
.expect("PyUnicode_InternInPlace called with non-string object");
101+
102+
let interned: PyObjectRef = vm.ctx.intern_str(old_str).to_owned().into();
103+
104+
unsafe { *string = interned.into_raw().as_ptr() }
105+
})
106+
}
107+
108+
#[unsafe(no_mangle)]
109+
pub unsafe extern "C" fn PyUnicode_EqualToUTF8AndSize(
110+
unicode: *mut PyObject,
111+
string: *const c_char,
112+
size: isize,
113+
) -> c_int {
114+
with_vm(|vm| {
115+
let size = size.try_into().map_err(|_| {
116+
vm.new_system_error("Negative size passed to PyUnicode_EqualToUTF8AndSize")
117+
})?;
118+
119+
let unicode = unsafe { &*unicode }.try_downcast_ref::<PyStr>(vm)?;
120+
let result = unsafe {
121+
let slice = slice::from_raw_parts(string as _, size);
122+
str::from_utf8(slice)
123+
}
124+
.ok()
125+
.and_then(|other| Some(unicode.to_str()? == other))
126+
.unwrap_or(false);
127+
128+
Ok(result)
129+
})
130+
}
131+
132+
#[cfg(false)]
133+
mod tests {
134+
use pyo3::intern;
135+
use pyo3::prelude::*;
136+
use pyo3::types::PyString;
137+
138+
#[test]
139+
fn test_unicode() {
140+
Python::attach(|py| {
141+
let string = PyString::new(py, "Hello, World!");
142+
assert!(string.is_instance_of::<PyString>());
143+
assert_eq!(string.to_str().unwrap(), "Hello, World!");
144+
assert_eq!(string, "Hello, World!");
145+
})
146+
}
147+
148+
#[test]
149+
fn test_intern_str() {
150+
Python::attach(|py| {
151+
let _string = intern!(py, "Hello, World!");
152+
})
153+
}
154+
}

0 commit comments

Comments
 (0)