-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Add unicode & bytes c-api support #7904
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| use crate::PyObject; | ||
| use crate::object::define_py_check; | ||
| use crate::pystate::with_vm; | ||
| use core::ffi::c_char; | ||
| use rustpython_vm::builtins::PyBytes; | ||
|
|
||
| define_py_check!(fn PyBytes_Check, types.bytes_type); | ||
| define_py_check!(exact fn PyBytes_CheckExact, types.bytes_type); | ||
|
|
||
| #[unsafe(no_mangle)] | ||
| #[allow(clippy::uninit_vec)] | ||
| pub unsafe extern "C" fn PyBytes_FromStringAndSize( | ||
| bytes: *mut c_char, | ||
| len: isize, | ||
| ) -> *mut PyObject { | ||
| with_vm(|vm| { | ||
| let len = len.try_into().map_err(|_| { | ||
| vm.new_system_error("Negative size passed to PyBytes_FromStringAndSize") | ||
| })?; | ||
|
|
||
| let data = if bytes.is_null() { | ||
| let mut data = Vec::with_capacity(len); | ||
| unsafe { data.set_len(len) }; | ||
| data | ||
| } else { | ||
| unsafe { core::slice::from_raw_parts(bytes as *const u8, len) }.to_vec() | ||
| }; | ||
|
|
||
| Ok(vm.ctx.new_bytes(data)) | ||
| }) | ||
| } | ||
|
|
||
| #[unsafe(no_mangle)] | ||
| pub unsafe extern "C" fn PyBytes_Size(bytes: *mut PyObject) -> isize { | ||
| with_vm(|vm| { | ||
| let bytes = unsafe { &*bytes }.try_downcast_ref::<PyBytes>(vm)?; | ||
| Ok(bytes.as_bytes().len()) | ||
| }) | ||
| } | ||
|
|
||
| #[unsafe(no_mangle)] | ||
| pub unsafe extern "C" fn PyBytes_AsString(bytes: *mut PyObject) -> *mut c_char { | ||
| with_vm(|vm| { | ||
| let bytes = unsafe { &*bytes }.try_downcast_ref::<PyBytes>(vm)?; | ||
| Ok(bytes.as_bytes().as_ptr()) | ||
| }) | ||
| } | ||
|
|
||
| #[cfg(false)] | ||
| mod tests { | ||
| use pyo3::prelude::*; | ||
| use pyo3::types::PyBytes; | ||
|
|
||
| #[test] | ||
| fn test_bytes() { | ||
| Python::attach(|py| { | ||
| let bytes = PyBytes::new(py, b"Hello, World!"); | ||
| assert_eq!(bytes.as_bytes(), b"Hello, World!"); | ||
| }) | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_bytes_uninit() { | ||
| Python::attach(|py| { | ||
| let bytes = PyBytes::new_with(py, 13, |data| { | ||
| data.copy_from_slice(b"Hello, World!"); | ||
| Ok(()) | ||
| }) | ||
| .unwrap(); | ||
| assert_eq!(bytes.as_bytes(), b"Hello, World!"); | ||
| }) | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,154 @@ | ||
| use crate::PyObject; | ||
| use crate::object::define_py_check; | ||
| use crate::pystate::with_vm; | ||
| use core::ffi::{CStr, c_char, c_int}; | ||
| use core::ptr::NonNull; | ||
| use core::slice; | ||
| use core::str; | ||
| use rustpython_vm::PyObjectRef; | ||
| use rustpython_vm::builtins::PyStr; | ||
|
|
||
| define_py_check!(fn PyUnicode_Check, types.str_type); | ||
| define_py_check!(exact fn PyUnicode_CheckExact, types.str_type); | ||
|
|
||
| #[unsafe(no_mangle)] | ||
| pub unsafe extern "C" fn PyUnicode_FromStringAndSize( | ||
| s: *const c_char, | ||
| len: isize, | ||
| ) -> *mut PyObject { | ||
| with_vm(|vm| { | ||
| let len: usize = len | ||
| .try_into() | ||
| .map_err(|_| vm.new_system_error("length must be non-negative"))?; | ||
|
|
||
| let text = if s.is_null() { | ||
| if len != 0 { | ||
| return Err(vm.new_system_error( | ||
| "PyUnicode_FromStringAndSize called with null data and non-zero len", | ||
| )); | ||
| } | ||
| "" | ||
| } else { | ||
| let bytes = unsafe { slice::from_raw_parts(s.cast::<u8>(), len) }; | ||
| str::from_utf8(bytes).expect("PyUnicode_FromStringAndSize got non-UTF8 data") | ||
| }; | ||
|
|
||
| Ok(vm.ctx.new_str(text)) | ||
| }) | ||
| } | ||
|
|
||
| #[unsafe(no_mangle)] | ||
| pub unsafe extern "C" fn PyUnicode_AsUTF8AndSize( | ||
| obj: *mut PyObject, | ||
| size: *mut isize, | ||
| ) -> *const c_char { | ||
| with_vm(|vm| { | ||
| let unicode = unsafe { &*obj }.try_downcast_ref::<PyStr>(vm)?; | ||
|
|
||
| let str = unicode.to_str().ok_or_else(|| { | ||
| vm.new_system_error("PyUnicode_AsUTF8AndSize only supports UTF-8 or ASCII strings") | ||
| })?; | ||
|
|
||
| if size.is_null() { | ||
| // We do not support null size arguments because the returned string is not NULL terminated. | ||
| return Err( | ||
| vm.new_system_error("size argument to PyUnicode_AsUTF8AndSize cannot be null") | ||
| ); | ||
| } | ||
|
|
||
| unsafe { *size = str.len() as isize }; | ||
| Ok(str.as_ptr()) | ||
| }) | ||
| } | ||
|
|
||
| #[unsafe(no_mangle)] | ||
| pub unsafe extern "C" fn PyUnicode_AsEncodedString( | ||
| unicode: *mut PyObject, | ||
| encoding: *const c_char, | ||
| errors: *const c_char, | ||
| ) -> *mut PyObject { | ||
| with_vm(|vm| { | ||
| let unicode = unsafe { &*unicode } | ||
| .try_downcast_ref::<PyStr>(vm)? | ||
| .to_owned(); | ||
| let encoding = if encoding.is_null() { | ||
| "utf-8" | ||
| } else { | ||
| unsafe { CStr::from_ptr(encoding) } | ||
| .to_str() | ||
| .expect("encoding must be valid UTF-8") | ||
| }; | ||
| let errors = if errors.is_null() { | ||
| None | ||
| } else { | ||
| let errors = unsafe { CStr::from_ptr(errors) } | ||
| .to_str() | ||
| .expect("errors must be valid UTF-8"); | ||
| Some(vm.ctx.new_utf8_str(errors)) | ||
| }; | ||
| vm.state | ||
| .codec_registry | ||
| .encode_text(unicode, encoding, errors, vm) | ||
| }) | ||
| } | ||
|
|
||
| #[unsafe(no_mangle)] | ||
| pub unsafe extern "C" fn PyUnicode_InternInPlace(string: *mut *mut PyObject) { | ||
| with_vm(|vm| { | ||
| let old_str = unsafe { PyObjectRef::from_raw(NonNull::new_unchecked(*string)) } | ||
| .downcast_exact::<PyStr>(vm) | ||
| .expect("PyUnicode_InternInPlace called with non-string object"); | ||
|
|
||
| let interned: PyObjectRef = vm.ctx.intern_str(old_str).to_owned().into(); | ||
|
|
||
| unsafe { *string = interned.into_raw().as_ptr() } | ||
| }) | ||
| } | ||
|
|
||
| #[unsafe(no_mangle)] | ||
| pub unsafe extern "C" fn PyUnicode_EqualToUTF8AndSize( | ||
| unicode: *mut PyObject, | ||
| string: *const c_char, | ||
| size: isize, | ||
| ) -> c_int { | ||
| with_vm(|vm| { | ||
| let size = size.try_into().map_err(|_| { | ||
| vm.new_system_error("Negative size passed to PyUnicode_EqualToUTF8AndSize") | ||
| })?; | ||
|
|
||
| let unicode = unsafe { &*unicode }.try_downcast_ref::<PyStr>(vm)?; | ||
| let result = unsafe { | ||
| let slice = slice::from_raw_parts(string as _, size); | ||
| str::from_utf8(slice) | ||
| } | ||
| .ok() | ||
| .and_then(|other| Some(unicode.to_str()? == other)) | ||
| .unwrap_or(false); | ||
|
|
||
| Ok(result) | ||
| }) | ||
| } | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| #[cfg(false)] | ||
| mod tests { | ||
| use pyo3::intern; | ||
| use pyo3::prelude::*; | ||
| use pyo3::types::PyString; | ||
|
|
||
| #[test] | ||
| fn test_unicode() { | ||
| Python::attach(|py| { | ||
| let string = PyString::new(py, "Hello, World!"); | ||
| assert!(string.is_instance_of::<PyString>()); | ||
| assert_eq!(string.to_str().unwrap(), "Hello, World!"); | ||
| assert_eq!(string, "Hello, World!"); | ||
| }) | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_intern_str() { | ||
| Python::attach(|py| { | ||
| let _string = intern!(py, "Hello, World!"); | ||
| }) | ||
| } | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.