Skip to content

Commit 7835f58

Browse files
committed
PyBytesRef and other input refactor
1 parent 71e0516 commit 7835f58

11 files changed

Lines changed: 350 additions & 126 deletions

File tree

src/deserialize/deserializer.rs

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
2-
// Copyright ijl (2022-2025), Aarni Koskela (2021), Eric Jolibois (2021)
2+
// Copyright ijl (2023-2026), Eric Jolibois (2021), Aarni Koskela (2021)
33

44
use crate::deserialize::DeserializeError;
5-
use crate::deserialize::utf8::read_input_to_buf;
5+
use crate::deserialize::input::read_input_to_buf;
66
use crate::typeref::EMPTY_UNICODE;
77
use core::ptr::NonNull;
88

@@ -15,16 +15,19 @@ pub(crate) fn deserialize(
1515

1616
if buffer.len() == 2 {
1717
cold_path!();
18-
if buffer == b"[]" {
19-
return Ok(nonnull!(ffi!(PyList_New(0))));
20-
} else if buffer == b"{}" {
21-
return Ok(nonnull!(ffi!(PyDict_New())));
22-
} else if buffer == b"\"\"" {
23-
unsafe { return Ok(nonnull!(use_immortal!(EMPTY_UNICODE))) }
18+
match buffer.as_bytes() {
19+
b"[]" => {
20+
return Ok(nonnull!(ffi!(PyList_New(0))));
21+
}
22+
b"{}" => {
23+
return Ok(nonnull!(ffi!(PyDict_New())));
24+
}
25+
b"\"\"" => {
26+
return Ok(nonnull!(use_immortal!(EMPTY_UNICODE)));
27+
}
28+
_ => {}
2429
}
2530
}
2631

27-
let buffer_str = unsafe { core::str::from_utf8_unchecked(buffer) };
28-
29-
crate::deserialize::backend::deserialize(buffer_str)
32+
crate::deserialize::backend::deserialize(buffer)
3033
}

src/deserialize/input.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// SPDX-License-Identifier: MPL-2.0
2+
// Copyright ijl (2025-2026)
3+
4+
use crate::deserialize::DeserializeError;
5+
use crate::ffi::{PyByteArrayRef, PyBytesRef, PyMemoryViewRef, PyStrRef};
6+
use crate::util::INVALID_STR;
7+
use std::borrow::Cow;
8+
9+
#[cfg(CPython)]
10+
const INPUT_TYPE_MESSAGE: &str = "Input must be bytes, bytearray, memoryview, or str";
11+
12+
#[cfg(not(CPython))]
13+
const INPUT_TYPE_MESSAGE: &str = "Input must be bytes, bytearray, or str";
14+
15+
pub(crate) fn read_input_to_buf(
16+
ptr: *mut crate::ffi::PyObject,
17+
) -> Result<&'static str, DeserializeError<'static>> {
18+
let buffer: Option<&'static str>;
19+
if let Ok(ob) = PyBytesRef::from_ptr(ptr) {
20+
buffer = ob.as_str();
21+
} else if let Ok(ob) = PyStrRef::from_ptr(ptr) {
22+
buffer = ob.as_str();
23+
} else if let Ok(ob) = PyByteArrayRef::from_ptr(ptr) {
24+
buffer = ob.as_str();
25+
} else if let Ok(ob) = PyMemoryViewRef::from_ptr(ptr) {
26+
buffer = ob.as_str();
27+
} else {
28+
return Err(DeserializeError::invalid(Cow::Borrowed(INPUT_TYPE_MESSAGE)));
29+
}
30+
match buffer {
31+
Some(as_str) => {
32+
if as_str.is_empty() {
33+
cold_path!();
34+
Err(DeserializeError::invalid(Cow::Borrowed(
35+
"Input is a zero-length, empty document",
36+
)))
37+
} else {
38+
Ok(as_str)
39+
}
40+
}
41+
None => {
42+
cold_path!();
43+
Err(DeserializeError::invalid(Cow::Borrowed(INVALID_STR)))
44+
}
45+
}
46+
}

src/deserialize/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
2-
// Copyright ijl (2020-2025), Eric Jolibois (2021)
2+
// Copyright ijl (2020-2026), Eric Jolibois (2021)
33

44
mod backend;
55
#[cfg(not(Py_GIL_DISABLED))]
66
mod cache;
77
mod deserializer;
88
mod error;
9+
mod input;
910
mod pyobject;
10-
mod utf8;
1111

1212
#[cfg(not(Py_GIL_DISABLED))]
1313
pub(crate) use cache::{KEY_MAP, KeyMap};

src/deserialize/utf8.rs

Lines changed: 0 additions & 101 deletions
This file was deleted.

src/ffi/buffer.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
2-
// Copyright ijl (2021-2025), Baul (2020)
2+
// Copyright ijl (2021-2026), Baul (2020)
33

44
use crate::ffi::{Py_buffer, Py_hash_t, Py_ssize_t, PyObject, PyVarObject};
55
use core::ffi::c_int;
@@ -30,5 +30,5 @@ pub(crate) struct PyMemoryViewObject {
3030
#[allow(non_snake_case)]
3131
#[inline(always)]
3232
pub(crate) unsafe fn PyMemoryView_GET_BUFFER(op: *mut PyObject) -> *const Py_buffer {
33-
unsafe { &(*op.cast::<PyMemoryViewObject>()).view }
33+
unsafe { &raw const (*op.cast::<PyMemoryViewObject>()).view }
3434
}

src/ffi/mod.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,31 @@
33

44
#[cfg(Py_GIL_DISABLED)]
55
mod atomiculong;
6+
#[cfg(CPython)]
67
mod buffer;
78
mod bytes;
89
pub(crate) mod compat;
910
mod fragment;
1011
mod long;
12+
mod pybytearrayref;
13+
mod pybytesref;
14+
mod pymemoryview;
1115
mod pystrref;
16+
mod utf8;
1217

1318
pub(crate) use compat::*;
1419

1520
pub(crate) use long::pylong_is_unsigned;
1621
#[cfg(feature = "inline_int")]
1722
pub(crate) use long::{pylong_fits_in_i32, pylong_get_inline_value, pylong_is_zero};
1823

24+
#[allow(unused)]
1925
pub(crate) use {
20-
buffer::PyMemoryView_GET_BUFFER,
2126
bytes::{PyBytes_AS_STRING, PyBytes_GET_SIZE, PyBytesObject},
2227
fragment::{Fragment, orjson_fragmenttype_new},
28+
pybytearrayref::{PyByteArrayRef, PyByteArrayRefError},
29+
pybytesref::{PyBytesRef, PyBytesRefError},
30+
pymemoryview::{PyMemoryViewRef, PyMemoryViewRefError},
2331
pystrref::{PyStrRef, PyStrSubclassRef, set_str_create_fn},
2432
};
2533

@@ -58,6 +66,9 @@ pub(crate) use pyo3_ffi::PyErr_Restore;
5866
#[cfg(CPython)]
5967
pub(crate) use pyo3_ffi::{PyObject_CallMethodNoArgs, PyObject_CallMethodOneArg};
6068

69+
#[cfg(CPython)]
70+
pub(crate) use buffer::PyMemoryView_GET_BUFFER;
71+
6172
#[cfg(not(feature = "inline_str"))]
6273
pub(crate) use pyo3_ffi::{PyUnicode_DATA, PyUnicode_KIND};
6374

src/ffi/pybytearrayref.rs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// SPDX-License-Identifier: MPL-2.0
2+
// Copyright ijl (2026)
3+
4+
pub(crate) enum PyByteArrayRefError {
5+
NotType,
6+
}
7+
8+
#[derive(Clone)]
9+
#[repr(transparent)]
10+
pub(crate) struct PyByteArrayRef {
11+
ptr: core::ptr::NonNull<pyo3_ffi::PyObject>,
12+
}
13+
14+
unsafe impl Send for PyByteArrayRef {}
15+
unsafe impl Sync for PyByteArrayRef {}
16+
17+
impl PartialEq for PyByteArrayRef {
18+
fn eq(&self, other: &Self) -> bool {
19+
self.ptr == other.ptr
20+
}
21+
}
22+
23+
impl PyByteArrayRef {
24+
#[inline]
25+
pub fn from_ptr(ptr: *mut pyo3_ffi::PyObject) -> Result<Self, PyByteArrayRefError> {
26+
unsafe {
27+
debug_assert!(!ptr.is_null());
28+
if ob_type!(ptr) == &raw mut crate::ffi::PyByteArray_Type {
29+
Ok(Self {
30+
ptr: core::ptr::NonNull::new_unchecked(ptr),
31+
})
32+
} else {
33+
Err(PyByteArrayRefError::NotType)
34+
}
35+
}
36+
}
37+
38+
#[inline]
39+
pub fn as_ptr(&self) -> *mut pyo3_ffi::PyObject {
40+
self.ptr.as_ptr()
41+
}
42+
43+
#[allow(unused)]
44+
#[inline]
45+
pub fn as_non_null_ptr(&self) -> core::ptr::NonNull<pyo3_ffi::PyObject> {
46+
self.ptr
47+
}
48+
49+
#[inline]
50+
pub fn as_bytes(&self) -> &'static [u8] {
51+
unsafe {
52+
core::slice::from_raw_parts(
53+
crate::ffi::PyByteArray_AsString(self.as_ptr())
54+
.cast::<u8>()
55+
.cast_const(),
56+
crate::util::isize_to_usize(crate::ffi::PyByteArray_Size(self.as_ptr())),
57+
)
58+
}
59+
}
60+
61+
#[inline]
62+
pub fn as_str(&self) -> Option<&'static str> {
63+
let buffer = self.as_bytes();
64+
if !crate::ffi::utf8::is_valid_utf8(buffer) {
65+
cold_path!();
66+
None
67+
} else {
68+
unsafe { Some(core::str::from_utf8_unchecked(buffer)) }
69+
}
70+
}
71+
}

0 commit comments

Comments
 (0)