Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
array
  • Loading branch information
youknowone committed Nov 29, 2025
commit 2585c5e86bf832f037c23957686e47c8662c94f0
311 changes: 280 additions & 31 deletions crates/vm/src/stdlib/ctypes/array.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
use crate::builtins::PyBytes;
use crate::atomic_func;
use crate::builtins::{PyBytes, PyInt};
use crate::convert::ToPyObject;
use crate::protocol::PyNumberMethods;
use crate::types::{AsNumber, Callable};
use crate::{Py, PyObjectRef, PyPayload};
use crate::function::FuncArgs;
use crate::protocol::{PyNumberMethods, PySequenceMethods};
use crate::types::{AsNumber, AsSequence, Callable};
use crate::{AsObject, Py, PyObjectRef, PyPayload};
use crate::{
PyResult, VirtualMachine,
builtins::{PyType, PyTypeRef},
Expand All @@ -11,6 +13,7 @@ use crate::{
use crossbeam_utils::atomic::AtomicCell;
use num_traits::ToPrimitive;
use rustpython_common::lock::PyRwLock;
use rustpython_vm::stdlib::ctypes::_ctypes::get_size;
use rustpython_vm::stdlib::ctypes::base::PyCData;

#[pyclass(name = "PyCArrayType", base = PyType, module = "_ctypes")]
Expand All @@ -28,12 +31,34 @@ impl std::fmt::Debug for PyCArrayType {
}

impl Callable for PyCArrayType {
type Args = ();
fn call(zelf: &Py<Self>, _args: Self::Args, vm: &VirtualMachine) -> PyResult {
type Args = FuncArgs;
fn call(zelf: &Py<Self>, args: Self::Args, vm: &VirtualMachine) -> PyResult {
// Create an instance of the array
let element_type = zelf.inner.typ.read().clone();
let length = zelf.inner.length.load();
let element_size = zelf.inner.element_size.load();
let total_size = element_size * length;
let mut buffer = vec![0u8; total_size];

// Initialize from positional arguments
for (i, value) in args.args.iter().enumerate() {
if i >= length {
break;
}
let offset = i * element_size;
if let Ok(int_val) = value.try_int(vm) {
let bytes = PyCArray::int_to_bytes(int_val.as_bigint(), element_size);
if offset + element_size <= buffer.len() {
buffer[offset..offset + element_size].copy_from_slice(&bytes);
}
}
}

Ok(PyCArray {
typ: PyRwLock::new(zelf.inner.typ.read().clone()),
length: AtomicCell::new(zelf.inner.length.load()),
value: PyRwLock::new(zelf.inner.value.read().clone()),
typ: PyRwLock::new(element_type),
length: AtomicCell::new(length),
element_size: AtomicCell::new(element_size),
buffer: PyRwLock::new(buffer),
}
.into_pyobject(vm))
}
Expand All @@ -49,22 +74,42 @@ impl Constructor for PyCArrayType {

#[pyclass(flags(IMMUTABLETYPE), with(Callable, Constructor, AsNumber))]
impl PyCArrayType {
#[pygetset(name = "_type_")]
fn typ(&self) -> PyTypeRef {
self.inner.typ.read().clone()
}

#[pygetset(name = "_length_")]
fn length(&self) -> usize {
self.inner.length.load()
}

#[pymethod]
fn __mul__(zelf: &Py<Self>, n: isize, vm: &VirtualMachine) -> PyResult {
if n < 0 {
return Err(vm.new_value_error(format!("Array length must be >= 0, not {n}")));
}
// Create a nested array type: (inner_type * inner_length) * n
// The new array's element type is the current array type
let inner_type = zelf.inner.typ.read().clone();
// The new array has n elements, each element is the current array type
// e.g., (c_int * 5) * 3 = Array of 3 elements, each is (c_int * 5)
let inner_length = zelf.inner.length.load();
let inner_element_size = zelf.inner.element_size.load();

// The element type of the new array is the current array type itself
let obj_ref: PyObjectRef = zelf.to_owned().into();
let current_array_type = obj_ref
.downcast::<PyType>()
.expect("PyCArrayType should be a PyType");

// Element size is the total size of the inner array
let new_element_size = inner_length * inner_element_size;

// Create a new array type where the element is the current array
Ok(PyCArrayType {
inner: PyCArray {
typ: PyRwLock::new(inner_type),
length: AtomicCell::new(inner_length * n as usize),
value: PyRwLock::new(vm.ctx.none()),
typ: PyRwLock::new(current_array_type),
length: AtomicCell::new(n as usize),
element_size: AtomicCell::new(new_element_size),
buffer: PyRwLock::new(vec![]),
},
}
.to_pyobject(vm))
Expand Down Expand Up @@ -101,7 +146,8 @@ impl AsNumber for PyCArrayType {
pub struct PyCArray {
pub(super) typ: PyRwLock<PyTypeRef>,
pub(super) length: AtomicCell<usize>,
pub(super) value: PyRwLock<PyObjectRef>,
pub(super) element_size: AtomicCell<usize>,
pub(super) buffer: PyRwLock<Vec<u8>>,
}

impl std::fmt::Debug for PyCArray {
Expand All @@ -114,48 +160,251 @@ impl std::fmt::Debug for PyCArray {
}

impl Constructor for PyCArray {
type Args = (PyTypeRef, usize);
type Args = FuncArgs;

fn py_new(cls: PyTypeRef, args: Self::Args, vm: &VirtualMachine) -> PyResult {
Self {
typ: PyRwLock::new(args.0),
length: AtomicCell::new(args.1),
value: PyRwLock::new(vm.ctx.none()),
// Get _type_ and _length_ from the class
let type_attr = cls.as_object().get_attr("_type_", vm).ok();
let length_attr = cls.as_object().get_attr("_length_", vm).ok();

let element_type = type_attr.unwrap_or_else(|| vm.ctx.types.object_type.to_owned().into());
let length = if let Some(len_obj) = length_attr {
len_obj.try_int(vm)?.as_bigint().to_usize().unwrap_or(0)
} else {
0
};

// Get element size from _type_
let element_size = if let Ok(type_code) = element_type.get_attr("_type_", vm) {
if let Ok(s) = type_code.str(vm) {
let s = s.to_string();
if s.len() == 1 {
get_size(&s)
} else {
std::mem::size_of::<usize>()
}
} else {
std::mem::size_of::<usize>()
}
} else {
std::mem::size_of::<usize>()
};

let total_size = element_size * length;
let mut buffer = vec![0u8; total_size];

// Initialize from positional arguments
for (i, value) in args.args.iter().enumerate() {
if i >= length {
break;
}
let offset = i * element_size;
if let Ok(int_val) = value.try_int(vm) {
let bytes = Self::int_to_bytes(int_val.as_bigint(), element_size);
if offset + element_size <= buffer.len() {
buffer[offset..offset + element_size].copy_from_slice(&bytes);
}
}
}

let element_type_ref = element_type
.downcast::<PyType>()
.unwrap_or_else(|_| vm.ctx.types.object_type.to_owned());

PyCArray {
typ: PyRwLock::new(element_type_ref),
length: AtomicCell::new(length),
element_size: AtomicCell::new(element_size),
buffer: PyRwLock::new(buffer),
}
.into_ref_with_type(vm, cls)
.map(Into::into)
}
}

#[pyclass(flags(BASETYPE, IMMUTABLETYPE), with(Constructor))]
impl AsSequence for PyCArray {
fn as_sequence() -> &'static PySequenceMethods {
use std::sync::LazyLock;
static AS_SEQUENCE: LazyLock<PySequenceMethods> = LazyLock::new(|| PySequenceMethods {
length: atomic_func!(|seq, _vm| Ok(PyCArray::sequence_downcast(seq).length.load())),
item: atomic_func!(|seq, i, vm| {
PyCArray::getitem_by_index(PyCArray::sequence_downcast(seq), i, vm)
}),
ass_item: atomic_func!(|seq, i, value, vm| {
let zelf = PyCArray::sequence_downcast(seq);
match value {
Some(v) => PyCArray::setitem_by_index(zelf, i, v, vm),
None => Err(vm.new_type_error("cannot delete array elements".to_owned())),
}
}),
..PySequenceMethods::NOT_IMPLEMENTED
});
&AS_SEQUENCE
}
}

#[pyclass(flags(BASETYPE, IMMUTABLETYPE), with(Constructor, AsSequence))]
impl PyCArray {
fn int_to_bytes(i: &malachite_bigint::BigInt, size: usize) -> Vec<u8> {
match size {
1 => vec![i.to_i8().unwrap_or(0) as u8],
2 => i.to_i16().unwrap_or(0).to_ne_bytes().to_vec(),
4 => i.to_i32().unwrap_or(0).to_ne_bytes().to_vec(),
8 => i.to_i64().unwrap_or(0).to_ne_bytes().to_vec(),
_ => vec![0u8; size],
}
}

fn bytes_to_int(bytes: &[u8], size: usize, vm: &VirtualMachine) -> PyObjectRef {
match size {
1 => vm.ctx.new_int(bytes[0] as i8).into(),
2 => {
let val = i16::from_ne_bytes([bytes[0], bytes[1]]);
vm.ctx.new_int(val).into()
}
4 => {
let val = i32::from_ne_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
vm.ctx.new_int(val).into()
}
8 => {
let val = i64::from_ne_bytes([
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
]);
vm.ctx.new_int(val).into()
}
_ => vm.ctx.new_int(0).into(),
}
}
Comment on lines +258 to +277
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Potential panic if buffer is too short.

The bytes_to_int function accesses bytes[0], bytes[1], etc. without verifying that the slice has enough elements. If called with an undersized slice, this will panic.

     fn bytes_to_int(bytes: &[u8], size: usize, vm: &VirtualMachine) -> PyObjectRef {
+        if bytes.len() < size {
+            return vm.ctx.new_int(0).into();
+        }
         match size {
             1 => vm.ctx.new_int(bytes[0] as i8).into(),
             2 => {
-                let val = i16::from_ne_bytes([bytes[0], bytes[1]]);
+                let val = i16::from_ne_bytes(bytes[..2].try_into().unwrap());
                 vm.ctx.new_int(val).into()
             }
             4 => {
-                let val = i32::from_ne_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
+                let val = i32::from_ne_bytes(bytes[..4].try_into().unwrap());
                 vm.ctx.new_int(val).into()
             }
             8 => {
-                let val = i64::from_ne_bytes([
-                    bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
-                ]);
+                let val = i64::from_ne_bytes(bytes[..8].try_into().unwrap());
                 vm.ctx.new_int(val).into()
             }
             _ => vm.ctx.new_int(0).into(),
         }
     }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
fn bytes_to_int(bytes: &[u8], size: usize, vm: &VirtualMachine) -> PyObjectRef {
match size {
1 => vm.ctx.new_int(bytes[0] as i8).into(),
2 => {
let val = i16::from_ne_bytes([bytes[0], bytes[1]]);
vm.ctx.new_int(val).into()
}
4 => {
let val = i32::from_ne_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
vm.ctx.new_int(val).into()
}
8 => {
let val = i64::from_ne_bytes([
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
]);
vm.ctx.new_int(val).into()
}
_ => vm.ctx.new_int(0).into(),
}
}
fn bytes_to_int(bytes: &[u8], size: usize, vm: &VirtualMachine) -> PyObjectRef {
if bytes.len() < size {
return vm.ctx.new_int(0).into();
}
match size {
1 => vm.ctx.new_int(bytes[0] as i8).into(),
2 => {
let val = i16::from_ne_bytes(bytes[..2].try_into().unwrap());
vm.ctx.new_int(val).into()
}
4 => {
let val = i32::from_ne_bytes(bytes[..4].try_into().unwrap());
vm.ctx.new_int(val).into()
}
8 => {
let val = i64::from_ne_bytes(bytes[..8].try_into().unwrap());
vm.ctx.new_int(val).into()
}
_ => vm.ctx.new_int(0).into(),
}
}
🤖 Prompt for AI Agents
crates/vm/src/stdlib/ctypes/array.rs around lines 258 to 277: the bytes_to_int
function indexes bytes[0..7] without checking slice length which can panic on
short slices; validate the slice length against the requested size before
indexing and handle the error path instead of panicking — either return a Python
exception (e.g. raise a ValueError/IndexError via the VM context) or return a
safe default; implement a guard like if bytes.len() < size { return
vm.ctx.new_value_error("buffer too small").into() } (or equivalent using VM
APIs) then proceed to construct integers using safe indexing or by copying into
fixed-size arrays after confirming length.


fn getitem_by_index(zelf: &PyCArray, i: isize, vm: &VirtualMachine) -> PyResult {
let length = zelf.length.load() as isize;
let index = if i < 0 { length + i } else { i };
if index < 0 || index >= length {
return Err(vm.new_index_error("array index out of range".to_owned()));
}
let index = index as usize;
let element_size = zelf.element_size.load();
let offset = index * element_size;
let buffer = zelf.buffer.read();
if offset + element_size <= buffer.len() {
let bytes = &buffer[offset..offset + element_size];
Ok(Self::bytes_to_int(bytes, element_size, vm))
} else {
Ok(vm.ctx.new_int(0).into())
}
}

fn setitem_by_index(
zelf: &PyCArray,
i: isize,
value: PyObjectRef,
vm: &VirtualMachine,
) -> PyResult<()> {
let length = zelf.length.load() as isize;
let index = if i < 0 { length + i } else { i };
if index < 0 || index >= length {
return Err(vm.new_index_error("array index out of range".to_owned()));
}
let index = index as usize;
let element_size = zelf.element_size.load();
let offset = index * element_size;

let int_val = value.try_int(vm)?;
let bytes = Self::int_to_bytes(int_val.as_bigint(), element_size);

let mut buffer = zelf.buffer.write();
if offset + element_size <= buffer.len() {
buffer[offset..offset + element_size].copy_from_slice(&bytes);
}
Ok(())
}

#[pymethod]
fn __getitem__(&self, index: PyObjectRef, vm: &VirtualMachine) -> PyResult {
if let Some(i) = index.downcast_ref::<PyInt>() {
let i = i.as_bigint().to_isize().ok_or_else(|| {
vm.new_index_error("cannot fit index into an index-sized integer".to_owned())
})?;
Self::getitem_by_index(self, i, vm)
} else {
Err(vm.new_type_error("array indices must be integers".to_owned()))
}
}

#[pymethod]
fn __setitem__(
&self,
index: PyObjectRef,
value: PyObjectRef,
vm: &VirtualMachine,
) -> PyResult<()> {
if let Some(i) = index.downcast_ref::<PyInt>() {
let i = i.as_bigint().to_isize().ok_or_else(|| {
vm.new_index_error("cannot fit index into an index-sized integer".to_owned())
})?;
Self::setitem_by_index(self, i, value, vm)
} else {
Err(vm.new_type_error("array indices must be integers".to_owned()))
}
}

#[pymethod]
fn __len__(&self) -> usize {
self.length.load()
}

#[pygetset(name = "_type_")]
fn typ(&self) -> PyTypeRef {
self.typ.read().clone()
}

#[pygetset(name = "_length_")]
fn length(&self) -> usize {
fn length_getter(&self) -> usize {
self.length.load()
}

#[pygetset]
fn value(&self) -> PyObjectRef {
self.value.read().clone()
fn value(&self, vm: &VirtualMachine) -> PyObjectRef {
// Return bytes representation of the buffer
let buffer = self.buffer.read();
vm.ctx.new_bytes(buffer.clone()).into()
}

#[pygetset(setter)]
fn set_value(&self, value: PyObjectRef) {
*self.value.write() = value;
fn set_value(&self, value: PyObjectRef, _vm: &VirtualMachine) -> PyResult<()> {
if let Some(bytes) = value.downcast_ref::<PyBytes>() {
let mut buffer = self.buffer.write();
let src = bytes.as_bytes();
let len = std::cmp::min(src.len(), buffer.len());
buffer[..len].copy_from_slice(&src[..len]);
}
Ok(())
}
Comment on lines +374 to +382
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

set_value silently ignores non-bytes input.

When value is not a PyBytes, the function returns Ok(()) without any indication that the value wasn't set. This could lead to confusing behavior where assignments appear to succeed but have no effect.

Consider raising a type error for non-bytes input:

     fn set_value(&self, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> {
         if let Some(bytes) = value.downcast_ref::<PyBytes>() {
             let mut buffer = self.buffer.write();
             let src = bytes.as_bytes();
             let len = std::cmp::min(src.len(), buffer.len());
             buffer[..len].copy_from_slice(&src[..len]);
+            Ok(())
+        } else {
+            Err(vm.new_type_error("expected bytes".to_owned()))
         }
-        Ok(())
     }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
fn set_value(&self, value: PyObjectRef, _vm: &VirtualMachine) -> PyResult<()> {
if let Some(bytes) = value.downcast_ref::<PyBytes>() {
let mut buffer = self.buffer.write();
let src = bytes.as_bytes();
let len = std::cmp::min(src.len(), buffer.len());
buffer[..len].copy_from_slice(&src[..len]);
}
Ok(())
}
fn set_value(&self, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> {
if let Some(bytes) = value.downcast_ref::<PyBytes>() {
let mut buffer = self.buffer.write();
let src = bytes.as_bytes();
let len = std::cmp::min(src.len(), buffer.len());
buffer[..len].copy_from_slice(&src[..len]);
Ok(())
} else {
Err(vm.new_type_error("expected bytes".to_owned()))
}
}
🤖 Prompt for AI Agents
In crates/vm/src/stdlib/ctypes/array.rs around lines 374 to 382, the set_value
function currently returns Ok(()) when value is not a PyBytes which silently
ignores the assignment; change this to return a Python TypeError instead. Update
the function to check if value.downcast_ref::<PyBytes>() is Some and perform the
copy as before, but if it is None return Err(vm.new_type_error("expected a
bytes-like object".to_owned())) (or a similar clear message), so non-bytes input
raises an error rather than silently succeeding.


#[pygetset]
fn raw(&self, vm: &VirtualMachine) -> PyObjectRef {
let buffer = self.buffer.read();
vm.ctx.new_bytes(buffer.clone()).into()
}

#[pygetset(setter)]
fn set_raw(&self, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> {
if let Some(bytes) = value.downcast_ref::<PyBytes>() {
let mut buffer = self.buffer.write();
let src = bytes.as_bytes();
let len = std::cmp::min(src.len(), buffer.len());
buffer[..len].copy_from_slice(&src[..len]);
Ok(())
} else {
Err(vm.new_type_error("expected bytes".to_owned()))
}
}
}

impl PyCArray {
#[allow(unused)]
pub fn to_arg(&self, _vm: &VirtualMachine) -> PyResult<libffi::middle::Arg> {
let value = self.value.read();
let py_bytes = value.downcast_ref::<PyBytes>().unwrap();
let bytes = py_bytes.payload().to_vec();
Ok(libffi::middle::Arg::new(&bytes))
let buffer = self.buffer.read();
Ok(libffi::middle::Arg::new(&buffer.clone()))
}
Comment on lines 404 to 412
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Use-after-free: Arg::new references a temporary that is immediately dropped.

libffi::middle::Arg::new(&buffer.clone()) creates an Arg referencing a cloned Vec<u8> that is dropped at the end of the expression, leaving the Arg pointing to freed memory. This will cause undefined behavior when the Arg is used.

The to_arg implementation needs to ensure the buffer lives long enough. This likely requires a different approach, such as storing the buffer in a way that outlives the FFI call, or using a raw pointer approach that matches how ctypes handles memory for FFI.

     pub fn to_arg(&self, _vm: &VirtualMachine) -> PyResult<libffi::middle::Arg> {
-        let buffer = self.buffer.read();
-        Ok(libffi::middle::Arg::new(&buffer.clone()))
+        // TODO: This needs a different approach to ensure buffer lifetime
+        // The buffer must outlive the Arg returned here
+        let buffer = self.buffer.read();
+        let ptr = buffer.as_ptr();
+        Ok(libffi::middle::Arg::new(&ptr))
     }

Note: The suggested fix passes a pointer, but the overall lifetime management for FFI calls may need architectural review.

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In crates/vm/src/stdlib/ctypes/array.rs around lines 404–409, the current
implementation calls libffi::middle::Arg::new(&buffer.clone()), creating an Arg
that points at a temporary Vec<u8> that is dropped immediately (use-after-free).
Fix by ensuring the buffer outlives the Arg: do not pass a reference to a
temporary clone; instead make the byte storage owned and stored with a lifetime
tied to the PyCArray (e.g., store Arc<Vec<u8>> or Box<[u8]>/Vec<u8> as a field
or in a dedicated arg_storage on self), create the Arg from a reference to that
owned storage (or use .as_ptr() with explicit length and ensure ownership
remains valid until the ffi call completes). Update PyCArray fields and to_arg
to return an Arg that references the persistent storage rather than a dropped
temporary.

}
Loading