Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix ctypes
  • Loading branch information
youknowone committed Dec 23, 2025
commit 79abbf0b29d5183b0885e45ea9991bd8d046d702
102 changes: 76 additions & 26 deletions crates/vm/src/stdlib/ctypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ pub(crate) fn make_module(vm: &VirtualMachine) -> PyRef<PyModule> {
pointer::PyCPointerType::make_class(ctx);
structure::PyCStructType::make_class(ctx);
union::PyCUnionType::make_class(ctx);
function::PyCFuncPtrType::make_class(ctx);
extend_module!(vm, &module, {
"_CData" => PyCData::make_class(ctx),
"_SimpleCData" => PyCSimple::make_class(ctx),
Expand Down Expand Up @@ -385,12 +386,8 @@ pub(crate) mod _ctypes {
#[pyattr]
const RTLD_GLOBAL: i32 = 0;

#[cfg(target_os = "windows")]
#[pyattr]
const SIZEOF_TIME_T: usize = 8;
#[cfg(not(target_os = "windows"))]
#[pyattr]
const SIZEOF_TIME_T: usize = 4;
const SIZEOF_TIME_T: usize = std::mem::size_of::<libc::time_t>();

#[pyattr]
const CTYPES_MAX_ARGCOUNT: usize = 1024;
Expand Down Expand Up @@ -578,30 +575,42 @@ pub(crate) mod _ctypes {
#[pyfunction(name = "dlopen")]
fn load_library_unix(
name: Option<crate::function::FsPath>,
_load_flags: OptionalArg<i32>,
load_flags: OptionalArg<i32>,
vm: &VirtualMachine,
) -> PyResult<usize> {
// TODO: audit functions first
// TODO: load_flags
// Default mode: RTLD_NOW | RTLD_LOCAL, always force RTLD_NOW
let mode = load_flags.unwrap_or(libc::RTLD_NOW | libc::RTLD_LOCAL) | libc::RTLD_NOW;

match name {
Some(name) => {
let cache = library::libcache();
let mut cache_write = cache.write();
let os_str = name.as_os_str(vm)?;
let (id, _) = cache_write.get_or_insert_lib(&*os_str, vm).map_err(|e| {
// Include filename in error message for better diagnostics
let name_str = os_str.to_string_lossy();
vm.new_os_error(format!("{}: {}", name_str, e))
})?;
let (id, _) = cache_write
.get_or_insert_lib_with_mode(&*os_str, mode, vm)
.map_err(|e| {
let name_str = os_str.to_string_lossy();
vm.new_os_error(format!("{}: {}", name_str, e))
})?;
Ok(id)
}
None => {
// If None, call libc::dlopen(null, mode) to get the current process handle
let handle = unsafe { libc::dlopen(std::ptr::null(), libc::RTLD_NOW) };
// dlopen(NULL, mode) to get the current process handle (for pythonapi)
let handle = unsafe { libc::dlopen(std::ptr::null(), mode) };
if handle.is_null() {
return Err(vm.new_os_error("dlopen() error"));
let err = unsafe { libc::dlerror() };
let msg = if err.is_null() {
"dlopen() error".to_string()
} else {
unsafe { std::ffi::CStr::from_ptr(err).to_string_lossy().into_owned() }
};
return Err(vm.new_os_error(msg));
}
Ok(handle as usize)
// Add to library cache so symbol lookup works
let cache = library::libcache();
let mut cache_write = cache.write();
let id = cache_write.insert_raw_handle(handle);
Ok(id)
}
}
}
Expand All @@ -614,6 +623,48 @@ pub(crate) mod _ctypes {
Ok(())
}

#[cfg(not(windows))]
#[pyfunction]
fn dlclose(handle: usize, _vm: &VirtualMachine) -> PyResult<()> {
// Remove from cache, which triggers SharedLibrary drop.
// libloading::Library calls dlclose automatically on Drop.
let cache = library::libcache();
let mut cache_write = cache.write();
cache_write.drop_lib(handle);
Ok(())
}

#[cfg(not(windows))]
#[pyfunction]
fn dlsym(
handle: usize,
name: crate::builtins::PyStrRef,
vm: &VirtualMachine,
) -> PyResult<usize> {
let symbol_name = std::ffi::CString::new(name.as_str())
.map_err(|_| vm.new_value_error("symbol name contains null byte"))?;

// Clear previous error
unsafe { libc::dlerror() };

let ptr = unsafe { libc::dlsym(handle as *mut libc::c_void, symbol_name.as_ptr()) };

// Check for error via dlerror first
let err = unsafe { libc::dlerror() };
if !err.is_null() {
let msg = unsafe { std::ffi::CStr::from_ptr(err).to_string_lossy().into_owned() };
return Err(vm.new_os_error(msg));
}

// Treat NULL symbol address as error
// This handles cases like GNU IFUNCs that resolve to NULL
if ptr.is_null() {
return Err(vm.new_os_error(format!("symbol '{}' not found", name.as_str())));
}

Ok(ptr as usize)
}
Comment on lines +637 to +666
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

# First, let's see the full context around the dlsym and dlclose functions
sed -n '620,670p' crates/vm/src/stdlib/ctypes.rs

Repository: RustPython/RustPython

Length of output: 1784


🏁 Script executed:

# Search for handle validation patterns in the file
rg -n 'handle' crates/vm/src/stdlib/ctypes.rs | head -30

Repository: RustPython/RustPython

Length of output: 758


🏁 Script executed:

# Look for the cache structure and how handles are stored/retrieved
rg -n 'get_lib\|LIBRARIES\|cache' crates/vm/src/stdlib/ctypes.rs | head -20

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

# Check the dlopen implementation to see how handles are initially stored
rg -n -B5 -A10 'dlopen' crates/vm/src/stdlib/ctypes.rs | head -50

Repository: RustPython/RustPython

Length of output: 1742


🏁 Script executed:

# Let's find the cache implementation to understand what insert_raw_handle returns
find . -name "*.rs" -type f | xargs rg -l 'insert_raw_handle\|libcache' | head -5

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

# Look for the library cache module definition
fd -name "*.rs" | xargs rg -l 'struct.*Cache\|libcache' | head -5

Repository: RustPython/RustPython

Length of output: 295


🏁 Script executed:

# Search for the cache implementation more broadly
rg -n 'fn insert_raw_handle\|fn drop_lib\|fn get_lib' --type=rs

Repository: RustPython/RustPython

Length of output: 91


🏁 Script executed:

# Let's check what the cache actually stores and returns
rg -B5 -A15 'insert_raw_handle' crates/vm/src/stdlib/ctypes.rs

Repository: RustPython/RustPython

Length of output: 701


🏁 Script executed:

# Search for the library module implementation
find crates -name "*.rs" -type f | xargs grep -l 'insert_raw_handle'

Repository: RustPython/RustPython

Length of output: 134


🏁 Script executed:

# Look for the library module files
find crates -name "library.rs" -o -name "lib.rs" | grep -E '(library|lib)\.rs'

Repository: RustPython/RustPython

Length of output: 541


🏁 Script executed:

# Search more broadly for the cache implementation
rg -A20 'fn libcache' crates/ --type rust

Repository: RustPython/RustPython

Length of output: 519


🏁 Script executed:

# Check if there's a library submodule in the ctypes file
head -50 crates/vm/src/stdlib/ctypes.rs

Repository: RustPython/RustPython

Length of output: 1571


🏁 Script executed:

# Now let's look at the ExternalLibs struct and cache implementation
cat crates/vm/src/stdlib/ctypes/library.rs | head -150

Repository: RustPython/RustPython

Length of output: 4843


🏁 Script executed:

# Look specifically for insert_raw_handle and drop_lib implementations
rg -A10 'fn insert_raw_handle|fn drop_lib' crates/vm/src/stdlib/ctypes/library.rs

Repository: RustPython/RustPython

Length of output: 621


🏁 Script executed:

# Check the type of ID returned by insert_raw_handle
rg -B5 -A15 'struct ExternalLibs' crates/vm/src/stdlib/ctypes/library.rs

Repository: RustPython/RustPython

Length of output: 462


Validate the handle exists before casting to dlsym.

Line 650 casts handle directly to *mut libc::c_void without verifying it's a valid library handle in the cache. This could allow passing freed or completely invalid handles to libc::dlsym, causing undefined behavior. While dlerror checking (lines 653–657) will catch actual dlsym failures, it doesn't prevent undefined behavior from invalid pointer casts.

Add a validation check using cache.get_lib(handle) to ensure the handle exists in the cache before passing it to dlsym, and return an error if not found.

🤖 Prompt for AI Agents
In crates/vm/src/stdlib/ctypes.rs around lines 637 to 666, the code currently
casts the numeric handle to *mut libc::c_void and calls dlsym without validating
the handle; add a validation step that looks up the handle in the library cache
(e.g. cache.get_lib(handle)) before calling dlsym, return an appropriate
PyResult error (vm.new_os_error or vm.new_value_error) if the handle is not
found, and use the validated library pointer from the cache when calling
libc::dlsym instead of casting the raw usize handle.


#[pyfunction(name = "POINTER")]
fn create_pointer_type(cls: PyObjectRef, vm: &VirtualMachine) -> PyResult {
use crate::builtins::PyStr;
Expand Down Expand Up @@ -905,25 +956,24 @@ pub(crate) mod _ctypes {

#[pyfunction]
fn get_errno() -> i32 {
errno::errno().0
super::function::get_errno_value()
}

#[pyfunction]
fn set_errno(value: i32) {
errno::set_errno(errno::Errno(value));
fn set_errno(value: i32) -> i32 {
super::function::set_errno_value(value)
}

#[cfg(windows)]
#[pyfunction]
fn get_last_error() -> PyResult<u32> {
Ok(unsafe { windows_sys::Win32::Foundation::GetLastError() })
Ok(super::function::get_last_error_value())
}

#[cfg(windows)]
#[pyfunction]
fn set_last_error(value: u32) -> PyResult<()> {
unsafe { windows_sys::Win32::Foundation::SetLastError(value) };
Ok(())
fn set_last_error(value: u32) -> u32 {
super::function::set_last_error_value(value)
}

#[pyattr]
Expand Down Expand Up @@ -1084,9 +1134,9 @@ pub(crate) mod _ctypes {
ffi_args.push(Arg::new(val));
}

let cif = Cif::new(arg_types, Type::isize());
let cif = Cif::new(arg_types, Type::c_int());
let code_ptr = CodePtr::from_ptr(func_addr as *const _);
let result: isize = unsafe { cif.call(code_ptr, &ffi_args) };
let result: libc::c_int = unsafe { cif.call(code_ptr, &ffi_args) };
Ok(vm.ctx.new_int(result).into())
}

Expand Down
44 changes: 38 additions & 6 deletions crates/vm/src/stdlib/ctypes/array.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,32 @@
use super::StgInfo;
use super::base::{CDATA_BUFFER_METHODS, PyCData};
use super::type_info;
use crate::{
AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, VirtualMachine,
atomic_func,
builtins::{PyBytes, PyInt, PyList, PySlice, PyStr, PyType, PyTypeRef},
builtins::{
PyBytes, PyInt, PyList, PySlice, PyStr, PyType, PyTypeRef, genericalias::PyGenericAlias,
},
class::StaticType,
function::{ArgBytesLike, FuncArgs, PySetterValue},
protocol::{BufferDescriptor, PyBuffer, PyNumberMethods, PySequenceMethods},
types::{AsBuffer, AsNumber, AsSequence, Constructor, Initializer},
};
use num_traits::{Signed, ToPrimitive};

/// Get itemsize from a PEP 3118 format string
/// Extracts the type code (last char after endianness prefix) and returns its size
fn get_size_from_format(fmt: &str) -> usize {
// Format is like "<f", ">q", etc. - strip endianness prefix and get type code
let code = fmt
.trim_start_matches(['<', '>', '@', '=', '!', '&'])
.chars()
.next()
.map(|c| c.to_string());
code.map(|c| type_info(&c).map(|t| t.size).unwrap_or(1))
.unwrap_or(1)
}

/// Creates array type for (element_type, length)
/// Uses _array_type_cache to ensure identical calls return the same type object
pub(super) fn array_type_from_ctype(
Expand Down Expand Up @@ -444,6 +460,11 @@ impl AsSequence for PyCArray {
with(Constructor, AsSequence, AsBuffer)
)]
impl PyCArray {
#[pyclassmethod]
fn __class_getitem__(cls: PyTypeRef, args: PyObjectRef, vm: &VirtualMachine) -> PyGenericAlias {
PyGenericAlias::from_args(cls, args, vm)
}

fn int_to_bytes(i: &malachite_bigint::BigInt, size: usize) -> Vec<u8> {
// Try unsigned first (handles values like 0xFFFFFFFF that overflow signed)
// then fall back to signed (handles negative values)
Expand Down Expand Up @@ -1056,27 +1077,38 @@ impl AsBuffer for PyCArray {
.expect("PyCArray type must have StgInfo");
let format = stg_info.format.clone();
let shape = stg_info.shape.clone();
let element_size = stg_info.element_size;

let desc = if let Some(fmt) = format
&& !shape.is_empty()
{
// itemsize is the size of the base element type (item_info->size)
// For empty arrays, we still need the element size, not 0
let total_elements: usize = shape.iter().product();
let has_zero_dim = shape.contains(&0);
let itemsize = if total_elements > 0 && buffer_len > 0 {
buffer_len / total_elements
} else {
// For empty arrays, get itemsize from format type code
get_size_from_format(&fmt)
};

// Build dim_desc from shape (C-contiguous: row-major order)
// stride[i] = product(shape[i+1:]) * itemsize
// For empty arrays (any dimension is 0), all strides are 0
let mut dim_desc = Vec::with_capacity(shape.len());
let mut stride = element_size as isize;
let mut stride = itemsize as isize;

// Calculate strides from innermost to outermost dimension
for &dim_size in shape.iter().rev() {
dim_desc.push((dim_size, stride, 0));
let current_stride = if has_zero_dim { 0 } else { stride };
dim_desc.push((dim_size, current_stride, 0));
stride *= dim_size as isize;
}
dim_desc.reverse();

BufferDescriptor {
len: buffer_len,
readonly: false,
itemsize: element_size,
itemsize,
format: std::borrow::Cow::Owned(fmt),
dim_desc,
}
Expand Down
76 changes: 49 additions & 27 deletions crates/vm/src/stdlib/ctypes/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,15 +300,27 @@ pub(super) fn get_field_format(
big_endian: bool,
vm: &VirtualMachine,
) -> String {
let endian_prefix = if big_endian { ">" } else { "<" };

// 1. Check StgInfo for format
if let Some(type_obj) = field_type.downcast_ref::<PyType>()
&& let Some(stg_info) = type_obj.stg_info_opt()
&& let Some(fmt) = &stg_info.format
{
// Handle endian prefix for simple types
if fmt.len() == 1 {
let endian_prefix = if big_endian { ">" } else { "<" };
return format!("{}{}", endian_prefix, fmt);
// For structures (T{...}), arrays ((n)...), and pointers (&...), return as-is
// These complex types have their own endianness markers inside
if fmt.starts_with('T')
|| fmt.starts_with('(')
|| fmt.starts_with('&')
|| fmt.starts_with("X{")
{
return fmt.clone();
}

// For simple types, replace existing endian prefix with the correct one
let base_fmt = fmt.trim_start_matches(['<', '>', '@', '=', '!']);
if !base_fmt.is_empty() {
return format!("{}{}", endian_prefix, base_fmt);
}
return fmt.clone();
}
Expand All @@ -318,8 +330,7 @@ pub(super) fn get_field_format(
&& let Some(type_str) = type_attr.downcast_ref::<PyStr>()
{
let s = type_str.as_str();
if s.len() == 1 {
let endian_prefix = if big_endian { ">" } else { "<" };
if !s.is_empty() {
return format!("{}{}", endian_prefix, s);
}
return s.to_string();
Expand Down Expand Up @@ -1168,29 +1179,30 @@ impl PyCData {
.ok_or_else(|| vm.new_value_error("Invalid library handle"))?
};

// Get symbol address using platform-specific API
let symbol_name = std::ffi::CString::new(name.as_str())
.map_err(|_| vm.new_value_error("Invalid symbol name"))?;

#[cfg(windows)]
let ptr: *const u8 = unsafe {
match windows_sys::Win32::System::LibraryLoader::GetProcAddress(
handle as windows_sys::Win32::Foundation::HMODULE,
symbol_name.as_ptr() as *const u8,
) {
Some(p) => p as *const u8,
None => std::ptr::null(),
// Look up the library in the cache and use lib.get() for symbol lookup
let library_cache = super::library::libcache().read();
let library = library_cache
.get_lib(handle)
.ok_or_else(|| vm.new_value_error("Library not found"))?;
let inner_lib = library.lib.lock();

let symbol_name_with_nul = format!("{}\0", name.as_str());
let ptr: *const u8 = if let Some(lib) = &*inner_lib {
unsafe {
lib.get::<*const u8>(symbol_name_with_nul.as_bytes())
.map(|sym| *sym)
.map_err(|_| {
vm.new_value_error(format!("symbol '{}' not found", name.as_str()))
})?
}
} else {
return Err(vm.new_value_error("Library closed"));
};

#[cfg(not(windows))]
let ptr: *const u8 =
unsafe { libc::dlsym(handle as *mut libc::c_void, symbol_name.as_ptr()) as *const u8 };

// dlsym can return NULL for symbols that resolve to NULL (e.g., GNU IFUNC)
// Treat NULL addresses as errors
if ptr.is_null() {
return Err(
vm.new_value_error(format!("symbol '{}' not found in library", name.as_str()))
);
return Err(vm.new_value_error(format!("symbol '{}' not found", name.as_str())));
}

// PyCData_AtAddress
Expand Down Expand Up @@ -1593,7 +1605,7 @@ impl PyCField {
/// PyCField_set
#[pyslot]
fn descr_set(
zelf: &crate::PyObject,
zelf: &PyObject,
obj: PyObjectRef,
value: PySetterValue<PyObjectRef>,
vm: &VirtualMachine,
Expand Down Expand Up @@ -1804,7 +1816,7 @@ pub enum FfiArgValue {
F64(f64),
Pointer(usize),
/// Pointer with owned data. The PyObjectRef keeps the pointed data alive.
OwnedPointer(usize, #[allow(dead_code)] crate::PyObjectRef),
OwnedPointer(usize, #[allow(dead_code)] PyObjectRef),
}

impl FfiArgValue {
Expand Down Expand Up @@ -2145,6 +2157,16 @@ pub(super) fn read_ptr_from_buffer(buffer: &[u8]) -> usize {
}
}

/// Check if a type is a "simple instance" (direct subclass of a simple type)
/// Returns TRUE for c_int, c_void_p, etc. (simple types with _type_ attribute)
/// Returns FALSE for Structure, Array, POINTER(T), etc.
pub(super) fn is_simple_instance(typ: &Py<PyType>) -> bool {
// _ctypes_simple_instance
// Check if the type's metaclass is PyCSimpleType
let metaclass = typ.class();
metaclass.fast_issubclass(super::simple::PyCSimpleType::static_type())
}

/// Set or initialize StgInfo on a type
pub(super) fn set_or_init_stginfo(type_ref: &PyType, stg_info: StgInfo) {
if type_ref.init_type_data(stg_info.clone()).is_err()
Expand Down
Loading