From bd3172298f9158abf1411fa74aba10f214ef8e38 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Sat, 25 Apr 2026 17:58:58 -0600 Subject: [PATCH 01/22] WIP, ENH: allow larger than C int sized structured dtypes * Related to gh-30315 and gh-31308, but very much a work in progress. * Although the original failing regression test does pass here, it is not even close to safe to do this yet even though the full testsuite passes locally with `test_pickling_large` re-enabled. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/src/multiarray/descriptor.c | 12 ++++++------ numpy/_core/tests/test_dtype.py | 18 ++++++++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/numpy/_core/src/multiarray/descriptor.c b/numpy/_core/src/multiarray/descriptor.c index a45206e3b5ef..46ab733e5d58 100644 --- a/numpy/_core/src/multiarray/descriptor.c +++ b/numpy/_core/src/multiarray/descriptor.c @@ -316,7 +316,7 @@ _convert_from_tuple(PyObject *obj, int align) "dimension smaller then zero."); goto fail; } - if (shape.ptr[i] > NPY_MAX_INT) { + if (shape.ptr[i] > NPY_MAX_INTP) { PyErr_SetString(PyExc_ValueError, "invalid shape in fixed-type tuple: " "dimension does not fit into a C int."); @@ -325,13 +325,13 @@ _convert_from_tuple(PyObject *obj, int align) } npy_intp items = PyArray_OverflowMultiplyList(shape.ptr, shape.len); int overflowed; - int nbytes; - if (items < 0 || items > NPY_MAX_INT) { + long long nbytes; + if (items < 0 || items > NPY_MAX_INTP) { overflowed = 1; } else { - overflowed = npy_mul_with_overflow_int( - &nbytes, type->elsize, (int) items); + overflowed = npy_mul_with_overflow_longlong( + &nbytes, type->elsize, (npy_intp) items); } if (overflowed) { PyErr_SetString(PyExc_ValueError, @@ -343,7 +343,7 @@ _convert_from_tuple(PyObject *obj, int align) if (newdescr == NULL) { goto fail; } - newdescr->elsize = nbytes; + newdescr->elsize = (int)nbytes; newdescr->subarray = PyArray_malloc(sizeof(PyArray_ArrayDescr)); if (newdescr->subarray == NULL) { Py_DECREF(newdescr); diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py index 6464ccd61f9d..05491daf99ea 100644 --- a/numpy/_core/tests/test_dtype.py +++ b/numpy/_core/tests/test_dtype.py @@ -737,13 +737,8 @@ def test_shape_matches_ndim(self): def test_shape_invalid(self): # Check that the shape is valid. - max_int = np.iinfo(np.intc).max max_intp = np.iinfo(np.intp).max # Too large values (the datatype is part of this) - assert_raises(ValueError, np.dtype, [('a', 'f4', max_int // 4 + 1)]) - assert_raises(ValueError, np.dtype, [('a', 'f4', max_int + 1)]) - assert_raises(ValueError, np.dtype, [('a', 'f4', (max_int, 2))]) - # Takes a different code path (fails earlier: assert_raises(ValueError, np.dtype, [('a', 'f4', max_intp + 1)]) # Negative values assert_raises(ValueError, np.dtype, [('a', 'f4', -1)]) @@ -1271,7 +1266,6 @@ def check_pickling(self, dtype): assert_equal(x[0], y[0]) @pytest.mark.skipif(not IS_64BIT, reason="test requires 64-bit system") - @pytest.mark.xfail(reason="dtype conversion doesn't allow this yet.") def test_pickling_large(self): # The actual itemsize is larger than a c-integer here. dtype = np.dtype(f"({2**31},)i") @@ -2049,3 +2043,15 @@ def test_signature_dtypes_classes(self, typename: str): params_actual = set(sig.parameters) assert params_actual == params_expect + + +def test_gh_31308(): + kind = [("x", np.float64, 2 ** 28)] + kind_dtype = np.dtype(kind) + + +@pytest.mark.xfail(run=False) +def test_gh_31308_materialized(): + kind = [("x", np.float64, 2 ** 28)] + kind_dtype = np.dtype(kind) + rec_arr = np.array((1,), dtype=kind_dtype) From de85568911f469c33cf762e16cc51aeaab0f4ce7 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Sun, 26 Apr 2026 09:31:06 -0600 Subject: [PATCH 02/22] TST, ENH: PR 31332 revisions * `test_gh_31308` has been improved to verify that `itemsize` is actually correctly populated on the newly-supported `dtype` construction. * Minor source changes have been made to allow the above regression test to pass. --- numpy/_core/src/multiarray/descriptor.c | 4 ++-- numpy/_core/tests/test_dtype.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/numpy/_core/src/multiarray/descriptor.c b/numpy/_core/src/multiarray/descriptor.c index 46ab733e5d58..7530a597e18e 100644 --- a/numpy/_core/src/multiarray/descriptor.c +++ b/numpy/_core/src/multiarray/descriptor.c @@ -343,7 +343,7 @@ _convert_from_tuple(PyObject *obj, int align) if (newdescr == NULL) { goto fail; } - newdescr->elsize = (int)nbytes; + newdescr->elsize = (long long)nbytes; newdescr->subarray = PyArray_malloc(sizeof(PyArray_ArrayDescr)); if (newdescr->subarray == NULL) { Py_DECREF(newdescr); @@ -410,7 +410,7 @@ _convert_from_array_descr(PyObject *obj, int align) /* Types with fields need the Python C API for field access */ npy_uint64 dtypeflags = NPY_NEEDS_PYAPI; int maxalign = 1; - int totalsize = 0; + npy_intp totalsize = 0; PyObject *fields = PyDict_New(); if (!fields) { Py_DECREF(nameslist); diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py index 05491daf99ea..78dc7a1c7116 100644 --- a/numpy/_core/tests/test_dtype.py +++ b/numpy/_core/tests/test_dtype.py @@ -2048,6 +2048,7 @@ def test_signature_dtypes_classes(self, typename: str): def test_gh_31308(): kind = [("x", np.float64, 2 ** 28)] kind_dtype = np.dtype(kind) + assert kind_dtype.itemsize == (2 ** 28) * 8 @pytest.mark.xfail(run=False) From de8440f1c1d3ce9187bd7f19c6601c92440566f6 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Sun, 26 Apr 2026 13:46:56 -0600 Subject: [PATCH 03/22] TST: PR 31332 revisions `test_gh_31308_materialized()` is now passing, so it has been adjusted to be allowed to run if sufficient memory is available. The test was also improved to add a basic assertion about the recarray size that results. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/tests/test_dtype.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py index 78dc7a1c7116..d8e71db3f907 100644 --- a/numpy/_core/tests/test_dtype.py +++ b/numpy/_core/tests/test_dtype.py @@ -26,7 +26,7 @@ assert_equal, assert_raises, ) -from numpy.testing._private.utils import requires_deep_recursion +from numpy.testing._private.utils import requires_deep_recursion, requires_memory def assert_dtype_equal(a, b): @@ -2051,8 +2051,9 @@ def test_gh_31308(): assert kind_dtype.itemsize == (2 ** 28) * 8 -@pytest.mark.xfail(run=False) +@requires_memory(free_bytes=2e9) def test_gh_31308_materialized(): kind = [("x", np.float64, 2 ** 28)] kind_dtype = np.dtype(kind) rec_arr = np.array((1,), dtype=kind_dtype) + assert rec_arr["x"].size == 2 ** 28 From d83e9b9b0355aa8d07d81d908e71a2e74c39cfe0 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Mon, 27 Apr 2026 11:30:49 -0600 Subject: [PATCH 04/22] ENH: PR 31332 revisions * `test_pickling_large` now passes thanks to a small type specification change in `arraydescr_reduce`. Note that the test now takes ~6 minutes to run locally on ARM Mac, so the newly-added `slow` marker probably isn't sufficient. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/src/multiarray/descriptor.c | 3 ++- numpy/_core/tests/test_dtype.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/numpy/_core/src/multiarray/descriptor.c b/numpy/_core/src/multiarray/descriptor.c index 7530a597e18e..b8d59ed67b0b 100644 --- a/numpy/_core/src/multiarray/descriptor.c +++ b/numpy/_core/src/multiarray/descriptor.c @@ -2723,7 +2723,8 @@ arraydescr_reduce(PyArray_Descr *self, PyObject *NPY_UNUSED(args)) PyObject *ret, *mod, *obj; PyObject *state; char endian; - int elsize, alignment; + npy_intp elsize; + int alignment; ret = PyTuple_New(3); if (ret == NULL) { diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py index d8e71db3f907..d102dbd80384 100644 --- a/numpy/_core/tests/test_dtype.py +++ b/numpy/_core/tests/test_dtype.py @@ -1265,6 +1265,7 @@ def check_pickling(self, dtype): assert_equal(x, y) assert_equal(x[0], y[0]) + @pytest.mark.slow() @pytest.mark.skipif(not IS_64BIT, reason="test requires 64-bit system") def test_pickling_large(self): # The actual itemsize is larger than a c-integer here. From 377836b4478d5dd4e4a25a0766b1f19d85eb0ed3 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Mon, 27 Apr 2026 11:49:14 -0600 Subject: [PATCH 05/22] MAINT: PR 31332 revisions * Simplified the error handling in `_convert_from_tuple()` function based on reviewer feedback. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/src/multiarray/descriptor.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/numpy/_core/src/multiarray/descriptor.c b/numpy/_core/src/multiarray/descriptor.c index b8d59ed67b0b..7e854c4519cd 100644 --- a/numpy/_core/src/multiarray/descriptor.c +++ b/numpy/_core/src/multiarray/descriptor.c @@ -316,12 +316,6 @@ _convert_from_tuple(PyObject *obj, int align) "dimension smaller then zero."); goto fail; } - if (shape.ptr[i] > NPY_MAX_INTP) { - PyErr_SetString(PyExc_ValueError, - "invalid shape in fixed-type tuple: " - "dimension does not fit into a C int."); - goto fail; - } } npy_intp items = PyArray_OverflowMultiplyList(shape.ptr, shape.len); int overflowed; From d9bba0cc46d9a02c330162a5a1c1bc6c557b21c3 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Mon, 27 Apr 2026 12:58:21 -0600 Subject: [PATCH 06/22] MAINT: PR 31332 revisions * Simplify the error checking in `_convert_from_tuple()` and change a variable type in that function, based on reviewer feedback. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/src/multiarray/descriptor.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/_core/src/multiarray/descriptor.c b/numpy/_core/src/multiarray/descriptor.c index 7e854c4519cd..2718621dcf58 100644 --- a/numpy/_core/src/multiarray/descriptor.c +++ b/numpy/_core/src/multiarray/descriptor.c @@ -319,8 +319,8 @@ _convert_from_tuple(PyObject *obj, int align) } npy_intp items = PyArray_OverflowMultiplyList(shape.ptr, shape.len); int overflowed; - long long nbytes; - if (items < 0 || items > NPY_MAX_INTP) { + npy_intp nbytes; + if (items < 0) { overflowed = 1; } else { From ebcfa4a4f9ec229a0cfa7558920510d1bdc9d247 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Mon, 27 Apr 2026 13:07:51 -0600 Subject: [PATCH 07/22] TST: PR 31332 revisions * `test_shape_invalid()` now has two "later overflow" test cases restored, based on reviewer feedback. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/tests/test_dtype.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py index d102dbd80384..9f99008342c8 100644 --- a/numpy/_core/tests/test_dtype.py +++ b/numpy/_core/tests/test_dtype.py @@ -739,6 +739,8 @@ def test_shape_invalid(self): # Check that the shape is valid. max_intp = np.iinfo(np.intp).max # Too large values (the datatype is part of this) + assert_raises(ValueError, np.dtype, [('a', 'f8', max_intp // 8 + 1)]) + assert_raises(ValueError, np.dtype, [('a', 'f4', max_intp // 4 + 1)]) assert_raises(ValueError, np.dtype, [('a', 'f4', max_intp + 1)]) # Negative values assert_raises(ValueError, np.dtype, [('a', 'f4', -1)]) From 8812144b5e492cb52a928d373e8fdb444eed9c4b Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Mon, 27 Apr 2026 13:16:10 -0600 Subject: [PATCH 08/22] TST: PR 31332 revisions * `test_gh_31308_materialized()` has been adjusted to also have a 64-bit machine guard, since that is required for this test. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/tests/test_dtype.py | 1 + 1 file changed, 1 insertion(+) diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py index 9f99008342c8..40d33597ee63 100644 --- a/numpy/_core/tests/test_dtype.py +++ b/numpy/_core/tests/test_dtype.py @@ -2054,6 +2054,7 @@ def test_gh_31308(): assert kind_dtype.itemsize == (2 ** 28) * 8 +@pytest.mark.skipif(not IS_64BIT, reason="test requires 64-bit system") @requires_memory(free_bytes=2e9) def test_gh_31308_materialized(): kind = [("x", np.float64, 2 ** 28)] From 9dbee219283e67d1cd6c80597b9c92388d14eff5 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Mon, 27 Apr 2026 13:28:26 -0600 Subject: [PATCH 09/22] MAINT: PR 31332 revisions * `_convert_from_tuple()` now uses a more appropriate C function, `npy_mul_sizes_with_overflow()`, to check for overflow, based on reviewer feedback. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/src/multiarray/descriptor.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/_core/src/multiarray/descriptor.c b/numpy/_core/src/multiarray/descriptor.c index 2718621dcf58..0b8a1b4422c0 100644 --- a/numpy/_core/src/multiarray/descriptor.c +++ b/numpy/_core/src/multiarray/descriptor.c @@ -324,8 +324,8 @@ _convert_from_tuple(PyObject *obj, int align) overflowed = 1; } else { - overflowed = npy_mul_with_overflow_longlong( - &nbytes, type->elsize, (npy_intp) items); + overflowed = npy_mul_sizes_with_overflow( + &nbytes, type->elsize, items); } if (overflowed) { PyErr_SetString(PyExc_ValueError, From b266f4cde14d668b2cb01a2ca1ea519ade79bd00 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Mon, 27 Apr 2026 13:37:48 -0600 Subject: [PATCH 10/22] MAINT: PR 31332 revisions * Remove an extraneous typecast in `_convert_from_tuple()` function. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/src/multiarray/descriptor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/_core/src/multiarray/descriptor.c b/numpy/_core/src/multiarray/descriptor.c index 0b8a1b4422c0..f615d46514a0 100644 --- a/numpy/_core/src/multiarray/descriptor.c +++ b/numpy/_core/src/multiarray/descriptor.c @@ -337,7 +337,7 @@ _convert_from_tuple(PyObject *obj, int align) if (newdescr == NULL) { goto fail; } - newdescr->elsize = (long long)nbytes; + newdescr->elsize = nbytes; newdescr->subarray = PyArray_malloc(sizeof(PyArray_ArrayDescr)); if (newdescr->subarray == NULL) { Py_DECREF(newdescr); From 997f0c4b0f27e852144dbc5de3220b4cee92d6e2 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Tue, 28 Apr 2026 17:42:16 -0600 Subject: [PATCH 11/22] TST: PR 31332 revisions * Parametrize `test_gh_31308()` to include an `"i, i"` style structured dtype construction. That test case currently fails so will need to be repaired to avoid overflow. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/tests/test_dtype.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py index 40d33597ee63..595dfd893e34 100644 --- a/numpy/_core/tests/test_dtype.py +++ b/numpy/_core/tests/test_dtype.py @@ -2048,10 +2048,13 @@ def test_signature_dtypes_classes(self, typename: str): assert params_actual == params_expect -def test_gh_31308(): - kind = [("x", np.float64, 2 ** 28)] +@pytest.mark.parametrize("kind, exp", [ + ([("x", np.float64, 2 ** 28)], (2 ** 28 * 8)), + ("2147483648i,2147483648i", 17179869184), +]) +def test_gh_31308(kind, exp): kind_dtype = np.dtype(kind) - assert kind_dtype.itemsize == (2 ** 28) * 8 + assert kind_dtype.itemsize == exp @pytest.mark.skipif(not IS_64BIT, reason="test requires 64-bit system") From 208193c9784418b056936681670a47476f411b9e Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Wed, 29 Apr 2026 14:07:47 -0600 Subject: [PATCH 12/22] ENH: PR 31332 revisions * Adjust a variable typing in `_convert_from_list()` function to allow this structured dtype specification to be properly processed: `"2147483648i,2147483648i"`. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/src/multiarray/descriptor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/_core/src/multiarray/descriptor.c b/numpy/_core/src/multiarray/descriptor.c index f615d46514a0..8d81391c263c 100644 --- a/numpy/_core/src/multiarray/descriptor.c +++ b/numpy/_core/src/multiarray/descriptor.c @@ -627,7 +627,7 @@ _convert_from_list(PyObject *obj, int align) /* Types with fields need the Python C API for field access */ npy_uint64 dtypeflags = NPY_NEEDS_PYAPI; int maxalign = 1; - int totalsize = 0; + npy_intp totalsize = 0; for (int i = 0; i < n; i++) { PyArray_Descr *conv = _convert_from_any( PyList_GET_ITEM(obj, i), align); // noqa: borrowed-ref OK From 24ad411834cc60bb56889cdb3adf64f366a06e6f Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Wed, 29 Apr 2026 14:17:20 -0600 Subject: [PATCH 13/22] TST: PR 31332 revisions * `test_gh_31308()` has been augmented to include a new test case for "larger than C int" structured dtype specification via a dictionary. * `_convert_from_dict()` has had a variable type specification improved to support the above test case. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/src/multiarray/descriptor.c | 2 +- numpy/_core/tests/test_dtype.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/numpy/_core/src/multiarray/descriptor.c b/numpy/_core/src/multiarray/descriptor.c index 8d81391c263c..9d7f4ccf9f95 100644 --- a/numpy/_core/src/multiarray/descriptor.c +++ b/numpy/_core/src/multiarray/descriptor.c @@ -1095,7 +1095,7 @@ _convert_from_dict(PyObject *obj, int align) /* Types with fields need the Python C API for field access */ npy_uint64 dtypeflags = NPY_NEEDS_PYAPI; - int totalsize = 0; + npy_intp totalsize = 0; int maxalign = 1; int has_out_of_order_fields = 0; for (int i = 0; i < n; i++) { diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py index 595dfd893e34..57d5e60e5819 100644 --- a/numpy/_core/tests/test_dtype.py +++ b/numpy/_core/tests/test_dtype.py @@ -2051,6 +2051,7 @@ def test_signature_dtypes_classes(self, typename: str): @pytest.mark.parametrize("kind, exp", [ ([("x", np.float64, 2 ** 28)], (2 ** 28 * 8)), ("2147483648i,2147483648i", 17179869184), + (dict(names=["a"], formats=["2147483648i"]), 8589934592), ]) def test_gh_31308(kind, exp): kind_dtype = np.dtype(kind) From 3892f1dbdffc36ae70af7262dacd10c04b584ef5 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Wed, 29 Apr 2026 14:45:45 -0600 Subject: [PATCH 14/22] ENH: PR 31332 revisions * More dictionary-based structured dtype specification cases have been added to `test_gh_31308()`. A small typing patch for the `offset` variable in `_convert_from_dict()` has been added that allows the new test cases to pass. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/src/multiarray/descriptor.c | 2 +- numpy/_core/tests/test_dtype.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/numpy/_core/src/multiarray/descriptor.c b/numpy/_core/src/multiarray/descriptor.c index 9d7f4ccf9f95..0b2c04717f12 100644 --- a/numpy/_core/src/multiarray/descriptor.c +++ b/numpy/_core/src/multiarray/descriptor.c @@ -1140,7 +1140,7 @@ _convert_from_dict(PyObject *obj, int align) Py_DECREF(ind); goto fail; } - long offset = PyArray_PyIntAsInt(off); + npy_intp offset = PyArray_PyIntAsIntp(off); if (error_converting(offset)) { Py_DECREF(off); Py_DECREF(tup); diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py index 57d5e60e5819..20553010018c 100644 --- a/numpy/_core/tests/test_dtype.py +++ b/numpy/_core/tests/test_dtype.py @@ -2052,6 +2052,9 @@ def test_signature_dtypes_classes(self, typename: str): ([("x", np.float64, 2 ** 28)], (2 ** 28 * 8)), ("2147483648i,2147483648i", 17179869184), (dict(names=["a"], formats=["2147483648i"]), 8589934592), + (dict(names=["a"], formats=["2147483648i"], offsets=[1]), 8589934593), + (dict(names=["a"], formats=["2147483648i"], offsets=[2 ** 31 - 100]), 10737418140), + (dict(names=["a"], formats=["2147483648i"], offsets=[2 ** 31]), 10737418240), ]) def test_gh_31308(kind, exp): kind_dtype = np.dtype(kind) From 3dba943d822ad5f14151aff76e49364732f6c0f1 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Thu, 30 Apr 2026 10:24:47 -0600 Subject: [PATCH 15/22] TST: PR 31332 revisions * `test_gh_31308()` has been improved with several new/more complex structured dtype test cases (they all pass, as expected). [skip azp] [skip cirrus] [skip actions] --- numpy/_core/tests/test_dtype.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py index 20553010018c..10110149684b 100644 --- a/numpy/_core/tests/test_dtype.py +++ b/numpy/_core/tests/test_dtype.py @@ -2050,11 +2050,19 @@ def test_signature_dtypes_classes(self, typename: str): @pytest.mark.parametrize("kind, exp", [ ([("x", np.float64, 2 ** 28)], (2 ** 28 * 8)), + ([("x", np.float64, 2 ** 27), ("y", np.float64, 2 ** 27)], (2 ** 28 * 8)), + ([("x", np.float32, 2 ** 28), ("y", np.float64, 2 ** 27)], (2 ** 28 * 8)), + ([("x", np.float16, 2 ** 29), ("y", np.float64, 2 ** 27)], (2 ** 28 * 8)), ("2147483648i,2147483648i", 17179869184), + ("2147483648f,2147483648f", 17179869184), + ("2147483648d,2147483648d", 34359738368), + ("2b,2147483648b,2f,4i", 2147483674), (dict(names=["a"], formats=["2147483648i"]), 8589934592), (dict(names=["a"], formats=["2147483648i"], offsets=[1]), 8589934593), (dict(names=["a"], formats=["2147483648i"], offsets=[2 ** 31 - 100]), 10737418140), (dict(names=["a"], formats=["2147483648i"], offsets=[2 ** 31]), 10737418240), + (dict(names=["a", "b", "c"], formats=["2147483648b", "16i", "12f"], + offsets=[2 ** 31, 2 ** 32, 2 ** 32 + 69]), 4294967413), ]) def test_gh_31308(kind, exp): kind_dtype = np.dtype(kind) From 8fbdd2fd9ccf31e0adf2b96a4632b67bada7c75f Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Thu, 30 Apr 2026 10:45:43 -0600 Subject: [PATCH 16/22] TST: PR 31332 revisions * The `check_pickling` testing utility function has been adjusted to allow skipping the comparison of materialized arrays, because this can take several minutes for newly-supported large structured dtypes. To compensate for this, `check_pickling` has been augmented to additionally verify reconstitution of `itemsize` for serialized dtypes, which is a check that fails without the source patches in the above PR. * As a result, it is no longer necessary to mark `test_pickling_large` with `slow()`. --- numpy/_core/tests/test_dtype.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py index 10110149684b..8ae12a2b5488 100644 --- a/numpy/_core/tests/test_dtype.py +++ b/numpy/_core/tests/test_dtype.py @@ -1249,7 +1249,7 @@ def test_structured(self, dtype, random): class TestPickling: - def check_pickling(self, dtype): + def check_pickling(self, dtype, arr_assert=True): for proto in range(pickle.HIGHEST_PROTOCOL + 1): buf = pickle.dumps(dtype, proto) # The dtype pickling itself pickles `np.dtype` if it is pickled @@ -1259,22 +1259,25 @@ def check_pickling(self, dtype): pickled = pickle.loads(buf) assert_equal(pickled, dtype) assert_equal(pickled.descr, dtype.descr) + assert_equal(pickled.itemsize, dtype.itemsize) if dtype.metadata is not None: assert_equal(pickled.metadata, dtype.metadata) # Check the reconstructed dtype is functional x = np.zeros(3, dtype=dtype) y = np.zeros(3, dtype=pickled) - assert_equal(x, y) - assert_equal(x[0], y[0]) + # some large structured dtypes are too large to + # reasonably compare across all elements + if arr_assert: + assert_equal(x, y) + assert_equal(x[0], y[0]) - @pytest.mark.slow() @pytest.mark.skipif(not IS_64BIT, reason="test requires 64-bit system") def test_pickling_large(self): # The actual itemsize is larger than a c-integer here. dtype = np.dtype(f"({2**31},)i") - self.check_pickling(dtype) + self.check_pickling(dtype, False) dtype = np.dtype(f"({2**31},)i", metadata={"a": "b"}) - self.check_pickling(dtype) + self.check_pickling(dtype, False) @pytest.mark.parametrize('t', [int, float, complex, np.int32, str, object, bool]) From 4c5188350ada29d064b80e3d80e49298777445b9 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Thu, 30 Apr 2026 11:44:49 -0600 Subject: [PATCH 17/22] TST: PR 31332 revisions * `check_pickling` required too much memory for large structured dtypes, so the materializations of the arrays have been moved under the new `arr_assert` guard. * `test_gh_31308` was missing a `IS_64BIT` guard. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/tests/test_dtype.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py index 8ae12a2b5488..5a9c61642b6b 100644 --- a/numpy/_core/tests/test_dtype.py +++ b/numpy/_core/tests/test_dtype.py @@ -1262,12 +1262,12 @@ def check_pickling(self, dtype, arr_assert=True): assert_equal(pickled.itemsize, dtype.itemsize) if dtype.metadata is not None: assert_equal(pickled.metadata, dtype.metadata) - # Check the reconstructed dtype is functional - x = np.zeros(3, dtype=dtype) - y = np.zeros(3, dtype=pickled) # some large structured dtypes are too large to # reasonably compare across all elements if arr_assert: + # Check the reconstructed dtype is functional + x = np.zeros(3, dtype=dtype) + y = np.zeros(3, dtype=pickled) assert_equal(x, y) assert_equal(x[0], y[0]) @@ -2067,6 +2067,7 @@ def test_signature_dtypes_classes(self, typename: str): (dict(names=["a", "b", "c"], formats=["2147483648b", "16i", "12f"], offsets=[2 ** 31, 2 ** 32, 2 ** 32 + 69]), 4294967413), ]) +@pytest.mark.skipif(not IS_64BIT, reason="test requires 64-bit system") def test_gh_31308(kind, exp): kind_dtype = np.dtype(kind) assert kind_dtype.itemsize == exp From ae6349549671528ddf7f96b48439373300f7039a Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Fri, 1 May 2026 14:28:05 -0600 Subject: [PATCH 18/22] ENH, TST: PR 31332 revisions * Several more size/elsize related typing fixes in the descriptor source to support the above PR, and to fix a Windows test failure observed there (confirmed locally on Windows box). * Augment `test_gh_31308()` with an additional assertion that is sensitive to the need for some of these source changes. --- numpy/_core/src/multiarray/descriptor.c | 18 +++++++++--------- numpy/_core/tests/test_dtype.py | 2 ++ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/numpy/_core/src/multiarray/descriptor.c b/numpy/_core/src/multiarray/descriptor.c index 0b2c04717f12..e37db292b6d8 100644 --- a/numpy/_core/src/multiarray/descriptor.c +++ b/numpy/_core/src/multiarray/descriptor.c @@ -364,7 +364,7 @@ _convert_from_tuple(PyObject *obj, int align) } for (int i=0; i < shape.len; i++) { PyTuple_SET_ITEM(newdescr->subarray->shape, i, - PyLong_FromLong((long)shape.ptr[i])); + PyLong_FromSsize_t(shape.ptr[i])); if (PyTuple_GET_ITEM(newdescr->subarray->shape, i) == NULL) { Py_DECREF(newdescr); @@ -521,7 +521,7 @@ _convert_from_array_descr(PyObject *obj, int align) goto fail; } PyTuple_SET_ITEM(tup, 0, (PyObject *)conv); - PyTuple_SET_ITEM(tup, 1, PyLong_FromLong((long) totalsize)); + PyTuple_SET_ITEM(tup, 1, PyLong_FromSsize_t(totalsize)); /* * Title can be "meta-data". Only insert it @@ -642,7 +642,7 @@ _convert_from_list(PyObject *obj, int align) } maxalign = PyArray_MAX(maxalign, _align); } - PyObject *size_obj = PyLong_FromLong((long) totalsize); + PyObject *size_obj = PyLong_FromSsize_t(totalsize); if (!size_obj) { Py_DECREF(conv); goto fail; @@ -1156,7 +1156,7 @@ _convert_from_dict(PyObject *obj, int align) goto fail; } - PyTuple_SET_ITEM(tup, 1, PyLong_FromLong(offset)); + PyTuple_SET_ITEM(tup, 1, PyLong_FromSsize_t(offset)); /* Flag whether the fields are specified out of order */ if (offset < totalsize) { has_out_of_order_fields = 1; @@ -1180,7 +1180,7 @@ _convert_from_dict(PyObject *obj, int align) if (align && _align > 1) { totalsize = NPY_NEXT_ALIGNED_OFFSET(totalsize, _align); } - PyTuple_SET_ITEM(tup, 1, PyLong_FromLong(totalsize)); + PyTuple_SET_ITEM(tup, 1, PyLong_FromSsize_t(totalsize)); totalsize += newdescr->elsize; } if (len == 3) { @@ -1797,7 +1797,7 @@ _convert_from_str(PyObject *obj, int align) } int check_num = NPY_NOTYPE + 10; - int elsize = 0; + npy_intp elsize = 0; /* A typecode like 'd' */ if (len == 1) { /* Python byte string characters are unsigned */ @@ -1810,7 +1810,7 @@ _convert_from_str(PyObject *obj, int align) /* Attempt to parse the integer, make sure it's the rest of the string */ errno = 0; - long result = strtol(type + 1, &typeend, 10); + npy_intp result = strtol(type + 1, &typeend, 10); npy_bool some_parsing_happened = !(type == typeend); npy_bool entire_string_consumed = *typeend == '\0'; npy_bool parsing_succeeded = @@ -1820,7 +1820,7 @@ _convert_from_str(PyObject *obj, int align) goto fail; } - elsize = (int)result; + elsize = result; if (parsing_succeeded && typeend - type == len) { @@ -2820,7 +2820,7 @@ arraydescr_reduce(PyArray_Descr *self, PyObject *NPY_UNUSED(args)) elsize = -1; alignment = -1; } - PyTuple_SET_ITEM(state, 5, PyLong_FromLong(elsize)); + PyTuple_SET_ITEM(state, 5, PyLong_FromSsize_t(elsize)); PyTuple_SET_ITEM(state, 6, PyLong_FromLong(alignment)); PyTuple_SET_ITEM(state, 7, PyLong_FromUnsignedLongLong( self->flags & ~NPY_NOT_TRIVIALLY_COPYABLE)); diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py index 5a9c61642b6b..eaaa39fafc07 100644 --- a/numpy/_core/tests/test_dtype.py +++ b/numpy/_core/tests/test_dtype.py @@ -2071,6 +2071,8 @@ def test_signature_dtypes_classes(self, typename: str): def test_gh_31308(kind, exp): kind_dtype = np.dtype(kind) assert kind_dtype.itemsize == exp + for name in kind_dtype.names: + assert kind_dtype[name].shape[0] > 0 @pytest.mark.skipif(not IS_64BIT, reason="test requires 64-bit system") From 692a3f1fc4c4c3787df74fe2f181e09355997794 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Sun, 3 May 2026 11:05:34 -0600 Subject: [PATCH 19/22] DOC: PR 31332 revisions * Add a release note for the above PR. --- doc/release/upcoming_changes/31332.improvement.rst | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 doc/release/upcoming_changes/31332.improvement.rst diff --git a/doc/release/upcoming_changes/31332.improvement.rst b/doc/release/upcoming_changes/31332.improvement.rst new file mode 100644 index 000000000000..f8a0661ff0c3 --- /dev/null +++ b/doc/release/upcoming_changes/31332.improvement.rst @@ -0,0 +1,6 @@ +Structured dtypes now support larger field sizes +------------------------------------------------ +It is now possible to construct structured data types with +field sizes and offsets that exceed the size of a standard C +integer. Arrays using these structured data types are now +also possible to construct. From 673b19b9acc88b88145afb0ccc3deec7a363c253 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Mon, 11 May 2026 17:56:16 -0600 Subject: [PATCH 20/22] TST, MAINT: PR 31332 revisions * Fixed an incorrectly typed `elsize` in `array_fromfile_binary()`. * Added a matching regression test, though note that `test_recarray_fromfile_massive()` already passes without the type change above. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/src/multiarray/ctors.c | 2 +- numpy/_core/tests/test_records.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/numpy/_core/src/multiarray/ctors.c b/numpy/_core/src/multiarray/ctors.c index 7d5bc2d79c41..68f684bec757 100644 --- a/numpy/_core/src/multiarray/ctors.c +++ b/numpy/_core/src/multiarray/ctors.c @@ -3460,7 +3460,7 @@ array_fromfile_binary(FILE *fp, PyArray_Descr *dtype, npy_intp num, size_t *nrea { PyArrayObject *r; npy_off_t start, numbytes; - int elsize; + npy_intp elsize; if (num < 0) { int fail = 0; diff --git a/numpy/_core/tests/test_records.py b/numpy/_core/tests/test_records.py index 9387e8aa9a83..089a48534ede 100644 --- a/numpy/_core/tests/test_records.py +++ b/numpy/_core/tests/test_records.py @@ -9,6 +9,7 @@ import numpy as np from numpy.testing import ( + IS_64BIT, assert_, assert_array_almost_equal, assert_array_equal, @@ -16,6 +17,7 @@ assert_raises, temppath, ) +from numpy.testing._private.utils import requires_memory class TestFromrecords: @@ -108,6 +110,17 @@ def test_recarray_fromfile(self): assert_equal(r1, r2) assert_equal(r2, r3) + @pytest.mark.skipif(not IS_64BIT, reason="test requires 64-bit system") + @requires_memory(free_bytes=2e9) + def test_recarray_fromfile_massive(self, tmpdir): + kind = [("x", np.float64, 2 ** 28)] + kind_dtype = np.dtype(kind) + rec_arr = np.array((1,), dtype=kind_dtype) + with tmpdir.as_cwd(): + rec_arr.tofile("f.data") + actual = np.fromfile("f.data", dtype=kind_dtype) + assert actual.itemsize == 2 ** 28 * 8 + def test_recarray_from_obj(self): count = 10 a = np.zeros(count, dtype='O') From 53126e0ec267ae698ff47797ae761e7481fea806 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Tue, 12 May 2026 12:06:24 -0600 Subject: [PATCH 21/22] TST: PR 31332 revisions * `test_recarray_fromfile_massive()` has been adjusted such that it only passes with the previous patch to `array_fromfile_binary()`. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/tests/test_records.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/numpy/_core/tests/test_records.py b/numpy/_core/tests/test_records.py index 089a48534ede..9624589cb761 100644 --- a/numpy/_core/tests/test_records.py +++ b/numpy/_core/tests/test_records.py @@ -14,6 +14,7 @@ assert_array_almost_equal, assert_array_equal, assert_equal, + assert_allclose, assert_raises, temppath, ) @@ -111,15 +112,17 @@ def test_recarray_fromfile(self): assert_equal(r2, r3) @pytest.mark.skipif(not IS_64BIT, reason="test requires 64-bit system") - @requires_memory(free_bytes=2e9) + @requires_memory(free_bytes=4.3e9) def test_recarray_fromfile_massive(self, tmpdir): - kind = [("x", np.float64, 2 ** 28)] + kind = [("x", np.float64, 2 ** 29)] kind_dtype = np.dtype(kind) rec_arr = np.array((1,), dtype=kind_dtype) with tmpdir.as_cwd(): rec_arr.tofile("f.data") actual = np.fromfile("f.data", dtype=kind_dtype) - assert actual.itemsize == 2 ** 28 * 8 + assert actual.itemsize == 2 ** 29 * 8 + item = actual["x"][0][1] + assert_allclose(item, 1) def test_recarray_from_obj(self): count = 10 From b2856067d9368b734297e1261a351369438b9fb5 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Tue, 12 May 2026 15:19:51 -0600 Subject: [PATCH 22/22] TST: PR 31332 revisions * `test_gh_31308_materialized` has been adjusted to include a new test case that overflows--this will need to be fixed. [skip azp] [skip cirrus] [skip actions] --- numpy/_core/tests/test_dtype.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py index eaaa39fafc07..a503057ca48f 100644 --- a/numpy/_core/tests/test_dtype.py +++ b/numpy/_core/tests/test_dtype.py @@ -2077,8 +2077,11 @@ def test_gh_31308(kind, exp): @pytest.mark.skipif(not IS_64BIT, reason="test requires 64-bit system") @requires_memory(free_bytes=2e9) -def test_gh_31308_materialized(): - kind = [("x", np.float64, 2 ** 28)] +@pytest.mark.parametrize("val, kind, exp", [ + ((1,), [("x", np.float64, 2 ** 28)], 2 ** 28), + ((1, 1), [("x", np.float64, 2 ** 28), ("y", np.float64, 1)], 2 ** 28), +]) +def test_gh_31308_materialized(val, kind, exp): kind_dtype = np.dtype(kind) - rec_arr = np.array((1,), dtype=kind_dtype) - assert rec_arr["x"].size == 2 ** 28 + rec_arr = np.array(val, dtype=kind_dtype) + assert rec_arr["x"].size == exp