numpy · tylerjereddy · Apr 25, 2026 · Apr 26, 2026 · Apr 26, 2026 · Apr 27, 2026
diff --git a/doc/release/upcoming_changes/31332.improvement.rst b/doc/release/upcoming_changes/31332.improvement.rst
@@ -0,0 +1,6 @@
+Structured dtypes now support larger field sizes
+------------------------------------------------
+It is now possible to construct structured data types with
+field sizes and offsets that exceed the size of a standard C
+integer. Arrays using these structured data types are now
+also possible to construct.
diff --git a/numpy/_core/src/multiarray/ctors.c b/numpy/_core/src/multiarray/ctors.c
@@ -3460,7 +3460,7 @@ array_fromfile_binary(FILE *fp, PyArray_Descr *dtype, npy_intp num, size_t *nrea
 {
     PyArrayObject *r;
     npy_off_t start, numbytes;
-    int elsize;
+    npy_intp elsize;
 
     if (num < 0) {
         int fail = 0;

diff --git a/numpy/_core/src/multiarray/descriptor.c b/numpy/_core/src/multiarray/descriptor.c
@@ -316,22 +316,16 @@ _convert_from_tuple(PyObject *obj, int align)
                                 "dimension smaller then zero.");
                 goto fail;
             }
-            if (shape.ptr[i] > NPY_MAX_INT) {
-                PyErr_SetString(PyExc_ValueError,
-                                "invalid shape in fixed-type tuple: "
-                                "dimension does not fit into a C int.");
-                goto fail;
-            }
         }
         npy_intp items = PyArray_OverflowMultiplyList(shape.ptr, shape.len);
         int overflowed;
-        int nbytes;
-        if (items < 0 || items > NPY_MAX_INT) {
+        npy_intp nbytes;
+        if (items < 0) {
             overflowed = 1;
         }
         else {
-            overflowed = npy_mul_with_overflow_int(
-                &nbytes, type->elsize, (int) items);
+            overflowed = npy_mul_sizes_with_overflow(
+                &nbytes, type->elsize, items);
         }
         if (overflowed) {
             PyErr_SetString(PyExc_ValueError,
@@ -370,7 +364,7 @@ _convert_from_tuple(PyObject *obj, int align)
         }
         for (int i=0; i < shape.len; i++) {
             PyTuple_SET_ITEM(newdescr->subarray->shape, i,
-                             PyLong_FromLong((long)shape.ptr[i]));
+                             PyLong_FromSsize_t(shape.ptr[i]));
 
             if (PyTuple_GET_ITEM(newdescr->subarray->shape, i) == NULL) {
                 Py_DECREF(newdescr);
@@ -410,7 +404,7 @@ _convert_from_array_descr(PyObject *obj, int align)
     /* Types with fields need the Python C API for field access */
     npy_uint64 dtypeflags = NPY_NEEDS_PYAPI;
     int maxalign = 1;
-    int totalsize = 0;
+    npy_intp totalsize = 0;
     PyObject *fields = PyDict_New();
     if (!fields) {
         Py_DECREF(nameslist);
@@ -527,7 +521,7 @@ _convert_from_array_descr(PyObject *obj, int align)
             goto fail;
         }
         PyTuple_SET_ITEM(tup, 0, (PyObject *)conv);
-        PyTuple_SET_ITEM(tup, 1, PyLong_FromLong((long) totalsize));
+        PyTuple_SET_ITEM(tup, 1, PyLong_FromSsize_t(totalsize));
 
         /*
          * Title can be "meta-data".  Only insert it
@@ -633,7 +627,7 @@ _convert_from_list(PyObject *obj, int align)
     /* Types with fields need the Python C API for field access */
     npy_uint64 dtypeflags = NPY_NEEDS_PYAPI;
     int maxalign = 1;
-    int totalsize = 0;
+    npy_intp totalsize = 0;
     for (int i = 0; i < n; i++) {
         PyArray_Descr *conv = _convert_from_any(
                 PyList_GET_ITEM(obj, i), align); // noqa: borrowed-ref OK
@@ -648,7 +642,7 @@ _convert_from_list(PyObject *obj, int align)
             }
             maxalign = PyArray_MAX(maxalign, _align);
         }
-        PyObject *size_obj = PyLong_FromLong((long) totalsize);
+        PyObject *size_obj = PyLong_FromSsize_t(totalsize);
         if (!size_obj) {
             Py_DECREF(conv);
             goto fail;
@@ -1101,7 +1095,7 @@ _convert_from_dict(PyObject *obj, int align)
 
     /* Types with fields need the Python C API for field access */
     npy_uint64 dtypeflags = NPY_NEEDS_PYAPI;
-    int totalsize = 0;
+    npy_intp totalsize = 0;
     int maxalign = 1;
     int has_out_of_order_fields = 0;
     for (int i = 0; i < n; i++) {
@@ -1146,7 +1140,7 @@ _convert_from_dict(PyObject *obj, int align)
                 Py_DECREF(ind);
                 goto fail;
             }
-            long offset = PyArray_PyIntAsInt(off);
+            npy_intp offset = PyArray_PyIntAsIntp(off);
             if (error_converting(offset)) {
                 Py_DECREF(off);
                 Py_DECREF(tup);
@@ -1162,7 +1156,7 @@ _convert_from_dict(PyObject *obj, int align)
                 goto fail;
             }
 
-            PyTuple_SET_ITEM(tup, 1, PyLong_FromLong(offset));
+            PyTuple_SET_ITEM(tup, 1, PyLong_FromSsize_t(offset));
             /* Flag whether the fields are specified out of order */
             if (offset < totalsize) {
                 has_out_of_order_fields = 1;
@@ -1186,7 +1180,7 @@ _convert_from_dict(PyObject *obj, int align)
             if (align && _align > 1) {
                 totalsize = NPY_NEXT_ALIGNED_OFFSET(totalsize, _align);
             }
-            PyTuple_SET_ITEM(tup, 1, PyLong_FromLong(totalsize));
+            PyTuple_SET_ITEM(tup, 1, PyLong_FromSsize_t(totalsize));
             totalsize += newdescr->elsize;
         }
         if (len == 3) {
@@ -1803,7 +1797,7 @@ _convert_from_str(PyObject *obj, int align)
     }
 
     int check_num = NPY_NOTYPE + 10;
-    int elsize = 0;
+    npy_intp elsize = 0;
     /* A typecode like 'd' */
     if (len == 1) {
         /* Python byte string characters are unsigned */
@@ -1816,7 +1810,7 @@ _convert_from_str(PyObject *obj, int align)
 
         /* Attempt to parse the integer, make sure it's the rest of the string */
         errno = 0;
-        long result = strtol(type + 1, &typeend, 10);
+        npy_intp result = strtol(type + 1, &typeend, 10);
         npy_bool some_parsing_happened = !(type == typeend);
         npy_bool entire_string_consumed = *typeend == '\0';
         npy_bool parsing_succeeded =
@@ -1826,7 +1820,7 @@ _convert_from_str(PyObject *obj, int align)
             goto fail;
         }
 
-        elsize = (int)result;
+        elsize = result;
 
 
         if (parsing_succeeded && typeend - type == len) {
@@ -2723,7 +2717,8 @@ arraydescr_reduce(PyArray_Descr *self, PyObject *NPY_UNUSED(args))
     PyObject *ret, *mod, *obj;
     PyObject *state;
     char endian;
-    int elsize, alignment;
+    npy_intp elsize;
+    int alignment;
 
     ret = PyTuple_New(3);
     if (ret == NULL) {
@@ -2825,7 +2820,7 @@ arraydescr_reduce(PyArray_Descr *self, PyObject *NPY_UNUSED(args))
         elsize = -1;
         alignment = -1;
     }
-    PyTuple_SET_ITEM(state, 5, PyLong_FromLong(elsize));
+    PyTuple_SET_ITEM(state, 5, PyLong_FromSsize_t(elsize));
     PyTuple_SET_ITEM(state, 6, PyLong_FromLong(alignment));
     PyTuple_SET_ITEM(state, 7, PyLong_FromUnsignedLongLong(
             self->flags & ~NPY_NOT_TRIVIALLY_COPYABLE));

diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py
@@ -26,7 +26,7 @@
     assert_equal,
     assert_raises,
 )
-from numpy.testing._private.utils import requires_deep_recursion
+from numpy.testing._private.utils import requires_deep_recursion, requires_memory
 
 
 def assert_dtype_equal(a, b):
@@ -737,13 +737,10 @@ def test_shape_matches_ndim(self):
 
     def test_shape_invalid(self):
         # Check that the shape is valid.
-        max_int = np.iinfo(np.intc).max
         max_intp = np.iinfo(np.intp).max
         # Too large values (the datatype is part of this)
-        assert_raises(ValueError, np.dtype, [('a', 'f4', max_int // 4 + 1)])
-        assert_raises(ValueError, np.dtype, [('a', 'f4', max_int + 1)])
-        assert_raises(ValueError, np.dtype, [('a', 'f4', (max_int, 2))])
-        # Takes a different code path (fails earlier:
+        assert_raises(ValueError, np.dtype, [('a', 'f8', max_intp // 8 + 1)])
+        assert_raises(ValueError, np.dtype, [('a', 'f4', max_intp // 4 + 1)])
         assert_raises(ValueError, np.dtype, [('a', 'f4', max_intp + 1)])
         # Negative values
         assert_raises(ValueError, np.dtype, [('a', 'f4', -1)])
@@ -1252,7 +1249,7 @@ def test_structured(self, dtype, random):
 
 class TestPickling:
 
-    def check_pickling(self, dtype):
+    def check_pickling(self, dtype, arr_assert=True):
         for proto in range(pickle.HIGHEST_PROTOCOL + 1):
             buf = pickle.dumps(dtype, proto)
             # The dtype pickling itself pickles `np.dtype` if it is pickled
@@ -1262,22 +1259,25 @@ def check_pickling(self, dtype):
             pickled = pickle.loads(buf)
             assert_equal(pickled, dtype)
             assert_equal(pickled.descr, dtype.descr)
+            assert_equal(pickled.itemsize, dtype.itemsize)
             if dtype.metadata is not None:
                 assert_equal(pickled.metadata, dtype.metadata)
-            # Check the reconstructed dtype is functional
-            x = np.zeros(3, dtype=dtype)
-            y = np.zeros(3, dtype=pickled)
-            assert_equal(x, y)
-            assert_equal(x[0], y[0])
+            # some large structured dtypes are too large to
+            # reasonably compare across all elements
+            if arr_assert:
+                # Check the reconstructed dtype is functional
+                x = np.zeros(3, dtype=dtype)
+                y = np.zeros(3, dtype=pickled)
+                assert_equal(x, y)
+                assert_equal(x[0], y[0])
 
     @pytest.mark.skipif(not IS_64BIT, reason="test requires 64-bit system")
-    @pytest.mark.xfail(reason="dtype conversion doesn't allow this yet.")
     def test_pickling_large(self):
         # The actual itemsize is larger than a c-integer here.
         dtype = np.dtype(f"({2**31},)i")
-        self.check_pickling(dtype)
+        self.check_pickling(dtype, False)
         dtype = np.dtype(f"({2**31},)i", metadata={"a": "b"})
-        self.check_pickling(dtype)
+        self.check_pickling(dtype, False)
 
     @pytest.mark.parametrize('t', [int, float, complex, np.int32, str, object,
                                    bool])
@@ -2049,3 +2049,36 @@ def test_signature_dtypes_classes(self, typename: str):
 
         params_actual = set(sig.parameters)
         assert params_actual == params_expect
+
+
+@pytest.mark.parametrize("kind, exp", [
+    ([("x", np.float64, 2 ** 28)], (2 ** 28 * 8)),
+    ([("x", np.float64, 2 ** 27), ("y", np.float64, 2 ** 27)], (2 ** 28 * 8)),
+    ([("x", np.float32, 2 ** 28), ("y", np.float64, 2 ** 27)], (2 ** 28 * 8)),
+    ([("x", np.float16, 2 ** 29), ("y", np.float64, 2 ** 27)], (2 ** 28 * 8)),
+    ("2147483648i,2147483648i", 17179869184),
+    ("2147483648f,2147483648f", 17179869184),
+    ("2147483648d,2147483648d", 34359738368),
+    ("2b,2147483648b,2f,4i", 2147483674),
+    (dict(names=["a"], formats=["2147483648i"]), 8589934592),
+    (dict(names=["a"], formats=["2147483648i"], offsets=[1]), 8589934593),
+    (dict(names=["a"], formats=["2147483648i"], offsets=[2 ** 31 - 100]), 10737418140),
+    (dict(names=["a"], formats=["2147483648i"], offsets=[2 ** 31]), 10737418240),
+    (dict(names=["a", "b", "c"], formats=["2147483648b", "16i", "12f"],
+     offsets=[2 ** 31, 2 ** 32, 2 ** 32 + 69]), 4294967413),
+])
+@pytest.mark.skipif(not IS_64BIT, reason="test requires 64-bit system")
+def test_gh_31308(kind, exp):
+    kind_dtype = np.dtype(kind)
+    assert kind_dtype.itemsize == exp
+    for name in kind_dtype.names:
+        assert kind_dtype[name].shape[0] > 0
+
+
+@pytest.mark.skipif(not IS_64BIT, reason="test requires 64-bit system")
+@requires_memory(free_bytes=2e9)
+def test_gh_31308_materialized():
+    kind = [("x", np.float64, 2 ** 28)]
+    kind_dtype = np.dtype(kind)
+    rec_arr = np.array((1,), dtype=kind_dtype)
+    assert rec_arr["x"].size == 2 ** 28
diff --git a/numpy/_core/tests/test_records.py b/numpy/_core/tests/test_records.py
@@ -9,13 +9,15 @@
 
 import numpy as np
 from numpy.testing import (
+    IS_64BIT,
     assert_,
     assert_array_almost_equal,
     assert_array_equal,
     assert_equal,
     assert_raises,
     temppath,
 )
+from numpy.testing._private.utils import requires_memory
 
 
 class TestFromrecords:
@@ -108,6 +110,17 @@ def test_recarray_fromfile(self):
         assert_equal(r1, r2)
         assert_equal(r2, r3)
 
+    @pytest.mark.skipif(not IS_64BIT, reason="test requires 64-bit system")
+    @requires_memory(free_bytes=2e9)
+    def test_recarray_fromfile_massive(self, tmpdir):
+        kind = [("x", np.float64, 2 ** 28)]
+        kind_dtype = np.dtype(kind)
+        rec_arr = np.array((1,), dtype=kind_dtype)
+        with tmpdir.as_cwd():
+            rec_arr.tofile("f.data")
+            actual = np.fromfile("f.data", dtype=kind_dtype)
+            assert actual.itemsize == 2 ** 28 * 8
+
     def test_recarray_from_obj(self):
         count = 10
         a = np.zeros(count, dtype='O')