From a16949a10a241d2d47d5121947b48644b01f4cde Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Thu, 30 Apr 2026 09:51:53 +0200
Subject: [PATCH 1/3] MAINT,ENH: Make DTypes (except `np.dtype`) proper
 HeapTypes

This makes all DTypes HeapTypes.  For the builtin ones (and non-user
created ones) we also mark them all as immortal as it might help
a bit with refcounting on threaded execution.

Because it is IMO nonsense to limit that to free-threaded, added a
helper and use it throughout.

(Claude was used with guidance to execute what I wanted.)
---
 numpy/_core/_add_newdocs.py                   |   2 +-
 numpy/_core/function_base.py                  |  17 +-
 numpy/_core/src/common/npy_pycompat.h         |  12 +
 numpy/_core/src/multiarray/abstractdtypes.c   | 271 ++++++++----------
 numpy/_core/src/multiarray/abstractdtypes.h   |  23 +-
 numpy/_core/src/multiarray/array_method.c     |   4 +-
 numpy/_core/src/multiarray/dtypemeta.c        | 120 ++------
 numpy/_core/src/multiarray/multiarraymodule.c |  30 +-
 numpy/_core/src/multiarray/usertypes.c        |  12 +-
 numpy/_core/src/umath/extobj.c                |   4 +-
 10 files changed, 220 insertions(+), 275 deletions(-)

diff --git a/numpy/_core/_add_newdocs.py b/numpy/_core/_add_newdocs.py
index 63ede7cc2c0b..271e4ec287bb 100644
--- a/numpy/_core/_add_newdocs.py
+++ b/numpy/_core/_add_newdocs.py
@@ -7070,7 +7070,7 @@ def _array_method_doc(name: str, params: str, doc: str) -> None:
         {_extra_docs}
         See `numpy.dtype` for the typical way to create dtype instances
         and :ref:`arrays.dtypes` for additional information.
-        """)
+        """, warn_on_python=False)
 
     del _dtype_name, _signature, _sctype_name, _extra_docs  # avoid namespace pollution
 
diff --git a/numpy/_core/function_base.py b/numpy/_core/function_base.py
index b01ba108d2c4..30ce0197ba42 100644
--- a/numpy/_core/function_base.py
+++ b/numpy/_core/function_base.py
@@ -472,6 +472,8 @@ def _needs_add_docstring(obj):
 
 
 def _add_docstring(obj, doc, warn_on_python):
+    doc = inspect.cleandoc(doc)
+
     if warn_on_python and not _needs_add_docstring(obj):
         warnings.warn(
             f"add_newdoc was used on a pure-python object {obj}. "
@@ -479,7 +481,20 @@ def _add_docstring(obj, doc, warn_on_python):
             UserWarning,
             stacklevel=3)
 
-    doc = inspect.cleandoc(doc)
+    # For types, try to assign ``__doc__`` directly (works for heap types).
+    # When that succeeds, ``add_docstring`` only needs to populate
+    # ``__text_signature__`` from any ``"\n--\n\n"`` stub.  Static types
+    # (where ``__doc__`` is read-only) fall through unchanged.
+    if isinstance(obj, type):
+        head, sep, body = doc.partition("\n--\n\n")
+        try:
+            obj.__doc__ = body if sep else doc
+        except Exception:
+            pass  # just assume we should use add_docstring.
+        else:
+            if not sep:
+                return
+            doc = head + sep  # set only text-signature part
 
     try:
         add_docstring(obj, doc)
diff --git a/numpy/_core/src/common/npy_pycompat.h b/numpy/_core/src/common/npy_pycompat.h
index 52d44b17283a..5b438c6ec248 100644
--- a/numpy/_core/src/common/npy_pycompat.h
+++ b/numpy/_core/src/common/npy_pycompat.h
@@ -6,6 +6,18 @@
 
 #define Npy_HashDouble _Py_HashDouble
 
+/* No-op fallback for ``PyUnstable_SetImmortal`` on Python < 3.13. */
+static inline int
+NpyUnstable_SetImmortal(PyObject *op)
+{
+#if PY_VERSION_HEX >= 0x030D0000
+    return PyUnstable_SetImmortal(op);
+#else
+    (void)op;
+    return 1;
+#endif
+}
+
 #ifdef Py_GIL_DISABLED
 // Specialized version of critical section locking to safely use
 // PySequence_Fast APIs without the GIL. For performance, the argument *to*
diff --git a/numpy/_core/src/multiarray/abstractdtypes.c b/numpy/_core/src/multiarray/abstractdtypes.c
index 120ada551e7f..86bddcd3c6ba 100644
--- a/numpy/_core/src/multiarray/abstractdtypes.c
+++ b/numpy/_core/src/multiarray/abstractdtypes.c
@@ -12,6 +12,7 @@
 #include "abstractdtypes.h"
 #include "array_coercion.h"
 #include "common.h"
+#include "npy_pycompat.h"
 
 
 static inline PyArray_Descr *
@@ -82,79 +83,12 @@ discover_descriptor_from_pycomplex(
 }
 
 
-NPY_NO_EXPORT int
-initialize_and_map_pytypes_to_dtypes()
-{
-    if (PyType_Ready((PyTypeObject *)&PyArray_IntAbstractDType) < 0) {
-        return -1;
-    }
-    if (PyType_Ready((PyTypeObject *)&PyArray_FloatAbstractDType) < 0) {
-        return -1;
-    }
-    if (PyType_Ready((PyTypeObject *)&PyArray_ComplexAbstractDType) < 0) {
-        return -1;
-    }
-    /*
-     * Delayed assignments to avoid "error C2099: initializer is not a constant"
-     * in windows compilers.  Can hopefully be done in structs in the future.
-     */
-    ((PyTypeObject *)&PyArray_PyLongDType)->tp_base =
-        (PyTypeObject *)&PyArray_IntAbstractDType;
-    PyArray_PyLongDType.scalar_type = &PyLong_Type;
-    if (PyType_Ready((PyTypeObject *)&PyArray_PyLongDType) < 0) {
-        return -1;
-    }
-    ((PyTypeObject *)&PyArray_PyFloatDType)->tp_base =
-        (PyTypeObject *)&PyArray_FloatAbstractDType;
-    PyArray_PyFloatDType.scalar_type = &PyFloat_Type;
-    if (PyType_Ready((PyTypeObject *)&PyArray_PyFloatDType) < 0) {
-        return -1;
-    }
-    ((PyTypeObject *)&PyArray_PyComplexDType)->tp_base =
-        (PyTypeObject *)&PyArray_ComplexAbstractDType;
-    PyArray_PyComplexDType.scalar_type = &PyComplex_Type;
-    if (PyType_Ready((PyTypeObject *)&PyArray_PyComplexDType) < 0) {
-        return -1;
-    }
-
-    /* Register the new DTypes for discovery */
-    if (_PyArray_MapPyTypeToDType(
-            &PyArray_PyLongDType, &PyLong_Type, NPY_FALSE) < 0) {
-        return -1;
-    }
-    if (_PyArray_MapPyTypeToDType(
-            &PyArray_PyFloatDType, &PyFloat_Type, NPY_FALSE) < 0) {
-        return -1;
-    }
-    if (_PyArray_MapPyTypeToDType(
-            &PyArray_PyComplexDType, &PyComplex_Type, NPY_FALSE) < 0) {
-        return -1;
-    }
-
-    /*
-     * Map str, bytes, and bool, for which we do not need abstract versions
-     * to the NumPy DTypes. This is done here using the `is_known_scalar_type`
-     * function.
-     * TODO: The `is_known_scalar_type` function is considered preliminary,
-     *       the same could be achieved e.g. with additional abstract DTypes.
-     */
-    PyArray_DTypeMeta *dtype;
-    dtype = typenum_to_dtypemeta(NPY_UNICODE);
-    if (_PyArray_MapPyTypeToDType(dtype, &PyUnicode_Type, NPY_FALSE) < 0) {
-        return -1;
-    }
-
-    dtype = typenum_to_dtypemeta(NPY_STRING);
-    if (_PyArray_MapPyTypeToDType(dtype, &PyBytes_Type, NPY_FALSE) < 0) {
-        return -1;
-    }
-    dtype = typenum_to_dtypemeta(NPY_BOOL);
-    if (_PyArray_MapPyTypeToDType(dtype, &PyBool_Type, NPY_FALSE) < 0) {
-        return -1;
-    }
-
-    return 0;
-}
+NPY_NO_EXPORT PyArray_DTypeMeta *PyArray_IntAbstractDTypePtr = NULL;
+NPY_NO_EXPORT PyArray_DTypeMeta *PyArray_FloatAbstractDTypePtr = NULL;
+NPY_NO_EXPORT PyArray_DTypeMeta *PyArray_ComplexAbstractDTypePtr = NULL;
+NPY_NO_EXPORT PyArray_DTypeMeta *PyArray_PyLongDTypePtr = NULL;
+NPY_NO_EXPORT PyArray_DTypeMeta *PyArray_PyFloatDTypePtr = NULL;
+NPY_NO_EXPORT PyArray_DTypeMeta *PyArray_PyComplexDTypePtr = NULL;
 
 
 /*
@@ -286,102 +220,139 @@ complex_common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other)
 }
 
 
-/*
- * Define abstract numerical DTypes that all regular ones can inherit from
- * (in arraytypes.c.src).
- * Here, also define types corresponding to the python scalars.
- */
-NPY_NO_EXPORT PyArray_DTypeMeta PyArray_IntAbstractDType = {{{
-        PyVarObject_HEAD_INIT(&PyArrayDTypeMeta_Type, 0)
-        .tp_name = "numpy.dtypes._IntegerAbstractDType",
-        .tp_base = &PyArrayDescr_Type,
-        .tp_basicsize = sizeof(PyArray_Descr),
-        .tp_flags = Py_TPFLAGS_DEFAULT,
-    },},
-    .type_num = -1,
-    .flags = NPY_DT_ABSTRACT,
-};
-
-NPY_DType_Slots pylongdtype_slots = {
+static NPY_DType_Slots pylongdtype_slots = {
     .discover_descr_from_pyobject = discover_descriptor_from_pylong,
     .default_descr = int_default_descriptor,
     .common_dtype = int_common_dtype,
 };
 
-NPY_NO_EXPORT PyArray_DTypeMeta PyArray_PyLongDType = {{{
-        PyVarObject_HEAD_INIT(&PyArrayDTypeMeta_Type, 0)
-        .tp_name = "numpy.dtypes._PyLongDType",
-        .tp_base = NULL,  /* set in initialize_and_map_pytypes_to_dtypes */
-        .tp_basicsize = sizeof(PyArray_Descr),
-        .tp_flags = Py_TPFLAGS_DEFAULT,
-    },},
-    .type_num = -1,
-    .dt_slots = &pylongdtype_slots,
-    .scalar_type = NULL,  /* set in initialize_and_map_pytypes_to_dtypes */
-};
-
-NPY_NO_EXPORT PyArray_DTypeMeta PyArray_FloatAbstractDType = {{{
-        PyVarObject_HEAD_INIT(&PyArrayDTypeMeta_Type, 0)
-        .tp_name = "numpy.dtypes._FloatAbstractDType",
-        .tp_base = &PyArrayDescr_Type,
-        .tp_basicsize = sizeof(PyArray_Descr),
-       .tp_flags = Py_TPFLAGS_DEFAULT,
-    },},
-    .type_num = -1,
-    .flags = NPY_DT_ABSTRACT,
-};
-
-NPY_DType_Slots pyfloatdtype_slots = {
+static NPY_DType_Slots pyfloatdtype_slots = {
     .discover_descr_from_pyobject = discover_descriptor_from_pyfloat,
     .default_descr = float_default_descriptor,
     .common_dtype = float_common_dtype,
 };
 
-NPY_NO_EXPORT PyArray_DTypeMeta PyArray_PyFloatDType = {{{
-        PyVarObject_HEAD_INIT(&PyArrayDTypeMeta_Type, 0)
-        .tp_name = "numpy.dtypes._PyFloatDType",
-        .tp_base = NULL,  /* set in initialize_and_map_pytypes_to_dtypes */
-        .tp_basicsize = sizeof(PyArray_Descr),
-       .tp_flags = Py_TPFLAGS_DEFAULT,
-    },},
-    .type_num = -1,
-    .dt_slots = &pyfloatdtype_slots,
-    .scalar_type = NULL,  /* set in initialize_and_map_pytypes_to_dtypes */
-};
-
-NPY_NO_EXPORT PyArray_DTypeMeta PyArray_ComplexAbstractDType = {{{
-        PyVarObject_HEAD_INIT(&PyArrayDTypeMeta_Type, 0)
-        .tp_name = "numpy.dtypes._ComplexAbstractDType",
-        .tp_base = &PyArrayDescr_Type,
-        .tp_basicsize = sizeof(PyArray_Descr),
-         .tp_flags = Py_TPFLAGS_DEFAULT,
-    },},
-    .type_num = -1,
-    .flags = NPY_DT_ABSTRACT,
-};
-
-NPY_DType_Slots pycomplexdtype_slots = {
+static NPY_DType_Slots pycomplexdtype_slots = {
     .discover_descr_from_pyobject = discover_descriptor_from_pycomplex,
     .default_descr = complex_default_descriptor,
     .common_dtype = complex_common_dtype,
 };
 
-NPY_NO_EXPORT PyArray_DTypeMeta PyArray_PyComplexDType = {{{
-        PyVarObject_HEAD_INIT(&PyArrayDTypeMeta_Type, 0)
-        .tp_name = "numpy.dtypes._PyComplexDType",
-        .tp_base = NULL,  /* set in initialize_and_map_pytypes_to_dtypes */
-        .tp_basicsize = sizeof(PyArray_Descr),
-         .tp_flags = Py_TPFLAGS_DEFAULT,
-    },},
-    .type_num = -1,
-    .dt_slots = &pycomplexdtype_slots,
-    .scalar_type = NULL,  /* set in initialize_and_map_pytypes_to_dtypes */
-};
+
+/*
+ * Create a heap-type DType class via ``PyType_FromMetaclass`` and fill in
+ * the NumPy-specific fields.  If ``slots`` is NULL we allocate an empty
+ * ``NPY_DType_Slots`` (abstract DTypes have no functional slots; in
+ * principle we should route everything through ``DTypeMetaInitFromSpec``
+ * here, but for now we just allocate directly).  When ``scalar_type`` is
+ * non-NULL the new DType is also registered for scalar discovery.
+ */
+static PyArray_DTypeMeta *
+make_raw_dtype(const char *name, PyTypeObject *base,
+               npy_uint64 flags, NPY_DType_Slots *slots,
+               PyTypeObject *scalar_type)
+{
+    PyType_Slot type_slots[] = {
+        {Py_tp_base, base},
+        {0, NULL},
+    };
+    PyType_Spec spec = {
+        .name = name,
+        .basicsize = sizeof(PyArray_Descr),
+        .itemsize = 0,
+        .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE,
+        .slots = type_slots,
+    };
+    if (flags & NPY_DT_ABSTRACT) {
+        /* abstract ones can subclass in C but also disallow instant here */
+        spec.flags |= Py_TPFLAGS_BASETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION;
+    }
+    PyArray_DTypeMeta *dt = (PyArray_DTypeMeta *)PyType_FromMetaclass(
+            &PyArrayDTypeMeta_Type, NULL, &spec, NULL);
+    if (dt == NULL) {
+        return NULL;
+    }
+    if (slots == NULL) {
+        slots = PyMem_Calloc(1, sizeof(NPY_DType_Slots));
+        if (slots == NULL) {
+            Py_DECREF(dt);
+            PyErr_NoMemory();
+            return NULL;
+        }
+    }
+    dt->dt_slots = slots;
+    dt->type_num = -1;
+    Py_XINCREF(scalar_type);
+    dt->scalar_type = scalar_type;
+    dt->singleton = NULL;
+    dt->flags = flags;
+    NpyUnstable_SetImmortal((PyObject *)dt);
+
+    if (scalar_type != NULL) {
+        if (_PyArray_MapPyTypeToDType(dt, scalar_type, NPY_FALSE) < 0) {
+            Py_DECREF(dt);
+            return NULL;
+        }
+    }
+    return dt;
+}
 
 
 /*
- * Additional functions to deal with Python literal int, float, complex
+ * Create the abstract integer/float/complex DType classes (which the
+ * legacy concrete DTypes inherit from in ``arraytypes.c.src``) and the
+ * implicit DType classes for Python ``int``/``float``/``complex``
+ * literals, and register the latter for scalar discovery.
+ *
+ * Must be called before ``set_typeinfo``: ``dtypemeta_wrap_legacy_descriptor``
+ * inherits from the abstract DTypes created here.
  */
+NPY_NO_EXPORT int
+initialize_abstract_dtypes(void)
+{
+    struct dtype_spec {
+        const char *name;
+        PyArray_DTypeMeta **out;
+        /* Indirected so Py-scalar entries below can reference an abstract
+         * DType created earlier in the same loop iteration. */
+        PyTypeObject **base_ptr;
+        npy_uint64 flags;
+        NPY_DType_Slots *slots;
+        PyTypeObject *scalar_type;
+    };
+    PyTypeObject *descr_base = (PyTypeObject *)&PyArrayDescr_Type;
+
+    struct dtype_spec specs[] = {
+        /* Abstract DTypes; concrete legacy DTypes may inherit from these. */
+        {"numpy.dtypes._IntegerAbstractDType", &PyArray_IntAbstractDTypePtr,
+            &descr_base, NPY_DT_ABSTRACT, NULL, NULL},
+        {"numpy.dtypes._FloatAbstractDType", &PyArray_FloatAbstractDTypePtr,
+            &descr_base, NPY_DT_ABSTRACT, NULL, NULL},
+        {"numpy.dtypes._ComplexAbstractDType", &PyArray_ComplexAbstractDTypePtr,
+            &descr_base, NPY_DT_ABSTRACT, NULL, NULL},
+        /* Py-scalar DTypes; bases are the abstract DTypes created above. */
+        {"numpy.dtypes._PyLongDType", &PyArray_PyLongDTypePtr,
+            (PyTypeObject **)&PyArray_IntAbstractDTypePtr,
+            0, &pylongdtype_slots, &PyLong_Type},
+        {"numpy.dtypes._PyFloatDType", &PyArray_PyFloatDTypePtr,
+            (PyTypeObject **)&PyArray_FloatAbstractDTypePtr,
+            0, &pyfloatdtype_slots, &PyFloat_Type},
+        {"numpy.dtypes._PyComplexDType", &PyArray_PyComplexDTypePtr,
+            (PyTypeObject **)&PyArray_ComplexAbstractDTypePtr,
+            0, &pycomplexdtype_slots, &PyComplex_Type},
+    };
+    for (size_t i = 0; i < sizeof(specs) / sizeof(specs[0]); i++) {
+        *specs[i].out = make_raw_dtype(
+                specs[i].name, *specs[i].base_ptr, specs[i].flags,
+                specs[i].slots, specs[i].scalar_type);
+        if (*specs[i].out == NULL) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+
 /*
  * This function takes an existing array operand and if the new descr does
  * not match, replaces it with a new array that has the correct descriptor
diff --git a/numpy/_core/src/multiarray/abstractdtypes.h b/numpy/_core/src/multiarray/abstractdtypes.h
index a74b8f86e394..76cdcc8ba1e7 100644
--- a/numpy/_core/src/multiarray/abstractdtypes.h
+++ b/numpy/_core/src/multiarray/abstractdtypes.h
@@ -14,16 +14,25 @@ extern "C" {
  * These are mainly needed for value based promotion in ufuncs.  It
  * may be necessary to make them (partially) public, to allow user-defined
  * dtypes to perform value based casting.
+ * Since types are histrocically not defined as references we define
+ * dereferenced macro versions below for `&Type` style use.
  */
-NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_IntAbstractDType;
-NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_FloatAbstractDType;
-NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_ComplexAbstractDType;
-NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_PyLongDType;
-NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_PyFloatDType;
-NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_PyComplexDType;
+NPY_NO_EXPORT extern PyArray_DTypeMeta *PyArray_IntAbstractDTypePtr;
+NPY_NO_EXPORT extern PyArray_DTypeMeta *PyArray_FloatAbstractDTypePtr;
+NPY_NO_EXPORT extern PyArray_DTypeMeta *PyArray_ComplexAbstractDTypePtr;
+NPY_NO_EXPORT extern PyArray_DTypeMeta *PyArray_PyLongDTypePtr;
+NPY_NO_EXPORT extern PyArray_DTypeMeta *PyArray_PyFloatDTypePtr;
+NPY_NO_EXPORT extern PyArray_DTypeMeta *PyArray_PyComplexDTypePtr;
+
+#define PyArray_IntAbstractDType (*PyArray_IntAbstractDTypePtr)
+#define PyArray_FloatAbstractDType (*PyArray_FloatAbstractDTypePtr)
+#define PyArray_ComplexAbstractDType (*PyArray_ComplexAbstractDTypePtr)
+#define PyArray_PyLongDType (*PyArray_PyLongDTypePtr)
+#define PyArray_PyFloatDType (*PyArray_PyFloatDTypePtr)
+#define PyArray_PyComplexDType (*PyArray_PyComplexDTypePtr)
 
 NPY_NO_EXPORT int
-initialize_and_map_pytypes_to_dtypes(void);
+initialize_abstract_dtypes(void);
 
 
 /*
diff --git a/numpy/_core/src/multiarray/array_method.c b/numpy/_core/src/multiarray/array_method.c
index ffedca11d3d6..1958fbcf36fc 100644
--- a/numpy/_core/src/multiarray/array_method.c
+++ b/numpy/_core/src/multiarray/array_method.c
@@ -485,12 +485,10 @@ PyArrayMethod_FromSpec_int(PyArrayMethod_Spec *spec, int private)
         return NULL;
     }
     strcpy(res->method->name, spec->name);
-#ifdef Py_GIL_DISABLED
     // Mark immortal to reduce reference count contention in PyArray_GetCastingImpl
     // If we ever allow replacing ArrayMethod objects or cleanup it DTypes or ufuncs, this may need to be reconsidered.
     // An alternative that might help is to store cast methods in a PyArrayIdentityHash instead of a dict.
-    PyUnstable_SetImmortal((PyObject *)res->method);
-#endif
+    NpyUnstable_SetImmortal((PyObject *)res->method);
     return res;
 }
 
diff --git a/numpy/_core/src/multiarray/dtypemeta.c b/numpy/_core/src/multiarray/dtypemeta.c
index 7320c5c9fa9a..e086fc6b2020 100644
--- a/numpy/_core/src/multiarray/dtypemeta.c
+++ b/numpy/_core/src/multiarray/dtypemeta.c
@@ -11,6 +11,7 @@
 #include <numpy/npy_math.h>
 
 #include "npy_import.h"
+#include "npy_pycompat.h"
 
 #include "abstractdtypes.h"
 #include "arraytypes.h"
@@ -45,32 +46,6 @@ dtypemeta_dealloc(PyArray_DTypeMeta *self) {
     PyType_Type.tp_dealloc((PyObject *) self);
 }
 
-static PyObject *
-dtypemeta_alloc(PyTypeObject *NPY_UNUSED(type), Py_ssize_t NPY_UNUSED(items))
-{
-    PyErr_SetString(PyExc_TypeError,
-            "DTypes can only be created using the NumPy API.");
-    return NULL;
-}
-
-static PyObject *
-dtypemeta_new(PyTypeObject *NPY_UNUSED(type),
-        PyObject *NPY_UNUSED(args), PyObject *NPY_UNUSED(kwds))
-{
-    PyErr_SetString(PyExc_TypeError,
-            "Preliminary-API: Cannot subclass DType.");
-    return NULL;
-}
-
-static int
-dtypemeta_init(PyTypeObject *NPY_UNUSED(type),
-        PyObject *NPY_UNUSED(args), PyObject *NPY_UNUSED(kwds))
-{
-    PyErr_SetString(PyExc_TypeError,
-            "Preliminary-API: Cannot __init__ DType class.");
-    return -1;
-}
-
 static PyArray_DTypeMeta *
 dtype_does_not_promote(
         PyArray_DTypeMeta *NPY_UNUSED(self), PyArray_DTypeMeta *NPY_UNUSED(other))
@@ -412,15 +387,6 @@ dtypemeta_is_gc(PyObject *dtype_class)
 static int
 dtypemeta_traverse(PyArray_DTypeMeta *type, visitproc visit, void *arg)
 {
-    /*
-     * We have to traverse the base class (if it is a HeapType).
-     * PyType_Type will handle this logic for us.
-     * This function is currently not used, but will probably be necessary
-     * in the future when we implement HeapTypes (python/dynamically
-     * defined types). It should be revised at that time.
-     */
-    assert(0);
-    assert(!NPY_DT_is_legacy(type) && (PyTypeObject *)type != &PyArrayDescr_Type);
     Py_VISIT(type->singleton);
     Py_VISIT(type->scalar_type);
     return PyType_Type.tp_traverse((PyObject *)type, visit, arg);
@@ -1060,26 +1026,7 @@ object_common_dtype(
 
 /**
  * This function takes a PyArray_Descr and replaces its base class with
- * a newly created dtype subclass (DTypeMeta instances).
- * There are some subtleties that need to be remembered when doing this,
- * first for the class objects itself it could be either a HeapType or not.
- * Since we are defining the DType from C, we will not make it a HeapType,
- * thus making it identical to a typical *static* type (except that we
- * malloc it). We could do it the other way, but there seems no reason to
- * do so.
- *
- * The DType instances (the actual dtypes or descriptors), are based on
- * prototypes which are passed in. These should not be garbage collected
- * and thus Py_TPFLAGS_HAVE_GC is not set. (We could allow this, but than
- * would have to allocate a new object, since the GC needs information before
- * the actual struct).
- *
- * The above is the reason why we should works exactly like we would for a
- * static type here.
- * Otherwise, we blurry the lines between C-defined extension classes
- * and Python subclasses. e.g. `class MyInt(int): pass` is very different
- * from our `class Float64(np.dtype): pass`, because the latter should not
- * be a HeapType and its instances should be exact PyArray_Descr structs.
+ * a newly created DType (a heap type subclass of ``PyArray_DTypeMeta``).
  *
  * @param descr The descriptor that should be wrapped.
  * @param name The name for the DType.
@@ -1128,46 +1075,27 @@ dtypemeta_wrap_legacy_descriptor(
     memset(dt_slots, '\0', sizeof(NPY_DType_Slots));
     dt_slots->get_constant = default_get_constant;
 
-    PyArray_DTypeMeta *dtype_class = PyMem_Malloc(sizeof(PyArray_DTypeMeta));
+    PyType_Slot type_slots[] = {
+        {Py_tp_new, (void *)legacy_dtype_default_new},
+        {Py_tp_base, dtype_super_class},
+        {0, NULL},
+    };
+    PyType_Spec spec = {
+        .name = name,
+        .basicsize = sizeof(_PyArray_LegacyDescr),
+        .itemsize = 0,
+        .flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_IMMUTABLETYPE,
+        .slots = type_slots,
+    };
+    PyArray_DTypeMeta *dtype_class = (PyArray_DTypeMeta *)PyType_FromMetaclass(
+            &PyArrayDTypeMeta_Type, NULL, &spec, NULL);
     if (dtype_class == NULL) {
         PyMem_Free(dt_slots);
-        PyErr_NoMemory();
         return NULL;
     }
-
-    /*
-     * Initialize the struct fields identically to static code by copying
-     * a prototype instances for everything except our own fields which
-     * vary between the DTypes.
-     * In particular any Object initialization must be strictly copied from
-     * the untouched prototype to avoid complexities.
-     * Any Type slots need to be fixed before PyType_Ready, although most
-     * will be inherited automatically there.
-     */
-    static PyArray_DTypeMeta prototype = {
-        {{
-            PyVarObject_HEAD_INIT(&PyArrayDTypeMeta_Type, 0)
-            .tp_name = NULL,  /* set below */
-            .tp_basicsize = sizeof(_PyArray_LegacyDescr),
-            .tp_flags = Py_TPFLAGS_DEFAULT,
-            .tp_base = NULL,  /* set below */
-            .tp_new = (newfunc)legacy_dtype_default_new,
-            .tp_doc = NULL,  /* set in python */
-        },},
-        .flags = NPY_DT_LEGACY,
-        /* Further fields are not common between DTypes */
-    };
-    memcpy(dtype_class, &prototype, sizeof(PyArray_DTypeMeta));
-    /* Fix name and superclass of the Type*/
-    ((PyTypeObject *)dtype_class)->tp_name = name;
-    ((PyTypeObject *)dtype_class)->tp_base = dtype_super_class,
     dtype_class->dt_slots = dt_slots;
+    dtype_class->flags = NPY_DT_LEGACY;
 
-    /* Let python finish the initialization */
-    if (PyType_Ready((PyTypeObject *)dtype_class) < 0) {
-        Py_DECREF(dtype_class);
-        return NULL;
-    }
     dt_slots->castingimpls = PyDict_New();
     if (dt_slots->castingimpls == NULL) {
         Py_DECREF(dtype_class);
@@ -1284,6 +1212,7 @@ dtypemeta_wrap_legacy_descriptor(
         }
     }
 
+    NpyUnstable_SetImmortal((PyObject *)dtype_class);
     return dtype_class;
 }
 
@@ -1371,16 +1300,19 @@ NPY_NO_EXPORT PyTypeObject PyArrayDTypeMeta_Type = {
     .tp_name = "numpy._DTypeMeta",
     .tp_basicsize = sizeof(PyArray_DTypeMeta),
     .tp_dealloc = (destructor)dtypemeta_dealloc,
-    /* Types are garbage collected (see dtypemeta_is_gc documentation) */
-    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
+    /*
+     * Types are garbage collected (see dtypemeta_is_gc documentation).
+     * ``Py_TPFLAGS_DISALLOW_INSTANTIATION`` blocks Python-level subclassing;
+     * a custom ``tp_new`` is not allowed because ``PyType_FromMetaclass``
+     * forbids it on the metaclass.
+     */
+    .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
+                 | Py_TPFLAGS_DISALLOW_INSTANTIATION),
     .tp_doc = "Preliminary NumPy API: The Type of NumPy DTypes (metaclass)",
     .tp_traverse = (traverseproc)dtypemeta_traverse,
     .tp_members = dtypemeta_members,
     .tp_getset = dtypemeta_getset,
     .tp_base = NULL,  /* set to PyType_Type at import time */
-    .tp_init = (initproc)dtypemeta_init,
-    .tp_alloc = dtypemeta_alloc,
-    .tp_new = dtypemeta_new,
     .tp_is_gc = dtypemeta_is_gc,
 };
 
diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c
index 3c63bbaecaa3..855752a807df 100644
--- a/numpy/_core/src/multiarray/multiarraymodule.c
+++ b/numpy/_core/src/multiarray/multiarraymodule.c
@@ -5148,11 +5148,34 @@ _multiarray_umath_exec(PyObject *m) {
                             (PyObject *)&NpyBusDayCalendar_Type);
     set_flaginfo(d);
 
+    /* Create all abstract DType classes */
+    if (initialize_abstract_dtypes() < 0) {
+        return -1;
+    }
+
     /* Finalize scalar types and expose them via namespace or typeinfo dict */
     if (set_typeinfo(d) != 0) {
         return -1;
     }
 
+    /*
+     * Map ``str``/``bytes``/``bool`` to the matching legacy DTypes.  Done
+     * after ``set_typeinfo`` since that is what wraps those DTypes.
+     */
+    PyArray_DTypeMeta *dt;
+    dt = typenum_to_dtypemeta(NPY_UNICODE);
+    if (_PyArray_MapPyTypeToDType(dt, &PyUnicode_Type, NPY_FALSE) < 0) {
+        return -1;
+    }
+    dt = typenum_to_dtypemeta(NPY_STRING);
+    if (_PyArray_MapPyTypeToDType(dt, &PyBytes_Type, NPY_FALSE) < 0) {
+        return -1;
+    }
+    dt = typenum_to_dtypemeta(NPY_BOOL);
+    if (_PyArray_MapPyTypeToDType(dt, &PyBool_Type, NPY_FALSE) < 0) {
+        return -1;
+    }
+
     if (PyType_Ready(&PyArrayFunctionDispatcher_Type) < 0) {
         return -1;
     }
@@ -5173,9 +5196,6 @@ _multiarray_umath_exec(PyObject *m) {
     if (PyType_Ready(&PyBoundArrayMethod_Type) < 0) {
         return -1;
     }
-    if (initialize_and_map_pytypes_to_dtypes() < 0) {
-        return -1;
-    }
 
     if (PyArray_InitializeCasts() < 0) {
         return -1;
@@ -5193,13 +5213,11 @@ _multiarray_umath_exec(PyObject *m) {
     if (PyDataMem_DefaultHandler == NULL) {
         return -1;
     }
-#ifdef Py_GIL_DISABLED
-    if (PyUnstable_SetImmortal(PyDataMem_DefaultHandler) == 0) {
+    if (NpyUnstable_SetImmortal(PyDataMem_DefaultHandler) == 0) {
         PyErr_SetString(PyExc_RuntimeError,
                         "Could not mark memory handler capsule as immortal");
         return -1;
     }
-#endif
     /*
      * Initialize the context-local current handler
      * with the default PyDataMem_Handler capsule.
diff --git a/numpy/_core/src/multiarray/usertypes.c b/numpy/_core/src/multiarray/usertypes.c
index 78559fe9c80e..37f9c879c88d 100644
--- a/numpy/_core/src/multiarray/usertypes.c
+++ b/numpy/_core/src/multiarray/usertypes.c
@@ -234,15 +234,7 @@ PyArray_RegisterDataType(PyArray_DescrProto *descr_proto)
         return -1;
     }
 
-    /*
-     * Legacy user DTypes classes cannot have a name, since the user never
-     * defined one.  So we create a name for them here. These DTypes are
-     * effectively static types.
-     *
-     * Note: we have no intention of freeing the memory again since this
-     * behaves identically to static type definition.
-     */
-
+    /* Build a name for the dynamically created new DType class. */
     const char *scalar_name = descr_proto->typeobj->tp_name;
     /*
      * We have to take only the name, and ignore the module to get
@@ -308,13 +300,13 @@ PyArray_RegisterDataType(PyArray_DescrProto *descr_proto)
     descr_proto->type_num = typenum;
     PyArray_DTypeMeta *wrapped_dtype = dtypemeta_wrap_legacy_descriptor(
         descr, descr_proto->f, &PyArrayDescr_Type, name, NULL);
+    PyMem_Free(name);
     if (wrapped_dtype == NULL) {
         descr->type_num = -1;
         NPY_NUMUSERTYPES--;
         /* Override the type, it might be wrong and then decref crashes */
         Py_SET_TYPE(descr, &PyArrayDescr_Type);
         Py_DECREF(descr);
-        PyMem_Free(name);  /* free the name only on failure */
         return -1;
     }
     if (use_void_clearimpl) {
diff --git a/numpy/_core/src/umath/extobj.c b/numpy/_core/src/umath/extobj.c
index cf3f517b4c6a..e47706ef73eb 100644
--- a/numpy/_core/src/umath/extobj.c
+++ b/numpy/_core/src/umath/extobj.c
@@ -146,13 +146,11 @@ init_extobj(void)
     if (npy_static_pydata.default_extobj_capsule == NULL) {
         return -1;
     }
-#ifdef Py_GIL_DISABLED
-    if (PyUnstable_SetImmortal(npy_static_pydata.default_extobj_capsule) == 0) {
+    if (NpyUnstable_SetImmortal(npy_static_pydata.default_extobj_capsule) == 0) {
         PyErr_SetString(PyExc_RuntimeError, "Could not mark extobj capsule as immortal");
         Py_CLEAR(npy_static_pydata.default_extobj_capsule);
         return -1;
     }
-#endif
     npy_static_pydata.npy_extobj_contextvar = PyContextVar_New(
             "numpy.ufunc.extobj", npy_static_pydata.default_extobj_capsule);
     if (npy_static_pydata.npy_extobj_contextvar == NULL) {

From e665aaf0565a5d06ff54a1584c5d2008cdf4b5c6 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Thu, 30 Apr 2026 13:41:29 +0200
Subject: [PATCH 2/3] Undo array-method always-immortalize: actually leaked and
 CI fails

---
 numpy/_core/src/common/npy_pycompat.h     | 5 ++++-
 numpy/_core/src/multiarray/array_method.c | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/numpy/_core/src/common/npy_pycompat.h b/numpy/_core/src/common/npy_pycompat.h
index 5b438c6ec248..f6ff53817b62 100644
--- a/numpy/_core/src/common/npy_pycompat.h
+++ b/numpy/_core/src/common/npy_pycompat.h
@@ -6,7 +6,10 @@
 
 #define Npy_HashDouble _Py_HashDouble
 
-/* No-op fallback for ``PyUnstable_SetImmortal`` on Python < 3.13. */
+/*
+ * No-op fallback for ``PyUnstable_SetImmortal`` on Python < 3.13, use
+ * `PyUnstable_SetImmortal` directly when only targeting free-threaded.
+ */
 static inline int
 NpyUnstable_SetImmortal(PyObject *op)
 {
diff --git a/numpy/_core/src/multiarray/array_method.c b/numpy/_core/src/multiarray/array_method.c
index 1958fbcf36fc..ffedca11d3d6 100644
--- a/numpy/_core/src/multiarray/array_method.c
+++ b/numpy/_core/src/multiarray/array_method.c
@@ -485,10 +485,12 @@ PyArrayMethod_FromSpec_int(PyArrayMethod_Spec *spec, int private)
         return NULL;
     }
     strcpy(res->method->name, spec->name);
+#ifdef Py_GIL_DISABLED
     // Mark immortal to reduce reference count contention in PyArray_GetCastingImpl
     // If we ever allow replacing ArrayMethod objects or cleanup it DTypes or ufuncs, this may need to be reconsidered.
     // An alternative that might help is to store cast methods in a PyArrayIdentityHash instead of a dict.
-    NpyUnstable_SetImmortal((PyObject *)res->method);
+    PyUnstable_SetImmortal((PyObject *)res->method);
+#endif
     return res;
 }
 

From 2d2c2ff3fcf94f1c6791665dfc0e0e2f58498923 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastianb@nvidia.com>
Date: Mon, 11 May 2026 21:50:09 +0200
Subject: [PATCH 3/3] TST: Skip recursion test on x86 macos (cpython issue, but
 will open with nightlies)

---
 numpy/_core/tests/test_dtype.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/numpy/_core/tests/test_dtype.py b/numpy/_core/tests/test_dtype.py
index 6ecb55ee1233..464f91be76b7 100644
--- a/numpy/_core/tests/test_dtype.py
+++ b/numpy/_core/tests/test_dtype.py
@@ -4,6 +4,7 @@
 import operator
 import os
 import pickle
+import platform
 import sys
 import types
 import warnings
@@ -888,6 +889,10 @@ def test_list_recursion(self):
             np.dtype(l)
 
     @requires_deep_recursion
+    @pytest.mark.skipif(
+        sys.platform.startswith("darwin") and platform.machine() == "x86_64",
+        reason="test now segfaults on x86 macs",
+    )
     def test_tuple_recursion(self):
         d = np.int32
         for i in range(100000):