From 094c3e33b0be3bb73d4b8f09427b15a50879548d Mon Sep 17 00:00:00 2001
From: Jim Kitchen <jim22k@gmail.com>
Date: Sat, 21 Mar 2026 15:32:37 -0500
Subject: [PATCH] fix segfault

---
 graphblas/core/formatting.py   | 32 +++++++++++++++++++++-----------
 graphblas/core/ss/matrix.py    | 23 ++++++++++++++++-------
 graphblas/core/ss/vector.py    | 23 ++++++++++++++++-------
 graphblas/tests/test_matrix.py |  2 +-
 graphblas/tests/test_vector.py |  7 ++-----
 5 files changed, 56 insertions(+), 31 deletions(-)

diff --git a/graphblas/core/formatting.py b/graphblas/core/formatting.py
index 0b6252101..5fe9b6972 100644
--- a/graphblas/core/formatting.py
+++ b/graphblas/core/formatting.py
@@ -119,7 +119,7 @@
 """
 
 
-def _update_matrix_dataframe(df, matrix, rows, row_offset, columns, column_offset, *, mask=None):
+def _update_matrix_array(arr, matrix, rows, row_offset, columns, column_offset, *, mask=None):
     if rows is None and columns is None:
         if mask is None:
             submatrix = matrix
@@ -167,13 +167,17 @@ def _update_matrix_dataframe(df, matrix, rows, row_offset, columns, column_offse
     np_type = submatrix.dtype.np_type
     if submatrix.dtype._is_udt and np_type.subdtype is not None:
         vals = vals.tolist()
-    df.values[rows, cols] = vals
+    if isinstance(vals, np.ndarray) and vals.dtype.names is not None:
+        # Structured array: convert numpy.void elements to tuples for consistent display
+        arr[rows, cols] = [tuple(v) for v in vals]
+    else:
+        arr[rows, cols] = vals
     if np.issubdtype(np_type, np.inexact):
         nulls = np.isnan(vals)
-        df.values[rows[nulls], cols[nulls]] = "nan"
+        arr[rows[nulls], cols[nulls]] = "nan"
 
 
-def _update_vector_dataframe(df, vector, columns, column_offset, *, mask=None):
+def _update_vector_array(arr, vector, columns, column_offset, *, mask=None):
     if columns is None:
         if mask is None:
             subvector = vector
@@ -205,9 +209,13 @@ def _update_vector_dataframe(df, vector, columns, column_offset, *, mask=None):
     np_type = subvector.dtype.np_type
     if subvector.dtype._is_udt and np_type.subdtype is not None:
         vals = vals.tolist()
-    df.values[0, cols] = vals
+    if isinstance(vals, np.ndarray) and vals.dtype.names is not None:
+        # Structured array: convert numpy.void elements to tuples for consistent display
+        arr[0, cols] = [tuple(v) for v in vals]
+    else:
+        arr[0, cols] = vals
     if np.issubdtype(np_type, np.inexact):
-        df.values[0, cols[np.isnan(vals)]] = "nan"
+        arr[0, cols[np.isnan(vals)]] = "nan"
 
 
 def _get_max_columns():
@@ -244,11 +252,11 @@ def _get_matrix_dataframe(matrix, max_rows, min_rows, max_columns, *, mask=None)
         max_columns = _get_max_columns()
     rows, row_groups = _get_chunk(matrix._nrows, min_rows, max_rows)
     columns, column_groups = _get_chunk(matrix._ncols, max_columns, max_columns)
-    df = pd.DataFrame(columns=columns, index=rows)
+    arr = np.full((len(rows), len(columns)), np.nan, dtype=object)
     for row_group, row_offset in row_groups:
         for column_group, column_offset in column_groups:
-            _update_matrix_dataframe(
-                df,
+            _update_matrix_array(
+                arr,
                 matrix,
                 row_group,
                 row_offset,
@@ -256,6 +264,7 @@ def _get_matrix_dataframe(matrix, max_rows, min_rows, max_columns, *, mask=None)
                 column_offset,
                 mask=mask,
             )
+    df = pd.DataFrame(arr, columns=columns, index=rows)
     if (
         (mask is None or mask.structure)
         and df.shape != matrix.shape
@@ -306,9 +315,10 @@ def _get_vector_dataframe(vector, max_rows, min_rows, max_columns, *, mask=None)
     if max_columns is None:  # pragma: no branch
         max_columns = _get_max_columns()
     columns, column_groups = _get_chunk(vector._size, max_columns, max_columns)
-    df = pd.DataFrame(columns=columns, index=[""])
+    arr = np.full((1, len(columns)), np.nan, dtype=object)
     for column_group, column_offset in column_groups:
-        _update_vector_dataframe(df, vector, column_group, column_offset, mask=mask)
+        _update_vector_array(arr, vector, column_group, column_offset, mask=mask)
+    df = pd.DataFrame(arr, columns=columns, index=[""])
     if (
         (mask is None or mask.structure)
         and df.size != vector._size
diff --git a/graphblas/core/ss/matrix.py b/graphblas/core/ss/matrix.py
index 509c56113..de6395cf6 100644
--- a/graphblas/core/ss/matrix.py
+++ b/graphblas/core/ss/matrix.py
@@ -4088,13 +4088,22 @@ def serialize(self, compression="default", level=None, **opts):
             dtype_size = ffi_new("size_t*")
             status = lib.GrB_Type_get_SIZE(parent.dtype.gb_obj[0], dtype_size, lib.GrB_NAME)
             check_status_carg(status, "Type", parent.dtype.gb_obj[0])
-            # Then get the name
-            dtype_char = ffi_new(f"char[{dtype_size[0]}]")
-            status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME)
-            check_status_carg(status, "Type", parent.dtype.gb_obj[0])
-            # Then set the name
-            status = lib.GrB_Matrix_set_String(parent._carg, dtype_char, lib.GrB_NAME)
-            check_status_carg(status, "Matrix", parent._carg)
+            if dtype_size[0] >= lib.GxB_MAX_NAME_LEN:
+                # The dtype name is too long to safely store in the blob (GxB_Serialized_get_SIZE
+                # segfaults on names >= GxB_MAX_NAME_LEN). For named UDTs, use the short
+                # registered name instead; anonymous UDTs cannot round-trip without dtype=.
+                if not parent.dtype._is_anonymous:
+                    val_obj = ffi.new("char[]", parent.dtype.name.encode())
+                    status = lib.GrB_Matrix_set_String(parent._carg, val_obj, lib.GrB_NAME)
+                    check_status_carg(status, "Matrix", parent._carg)
+            else:
+                # Then get the name
+                dtype_char = ffi_new(f"char[{dtype_size[0]}]")
+                status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME)
+                check_status_carg(status, "Type", parent.dtype.gb_obj[0])
+                # Then set the name
+                status = lib.GrB_Matrix_set_String(parent._carg, dtype_char, lib.GrB_NAME)
+                check_status_carg(status, "Matrix", parent._carg)
 
         check_status(
             lib.GxB_Matrix_serialize(
diff --git a/graphblas/core/ss/vector.py b/graphblas/core/ss/vector.py
index fdde7eb92..4acef3fe5 100644
--- a/graphblas/core/ss/vector.py
+++ b/graphblas/core/ss/vector.py
@@ -1659,13 +1659,22 @@ def serialize(self, compression="default", level=None, **opts):
             dtype_size = ffi_new("size_t*")
             status = lib.GrB_Type_get_SIZE(parent.dtype.gb_obj[0], dtype_size, lib.GrB_NAME)
             check_status_carg(status, "Type", parent.dtype.gb_obj[0])
-            # Then get the name
-            dtype_char = ffi_new(f"char[{dtype_size[0]}]")
-            status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME)
-            check_status_carg(status, "Type", parent.dtype.gb_obj[0])
-            # Then set the name
-            status = lib.GrB_Vector_set_String(parent._carg, dtype_char, lib.GrB_NAME)
-            check_status_carg(status, "Vector", parent._carg)
+            if dtype_size[0] >= lib.GxB_MAX_NAME_LEN:
+                # The dtype name is too long to safely store in the blob (GxB_Serialized_get_SIZE
+                # segfaults on names >= GxB_MAX_NAME_LEN). For named UDTs, use the short
+                # registered name instead; anonymous UDTs cannot round-trip without dtype=.
+                if not parent.dtype._is_anonymous:
+                    val_obj = ffi.new("char[]", parent.dtype.name.encode())
+                    status = lib.GrB_Vector_set_String(parent._carg, val_obj, lib.GrB_NAME)
+                    check_status_carg(status, "Vector", parent._carg)
+            else:
+                # Then get the name
+                dtype_char = ffi_new(f"char[{dtype_size[0]}]")
+                status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME)
+                check_status_carg(status, "Type", parent.dtype.gb_obj[0])
+                # Then set the name
+                status = lib.GrB_Vector_set_String(parent._carg, dtype_char, lib.GrB_NAME)
+                check_status_carg(status, "Vector", parent._carg)
 
         check_status(
             lib.GxB_Vector_serialize(
diff --git a/graphblas/tests/test_matrix.py b/graphblas/tests/test_matrix.py
index 24f0e73d7..b972ef260 100644
--- a/graphblas/tests/test_matrix.py
+++ b/graphblas/tests/test_matrix.py
@@ -2794,7 +2794,7 @@ def test_ss_concat(A, v):
     expected[:, A.ncols] = v
     assert B5.isequal(expected)
 
-    with pytest.raises(TypeError, match=""):
+    with pytest.raises(TypeError):
         gb.ss.concat([v, [v]])
     with pytest.raises(TypeError):
         gb.ss.concat([[v], v])
diff --git a/graphblas/tests/test_vector.py b/graphblas/tests/test_vector.py
index db80cdf71..d9bf84495 100644
--- a/graphblas/tests/test_vector.py
+++ b/graphblas/tests/test_vector.py
@@ -2221,11 +2221,8 @@ def test_udt():
     if suitesparse:
         vv = Vector.ss.deserialize(v.ss.serialize(), dtype=long_udt)
         assert v.isequal(vv, check_dtype=True)
-        if ss_version_major < 9:
-            with pytest.raises(SyntaxError):
-                # The size of the UDT name is limited
-                Vector.ss.deserialize(v.ss.serialize())
-        else:
+        with pytest.raises(SyntaxError):
+            # The dtype name is too long to embed in the blob; dtype= must be provided
             Vector.ss.deserialize(v.ss.serialize())
     # May be able to look up non-anonymous dtypes by name if their names are too long
     named_long_dtype = np.dtype([("x", np.bool_), ("y" * 1000, np.float64)], align=False)