From 094c3e33b0be3bb73d4b8f09427b15a50879548d Mon Sep 17 00:00:00 2001 From: Jim Kitchen Date: Sat, 21 Mar 2026 15:32:37 -0500 Subject: [PATCH] fix segfault --- graphblas/core/formatting.py | 32 +++++++++++++++++++++----------- graphblas/core/ss/matrix.py | 23 ++++++++++++++++------- graphblas/core/ss/vector.py | 23 ++++++++++++++++------- graphblas/tests/test_matrix.py | 2 +- graphblas/tests/test_vector.py | 7 ++----- 5 files changed, 56 insertions(+), 31 deletions(-) diff --git a/graphblas/core/formatting.py b/graphblas/core/formatting.py index 0b6252101..5fe9b6972 100644 --- a/graphblas/core/formatting.py +++ b/graphblas/core/formatting.py @@ -119,7 +119,7 @@ """ -def _update_matrix_dataframe(df, matrix, rows, row_offset, columns, column_offset, *, mask=None): +def _update_matrix_array(arr, matrix, rows, row_offset, columns, column_offset, *, mask=None): if rows is None and columns is None: if mask is None: submatrix = matrix @@ -167,13 +167,17 @@ def _update_matrix_dataframe(df, matrix, rows, row_offset, columns, column_offse np_type = submatrix.dtype.np_type if submatrix.dtype._is_udt and np_type.subdtype is not None: vals = vals.tolist() - df.values[rows, cols] = vals + if isinstance(vals, np.ndarray) and vals.dtype.names is not None: + # Structured array: convert numpy.void elements to tuples for consistent display + arr[rows, cols] = [tuple(v) for v in vals] + else: + arr[rows, cols] = vals if np.issubdtype(np_type, np.inexact): nulls = np.isnan(vals) - df.values[rows[nulls], cols[nulls]] = "nan" + arr[rows[nulls], cols[nulls]] = "nan" -def _update_vector_dataframe(df, vector, columns, column_offset, *, mask=None): +def _update_vector_array(arr, vector, columns, column_offset, *, mask=None): if columns is None: if mask is None: subvector = vector @@ -205,9 +209,13 @@ def _update_vector_dataframe(df, vector, columns, column_offset, *, mask=None): np_type = subvector.dtype.np_type if subvector.dtype._is_udt and np_type.subdtype is not None: vals = vals.tolist() - df.values[0, cols] = vals + if isinstance(vals, np.ndarray) and vals.dtype.names is not None: + # Structured array: convert numpy.void elements to tuples for consistent display + arr[0, cols] = [tuple(v) for v in vals] + else: + arr[0, cols] = vals if np.issubdtype(np_type, np.inexact): - df.values[0, cols[np.isnan(vals)]] = "nan" + arr[0, cols[np.isnan(vals)]] = "nan" def _get_max_columns(): @@ -244,11 +252,11 @@ def _get_matrix_dataframe(matrix, max_rows, min_rows, max_columns, *, mask=None) max_columns = _get_max_columns() rows, row_groups = _get_chunk(matrix._nrows, min_rows, max_rows) columns, column_groups = _get_chunk(matrix._ncols, max_columns, max_columns) - df = pd.DataFrame(columns=columns, index=rows) + arr = np.full((len(rows), len(columns)), np.nan, dtype=object) for row_group, row_offset in row_groups: for column_group, column_offset in column_groups: - _update_matrix_dataframe( - df, + _update_matrix_array( + arr, matrix, row_group, row_offset, @@ -256,6 +264,7 @@ def _get_matrix_dataframe(matrix, max_rows, min_rows, max_columns, *, mask=None) column_offset, mask=mask, ) + df = pd.DataFrame(arr, columns=columns, index=rows) if ( (mask is None or mask.structure) and df.shape != matrix.shape @@ -306,9 +315,10 @@ def _get_vector_dataframe(vector, max_rows, min_rows, max_columns, *, mask=None) if max_columns is None: # pragma: no branch max_columns = _get_max_columns() columns, column_groups = _get_chunk(vector._size, max_columns, max_columns) - df = pd.DataFrame(columns=columns, index=[""]) + arr = np.full((1, len(columns)), np.nan, dtype=object) for column_group, column_offset in column_groups: - _update_vector_dataframe(df, vector, column_group, column_offset, mask=mask) + _update_vector_array(arr, vector, column_group, column_offset, mask=mask) + df = pd.DataFrame(arr, columns=columns, index=[""]) if ( (mask is None or mask.structure) and df.size != vector._size diff --git a/graphblas/core/ss/matrix.py b/graphblas/core/ss/matrix.py index 509c56113..de6395cf6 100644 --- a/graphblas/core/ss/matrix.py +++ b/graphblas/core/ss/matrix.py @@ -4088,13 +4088,22 @@ def serialize(self, compression="default", level=None, **opts): dtype_size = ffi_new("size_t*") status = lib.GrB_Type_get_SIZE(parent.dtype.gb_obj[0], dtype_size, lib.GrB_NAME) check_status_carg(status, "Type", parent.dtype.gb_obj[0]) - # Then get the name - dtype_char = ffi_new(f"char[{dtype_size[0]}]") - status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME) - check_status_carg(status, "Type", parent.dtype.gb_obj[0]) - # Then set the name - status = lib.GrB_Matrix_set_String(parent._carg, dtype_char, lib.GrB_NAME) - check_status_carg(status, "Matrix", parent._carg) + if dtype_size[0] >= lib.GxB_MAX_NAME_LEN: + # The dtype name is too long to safely store in the blob (GxB_Serialized_get_SIZE + # segfaults on names >= GxB_MAX_NAME_LEN). For named UDTs, use the short + # registered name instead; anonymous UDTs cannot round-trip without dtype=. + if not parent.dtype._is_anonymous: + val_obj = ffi.new("char[]", parent.dtype.name.encode()) + status = lib.GrB_Matrix_set_String(parent._carg, val_obj, lib.GrB_NAME) + check_status_carg(status, "Matrix", parent._carg) + else: + # Then get the name + dtype_char = ffi_new(f"char[{dtype_size[0]}]") + status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME) + check_status_carg(status, "Type", parent.dtype.gb_obj[0]) + # Then set the name + status = lib.GrB_Matrix_set_String(parent._carg, dtype_char, lib.GrB_NAME) + check_status_carg(status, "Matrix", parent._carg) check_status( lib.GxB_Matrix_serialize( diff --git a/graphblas/core/ss/vector.py b/graphblas/core/ss/vector.py index fdde7eb92..4acef3fe5 100644 --- a/graphblas/core/ss/vector.py +++ b/graphblas/core/ss/vector.py @@ -1659,13 +1659,22 @@ def serialize(self, compression="default", level=None, **opts): dtype_size = ffi_new("size_t*") status = lib.GrB_Type_get_SIZE(parent.dtype.gb_obj[0], dtype_size, lib.GrB_NAME) check_status_carg(status, "Type", parent.dtype.gb_obj[0]) - # Then get the name - dtype_char = ffi_new(f"char[{dtype_size[0]}]") - status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME) - check_status_carg(status, "Type", parent.dtype.gb_obj[0]) - # Then set the name - status = lib.GrB_Vector_set_String(parent._carg, dtype_char, lib.GrB_NAME) - check_status_carg(status, "Vector", parent._carg) + if dtype_size[0] >= lib.GxB_MAX_NAME_LEN: + # The dtype name is too long to safely store in the blob (GxB_Serialized_get_SIZE + # segfaults on names >= GxB_MAX_NAME_LEN). For named UDTs, use the short + # registered name instead; anonymous UDTs cannot round-trip without dtype=. + if not parent.dtype._is_anonymous: + val_obj = ffi.new("char[]", parent.dtype.name.encode()) + status = lib.GrB_Vector_set_String(parent._carg, val_obj, lib.GrB_NAME) + check_status_carg(status, "Vector", parent._carg) + else: + # Then get the name + dtype_char = ffi_new(f"char[{dtype_size[0]}]") + status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME) + check_status_carg(status, "Type", parent.dtype.gb_obj[0]) + # Then set the name + status = lib.GrB_Vector_set_String(parent._carg, dtype_char, lib.GrB_NAME) + check_status_carg(status, "Vector", parent._carg) check_status( lib.GxB_Vector_serialize( diff --git a/graphblas/tests/test_matrix.py b/graphblas/tests/test_matrix.py index 24f0e73d7..b972ef260 100644 --- a/graphblas/tests/test_matrix.py +++ b/graphblas/tests/test_matrix.py @@ -2794,7 +2794,7 @@ def test_ss_concat(A, v): expected[:, A.ncols] = v assert B5.isequal(expected) - with pytest.raises(TypeError, match=""): + with pytest.raises(TypeError): gb.ss.concat([v, [v]]) with pytest.raises(TypeError): gb.ss.concat([[v], v]) diff --git a/graphblas/tests/test_vector.py b/graphblas/tests/test_vector.py index db80cdf71..d9bf84495 100644 --- a/graphblas/tests/test_vector.py +++ b/graphblas/tests/test_vector.py @@ -2221,11 +2221,8 @@ def test_udt(): if suitesparse: vv = Vector.ss.deserialize(v.ss.serialize(), dtype=long_udt) assert v.isequal(vv, check_dtype=True) - if ss_version_major < 9: - with pytest.raises(SyntaxError): - # The size of the UDT name is limited - Vector.ss.deserialize(v.ss.serialize()) - else: + with pytest.raises(SyntaxError): + # The dtype name is too long to embed in the blob; dtype= must be provided Vector.ss.deserialize(v.ss.serialize()) # May be able to look up non-anonymous dtypes by name if their names are too long named_long_dtype = np.dtype([("x", np.bool_), ("y" * 1000, np.float64)], align=False)