@@ -87,6 +87,9 @@ def test_python_values_to_proto_values_bool(values):
8787 (np .array ([None ]), ValueType .BYTES_LIST , None ),
8888 (np .array ([None ]), ValueType .STRING_LIST , None ),
8989 (np .array ([None ]), ValueType .UNIX_TIMESTAMP_LIST , None ),
90+ ([np .array ([], dtype = np .int32 )], ValueType .INT32_LIST , []),
91+ ([np .array ([], dtype = np .float32 )], ValueType .FLOAT_LIST , []),
92+ ([np .array ([], dtype = np .bool_ )], ValueType .BOOL_LIST , []),
9093 ([b"[1,2,3]" ], ValueType .INT64_LIST , [1 , 2 , 3 ]),
9194 ([b"[1,2,3]" ], ValueType .INT32_LIST , [1 , 2 , 3 ]),
9295 ([b"[1.5,2.5,3.5]" ], ValueType .FLOAT_LIST , [1.5 , 2.5 , 3.5 ]),
@@ -2065,3 +2068,164 @@ def test_proto_field_name_in_map(self):
20652068 from feast .type_map import PROTO_VALUE_TO_VALUE_TYPE_MAP
20662069
20672070 assert PROTO_VALUE_TO_VALUE_TYPE_MAP ["scalar_map_val" ] == ValueType .SCALAR_MAP
2071+
2072+
2073+ class TestArrowArrayStringListMaterialization :
2074+ """Regression tests for Array(String) columns from Arrow/Athena materialization.
2075+
2076+ Arrow/Athena deserializes Array(String) feature columns as numpy.ndarray with
2077+ object dtype. Two bugs were triggered:
2078+
2079+ 1. ValueError: "The truth value of an empty array is ambiguous"
2080+ — when an empty ndarray reached the scalar null-check `elif not pd.isnull(value)`.
2081+
2082+ 2. TypeError: "bad argument type for built-in operation"
2083+ — when proto_type(val=<ndarray>) was called; protobuf rejects ndarrays.
2084+
2085+ Both are fixed by _sanitize_list_value, which converts ndarrays to plain Python
2086+ lists and replaces None elements with a type-appropriate zero/empty default
2087+ (see _LIST_NONE_DEFAULTS).
2088+ """
2089+
2090+ def test_sanitize_list_value_ndarray (self ):
2091+ """ndarray is converted to a plain Python list."""
2092+ from feast .type_map import _sanitize_list_value
2093+
2094+ arr = np .array (["foo" , "bar" ], dtype = object )
2095+ result = _sanitize_list_value (arr , ValueType .STRING_LIST )
2096+ assert result == ["foo" , "bar" ]
2097+ assert isinstance (result , list )
2098+
2099+ def test_sanitize_list_value_empty_ndarray (self ):
2100+ """Empty ndarray is converted to an empty Python list."""
2101+ from feast .type_map import _sanitize_list_value
2102+
2103+ arr = np .array ([], dtype = object )
2104+ result = _sanitize_list_value (arr , ValueType .STRING_LIST )
2105+ assert result == []
2106+
2107+ def test_sanitize_list_value_ndarray_with_none (self ):
2108+ """None elements inside a STRING_LIST ndarray are replaced with empty string."""
2109+ from feast .type_map import _sanitize_list_value
2110+
2111+ arr = np .array (["foo" , None , "baz" ], dtype = object )
2112+ result = _sanitize_list_value (arr , ValueType .STRING_LIST )
2113+ assert result == ["foo" , "" , "baz" ]
2114+
2115+ def test_sanitize_list_value_plain_list (self ):
2116+ """Plain Python lists without None pass through unchanged."""
2117+ from feast .type_map import _sanitize_list_value
2118+
2119+ lst = ["foo" , "bar" ]
2120+ result = _sanitize_list_value (lst , ValueType .STRING_LIST )
2121+ assert result == ["foo" , "bar" ]
2122+
2123+ def test_sanitize_list_value_plain_list_with_none (self ):
2124+ """None elements in a STRING_LIST plain list are replaced with empty string."""
2125+ from feast .type_map import _sanitize_list_value
2126+
2127+ lst = ["foo" , None ]
2128+ result = _sanitize_list_value (lst , ValueType .STRING_LIST )
2129+ assert result == ["foo" , "" ]
2130+
2131+ def test_sanitize_list_value_numeric_none_replaced (self ):
2132+ """None elements in numeric lists are replaced with a type-appropriate default."""
2133+ from feast .type_map import _sanitize_list_value
2134+
2135+ assert _sanitize_list_value ([1 , None , 2 ], ValueType .INT32_LIST ) == [1 , 0 , 2 ]
2136+ assert _sanitize_list_value ([1 , None , 2 ], ValueType .INT64_LIST ) == [1 , 0 , 2 ]
2137+ assert _sanitize_list_value ([1.0 , None , 2.0 ], ValueType .FLOAT_LIST ) == [
2138+ 1.0 ,
2139+ 0.0 ,
2140+ 2.0 ,
2141+ ]
2142+ assert _sanitize_list_value ([1.0 , None , 2.0 ], ValueType .DOUBLE_LIST ) == [
2143+ 1.0 ,
2144+ 0.0 ,
2145+ 2.0 ,
2146+ ]
2147+ assert _sanitize_list_value ([True , None , False ], ValueType .BOOL_LIST ) == [
2148+ True ,
2149+ False ,
2150+ False ,
2151+ ]
2152+
2153+ def test_sanitize_list_value_bytes_none_replaced (self ):
2154+ """None elements in BYTES_LIST are replaced with b''."""
2155+ from feast .type_map import _sanitize_list_value
2156+
2157+ result = _sanitize_list_value ([b"x" , None ], ValueType .BYTES_LIST )
2158+ assert result == [b"x" , b"" ]
2159+
2160+ def test_sanitize_list_value_scalar_passthrough (self ):
2161+ """Non-list, non-ndarray values are returned unchanged."""
2162+ from feast .type_map import _sanitize_list_value
2163+
2164+ assert _sanitize_list_value ("hello" , ValueType .STRING_LIST ) == "hello"
2165+ assert _sanitize_list_value (42 , ValueType .INT32_LIST ) == 42
2166+
2167+ def test_string_list_from_ndarray (self ):
2168+ """STRING_LIST column with ndarray values materializes without TypeError."""
2169+ values = [
2170+ np .array (["foo" , "bar" ], dtype = object ),
2171+ np .array (["baz" ], dtype = object ),
2172+ ]
2173+ protos = python_values_to_proto_values (values , ValueType .STRING_LIST )
2174+ assert len (protos ) == 2
2175+ assert list (protos [0 ].string_list_val .val ) == ["foo" , "bar" ]
2176+ assert list (protos [1 ].string_list_val .val ) == ["baz" ]
2177+
2178+ def test_string_list_from_empty_ndarray (self ):
2179+ """Empty ndarray in a STRING_LIST column must not raise ValueError."""
2180+ values = [
2181+ np .array ([], dtype = object ),
2182+ np .array (["foo" ], dtype = object ),
2183+ ]
2184+ protos = python_values_to_proto_values (values , ValueType .STRING_LIST )
2185+ assert list (protos [0 ].string_list_val .val ) == []
2186+ assert list (protos [1 ].string_list_val .val ) == ["foo" ]
2187+
2188+ def test_string_list_from_ndarray_with_none_elements (self ):
2189+ """None elements inside an ndarray must not cause TypeError in protobuf."""
2190+ values = [
2191+ np .array (["foo" , None , "baz" ], dtype = object ),
2192+ ]
2193+ protos = python_values_to_proto_values (values , ValueType .STRING_LIST )
2194+ # None is replaced with empty string
2195+ assert list (protos [0 ].string_list_val .val ) == ["foo" , "" , "baz" ]
2196+
2197+ def test_string_list_null_row_produces_empty_proto (self ):
2198+ """A None row (missing user) produces an empty ProtoValue."""
2199+ from feast .protos .feast .types .Value_pb2 import Value as ProtoValue
2200+
2201+ values = [
2202+ None ,
2203+ np .array (["foo" ], dtype = object ),
2204+ ]
2205+ protos = python_values_to_proto_values (values , ValueType .STRING_LIST )
2206+ assert protos [0 ] == ProtoValue ()
2207+ assert list (protos [1 ].string_list_val .val ) == ["foo" ]
2208+
2209+ def test_mixed_batch_simulating_athena_chunk (self ):
2210+ """Simulate a real Athena chunk: mix of ndarray, empty ndarray, and None rows.
2211+
2212+ This is the exact scenario that triggered the TypeError during
2213+ string_list_features materialization.
2214+ """
2215+ from feast .protos .feast .types .Value_pb2 import Value as ProtoValue
2216+
2217+ # tags / labels column from Athena
2218+ values = [
2219+ np .array (["foo" , "bar" ], dtype = object ), # normal entity
2220+ np .array ([], dtype = object ), # entity with no values set
2221+ None , # missing entity (NULL row)
2222+ np .array (["baz" ], dtype = object ), # normal entity
2223+ np .array (["qux" , None ], dtype = object ), # entity with partial null
2224+ ]
2225+ protos = python_values_to_proto_values (values , ValueType .STRING_LIST )
2226+
2227+ assert list (protos [0 ].string_list_val .val ) == ["foo" , "bar" ]
2228+ assert list (protos [1 ].string_list_val .val ) == []
2229+ assert protos [2 ] == ProtoValue ()
2230+ assert list (protos [3 ].string_list_val .val ) == ["baz" ]
2231+ assert list (protos [4 ].string_list_val .val ) == ["qux" , "" ]
0 commit comments