diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 691a398c8e..fd1750a5ba 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -929,16 +929,35 @@ def _convert_scalar_values_to_proto( return [ProtoValue()] * len(values) if feast_value_type == ValueType.UNIX_TIMESTAMP: - int_timestamps = _python_datetime_to_int_timestamp(values) - return [ProtoValue(unix_timestamp_val=ts) for ts in int_timestamps] # type: ignore + out = [] + for value in values: + if isinstance(value, np.ndarray) or ( + hasattr(value, "__len__") and not isinstance(value, (str, bytes)) + ): + # Array-like value in a scalar UNIX_TIMESTAMP column: treat as null. + out.append(ProtoValue()) + elif value is None: + out.append(ProtoValue()) + else: + (ts,) = _python_datetime_to_int_timestamp([value]) + out.append(ProtoValue(unix_timestamp_val=ts)) # type: ignore + return out field_name, func, valid_scalar_types = PYTHON_SCALAR_VALUE_TYPE_TO_PROTO_VALUE[ feast_value_type ] - # Validate scalar types - if valid_scalar_types: - if (sample == 0 or sample == 0.0) and feast_value_type != ValueType.BOOL: + # Validate scalar types — skip for array-like samples (they will be treated + # as null or raw values in the conversion loop below). + if valid_scalar_types and not ( + isinstance(sample, np.ndarray) + or (hasattr(sample, "__len__") and not isinstance(sample, (str, bytes))) + ): + try: + is_zero = sample == 0 or sample == 0.0 + except (ValueError, TypeError): + is_zero = False + if is_zero and feast_value_type != ValueType.BOOL: # Numpy converts 0 to int, but column type may be float allowed_types = {np.int64, int, np.float64, float, decimal.Decimal} assert type(sample) in allowed_types, ( @@ -951,20 +970,39 @@ def _convert_scalar_values_to_proto( # Handle BOOL specially due to np.bool_ conversion requirement if feast_value_type == ValueType.BOOL: - return [ - ProtoValue( - **{field_name: func(bool(value) if type(value) is np.bool_ else value)} - ) # type: ignore - if not pd.isnull(value) - else ProtoValue() - for value in values - ] + out = [] + for value in values: + if isinstance(value, np.ndarray) or ( + hasattr(value, "__len__") and not isinstance(value, (str, bytes)) + ): + # Array-like value in a scalar BOOL column: treat as null. + out.append(ProtoValue()) + elif not pd.isnull(value): + out.append( + ProtoValue( + **{ + field_name: func( + bool(value) if type(value) is np.bool_ else value + ) + } + ) # type: ignore + ) + else: + out.append(ProtoValue()) + return out # Generic scalar conversion out = [] for value in values: if isinstance(value, ProtoValue): out.append(value) + elif isinstance(value, np.ndarray) or ( + hasattr(value, "__len__") and not isinstance(value, (str, bytes)) + ): + # Array-like value in a scalar column: always treat as null. + # pd.isnull() is vectorised and would return an ndarray here, + # making `not pd.isnull(value)` raise ValueError. + out.append(ProtoValue()) elif not pd.isnull(value): out.append(ProtoValue(**{field_name: func(value)})) else: diff --git a/sdk/python/tests/unit/test_type_map.py b/sdk/python/tests/unit/test_type_map.py index 8ec854d64a..4f87aa46f1 100644 --- a/sdk/python/tests/unit/test_type_map.py +++ b/sdk/python/tests/unit/test_type_map.py @@ -1845,3 +1845,111 @@ def test_pa_to_feast_value_type_nested(self): pa_to_feast_value_type("list>") == ValueType.VALUE_LIST ) + + +class TestEmptyArrayAsNull: + """Regression tests for https://github.com/feast-dev/feast/issues/6255 + Ensure that an empty numpy array in a scalar feature column is treated as + null rather than raising ``ValueError: The truth value of an empty array is + ambiguous``. + """ + + def test_empty_numpy_array_treated_as_null_double(self): + from feast.protos.feast.types.Value_pb2 import Value as ProtoValue + + result = python_values_to_proto_values( + [np.array([]), 1.0, None], ValueType.DOUBLE + ) + assert result[0] == ProtoValue(), ( + "empty array should produce an empty ProtoValue" + ) + assert result[1].double_val == 1.0 + assert result[2] == ProtoValue(), ( + "None should still produce an empty ProtoValue" + ) + + def test_empty_numpy_array_treated_as_null_int64(self): + from feast.protos.feast.types.Value_pb2 import Value as ProtoValue + + result = python_values_to_proto_values( + [np.array([]), 42, None], ValueType.INT64 + ) + assert result[0] == ProtoValue(), ( + "empty array should produce an empty ProtoValue" + ) + assert result[1].int64_val == 42 + assert result[2] == ProtoValue() + + def test_empty_numpy_array_treated_as_null_bool(self): + from feast.protos.feast.types.Value_pb2 import Value as ProtoValue + + result = python_values_to_proto_values( + [np.array([]), True, None], ValueType.BOOL + ) + assert result[0] == ProtoValue(), ( + "empty array should produce an empty ProtoValue" + ) + assert result[1].bool_val is True + assert result[2] == ProtoValue() + + def test_array_with_null_element_treated_as_null(self): + """A non-empty array containing any null element in a scalar column is treated as null.""" + from feast.protos.feast.types.Value_pb2 import Value as ProtoValue + + result = python_values_to_proto_values( + [np.array([np.nan, 1.0]), 3.0], ValueType.DOUBLE + ) + assert result[0] == ProtoValue(), ( + "array with null element should produce an empty ProtoValue" + ) + assert result[1].double_val == 3.0 + + def test_non_empty_array_without_nulls_is_treated_as_null(self): + """A non-empty numpy array in a scalar column is always treated as null. + + A scalar feature column cannot hold an ndarray value (protobuf would + reject it), so any array-like value – empty or not – is mapped to an + empty ProtoValue() rather than crashing with ValueError. + """ + from feast.protos.feast.types.Value_pb2 import Value as ProtoValue + + result = python_values_to_proto_values( + [np.array([1.0, 2.0]), 3.0, None], ValueType.DOUBLE + ) + # array-like value in a scalar column → null, not a crash + assert result[0] == ProtoValue(), ( + "non-empty array in scalar column should be null" + ) + assert result[1].double_val == 3.0 + assert result[2] == ProtoValue() + + def test_empty_numpy_array_treated_as_null_unix_timestamp(self): + """Array-like values in a scalar UNIX_TIMESTAMP column must not crash.""" + from datetime import datetime, timezone + + from feast.protos.feast.types.Value_pb2 import Value as ProtoValue + + ts = datetime(2024, 1, 1, tzinfo=timezone.utc) + result = python_values_to_proto_values( + [np.array([]), ts, None], ValueType.UNIX_TIMESTAMP + ) + assert result[0] == ProtoValue(), ( + "empty array in UNIX_TIMESTAMP scalar column should produce null" + ) + assert result[1].unix_timestamp_val == int(ts.timestamp()) + assert result[2] == ProtoValue() + + def test_non_empty_array_treated_as_null_unix_timestamp(self): + """Non-empty array in a UNIX_TIMESTAMP scalar column should produce null, not crash.""" + from datetime import datetime, timezone + + from feast.protos.feast.types.Value_pb2 import Value as ProtoValue + + ts = datetime(2024, 6, 15, tzinfo=timezone.utc) + result = python_values_to_proto_values( + [np.array([1, 2, 3]), ts], ValueType.UNIX_TIMESTAMP + ) + assert result[0] == ProtoValue(), ( + "non-empty array in UNIX_TIMESTAMP scalar column should produce null" + ) + assert result[1].unix_timestamp_val == int(ts.timestamp())