Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 63 additions & 14 deletions sdk/python/feast/type_map.py
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -909,6 +909,16 @@ def _convert_list_values_to_proto(
]


def _is_array_like(value: Any) -> bool:
"""Return True if *value* is array-like (numpy array or any sized,
non-string, non-bytes container). Array-like values in a scalar
feature column cannot be mapped to a protobuf scalar field and are
therefore always treated as null."""
return isinstance(value, np.ndarray) or (
hasattr(value, "__len__") and not isinstance(value, (str, bytes))
)


def _convert_scalar_values_to_proto(
feast_value_type: ValueType,
values: List[Any],
Expand All @@ -929,16 +939,34 @@ def _convert_scalar_values_to_proto(
return [ProtoValue()] * len(values)

if feast_value_type == ValueType.UNIX_TIMESTAMP:
int_timestamps = _python_datetime_to_int_timestamp(values)
return [ProtoValue(unix_timestamp_val=ts) for ts in int_timestamps] # type: ignore
out: List[Any] = [None] * len(values)
clean_indices: List[int] = []
clean_values: List[Any] = []
for i, value in enumerate(values):
if _is_array_like(value) or value is None:
out[i] = ProtoValue()
else:
clean_indices.append(i)
clean_values.append(value)
if clean_values:
timestamps = _python_datetime_to_int_timestamp(clean_values)
for i, ts in zip(clean_indices, timestamps):
out[i] = ProtoValue(unix_timestamp_val=ts) # type: ignore
return out

field_name, func, valid_scalar_types = PYTHON_SCALAR_VALUE_TYPE_TO_PROTO_VALUE[
feast_value_type
]

# Validate scalar types
# Validate scalar types. The caller guarantees that *sample* is not
# array-like (array-like values are filtered out when picking the sample
# for scalar columns in python_values_to_proto_values).
if valid_scalar_types:
if (sample == 0 or sample == 0.0) and feast_value_type != ValueType.BOOL:
try:
is_zero = sample == 0 or sample == 0.0
except (ValueError, TypeError):
is_zero = False
if is_zero and feast_value_type != ValueType.BOOL:
# Numpy converts 0 to int, but column type may be float
allowed_types = {np.int64, int, np.float64, float, decimal.Decimal}
assert type(sample) in allowed_types, (
Expand All @@ -951,20 +979,35 @@ def _convert_scalar_values_to_proto(

# Handle BOOL specially due to np.bool_ conversion requirement
if feast_value_type == ValueType.BOOL:
return [
ProtoValue(
**{field_name: func(bool(value) if type(value) is np.bool_ else value)}
) # type: ignore
if not pd.isnull(value)
else ProtoValue()
for value in values
]
out = []
for value in values:
if _is_array_like(value):
# Array-like value in a scalar BOOL column: treat as null.
out.append(ProtoValue())
elif not pd.isnull(value):
out.append(
ProtoValue(
**{
field_name: func(
bool(value) if type(value) is np.bool_ else value
)
}
) # type: ignore
)
else:
out.append(ProtoValue())
return out

# Generic scalar conversion
out = []
for value in values:
if isinstance(value, ProtoValue):
out.append(value)
elif _is_array_like(value):
# Array-like value in a scalar column: always treat as null.
# pd.isnull() is vectorised and would return an ndarray here,
# making `not pd.isnull(value)` raise ValueError.
out.append(ProtoValue())
elif not pd.isnull(value):
out.append(ProtoValue(**{field_name: func(value)}))
else:
Expand Down Expand Up @@ -1107,12 +1150,18 @@ def _python_value_to_proto_value(
if "set" in type_name_lower:
return _python_set_to_proto_values(feast_value_type, values)

# Scalar types
# Scalar types — pick a sample that is not array-like so that the type
# validation in _convert_scalar_values_to_proto always receives a plain
# scalar (array-like values in a scalar column are treated as null).
if (
feast_value_type in PYTHON_SCALAR_VALUE_TYPE_TO_PROTO_VALUE
or feast_value_type == ValueType.UNIX_TIMESTAMP
):
return _convert_scalar_values_to_proto(feast_value_type, values, sample)
scalar_sample = next(
(v for v in values if _non_empty_value(v) and not _is_array_like(v)),
Comment thread
ntkathole marked this conversation as resolved.
None,
)
return _convert_scalar_values_to_proto(feast_value_type, values, scalar_sample)

raise Exception(f"Unsupported data type: {feast_value_type}")

Expand Down
108 changes: 108 additions & 0 deletions sdk/python/tests/unit/test_type_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -1845,3 +1845,111 @@ def test_pa_to_feast_value_type_nested(self):
pa_to_feast_value_type("list<item: list<item: double>>")
== ValueType.VALUE_LIST
)


class TestEmptyArrayAsNull:
"""Regression tests for https://github.com/feast-dev/feast/issues/6255
Ensure that an empty numpy array in a scalar feature column is treated as
null rather than raising ``ValueError: The truth value of an empty array is
ambiguous``.
"""

def test_empty_numpy_array_treated_as_null_double(self):
from feast.protos.feast.types.Value_pb2 import Value as ProtoValue

result = python_values_to_proto_values(
[np.array([]), 1.0, None], ValueType.DOUBLE
)
assert result[0] == ProtoValue(), (
"empty array should produce an empty ProtoValue"
)
assert result[1].double_val == 1.0
assert result[2] == ProtoValue(), (
"None should still produce an empty ProtoValue"
)

def test_empty_numpy_array_treated_as_null_int64(self):
from feast.protos.feast.types.Value_pb2 import Value as ProtoValue

result = python_values_to_proto_values(
[np.array([]), 42, None], ValueType.INT64
)
assert result[0] == ProtoValue(), (
"empty array should produce an empty ProtoValue"
)
assert result[1].int64_val == 42
assert result[2] == ProtoValue()

def test_empty_numpy_array_treated_as_null_bool(self):
from feast.protos.feast.types.Value_pb2 import Value as ProtoValue

result = python_values_to_proto_values(
[np.array([]), True, None], ValueType.BOOL
)
assert result[0] == ProtoValue(), (
"empty array should produce an empty ProtoValue"
)
assert result[1].bool_val is True
assert result[2] == ProtoValue()

def test_array_with_null_element_treated_as_null(self):
"""A non-empty array containing any null element in a scalar column is treated as null."""
from feast.protos.feast.types.Value_pb2 import Value as ProtoValue

result = python_values_to_proto_values(
[np.array([np.nan, 1.0]), 3.0], ValueType.DOUBLE
)
assert result[0] == ProtoValue(), (
"array with null element should produce an empty ProtoValue"
)
assert result[1].double_val == 3.0

def test_non_empty_array_without_nulls_is_treated_as_null(self):
"""A non-empty numpy array in a scalar column is always treated as null.

A scalar feature column cannot hold an ndarray value (protobuf would
reject it), so any array-like value – empty or not – is mapped to an
empty ProtoValue() rather than crashing with ValueError.
"""
from feast.protos.feast.types.Value_pb2 import Value as ProtoValue

result = python_values_to_proto_values(
[np.array([1.0, 2.0]), 3.0, None], ValueType.DOUBLE
)
# array-like value in a scalar column → null, not a crash
assert result[0] == ProtoValue(), (
"non-empty array in scalar column should be null"
)
assert result[1].double_val == 3.0
assert result[2] == ProtoValue()

def test_empty_numpy_array_treated_as_null_unix_timestamp(self):
"""Array-like values in a scalar UNIX_TIMESTAMP column must not crash."""
from datetime import datetime, timezone

from feast.protos.feast.types.Value_pb2 import Value as ProtoValue

ts = datetime(2024, 1, 1, tzinfo=timezone.utc)
result = python_values_to_proto_values(
[np.array([]), ts, None], ValueType.UNIX_TIMESTAMP
)
assert result[0] == ProtoValue(), (
"empty array in UNIX_TIMESTAMP scalar column should produce null"
)
assert result[1].unix_timestamp_val == int(ts.timestamp())
assert result[2] == ProtoValue()

def test_non_empty_array_treated_as_null_unix_timestamp(self):
"""Non-empty array in a UNIX_TIMESTAMP scalar column should produce null, not crash."""
from datetime import datetime, timezone

from feast.protos.feast.types.Value_pb2 import Value as ProtoValue

ts = datetime(2024, 6, 15, tzinfo=timezone.utc)
result = python_values_to_proto_values(
[np.array([1, 2, 3]), ts], ValueType.UNIX_TIMESTAMP
)
assert result[0] == ProtoValue(), (
"non-empty array in UNIX_TIMESTAMP scalar column should produce null"
)
assert result[1].unix_timestamp_val == int(ts.timestamp())
Loading