Skip to content

Commit d21d32c

Browse files
pr feedback: none default values per type
Signed-off-by: Alan Gauthier <alan.gauthier@jobteaser.com>
1 parent cf2f0c4 commit d21d32c

2 files changed

Lines changed: 83 additions & 77 deletions

File tree

sdk/python/feast/type_map.py

Lines changed: 31 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -825,43 +825,38 @@ def convert_set_to_list(value: Any) -> Any:
825825
]
826826

827827

828-
# Sentinel value used by _to_proto_safe_list to indicate that None elements
829-
# should simply be filtered (dropped) rather than replaced with a default.
830-
_DROP_NONE = object()
831-
832828
# Per-type default values substituted for None elements inside list columns.
833-
# Only STRING_LIST uses ""; numeric/bytes types drop None entirely because
834-
# there is no meaningful in-band sentinel (protobuf rejects wrong scalar types).
835-
_LIST_TYPE_NONE_REPLACEMENT: Dict[ValueType, Any] = {
829+
# Protobuf repeated fields do not accept None, so we replace with a
830+
# type-appropriate zero/empty value.
831+
_LIST_NONE_DEFAULTS: Dict[ValueType, Any] = {
836832
ValueType.STRING_LIST: "",
833+
ValueType.BYTES_LIST: b"",
834+
ValueType.INT32_LIST: 0,
835+
ValueType.INT64_LIST: 0,
836+
ValueType.FLOAT_LIST: 0.0,
837+
ValueType.DOUBLE_LIST: 0.0,
838+
ValueType.BOOL_LIST: False,
839+
ValueType.UNIX_TIMESTAMP_LIST: NULL_TIMESTAMP_INT_VALUE,
840+
ValueType.UUID_LIST: "",
841+
ValueType.TIME_UUID_LIST: "",
842+
ValueType.DECIMAL_LIST: "",
837843
}
838844

839845

840-
def _to_proto_safe_list(
841-
value: Any, feast_value_type: ValueType = ValueType.STRING_LIST
842-
) -> Any:
843-
"""Convert an array/list column value to a proto-safe Python list.
844-
845-
Arrow/Athena returns Array columns as numpy.ndarray (object dtype).
846-
Protobuf repeated fields reject ndarrays and (for non-string types) None
847-
elements, so we:
848-
1. Call .tolist() to convert any numpy.ndarray to a plain Python list.
849-
2. For STRING_LIST: replace None elements with "" (empty string).
850-
For all other list types: drop None elements, since there is no valid
851-
in-band default for numeric/bytes protobuf fields.
846+
def _sanitize_list_value(value: Any, feast_value_type: ValueType) -> Any:
847+
"""Convert ndarray to list and replace None elements with a type-appropriate default.
852848
853-
Args:
854-
value: The raw column value (ndarray, list, or scalar).
855-
feast_value_type: The Feast ValueType of the list column. Controls how
856-
None elements are handled. Defaults to STRING_LIST.
849+
Arrow/Athena may deserialize array columns as numpy.ndarray with object dtype
850+
instead of plain Python lists. Protobuf repeated fields do not accept ndarrays
851+
or None elements, so we normalise here before building proto messages.
857852
"""
858853
if isinstance(value, np.ndarray):
859854
value = value.tolist()
860-
if isinstance(value, list):
861-
none_replacement = _LIST_TYPE_NONE_REPLACEMENT.get(feast_value_type, _DROP_NONE)
862-
if none_replacement is _DROP_NONE:
863-
return [x for x in value if x is not None]
864-
return [x if x is not None else none_replacement for x in value]
855+
if isinstance(value, list) and len(value) == 0:
856+
return None
857+
none_default = _LIST_NONE_DEFAULTS.get(feast_value_type)
858+
if none_default is not None and isinstance(value, list):
859+
value = [none_default if v is None else v for v in value]
865860
return value
866861

867862

@@ -887,6 +882,13 @@ def _convert_list_values_to_proto(
887882
feast_value_type
888883
]
889884

885+
values = [
886+
_sanitize_list_value(v, feast_value_type) if v is not None else v
887+
for v in values
888+
]
889+
if sample is not None:
890+
sample = _sanitize_list_value(sample, feast_value_type)
891+
890892
# Bytes to array type conversion
891893
if isinstance(sample, (bytes, bytearray)):
892894
if feast_value_type == ValueType.BYTES_LIST:
@@ -943,14 +945,8 @@ def _convert_list_values_to_proto(
943945
]
944946

945947
# Generic list conversion
946-
# Arrow/Athena deserializes Array columns as numpy.ndarray (object dtype).
947-
# _to_proto_safe_list converts to a plain Python list and sanitizes None
948-
# elements in a type-appropriate way (replaced with "" for STRING_LIST,
949-
# dropped for numeric/bytes types).
950948
return [
951-
ProtoValue(
952-
**{field_name: proto_type(val=_to_proto_safe_list(value, feast_value_type))} # type: ignore[arg-type]
953-
)
949+
ProtoValue(**{field_name: proto_type(val=value)}) # type: ignore[arg-type]
954950
if value is not None
955951
else ProtoValue()
956952
for value in values

sdk/python/tests/unit/test_type_map.py

Lines changed: 52 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1967,77 +1967,87 @@ class TestArrowArrayStringListMaterialization:
19671967
2. TypeError: "bad argument type for built-in operation"
19681968
— when proto_type(val=<ndarray>) was called; protobuf rejects ndarrays.
19691969
1970-
Both are fixed by _to_proto_safe_list, which converts ndarrays to plain Python
1971-
lists and sanitizes None elements in a type-appropriate way:
1972-
- STRING_LIST: None → "" (empty string)
1973-
- All other list types: None elements are dropped (filtered out)
1970+
Both are fixed by _sanitize_list_value, which converts ndarrays to plain Python
1971+
lists and replaces None elements with a type-appropriate zero/empty default
1972+
(see _LIST_NONE_DEFAULTS).
19741973
"""
19751974

1976-
def test_to_proto_safe_list_ndarray(self):
1975+
def test_sanitize_list_value_ndarray(self):
19771976
"""ndarray is converted to a plain Python list."""
1978-
from feast.type_map import _to_proto_safe_list
1977+
from feast.type_map import _sanitize_list_value
19791978

19801979
arr = np.array(["foo", "bar"], dtype=object)
1981-
result = _to_proto_safe_list(arr)
1980+
result = _sanitize_list_value(arr, ValueType.STRING_LIST)
19821981
assert result == ["foo", "bar"]
19831982
assert isinstance(result, list)
19841983

1985-
def test_to_proto_safe_list_empty_ndarray(self):
1986-
"""Empty ndarray is converted to an empty list."""
1987-
from feast.type_map import _to_proto_safe_list
1984+
def test_sanitize_list_value_empty_ndarray(self):
1985+
"""Empty ndarray is converted to None (treated as a missing row)."""
1986+
from feast.type_map import _sanitize_list_value
19881987

19891988
arr = np.array([], dtype=object)
1990-
result = _to_proto_safe_list(arr)
1991-
assert result == []
1992-
assert isinstance(result, list)
1989+
result = _sanitize_list_value(arr, ValueType.STRING_LIST)
1990+
assert result is None
19931991

1994-
def test_to_proto_safe_list_ndarray_with_none(self):
1992+
def test_sanitize_list_value_ndarray_with_none(self):
19951993
"""None elements inside a STRING_LIST ndarray are replaced with empty string."""
1996-
from feast.type_map import _to_proto_safe_list
1994+
from feast.type_map import _sanitize_list_value
19971995

19981996
arr = np.array(["foo", None, "baz"], dtype=object)
1999-
result = _to_proto_safe_list(arr, ValueType.STRING_LIST)
1997+
result = _sanitize_list_value(arr, ValueType.STRING_LIST)
20001998
assert result == ["foo", "", "baz"]
20011999

2002-
def test_to_proto_safe_list_plain_list(self):
2003-
"""Plain Python lists pass through unchanged (no None replacement needed)."""
2004-
from feast.type_map import _to_proto_safe_list
2000+
def test_sanitize_list_value_plain_list(self):
2001+
"""Plain Python lists without None pass through unchanged."""
2002+
from feast.type_map import _sanitize_list_value
20052003

20062004
lst = ["foo", "bar"]
2007-
result = _to_proto_safe_list(lst)
2005+
result = _sanitize_list_value(lst, ValueType.STRING_LIST)
20082006
assert result == ["foo", "bar"]
20092007

2010-
def test_to_proto_safe_list_plain_list_with_none(self):
2008+
def test_sanitize_list_value_plain_list_with_none(self):
20112009
"""None elements in a STRING_LIST plain list are replaced with empty string."""
2012-
from feast.type_map import _to_proto_safe_list
2010+
from feast.type_map import _sanitize_list_value
20132011

20142012
lst = ["foo", None]
2015-
result = _to_proto_safe_list(lst, ValueType.STRING_LIST)
2013+
result = _sanitize_list_value(lst, ValueType.STRING_LIST)
20162014
assert result == ["foo", ""]
20172015

2018-
def test_to_proto_safe_list_numeric_list_none_dropped(self):
2019-
"""None elements in non-string lists are dropped, not replaced with a sentinel."""
2020-
from feast.type_map import _to_proto_safe_list
2016+
def test_sanitize_list_value_numeric_none_replaced(self):
2017+
"""None elements in numeric lists are replaced with a type-appropriate default."""
2018+
from feast.type_map import _sanitize_list_value
20212019

2022-
for vt in (
2023-
ValueType.FLOAT_LIST,
2024-
ValueType.DOUBLE_LIST,
2025-
ValueType.INT32_LIST,
2026-
ValueType.INT64_LIST,
2027-
ValueType.BYTES_LIST,
2028-
):
2029-
result = _to_proto_safe_list([1.0, None, 2.0], vt)
2030-
assert result == [1.0, 2.0], (
2031-
f"Expected None dropped for {vt}, got {result!r}"
2032-
)
2020+
assert _sanitize_list_value([1, None, 2], ValueType.INT32_LIST) == [1, 0, 2]
2021+
assert _sanitize_list_value([1, None, 2], ValueType.INT64_LIST) == [1, 0, 2]
2022+
assert _sanitize_list_value([1.0, None, 2.0], ValueType.FLOAT_LIST) == [
2023+
1.0,
2024+
0.0,
2025+
2.0,
2026+
]
2027+
assert _sanitize_list_value([1.0, None, 2.0], ValueType.DOUBLE_LIST) == [
2028+
1.0,
2029+
0.0,
2030+
2.0,
2031+
]
2032+
assert _sanitize_list_value([True, None, False], ValueType.BOOL_LIST) == [
2033+
True,
2034+
False,
2035+
False,
2036+
]
2037+
2038+
def test_sanitize_list_value_bytes_none_replaced(self):
2039+
"""None elements in BYTES_LIST are replaced with b''."""
2040+
from feast.type_map import _sanitize_list_value
2041+
2042+
result = _sanitize_list_value([b"x", None], ValueType.BYTES_LIST)
2043+
assert result == [b"x", b""]
20332044

2034-
def test_to_proto_safe_list_scalar_passthrough(self):
2045+
def test_sanitize_list_value_scalar_passthrough(self):
20352046
"""Non-list, non-ndarray values are returned unchanged."""
2036-
from feast.type_map import _to_proto_safe_list
2047+
from feast.type_map import _sanitize_list_value
20372048

2038-
assert _to_proto_safe_list("hello") == "hello"
2039-
assert _to_proto_safe_list(None) is None
2040-
assert _to_proto_safe_list(42) == 42
2049+
assert _sanitize_list_value("hello", ValueType.STRING_LIST) == "hello"
2050+
assert _sanitize_list_value(42, ValueType.INT32_LIST) == 42
20412051

20422052
def test_string_list_from_ndarray(self):
20432053
"""STRING_LIST column with ndarray values materializes without TypeError."""

0 commit comments

Comments
 (0)