Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: Fix JSON deserialization, schema inference, and silent fallback …
…for nested collection types

- Add nested list handling in proto_json from_json_object (list of lists
  was raising ParseError since no branch matched list-typed elements)
- Fix pa_to_feast_value_type to recognize nested list PyArrow types
  (list<item: list<item: T>>) instead of crashing with KeyError
- Replace silent String fallback in _str_to_feast_type with ValueError
  to surface corrupted tag values instead of silently losing type info
- Strengthen test coverage: type str roundtrip, inner value verification,
  multi-value batch, proto JSON roundtrip, PyArrow nested type inference

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: soojin <soojin@dable.io>
  • Loading branch information
2 people authored and ntkathole committed Apr 2, 2026
commit e57ddd23d0670bd8d72eb107a0343662a54d506f
7 changes: 4 additions & 3 deletions sdk/python/feast/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,10 @@ def _str_to_feast_type(type_str: str) -> FeastType:
try:
return PrimitiveFeastType[type_str]
except KeyError:
from feast.types import String

return String
raise ValueError(
f"Unknown FeastType: {type_str!r}. "
f"Valid primitive types: {[t.name for t in PrimitiveFeastType]}"
)


def _serialize_struct_schema(struct_type: Struct) -> str:
Expand Down
9 changes: 9 additions & 0 deletions sdk/python/feast/proto_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,15 @@ def from_json_object(
if len(value) == 0:
# Clear will mark the struct as modified so it will be created even if there are no values
message.int64_list_val.Clear()
elif isinstance(value[0], list):
# Nested collection (list of lists).
# Default to list_list_val since JSON transport loses the
# outer/inner set distinction.
rv = RepeatedValue()
for inner in value:
inner_val = rv.val.add()
from_json_object(parser, inner, inner_val)
message.list_list_val.CopyFrom(rv)
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
Outdated
elif isinstance(value[0], bool):
message.bool_list_val.val.extend(value)
elif isinstance(value[0], str):
Expand Down
6 changes: 5 additions & 1 deletion sdk/python/feast/type_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -1258,7 +1258,11 @@ def pa_to_feast_value_type(pa_type_as_str: str) -> ValueType:
is_list = False
if pa_type_as_str.startswith("list<item: "):
is_list = True
pa_type_as_str = pa_type_as_str.replace("list<item: ", "").replace(">", "")
inner_str = pa_type_as_str[len("list<item: ") : -1]
# Check for nested list (list of lists) before stripping
if inner_str.startswith("list<item: "):
return ValueType.LIST_LIST
pa_type_as_str = inner_str

if pa_type_as_str.startswith("timestamp"):
value_type = ValueType.UNIX_TIMESTAMP
Expand Down
27 changes: 27 additions & 0 deletions sdk/python/tests/unit/test_proto_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,33 @@ def test_feature_list(proto_json_patch):
)


def test_nested_collection_json_roundtrip(proto_json_patch):
"""Nested collection values (list of lists) should survive JSON roundtrip."""
from feast.protos.feast.types.Value_pb2 import Value

# Build a Value with list_list_val containing [[1,2],[3,4,5]]
value_proto = Value()
inner1 = value_proto.list_list_val.val.add()
inner1.int64_list_val.val.extend([1, 2])
inner2 = value_proto.list_list_val.val.add()
inner2.int64_list_val.val.extend([3, 4, 5])

# Serialize to JSON
value_json = MessageToDict(value_proto)
assert isinstance(value_json, list)
assert len(value_json) == 2
assert value_json[0] == [1, 2]
assert value_json[1] == [3, 4, 5]

# Deserialize back from JSON
feature_vector_str = '{"values": [[[1, 2], [3, 4, 5]]]}'
feature_vector_proto = FeatureVector()
Parse(feature_vector_str, feature_vector_proto)
assert len(feature_vector_proto.values) == 1
assert feature_vector_proto.values[0].WhichOneof("val") == "list_list_val"
assert len(feature_vector_proto.values[0].list_list_val.val) == 2


@pytest.fixture(scope="module")
def proto_json_patch():
proto_json.patch()
56 changes: 50 additions & 6 deletions sdk/python/tests/unit/test_type_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -1664,10 +1664,54 @@ def test_set_list_no_dedup_inner(self):
assert result[0] == [1, 1, 2]
assert result[1] == [3, 3]

def test_feast_value_type_to_pa_nested(self):
"""Test feast_value_type_to_pa for nested collection types."""
pa_type = feast_value_type_to_pa(ValueType.LIST_LIST)
assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.string()))
def test_list_list_proto_roundtrip_values(self):
"""Test that LIST_LIST roundtrip preserves actual inner values."""
values = [[[1, 2, 3], [4, 5]]]
protos = python_values_to_proto_values(values, ValueType.LIST_LIST)
result = feast_value_type_to_python_type(protos[0])
assert result[0] == [1, 2, 3]
assert result[1] == [4, 5]

def test_set_list_proto_roundtrip_values(self):
"""Test that SET_LIST roundtrip preserves actual inner values."""
values = [[["a", "b"], ["c"]]]
protos = python_values_to_proto_values(values, ValueType.SET_LIST)
result = feast_value_type_to_python_type(protos[0])
assert result[0] == ["a", "b"]
assert result[1] == ["c"]

def test_multi_value_batch_nested(self):
"""Test multiple nested collection values in a single batch."""
values = [[[1, 2], [3]], [[4], [5, 6]]]
protos = python_values_to_proto_values(values, ValueType.LIST_LIST)
assert len(protos) == 2
r0 = feast_value_type_to_python_type(protos[0])
r1 = feast_value_type_to_python_type(protos[1])
assert r0 == [[1, 2], [3]]
assert r1 == [[4], [5, 6]]

pa_type = feast_value_type_to_pa(ValueType.SET_SET)
assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.string()))
def test_feast_value_type_to_pa_nested(self):
"""Test feast_value_type_to_pa for all nested collection types."""
for vt in (
ValueType.LIST_LIST,
ValueType.LIST_SET,
ValueType.SET_LIST,
ValueType.SET_SET,
):
pa_type = feast_value_type_to_pa(vt)
assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.string()))

def test_pa_to_feast_value_type_nested(self):
"""Test pa_to_feast_value_type recognizes nested list PyArrow types."""
assert (
pa_to_feast_value_type("list<item: list<item: int64>>")
== ValueType.LIST_LIST
)
assert (
pa_to_feast_value_type("list<item: list<item: string>>")
== ValueType.LIST_LIST
)
assert (
pa_to_feast_value_type("list<item: list<item: double>>")
== ValueType.LIST_LIST
)
40 changes: 39 additions & 1 deletion sdk/python/tests/unit/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def test_nested_field_roundtrip():
"""Field with nested collection type should survive to_proto -> from_proto."""
test_cases = [
("aa", Array(Array(String))),
("as", Array(Set(Int32))),
("as_field", Array(Set(Int32))),
("sa", Set(Array(Float64))),
("ss", Set(Set(Bool))),
]
Expand All @@ -146,6 +146,10 @@ def test_nested_field_roundtrip():
assert restored.dtype.to_value_type() == dtype.to_value_type(), (
f"dtype mismatch for {name}: {restored.dtype} vs {dtype}"
)
# Verify inner type is preserved (not just ValueType equality)
assert str(restored.dtype) == str(dtype), (
f"Inner type lost for {name}: got {restored.dtype}, expected {dtype}"
)
assert restored.tags == {"user_tag": "value"}, (
f"Tags should not contain internal tags for {name}"
)
Expand Down Expand Up @@ -178,6 +182,40 @@ def test_uuid_set_feast_type():
assert from_value_type(set_time_uuid.to_value_type()) == set_time_uuid


def test_feast_type_str_roundtrip():
"""_feast_type_to_str and _str_to_feast_type should roundtrip for nested types."""
from feast.field import _feast_type_to_str, _str_to_feast_type

test_cases = [
Array(Array(String)),
Array(Array(Int32)),
Array(Array(Float64)),
Array(Set(Int64)),
Array(Set(Bool)),
Set(Array(String)),
Set(Array(Float32)),
Set(Set(Int32)),
Set(Set(Float64)),
]
for dtype in test_cases:
s = _feast_type_to_str(dtype)
restored = _str_to_feast_type(s)
assert str(restored) == str(dtype), (
f"Roundtrip failed: {dtype} -> '{s}' -> {restored}"
)


def test_str_to_feast_type_invalid():
"""_str_to_feast_type should raise ValueError on unrecognized type names."""
from feast.field import _str_to_feast_type

with pytest.raises(ValueError, match="Unknown FeastType"):
_str_to_feast_type("INVALID_TYPE")

with pytest.raises(ValueError, match="Unknown FeastType"):
_str_to_feast_type("Strig")


def test_all_value_types():
for value in ValueType:
# We do not support the NULL type.
Expand Down