Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat: Support nested collection types (Array/Set of Array/Set) (#5947)
Add support for 2-level nested collection types: Array(Array(T)),
Array(Set(T)), Set(Array(T)), and Set(Set(T)).

- Add 4 generic ValueType enums (LIST_LIST, LIST_SET, SET_LIST, SET_SET)
  backed by RepeatedValue proto messages
- Persist inner type info in Field tags (feast:nested_inner_type),
  following the existing Struct schema tag pattern
- Handle edge cases: empty inner collections, Set dedup at inner level,
  depth limit enforcement (2 levels max)
- Add proto/JSON/remote transport serialization support
- Add 25 unit tests covering all combinations and edge cases

Signed-off-by: Soojin Lee <lsjin0602@gmail.com>
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: soojin <soojin@dable.io>
  • Loading branch information
2 people authored and ntkathole committed Apr 2, 2026
commit 5425a9299da9178f07863fd2f25d64de1623abf4
8 changes: 8 additions & 0 deletions protos/feast/types/Value.proto
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ message ValueType {
TIME_UUID_LIST = 39;
UUID_SET = 40;
TIME_UUID_SET = 41;
LIST_LIST = 42;
LIST_SET = 43;
SET_LIST = 44;
SET_SET = 45;
}
}

Expand Down Expand Up @@ -108,6 +112,10 @@ message Value {
StringList time_uuid_list_val = 39;
StringSet uuid_set_val = 40;
StringSet time_uuid_set_val = 41;
RepeatedValue list_list_val = 42;
RepeatedValue list_set_val = 43;
RepeatedValue set_list_val = 44;
RepeatedValue set_set_val = 45;
}
}

Expand Down
37 changes: 33 additions & 4 deletions sdk/python/feast/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from feast.value_type import ValueType

STRUCT_SCHEMA_TAG = "feast:struct_schema"
NESTED_COLLECTION_INNER_TYPE_TAG = "feast:nested_inner_type"


@typechecked
Expand Down Expand Up @@ -118,7 +119,7 @@ def __str__(self):

def to_proto(self) -> FieldProto:
"""Converts a Field object to its protobuf representation."""
from feast.types import Array
from feast.types import Array, Set

value_type = self.dtype.to_value_type()
vector_search_metric = self.vector_search_metric or ""
Expand All @@ -128,6 +129,11 @@ def to_proto(self) -> FieldProto:
tags[STRUCT_SCHEMA_TAG] = _serialize_struct_schema(self.dtype)
elif isinstance(self.dtype, Array) and isinstance(self.dtype.base_type, Struct):
tags[STRUCT_SCHEMA_TAG] = _serialize_struct_schema(self.dtype.base_type)
# Persist nested collection type info in tags
if isinstance(self.dtype, (Array, Set)) and isinstance(
self.dtype.base_type, (Array, Set)
):
tags[NESTED_COLLECTION_INNER_TYPE_TAG] = _feast_type_to_str(self.dtype)
return FieldProto(
name=self.name,
value_type=value_type.value,
Expand Down Expand Up @@ -155,17 +161,30 @@ def from_proto(cls, field_proto: FieldProto):
# Reconstruct Struct type from persisted schema in tags
from feast.types import Array

internal_tags = {STRUCT_SCHEMA_TAG, NESTED_COLLECTION_INNER_TYPE_TAG}
dtype: FeastType
if value_type == ValueType.STRUCT and STRUCT_SCHEMA_TAG in tags:
dtype = _deserialize_struct_schema(tags[STRUCT_SCHEMA_TAG])
user_tags = {k: v for k, v in tags.items() if k != STRUCT_SCHEMA_TAG}
user_tags = {k: v for k, v in tags.items() if k not in internal_tags}
elif value_type == ValueType.STRUCT_LIST and STRUCT_SCHEMA_TAG in tags:
inner_struct = _deserialize_struct_schema(tags[STRUCT_SCHEMA_TAG])
dtype = Array(inner_struct)
user_tags = {k: v for k, v in tags.items() if k != STRUCT_SCHEMA_TAG}
user_tags = {k: v for k, v in tags.items() if k not in internal_tags}
elif (
value_type
in (
ValueType.LIST_LIST,
ValueType.LIST_SET,
ValueType.SET_LIST,
ValueType.SET_SET,
)
and NESTED_COLLECTION_INNER_TYPE_TAG in tags
):
dtype = _str_to_feast_type(tags[NESTED_COLLECTION_INNER_TYPE_TAG])
user_tags = {k: v for k, v in tags.items() if k not in internal_tags}
else:
dtype = from_value_type(value_type=value_type)
user_tags = tags
user_tags = {k: v for k, v in tags.items() if k not in internal_tags}

return cls(
name=field_proto.name,
Expand Down Expand Up @@ -198,6 +217,7 @@ def _feast_type_to_str(feast_type: FeastType) -> str:
from feast.types import (
Array,
PrimitiveFeastType,
Set,
)

if isinstance(feast_type, PrimitiveFeastType):
Expand All @@ -209,6 +229,8 @@ def _feast_type_to_str(feast_type: FeastType) -> str:
return json.dumps({"__struct__": nested})
elif isinstance(feast_type, Array):
return f"Array({_feast_type_to_str(feast_type.base_type)})"
elif isinstance(feast_type, Set):
return f"Set({_feast_type_to_str(feast_type.base_type)})"
else:
return str(feast_type)

Expand All @@ -218,6 +240,7 @@ def _str_to_feast_type(type_str: str) -> FeastType:
from feast.types import (
Array,
PrimitiveFeastType,
Set,
)

# Check if it's an Array type
Expand All @@ -226,6 +249,12 @@ def _str_to_feast_type(type_str: str) -> FeastType:
base_type = _str_to_feast_type(inner)
return Array(base_type)

# Check if it's a Set type
if type_str.startswith("Set(") and type_str.endswith(")"):
inner = type_str[4:-1]
base_type = _str_to_feast_type(inner)
return Set(base_type)

# Check if it's a nested Struct (JSON encoded)
if type_str.startswith("{"):
try:
Expand Down
5 changes: 5 additions & 0 deletions sdk/python/feast/infra/online_stores/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ def _proto_value_to_transport_value(proto_value: ValueProto) -> Any:
if val_attr == "json_list_val":
return list(getattr(proto_value, val_attr).val)

# Nested collection types use feast_value_type_to_python_type
# which handles recursive conversion of RepeatedValue protos.
if val_attr in ("list_list_val", "list_set_val", "set_list_val", "set_set_val"):
return feast_value_type_to_python_type(proto_value)

# Map/Struct types are converted to Python dicts by
# feast_value_type_to_python_type. Serialise them to JSON strings
# so the server-side DataFrame gets VARCHAR columns instead of
Expand Down
6 changes: 6 additions & 0 deletions sdk/python/feast/proto_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ def to_json_object(printer: _Printer, message: ProtoMessage) -> JsonObject:
# to JSON. The parse back result will be different from original message.
if which is None or which == "null_val":
return None
elif which in ("list_list_val", "list_set_val", "set_list_val", "set_set_val"):
# Nested collection: RepeatedValue containing Values
repeated = getattr(message, which)
value = [
printer._MessageToJsonObject(inner_val) for inner_val in repeated.val
]
elif "_list_" in which:
value = list(getattr(message, which).val)
else:
Expand Down
Loading