diff --git a/docs/reference/type-system.md b/docs/reference/type-system.md index f0739df8cd3..eb39b5e7948 100644 --- a/docs/reference/type-system.md +++ b/docs/reference/type-system.md @@ -86,6 +86,25 @@ All primitive types (except `Map` and `Json`) have corresponding set types for s - Set types are best suited for **online serving** use cases where feature values are written as Python sets and retrieved via `get_online_features`. {% endhint %} +### Nested Collection Types + +Feast supports arbitrarily nested collections using a recursive `VALUE_LIST` / `VALUE_SET` design. The outer container determines the proto enum (`VALUE_LIST` for `Array(…)`, `VALUE_SET` for `Set(…)`), while the full inner type structure is persisted via a mandatory `feast:nested_inner_type` Field tag. + +| Feast Type | Python Type | ValueType | Description | +|------------|-------------|-----------|-------------| +| `Array(Array(T))` | `List[List[T]]` | `VALUE_LIST` | List of lists | +| `Array(Set(T))` | `List[List[T]]` | `VALUE_LIST` | List of sets | +| `Set(Array(T))` | `List[List[T]]` | `VALUE_SET` | Set of lists | +| `Set(Set(T))` | `List[List[T]]` | `VALUE_SET` | Set of sets | +| `Array(Array(Array(T)))` | `List[List[List[T]]]` | `VALUE_LIST` | 3-level nesting | + +Where `T` is any supported primitive type (Int32, Int64, Float32, Float64, String, Bytes, Bool, UnixTimestamp) or another nested collection type. + +**Notes:** +- Nesting depth is **unlimited**. `Array(Array(Array(T)))`, `Set(Array(Set(T)))`, etc. are all supported. +- Inner type information is preserved via Field tags (`feast:nested_inner_type`) and restored during deserialization. This tag is mandatory for nested collection types. +- Empty inner collections (`[]`) are stored as empty proto values and round-trip as `None`. For example, `[[1, 2], [], [3]]` becomes `[[1, 2], None, [3]]` after a write-read cycle. + ### Map Types Map types allow storing dictionary-like data structures: @@ -233,6 +252,10 @@ user_features = FeatureView( Field(name="metadata", dtype=Map), Field(name="activity_log", dtype=Array(Map)), + # Nested collection types + Field(name="weekly_scores", dtype=Array(Array(Float64))), + Field(name="unique_tags_per_category", dtype=Array(Set(String))), + # JSON type Field(name="raw_event", dtype=Json), @@ -290,6 +313,30 @@ related_sessions = [uuid.uuid4(), uuid.uuid4(), uuid.uuid4()] unique_devices = {uuid.uuid4(), uuid.uuid4()} ``` +### Nested Collection Type Usage Examples + +Nested collections allow storing multi-dimensional data with unlimited depth: + +```python +# List of lists — e.g., weekly score history per user +weekly_scores = [[85.0, 90.5, 78.0], [92.0, 88.5], [95.0, 91.0, 87.5]] + +# List of sets — e.g., unique tags assigned per category +unique_tags_per_category = [["python", "ml"], ["rust", "systems"], ["python", "web"]] + +# 3-level nesting — e.g., multi-dimensional matrices +Field(name="tensor", dtype=Array(Array(Array(Float64)))) + +# Mixed nesting +Field(name="grouped_tags", dtype=Array(Set(Array(String)))) +``` + +**Limitation:** Empty inner collections round-trip as `None`: +```python +# Input: [[1, 2], [], [3]] +# Output: [[1, 2], None, [3]] (empty [] becomes None after write-read cycle) +``` + ### Map Type Usage Examples Maps can store complex nested data structures: diff --git a/protos/feast/types/Value.proto b/protos/feast/types/Value.proto index 69922eb0e8e..4194c19bac5 100644 --- a/protos/feast/types/Value.proto +++ b/protos/feast/types/Value.proto @@ -63,6 +63,8 @@ message ValueType { TIME_UUID_LIST = 39; UUID_SET = 40; TIME_UUID_SET = 41; + VALUE_LIST = 42; + VALUE_SET = 43; } } @@ -108,6 +110,8 @@ message Value { StringList time_uuid_list_val = 39; StringSet uuid_set_val = 40; StringSet time_uuid_set_val = 41; + RepeatedValue list_val = 42; + RepeatedValue set_val = 43; } } diff --git a/sdk/python/feast/field.py b/sdk/python/feast/field.py index c61ed6a5c5e..e155836467b 100644 --- a/sdk/python/feast/field.py +++ b/sdk/python/feast/field.py @@ -23,6 +23,7 @@ from feast.value_type import ValueType STRUCT_SCHEMA_TAG = "feast:struct_schema" +NESTED_COLLECTION_INNER_TYPE_TAG = "feast:nested_inner_type" @typechecked @@ -118,7 +119,7 @@ def __str__(self): def to_proto(self) -> FieldProto: """Converts a Field object to its protobuf representation.""" - from feast.types import Array + from feast.types import Array, Set value_type = self.dtype.to_value_type() vector_search_metric = self.vector_search_metric or "" @@ -128,6 +129,11 @@ def to_proto(self) -> FieldProto: tags[STRUCT_SCHEMA_TAG] = _serialize_struct_schema(self.dtype) elif isinstance(self.dtype, Array) and isinstance(self.dtype.base_type, Struct): tags[STRUCT_SCHEMA_TAG] = _serialize_struct_schema(self.dtype.base_type) + # Persist nested collection type info in tags + if isinstance(self.dtype, (Array, Set)) and isinstance( + self.dtype.base_type, (Array, Set) + ): + tags[NESTED_COLLECTION_INNER_TYPE_TAG] = _feast_type_to_str(self.dtype) return FieldProto( name=self.name, value_type=value_type.value, @@ -155,17 +161,24 @@ def from_proto(cls, field_proto: FieldProto): # Reconstruct Struct type from persisted schema in tags from feast.types import Array + internal_tags = {STRUCT_SCHEMA_TAG, NESTED_COLLECTION_INNER_TYPE_TAG} dtype: FeastType if value_type == ValueType.STRUCT and STRUCT_SCHEMA_TAG in tags: dtype = _deserialize_struct_schema(tags[STRUCT_SCHEMA_TAG]) - user_tags = {k: v for k, v in tags.items() if k != STRUCT_SCHEMA_TAG} + user_tags = {k: v for k, v in tags.items() if k not in internal_tags} elif value_type == ValueType.STRUCT_LIST and STRUCT_SCHEMA_TAG in tags: inner_struct = _deserialize_struct_schema(tags[STRUCT_SCHEMA_TAG]) dtype = Array(inner_struct) - user_tags = {k: v for k, v in tags.items() if k != STRUCT_SCHEMA_TAG} + user_tags = {k: v for k, v in tags.items() if k not in internal_tags} + elif ( + value_type in (ValueType.VALUE_LIST, ValueType.VALUE_SET) + and NESTED_COLLECTION_INNER_TYPE_TAG in tags + ): + dtype = _str_to_feast_type(tags[NESTED_COLLECTION_INNER_TYPE_TAG]) + user_tags = {k: v for k, v in tags.items() if k not in internal_tags} else: dtype = from_value_type(value_type=value_type) - user_tags = tags + user_tags = {k: v for k, v in tags.items() if k not in internal_tags} return cls( name=field_proto.name, @@ -198,6 +211,7 @@ def _feast_type_to_str(feast_type: FeastType) -> str: from feast.types import ( Array, PrimitiveFeastType, + Set, ) if isinstance(feast_type, PrimitiveFeastType): @@ -209,6 +223,8 @@ def _feast_type_to_str(feast_type: FeastType) -> str: return json.dumps({"__struct__": nested}) elif isinstance(feast_type, Array): return f"Array({_feast_type_to_str(feast_type.base_type)})" + elif isinstance(feast_type, Set): + return f"Set({_feast_type_to_str(feast_type.base_type)})" else: return str(feast_type) @@ -218,6 +234,7 @@ def _str_to_feast_type(type_str: str) -> FeastType: from feast.types import ( Array, PrimitiveFeastType, + Set, ) # Check if it's an Array type @@ -226,6 +243,12 @@ def _str_to_feast_type(type_str: str) -> FeastType: base_type = _str_to_feast_type(inner) return Array(base_type) + # Check if it's a Set type + if type_str.startswith("Set(") and type_str.endswith(")"): + inner = type_str[4:-1] + base_type = _str_to_feast_type(inner) + return Set(base_type) + # Check if it's a nested Struct (JSON encoded) if type_str.startswith("{"): try: @@ -243,9 +266,10 @@ def _str_to_feast_type(type_str: str) -> FeastType: try: return PrimitiveFeastType[type_str] except KeyError: - from feast.types import String - - return String + raise ValueError( + f"Unknown FeastType: {type_str!r}. " + f"Valid primitive types: {[t.name for t in PrimitiveFeastType]}" + ) def _serialize_struct_schema(struct_type: Struct) -> str: diff --git a/sdk/python/feast/infra/offline_stores/offline_utils.py b/sdk/python/feast/infra/offline_stores/offline_utils.py index 0c478adb2c4..5664e6f45a6 100644 --- a/sdk/python/feast/infra/offline_stores/offline_utils.py +++ b/sdk/python/feast/infra/offline_stores/offline_utils.py @@ -1,3 +1,4 @@ +import logging import uuid from dataclasses import asdict, dataclass from datetime import datetime, timedelta, timezone @@ -21,6 +22,7 @@ from feast.repo_config import RepoConfig from feast.type_map import feast_value_type_to_pa from feast.utils import _get_requested_feature_views_to_features_dict, to_naive_utc +from feast.value_type import ValueType DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL = "event_timestamp" @@ -241,6 +243,37 @@ def get_offline_store_from_config(offline_store_config: Any) -> OfflineStore: return offline_store_class() +_PA_BASIC_TYPES = { + "int32": pa.int32(), + "int64": pa.int64(), + "double": pa.float64(), + "float": pa.float32(), + "string": pa.string(), + "binary": pa.binary(), + "bool": pa.bool_(), + "large_string": pa.large_string(), + "null": pa.null(), +} + + +def _parse_pa_type_str(pa_type_str: str) -> pa.DataType: + """Parse a PyArrow type string to preserve inner element types for nested lists.""" + pa_type_str = pa_type_str.strip() + if pa_type_str.startswith("list"): + inner = pa_type_str[len("list Tuple[pa.Schema, List[str]]: @@ -250,15 +283,12 @@ def get_pyarrow_schema_from_batch_source( pa_schema = [] column_names = [] for column_name, column_type in column_names_and_types: - pa_schema.append( - ( - column_name, - feast_value_type_to_pa( - batch_source.source_datatype_to_feast_value_type()(column_type), - timestamp_unit=timestamp_unit, - ), - ) - ) + value_type = batch_source.source_datatype_to_feast_value_type()(column_type) + if value_type in (ValueType.VALUE_LIST, ValueType.VALUE_SET): + pa_type = _parse_pa_type_str(column_type) + else: + pa_type = feast_value_type_to_pa(value_type, timestamp_unit=timestamp_unit) + pa_schema.append((column_name, pa_type)) column_names.append(column_name) return pa.schema(pa_schema), column_names diff --git a/sdk/python/feast/infra/online_stores/remote.py b/sdk/python/feast/infra/online_stores/remote.py index f8e8dfce483..9bead1fcb9d 100644 --- a/sdk/python/feast/infra/online_stores/remote.py +++ b/sdk/python/feast/infra/online_stores/remote.py @@ -106,6 +106,11 @@ def _proto_value_to_transport_value(proto_value: ValueProto) -> Any: if val_attr == "json_list_val": return list(getattr(proto_value, val_attr).val) + # Nested collection types use feast_value_type_to_python_type + # which handles recursive conversion of RepeatedValue protos. + if val_attr in ("list_val", "set_val"): + return feast_value_type_to_python_type(proto_value) + # Map/Struct types are converted to Python dicts by # feast_value_type_to_python_type. Serialise them to JSON strings # so the server-side DataFrame gets VARCHAR columns instead of @@ -204,6 +209,12 @@ def online_read( logger.debug("Able to retrieve the online features from feature server.") response_json = json.loads(response.text) event_ts = self._get_event_ts(response_json) + # Build feature name -> ValueType mapping so we can reconstruct + # complex types (nested collections, sets, etc.) that cannot be + # inferred from raw JSON values alone. + feature_type_map: Dict[str, ValueType] = { + f.name: f.dtype.to_value_type() for f in table.features + } # Iterating over results and converting the API results in column format to row format. result_tuples: List[ Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]] @@ -223,13 +234,16 @@ def online_read( ] == "PRESENT" ): + feature_value_type = feature_type_map.get( + feature_name, ValueType.UNKNOWN + ) message = python_values_to_proto_values( [ response_json["results"][index]["values"][ feature_value_index ] ], - ValueType.UNKNOWN, + feature_value_type, ) feature_values_dict[feature_name] = message[0] else: diff --git a/sdk/python/feast/proto_json.py b/sdk/python/feast/proto_json.py index 487dc4284f3..d663e316b03 100644 --- a/sdk/python/feast/proto_json.py +++ b/sdk/python/feast/proto_json.py @@ -63,6 +63,12 @@ def to_json_object(printer: _Printer, message: ProtoMessage) -> JsonObject: # to JSON. The parse back result will be different from original message. if which is None or which == "null_val": return None + elif which in ("list_val", "set_val"): + # Nested collection: RepeatedValue containing Values + repeated = getattr(message, which) + value = [ + printer._MessageToJsonObject(inner_val) for inner_val in repeated.val + ] elif "_list_" in which: value = list(getattr(message, which).val) else: @@ -86,6 +92,19 @@ def from_json_object( if len(value) == 0: # Clear will mark the struct as modified so it will be created even if there are no values message.int64_list_val.Clear() + elif isinstance(value[0], list) or ( + value[0] is None and any(isinstance(v, list) for v in value) + ): + # Nested collection (list of lists). + # Check any() to handle cases where the first element is None + # (empty inner collections round-trip through proto as None). + # Default to list_val since JSON transport loses the + # outer/inner set distinction. + rv = RepeatedValue() + for inner in value: + inner_val = rv.val.add() + from_json_object(parser, inner, inner_val) + message.list_val.CopyFrom(rv) elif isinstance(value[0], bool): message.bool_list_val.val.extend(value) elif isinstance(value[0], str): diff --git a/sdk/python/feast/protos/feast/types/Value_pb2.py b/sdk/python/feast/protos/feast/types/Value_pb2.py index 16b0a7a961c..44ad6f115c2 100644 --- a/sdk/python/feast/protos/feast/types/Value_pb2.py +++ b/sdk/python/feast/protos/feast/types/Value_pb2.py @@ -14,7 +14,7 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x17\x66\x65\x61st/types/Value.proto\x12\x0b\x66\x65\x61st.types\"\xc3\x04\n\tValueType\"\xb5\x04\n\x04\x45num\x12\x0b\n\x07INVALID\x10\x00\x12\t\n\x05\x42YTES\x10\x01\x12\n\n\x06STRING\x10\x02\x12\t\n\x05INT32\x10\x03\x12\t\n\x05INT64\x10\x04\x12\n\n\x06\x44OUBLE\x10\x05\x12\t\n\x05\x46LOAT\x10\x06\x12\x08\n\x04\x42OOL\x10\x07\x12\x12\n\x0eUNIX_TIMESTAMP\x10\x08\x12\x0e\n\nBYTES_LIST\x10\x0b\x12\x0f\n\x0bSTRING_LIST\x10\x0c\x12\x0e\n\nINT32_LIST\x10\r\x12\x0e\n\nINT64_LIST\x10\x0e\x12\x0f\n\x0b\x44OUBLE_LIST\x10\x0f\x12\x0e\n\nFLOAT_LIST\x10\x10\x12\r\n\tBOOL_LIST\x10\x11\x12\x17\n\x13UNIX_TIMESTAMP_LIST\x10\x12\x12\x08\n\x04NULL\x10\x13\x12\x07\n\x03MAP\x10\x14\x12\x0c\n\x08MAP_LIST\x10\x15\x12\r\n\tBYTES_SET\x10\x16\x12\x0e\n\nSTRING_SET\x10\x17\x12\r\n\tINT32_SET\x10\x18\x12\r\n\tINT64_SET\x10\x19\x12\x0e\n\nDOUBLE_SET\x10\x1a\x12\r\n\tFLOAT_SET\x10\x1b\x12\x0c\n\x08\x42OOL_SET\x10\x1c\x12\x16\n\x12UNIX_TIMESTAMP_SET\x10\x1d\x12\x08\n\x04JSON\x10 \x12\r\n\tJSON_LIST\x10!\x12\n\n\x06STRUCT\x10\"\x12\x0f\n\x0bSTRUCT_LIST\x10#\x12\x08\n\x04UUID\x10$\x12\r\n\tTIME_UUID\x10%\x12\r\n\tUUID_LIST\x10&\x12\x12\n\x0eTIME_UUID_LIST\x10\'\x12\x0c\n\x08UUID_SET\x10(\x12\x11\n\rTIME_UUID_SET\x10)\"\xfa\x0b\n\x05Value\x12\x13\n\tbytes_val\x18\x01 \x01(\x0cH\x00\x12\x14\n\nstring_val\x18\x02 \x01(\tH\x00\x12\x13\n\tint32_val\x18\x03 \x01(\x05H\x00\x12\x13\n\tint64_val\x18\x04 \x01(\x03H\x00\x12\x14\n\ndouble_val\x18\x05 \x01(\x01H\x00\x12\x13\n\tfloat_val\x18\x06 \x01(\x02H\x00\x12\x12\n\x08\x62ool_val\x18\x07 \x01(\x08H\x00\x12\x1c\n\x12unix_timestamp_val\x18\x08 \x01(\x03H\x00\x12\x30\n\x0e\x62ytes_list_val\x18\x0b \x01(\x0b\x32\x16.feast.types.BytesListH\x00\x12\x32\n\x0fstring_list_val\x18\x0c \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12\x30\n\x0eint32_list_val\x18\r \x01(\x0b\x32\x16.feast.types.Int32ListH\x00\x12\x30\n\x0eint64_list_val\x18\x0e \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12\x32\n\x0f\x64ouble_list_val\x18\x0f \x01(\x0b\x32\x17.feast.types.DoubleListH\x00\x12\x30\n\x0e\x66loat_list_val\x18\x10 \x01(\x0b\x32\x16.feast.types.FloatListH\x00\x12.\n\rbool_list_val\x18\x11 \x01(\x0b\x32\x15.feast.types.BoolListH\x00\x12\x39\n\x17unix_timestamp_list_val\x18\x12 \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12%\n\x08null_val\x18\x13 \x01(\x0e\x32\x11.feast.types.NullH\x00\x12#\n\x07map_val\x18\x14 \x01(\x0b\x32\x10.feast.types.MapH\x00\x12,\n\x0cmap_list_val\x18\x15 \x01(\x0b\x32\x14.feast.types.MapListH\x00\x12.\n\rbytes_set_val\x18\x16 \x01(\x0b\x32\x15.feast.types.BytesSetH\x00\x12\x30\n\x0estring_set_val\x18\x17 \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12.\n\rint32_set_val\x18\x18 \x01(\x0b\x32\x15.feast.types.Int32SetH\x00\x12.\n\rint64_set_val\x18\x19 \x01(\x0b\x32\x15.feast.types.Int64SetH\x00\x12\x30\n\x0e\x64ouble_set_val\x18\x1a \x01(\x0b\x32\x16.feast.types.DoubleSetH\x00\x12.\n\rfloat_set_val\x18\x1b \x01(\x0b\x32\x15.feast.types.FloatSetH\x00\x12,\n\x0c\x62ool_set_val\x18\x1c \x01(\x0b\x32\x14.feast.types.BoolSetH\x00\x12\x37\n\x16unix_timestamp_set_val\x18\x1d \x01(\x0b\x32\x15.feast.types.Int64SetH\x00\x12\x12\n\x08json_val\x18 \x01(\tH\x00\x12\x30\n\rjson_list_val\x18! \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12&\n\nstruct_val\x18\" \x01(\x0b\x32\x10.feast.types.MapH\x00\x12/\n\x0fstruct_list_val\x18# \x01(\x0b\x32\x14.feast.types.MapListH\x00\x12\x12\n\x08uuid_val\x18$ \x01(\tH\x00\x12\x17\n\rtime_uuid_val\x18% \x01(\tH\x00\x12\x30\n\ruuid_list_val\x18& \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12\x35\n\x12time_uuid_list_val\x18\' \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12.\n\x0cuuid_set_val\x18( \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12\x33\n\x11time_uuid_set_val\x18) \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x42\x05\n\x03val\"\x18\n\tBytesList\x12\x0b\n\x03val\x18\x01 \x03(\x0c\"\x19\n\nStringList\x12\x0b\n\x03val\x18\x01 \x03(\t\"\x18\n\tInt32List\x12\x0b\n\x03val\x18\x01 \x03(\x05\"\x18\n\tInt64List\x12\x0b\n\x03val\x18\x01 \x03(\x03\"\x19\n\nDoubleList\x12\x0b\n\x03val\x18\x01 \x03(\x01\"\x18\n\tFloatList\x12\x0b\n\x03val\x18\x01 \x03(\x02\"\x17\n\x08\x42oolList\x12\x0b\n\x03val\x18\x01 \x03(\x08\"\x17\n\x08\x42ytesSet\x12\x0b\n\x03val\x18\x01 \x03(\x0c\"\x18\n\tStringSet\x12\x0b\n\x03val\x18\x01 \x03(\t\"\x17\n\x08Int32Set\x12\x0b\n\x03val\x18\x01 \x03(\x05\"\x17\n\x08Int64Set\x12\x0b\n\x03val\x18\x01 \x03(\x03\"\x18\n\tDoubleSet\x12\x0b\n\x03val\x18\x01 \x03(\x01\"\x17\n\x08\x46loatSet\x12\x0b\n\x03val\x18\x01 \x03(\x02\"\x16\n\x07\x42oolSet\x12\x0b\n\x03val\x18\x01 \x03(\x08\"m\n\x03Map\x12&\n\x03val\x18\x01 \x03(\x0b\x32\x19.feast.types.Map.ValEntry\x1a>\n\x08ValEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.feast.types.Value:\x02\x38\x01\"(\n\x07MapList\x12\x1d\n\x03val\x18\x01 \x03(\x0b\x32\x10.feast.types.Map\"0\n\rRepeatedValue\x12\x1f\n\x03val\x18\x01 \x03(\x0b\x32\x12.feast.types.Value*\x10\n\x04Null\x12\x08\n\x04NULL\x10\x00\x42Q\n\x11\x66\x65\x61st.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/typesb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x17\x66\x65\x61st/types/Value.proto\x12\x0b\x66\x65\x61st.types\"\xe2\x04\n\tValueType\"\xd4\x04\n\x04\x45num\x12\x0b\n\x07INVALID\x10\x00\x12\t\n\x05\x42YTES\x10\x01\x12\n\n\x06STRING\x10\x02\x12\t\n\x05INT32\x10\x03\x12\t\n\x05INT64\x10\x04\x12\n\n\x06\x44OUBLE\x10\x05\x12\t\n\x05\x46LOAT\x10\x06\x12\x08\n\x04\x42OOL\x10\x07\x12\x12\n\x0eUNIX_TIMESTAMP\x10\x08\x12\x0e\n\nBYTES_LIST\x10\x0b\x12\x0f\n\x0bSTRING_LIST\x10\x0c\x12\x0e\n\nINT32_LIST\x10\r\x12\x0e\n\nINT64_LIST\x10\x0e\x12\x0f\n\x0b\x44OUBLE_LIST\x10\x0f\x12\x0e\n\nFLOAT_LIST\x10\x10\x12\r\n\tBOOL_LIST\x10\x11\x12\x17\n\x13UNIX_TIMESTAMP_LIST\x10\x12\x12\x08\n\x04NULL\x10\x13\x12\x07\n\x03MAP\x10\x14\x12\x0c\n\x08MAP_LIST\x10\x15\x12\r\n\tBYTES_SET\x10\x16\x12\x0e\n\nSTRING_SET\x10\x17\x12\r\n\tINT32_SET\x10\x18\x12\r\n\tINT64_SET\x10\x19\x12\x0e\n\nDOUBLE_SET\x10\x1a\x12\r\n\tFLOAT_SET\x10\x1b\x12\x0c\n\x08\x42OOL_SET\x10\x1c\x12\x16\n\x12UNIX_TIMESTAMP_SET\x10\x1d\x12\x08\n\x04JSON\x10 \x12\r\n\tJSON_LIST\x10!\x12\n\n\x06STRUCT\x10\"\x12\x0f\n\x0bSTRUCT_LIST\x10#\x12\x08\n\x04UUID\x10$\x12\r\n\tTIME_UUID\x10%\x12\r\n\tUUID_LIST\x10&\x12\x12\n\x0eTIME_UUID_LIST\x10\'\x12\x0c\n\x08UUID_SET\x10(\x12\x11\n\rTIME_UUID_SET\x10)\x12\x0e\n\nVALUE_LIST\x10*\x12\r\n\tVALUE_SET\x10+\"\xd9\x0c\n\x05Value\x12\x13\n\tbytes_val\x18\x01 \x01(\x0cH\x00\x12\x14\n\nstring_val\x18\x02 \x01(\tH\x00\x12\x13\n\tint32_val\x18\x03 \x01(\x05H\x00\x12\x13\n\tint64_val\x18\x04 \x01(\x03H\x00\x12\x14\n\ndouble_val\x18\x05 \x01(\x01H\x00\x12\x13\n\tfloat_val\x18\x06 \x01(\x02H\x00\x12\x12\n\x08\x62ool_val\x18\x07 \x01(\x08H\x00\x12\x1c\n\x12unix_timestamp_val\x18\x08 \x01(\x03H\x00\x12\x30\n\x0e\x62ytes_list_val\x18\x0b \x01(\x0b\x32\x16.feast.types.BytesListH\x00\x12\x32\n\x0fstring_list_val\x18\x0c \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12\x30\n\x0eint32_list_val\x18\r \x01(\x0b\x32\x16.feast.types.Int32ListH\x00\x12\x30\n\x0eint64_list_val\x18\x0e \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12\x32\n\x0f\x64ouble_list_val\x18\x0f \x01(\x0b\x32\x17.feast.types.DoubleListH\x00\x12\x30\n\x0e\x66loat_list_val\x18\x10 \x01(\x0b\x32\x16.feast.types.FloatListH\x00\x12.\n\rbool_list_val\x18\x11 \x01(\x0b\x32\x15.feast.types.BoolListH\x00\x12\x39\n\x17unix_timestamp_list_val\x18\x12 \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12%\n\x08null_val\x18\x13 \x01(\x0e\x32\x11.feast.types.NullH\x00\x12#\n\x07map_val\x18\x14 \x01(\x0b\x32\x10.feast.types.MapH\x00\x12,\n\x0cmap_list_val\x18\x15 \x01(\x0b\x32\x14.feast.types.MapListH\x00\x12.\n\rbytes_set_val\x18\x16 \x01(\x0b\x32\x15.feast.types.BytesSetH\x00\x12\x30\n\x0estring_set_val\x18\x17 \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12.\n\rint32_set_val\x18\x18 \x01(\x0b\x32\x15.feast.types.Int32SetH\x00\x12.\n\rint64_set_val\x18\x19 \x01(\x0b\x32\x15.feast.types.Int64SetH\x00\x12\x30\n\x0e\x64ouble_set_val\x18\x1a \x01(\x0b\x32\x16.feast.types.DoubleSetH\x00\x12.\n\rfloat_set_val\x18\x1b \x01(\x0b\x32\x15.feast.types.FloatSetH\x00\x12,\n\x0c\x62ool_set_val\x18\x1c \x01(\x0b\x32\x14.feast.types.BoolSetH\x00\x12\x37\n\x16unix_timestamp_set_val\x18\x1d \x01(\x0b\x32\x15.feast.types.Int64SetH\x00\x12\x12\n\x08json_val\x18 \x01(\tH\x00\x12\x30\n\rjson_list_val\x18! \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12&\n\nstruct_val\x18\" \x01(\x0b\x32\x10.feast.types.MapH\x00\x12/\n\x0fstruct_list_val\x18# \x01(\x0b\x32\x14.feast.types.MapListH\x00\x12\x12\n\x08uuid_val\x18$ \x01(\tH\x00\x12\x17\n\rtime_uuid_val\x18% \x01(\tH\x00\x12\x30\n\ruuid_list_val\x18& \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12\x35\n\x12time_uuid_list_val\x18\' \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12.\n\x0cuuid_set_val\x18( \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12\x33\n\x11time_uuid_set_val\x18) \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12.\n\x08list_val\x18* \x01(\x0b\x32\x1a.feast.types.RepeatedValueH\x00\x12-\n\x07set_val\x18+ \x01(\x0b\x32\x1a.feast.types.RepeatedValueH\x00\x42\x05\n\x03val\"\x18\n\tBytesList\x12\x0b\n\x03val\x18\x01 \x03(\x0c\"\x19\n\nStringList\x12\x0b\n\x03val\x18\x01 \x03(\t\"\x18\n\tInt32List\x12\x0b\n\x03val\x18\x01 \x03(\x05\"\x18\n\tInt64List\x12\x0b\n\x03val\x18\x01 \x03(\x03\"\x19\n\nDoubleList\x12\x0b\n\x03val\x18\x01 \x03(\x01\"\x18\n\tFloatList\x12\x0b\n\x03val\x18\x01 \x03(\x02\"\x17\n\x08\x42oolList\x12\x0b\n\x03val\x18\x01 \x03(\x08\"\x17\n\x08\x42ytesSet\x12\x0b\n\x03val\x18\x01 \x03(\x0c\"\x18\n\tStringSet\x12\x0b\n\x03val\x18\x01 \x03(\t\"\x17\n\x08Int32Set\x12\x0b\n\x03val\x18\x01 \x03(\x05\"\x17\n\x08Int64Set\x12\x0b\n\x03val\x18\x01 \x03(\x03\"\x18\n\tDoubleSet\x12\x0b\n\x03val\x18\x01 \x03(\x01\"\x17\n\x08\x46loatSet\x12\x0b\n\x03val\x18\x01 \x03(\x02\"\x16\n\x07\x42oolSet\x12\x0b\n\x03val\x18\x01 \x03(\x08\"m\n\x03Map\x12&\n\x03val\x18\x01 \x03(\x0b\x32\x19.feast.types.Map.ValEntry\x1a>\n\x08ValEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.feast.types.Value:\x02\x38\x01\"(\n\x07MapList\x12\x1d\n\x03val\x18\x01 \x03(\x0b\x32\x10.feast.types.Map\"0\n\rRepeatedValue\x12\x1f\n\x03val\x18\x01 \x03(\x0b\x32\x12.feast.types.Value*\x10\n\x04Null\x12\x08\n\x04NULL\x10\x00\x42Q\n\x11\x66\x65\x61st.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/typesb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -24,48 +24,48 @@ _globals['DESCRIPTOR']._serialized_options = b'\n\021feast.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/types' _globals['_MAP_VALENTRY']._options = None _globals['_MAP_VALENTRY']._serialized_options = b'8\001' - _globals['_NULL']._serialized_start=2717 - _globals['_NULL']._serialized_end=2733 + _globals['_NULL']._serialized_start=2843 + _globals['_NULL']._serialized_end=2859 _globals['_VALUETYPE']._serialized_start=41 - _globals['_VALUETYPE']._serialized_end=620 + _globals['_VALUETYPE']._serialized_end=651 _globals['_VALUETYPE_ENUM']._serialized_start=55 - _globals['_VALUETYPE_ENUM']._serialized_end=620 - _globals['_VALUE']._serialized_start=623 - _globals['_VALUE']._serialized_end=2153 - _globals['_BYTESLIST']._serialized_start=2155 - _globals['_BYTESLIST']._serialized_end=2179 - _globals['_STRINGLIST']._serialized_start=2181 - _globals['_STRINGLIST']._serialized_end=2206 - _globals['_INT32LIST']._serialized_start=2208 - _globals['_INT32LIST']._serialized_end=2232 - _globals['_INT64LIST']._serialized_start=2234 - _globals['_INT64LIST']._serialized_end=2258 - _globals['_DOUBLELIST']._serialized_start=2260 - _globals['_DOUBLELIST']._serialized_end=2285 - _globals['_FLOATLIST']._serialized_start=2287 - _globals['_FLOATLIST']._serialized_end=2311 - _globals['_BOOLLIST']._serialized_start=2313 - _globals['_BOOLLIST']._serialized_end=2336 - _globals['_BYTESSET']._serialized_start=2338 - _globals['_BYTESSET']._serialized_end=2361 - _globals['_STRINGSET']._serialized_start=2363 - _globals['_STRINGSET']._serialized_end=2387 - _globals['_INT32SET']._serialized_start=2389 - _globals['_INT32SET']._serialized_end=2412 - _globals['_INT64SET']._serialized_start=2414 - _globals['_INT64SET']._serialized_end=2437 - _globals['_DOUBLESET']._serialized_start=2439 - _globals['_DOUBLESET']._serialized_end=2463 - _globals['_FLOATSET']._serialized_start=2465 - _globals['_FLOATSET']._serialized_end=2488 - _globals['_BOOLSET']._serialized_start=2490 - _globals['_BOOLSET']._serialized_end=2512 - _globals['_MAP']._serialized_start=2514 - _globals['_MAP']._serialized_end=2623 - _globals['_MAP_VALENTRY']._serialized_start=2561 - _globals['_MAP_VALENTRY']._serialized_end=2623 - _globals['_MAPLIST']._serialized_start=2625 - _globals['_MAPLIST']._serialized_end=2665 - _globals['_REPEATEDVALUE']._serialized_start=2667 - _globals['_REPEATEDVALUE']._serialized_end=2715 + _globals['_VALUETYPE_ENUM']._serialized_end=651 + _globals['_VALUE']._serialized_start=654 + _globals['_VALUE']._serialized_end=2279 + _globals['_BYTESLIST']._serialized_start=2281 + _globals['_BYTESLIST']._serialized_end=2305 + _globals['_STRINGLIST']._serialized_start=2307 + _globals['_STRINGLIST']._serialized_end=2332 + _globals['_INT32LIST']._serialized_start=2334 + _globals['_INT32LIST']._serialized_end=2358 + _globals['_INT64LIST']._serialized_start=2360 + _globals['_INT64LIST']._serialized_end=2384 + _globals['_DOUBLELIST']._serialized_start=2386 + _globals['_DOUBLELIST']._serialized_end=2411 + _globals['_FLOATLIST']._serialized_start=2413 + _globals['_FLOATLIST']._serialized_end=2437 + _globals['_BOOLLIST']._serialized_start=2439 + _globals['_BOOLLIST']._serialized_end=2462 + _globals['_BYTESSET']._serialized_start=2464 + _globals['_BYTESSET']._serialized_end=2487 + _globals['_STRINGSET']._serialized_start=2489 + _globals['_STRINGSET']._serialized_end=2513 + _globals['_INT32SET']._serialized_start=2515 + _globals['_INT32SET']._serialized_end=2538 + _globals['_INT64SET']._serialized_start=2540 + _globals['_INT64SET']._serialized_end=2563 + _globals['_DOUBLESET']._serialized_start=2565 + _globals['_DOUBLESET']._serialized_end=2589 + _globals['_FLOATSET']._serialized_start=2591 + _globals['_FLOATSET']._serialized_end=2614 + _globals['_BOOLSET']._serialized_start=2616 + _globals['_BOOLSET']._serialized_end=2638 + _globals['_MAP']._serialized_start=2640 + _globals['_MAP']._serialized_end=2749 + _globals['_MAP_VALENTRY']._serialized_start=2687 + _globals['_MAP_VALENTRY']._serialized_end=2749 + _globals['_MAPLIST']._serialized_start=2751 + _globals['_MAPLIST']._serialized_end=2791 + _globals['_REPEATEDVALUE']._serialized_start=2793 + _globals['_REPEATEDVALUE']._serialized_end=2841 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/types/Value_pb2.pyi b/sdk/python/feast/protos/feast/types/Value_pb2.pyi index 75487088939..53a7c800f04 100644 --- a/sdk/python/feast/protos/feast/types/Value_pb2.pyi +++ b/sdk/python/feast/protos/feast/types/Value_pb2.pyi @@ -92,6 +92,8 @@ class ValueType(google.protobuf.message.Message): TIME_UUID_LIST: ValueType._Enum.ValueType # 39 UUID_SET: ValueType._Enum.ValueType # 40 TIME_UUID_SET: ValueType._Enum.ValueType # 41 + VALUE_LIST: ValueType._Enum.ValueType # 42 + VALUE_SET: ValueType._Enum.ValueType # 43 class Enum(_Enum, metaclass=_EnumEnumTypeWrapper): ... INVALID: ValueType.Enum.ValueType # 0 @@ -132,6 +134,8 @@ class ValueType(google.protobuf.message.Message): TIME_UUID_LIST: ValueType.Enum.ValueType # 39 UUID_SET: ValueType.Enum.ValueType # 40 TIME_UUID_SET: ValueType.Enum.ValueType # 41 + VALUE_LIST: ValueType.Enum.ValueType # 42 + VALUE_SET: ValueType.Enum.ValueType # 43 def __init__( self, @@ -179,6 +183,8 @@ class Value(google.protobuf.message.Message): TIME_UUID_LIST_VAL_FIELD_NUMBER: builtins.int UUID_SET_VAL_FIELD_NUMBER: builtins.int TIME_UUID_SET_VAL_FIELD_NUMBER: builtins.int + LIST_VAL_FIELD_NUMBER: builtins.int + SET_VAL_FIELD_NUMBER: builtins.int bytes_val: builtins.bytes string_val: builtins.str int32_val: builtins.int @@ -241,6 +247,10 @@ class Value(google.protobuf.message.Message): def uuid_set_val(self) -> global___StringSet: ... @property def time_uuid_set_val(self) -> global___StringSet: ... + @property + def list_val(self) -> global___RepeatedValue: ... + @property + def set_val(self) -> global___RepeatedValue: ... def __init__( self, *, @@ -281,10 +291,12 @@ class Value(google.protobuf.message.Message): time_uuid_list_val: global___StringList | None = ..., uuid_set_val: global___StringSet | None = ..., time_uuid_set_val: global___StringSet | None = ..., + list_val: global___RepeatedValue | None = ..., + set_val: global___RepeatedValue | None = ..., ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_set_val", b"bool_set_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_set_val", b"bytes_set_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_set_val", b"double_set_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_set_val", b"float_set_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_set_val", b"int32_set_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_set_val", b"int64_set_val", "int64_val", b"int64_val", "json_list_val", b"json_list_val", "json_val", b"json_val", "map_list_val", b"map_list_val", "map_val", b"map_val", "null_val", b"null_val", "string_list_val", b"string_list_val", "string_set_val", b"string_set_val", "string_val", b"string_val", "struct_list_val", b"struct_list_val", "struct_val", b"struct_val", "time_uuid_list_val", b"time_uuid_list_val", "time_uuid_set_val", b"time_uuid_set_val", "time_uuid_val", b"time_uuid_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_set_val", b"unix_timestamp_set_val", "unix_timestamp_val", b"unix_timestamp_val", "uuid_list_val", b"uuid_list_val", "uuid_set_val", b"uuid_set_val", "uuid_val", b"uuid_val", "val", b"val"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_set_val", b"bool_set_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_set_val", b"bytes_set_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_set_val", b"double_set_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_set_val", b"float_set_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_set_val", b"int32_set_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_set_val", b"int64_set_val", "int64_val", b"int64_val", "json_list_val", b"json_list_val", "json_val", b"json_val", "map_list_val", b"map_list_val", "map_val", b"map_val", "null_val", b"null_val", "string_list_val", b"string_list_val", "string_set_val", b"string_set_val", "string_val", b"string_val", "struct_list_val", b"struct_list_val", "struct_val", b"struct_val", "time_uuid_list_val", b"time_uuid_list_val", "time_uuid_set_val", b"time_uuid_set_val", "time_uuid_val", b"time_uuid_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_set_val", b"unix_timestamp_set_val", "unix_timestamp_val", b"unix_timestamp_val", "uuid_list_val", b"uuid_list_val", "uuid_set_val", b"uuid_set_val", "uuid_val", b"uuid_val", "val", b"val"]) -> None: ... - def WhichOneof(self, oneof_group: typing_extensions.Literal["val", b"val"]) -> typing_extensions.Literal["bytes_val", "string_val", "int32_val", "int64_val", "double_val", "float_val", "bool_val", "unix_timestamp_val", "bytes_list_val", "string_list_val", "int32_list_val", "int64_list_val", "double_list_val", "float_list_val", "bool_list_val", "unix_timestamp_list_val", "null_val", "map_val", "map_list_val", "bytes_set_val", "string_set_val", "int32_set_val", "int64_set_val", "double_set_val", "float_set_val", "bool_set_val", "unix_timestamp_set_val", "json_val", "json_list_val", "struct_val", "struct_list_val", "uuid_val", "time_uuid_val", "uuid_list_val", "time_uuid_list_val", "uuid_set_val", "time_uuid_set_val"] | None: ... + def HasField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_set_val", b"bool_set_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_set_val", b"bytes_set_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_set_val", b"double_set_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_set_val", b"float_set_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_set_val", b"int32_set_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_set_val", b"int64_set_val", "int64_val", b"int64_val", "json_list_val", b"json_list_val", "json_val", b"json_val", "list_val", b"list_val", "map_list_val", b"map_list_val", "map_val", b"map_val", "null_val", b"null_val", "set_val", b"set_val", "string_list_val", b"string_list_val", "string_set_val", b"string_set_val", "string_val", b"string_val", "struct_list_val", b"struct_list_val", "struct_val", b"struct_val", "time_uuid_list_val", b"time_uuid_list_val", "time_uuid_set_val", b"time_uuid_set_val", "time_uuid_val", b"time_uuid_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_set_val", b"unix_timestamp_set_val", "unix_timestamp_val", b"unix_timestamp_val", "uuid_list_val", b"uuid_list_val", "uuid_set_val", b"uuid_set_val", "uuid_val", b"uuid_val", "val", b"val"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_set_val", b"bool_set_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_set_val", b"bytes_set_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_set_val", b"double_set_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_set_val", b"float_set_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_set_val", b"int32_set_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_set_val", b"int64_set_val", "int64_val", b"int64_val", "json_list_val", b"json_list_val", "json_val", b"json_val", "list_val", b"list_val", "map_list_val", b"map_list_val", "map_val", b"map_val", "null_val", b"null_val", "set_val", b"set_val", "string_list_val", b"string_list_val", "string_set_val", b"string_set_val", "string_val", b"string_val", "struct_list_val", b"struct_list_val", "struct_val", b"struct_val", "time_uuid_list_val", b"time_uuid_list_val", "time_uuid_set_val", b"time_uuid_set_val", "time_uuid_val", b"time_uuid_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_set_val", b"unix_timestamp_set_val", "unix_timestamp_val", b"unix_timestamp_val", "uuid_list_val", b"uuid_list_val", "uuid_set_val", b"uuid_set_val", "uuid_val", b"uuid_val", "val", b"val"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["val", b"val"]) -> typing_extensions.Literal["bytes_val", "string_val", "int32_val", "int64_val", "double_val", "float_val", "bool_val", "unix_timestamp_val", "bytes_list_val", "string_list_val", "int32_list_val", "int64_list_val", "double_list_val", "float_list_val", "bool_list_val", "unix_timestamp_list_val", "null_val", "map_val", "map_list_val", "bytes_set_val", "string_set_val", "int32_set_val", "int64_set_val", "double_set_val", "float_set_val", "bool_set_val", "unix_timestamp_set_val", "json_val", "json_list_val", "struct_val", "struct_list_val", "uuid_val", "time_uuid_val", "uuid_list_val", "time_uuid_list_val", "uuid_set_val", "time_uuid_set_val", "list_val", "set_val"] | None: ... global___Value = Value diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 4478445f4e4..3c1cc5a9380 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -53,6 +53,7 @@ Int64Set, Map, MapList, + RepeatedValue, StringList, StringSet, ) @@ -105,6 +106,10 @@ def feast_value_type_to_python_type( result.append(v) return result + # Handle nested collection types (list_val, set_val) + if val_attr in ("list_val", "set_val"): + return _handle_nested_collection_value(val) + # Handle Struct types — stored using Map proto, returned as dicts if val_attr == "struct_val": return _handle_map_value(val) @@ -198,6 +203,18 @@ def _handle_map_list_value(map_list_message) -> List[Dict[str, Any]]: return result +def _handle_nested_collection_value(repeated_value) -> List[Any]: + """Handle nested collection proto (RepeatedValue containing Values). + + Each inner Value is itself a list/set proto. We recursively convert + each inner Value to a Python list/set via feast_value_type_to_python_type. + """ + result = [] + for inner_value in repeated_value.val: + result.append(feast_value_type_to_python_type(inner_value)) + return result + + def feast_value_type_to_pandas_type(value_type: ValueType) -> Any: value_type_to_pandas_type: Dict[ValueType, str] = { ValueType.FLOAT: "float", @@ -212,7 +229,7 @@ def feast_value_type_to_pandas_type(value_type: ValueType) -> Any: ValueType.TIME_UUID: "str", } if ( - value_type.name in ("MAP", "JSON", "STRUCT") + value_type.name in ("MAP", "JSON", "STRUCT", "VALUE_LIST", "VALUE_SET") or value_type.name.endswith("_LIST") or value_type.name.endswith("_SET") ): @@ -306,8 +323,9 @@ def python_type_to_feast_value_type( if not recurse: raise ValueError( f"Value type for field {name} is {type(value)} but " - f"recursion is not allowed. Array types can only be one level " - f"deep." + f"recursion is not allowed. Nested collection types cannot be " + f"inferred automatically; use an explicit Field dtype instead " + f"(e.g., dtype=Array(Array(Int32)))." ) # This is the final type which we infer from the list @@ -446,6 +464,8 @@ def _convert_value_type_str_to_value_type(type_str: str) -> ValueType: "TIME_UUID_LIST": ValueType.TIME_UUID_LIST, "UUID_SET": ValueType.UUID_SET, "TIME_UUID_SET": ValueType.TIME_UUID_SET, + "VALUE_LIST": ValueType.VALUE_LIST, + "VALUE_SET": ValueType.VALUE_SET, } return type_map.get(type_str, ValueType.STRING) @@ -916,6 +936,10 @@ def _python_value_to_proto_value( Returns: List of Feast Value Proto """ + # Handle nested collection types (VALUE_LIST, VALUE_SET) + if feast_value_type in (ValueType.VALUE_LIST, ValueType.VALUE_SET): + return _convert_nested_collection_to_proto(feast_value_type, values) + # Handle Map types if feast_value_type == ValueType.MAP: result = [] @@ -1043,6 +1067,48 @@ def _python_value_to_proto_value( raise Exception(f"Unsupported data type: {feast_value_type}") +def _convert_nested_collection_to_proto( + feast_value_type: ValueType, values: List[Any] +) -> List[ProtoValue]: + """Convert nested collection values (list-of-lists, list-of-sets, etc.) to proto.""" + val_attr = "list_val" if feast_value_type == ValueType.VALUE_LIST else "set_val" + + result = [] + for value in values: + if value is None: + result.append(ProtoValue()) + else: + inner_values = [] + for inner_collection in value: + if inner_collection is None: + inner_values.append(ProtoValue()) + else: + inner_list = list(inner_collection) + if len(inner_list) == 0: + # Empty inner collection: store as empty ProtoValue + inner_values.append(ProtoValue()) + elif any( + isinstance(item, (list, set, tuple, np.ndarray)) + for item in inner_list + ): + # Deeper nesting (3+ levels): recurse using VALUE_LIST + inner_proto = _convert_nested_collection_to_proto( + ValueType.VALUE_LIST, [inner_list] + ) + inner_values.append(inner_proto[0]) + else: + # Leaf level: wrap as a single list-typed Value + proto_vals = python_values_to_proto_values( + [inner_list], ValueType.UNKNOWN + ) + inner_values.append(proto_vals[0]) + repeated = RepeatedValue(val=inner_values) + proto = ProtoValue() + getattr(proto, val_attr).CopyFrom(repeated) + result.append(proto) + return result + + def _python_dict_to_map_proto(python_dict: Dict[str, Any]) -> Map: """Convert a Python dictionary to a Map proto message.""" map_proto = Map() @@ -1135,6 +1201,8 @@ def python_values_to_proto_values( "json_list_val": ValueType.JSON_LIST, "struct_val": ValueType.STRUCT, "struct_list_val": ValueType.STRUCT_LIST, + "list_val": ValueType.VALUE_LIST, + "set_val": ValueType.VALUE_SET, "int32_set_val": ValueType.INT32_SET, "int64_set_val": ValueType.INT64_SET, "double_set_val": ValueType.DOUBLE_SET, @@ -1176,7 +1244,11 @@ def pa_to_feast_value_type(pa_type_as_str: str) -> ValueType: is_list = False if pa_type_as_str.startswith("list", "") + inner_str = pa_type_as_str[len("list ValueType: if isinstance(self.base_type, Struct): return ValueType.STRUCT_LIST + if isinstance(self.base_type, (Array, Set)): + return ValueType.VALUE_LIST assert isinstance(self.base_type, PrimitiveFeastType) value_type_name = PRIMITIVE_FEAST_TYPES_TO_VALUE_TYPES[self.base_type.name] value_type_list_name = value_type_name + "_LIST" return ValueType[value_type_list_name] + def __eq__(self, other): + if isinstance(other, Array): + return self.base_type == other.base_type + return False + + def __hash__(self): + return hash(("Array", hash(self.base_type))) + def __str__(self): return f"Array({self.base_type})" @@ -207,21 +220,33 @@ class Set(ComplexFeastType): base_type: Union[PrimitiveFeastType, ComplexFeastType] def __init__(self, base_type: Union[PrimitiveFeastType, ComplexFeastType]): - # Sets do not support MAP as a base type - supported_set_types = [t for t in SUPPORTED_BASE_TYPES if t not in (Map,)] - if base_type not in supported_set_types: - raise ValueError( - f"Type {type(base_type)} is currently not supported as a base type for Set." - ) + # Allow Array and Set as base types for nested collections + if not isinstance(base_type, (Array, Set)): + # Sets do not support MAP as a base type + supported_set_types = [t for t in SUPPORTED_BASE_TYPES if t not in (Map,)] + if base_type not in supported_set_types: + raise ValueError( + f"Type {type(base_type)} is currently not supported as a base type for Set." + ) self.base_type = base_type def to_value_type(self) -> ValueType: + if isinstance(self.base_type, (Array, Set)): + return ValueType.VALUE_SET assert isinstance(self.base_type, PrimitiveFeastType) value_type_name = PRIMITIVE_FEAST_TYPES_TO_VALUE_TYPES[self.base_type.name] value_type_set_name = value_type_name + "_SET" return ValueType[value_type_set_name] + def __eq__(self, other): + if isinstance(other, Set): + return self.base_type == other.base_type + return False + + def __hash__(self): + return hash(("Set", hash(self.base_type))) + def __str__(self): return f"Set({self.base_type})" @@ -365,6 +390,8 @@ def from_feast_to_pyarrow_type(feast_type: FeastType) -> pyarrow.DataType: base_type = feast_type.base_type if isinstance(base_type, Struct): return pyarrow.list_(base_type.to_pyarrow_type()) + if isinstance(base_type, (Array, Set)): + return pyarrow.list_(from_feast_to_pyarrow_type(base_type)) if isinstance(base_type, PrimitiveFeastType): if base_type == Map: return pyarrow.list_(pyarrow.map_(pyarrow.string(), pyarrow.string())) @@ -372,6 +399,8 @@ def from_feast_to_pyarrow_type(feast_type: FeastType) -> pyarrow.DataType: return pyarrow.list_(FEAST_TYPES_TO_PYARROW_TYPES[base_type]) elif isinstance(feast_type, Set): base_type = feast_type.base_type + if isinstance(base_type, (Array, Set)): + return pyarrow.list_(from_feast_to_pyarrow_type(base_type)) if isinstance(base_type, PrimitiveFeastType): if base_type in FEAST_TYPES_TO_PYARROW_TYPES: return pyarrow.list_(FEAST_TYPES_TO_PYARROW_TYPES[base_type]) @@ -402,6 +431,13 @@ def from_value_type( if value_type == ValueType.STRUCT_LIST: return Array(Struct({"_value": String})) + # Nested collection types use placeholder inner types. + # Real inner type is restored from Field tags during deserialization. + if value_type == ValueType.VALUE_LIST: + return Array(Array(String)) + if value_type == ValueType.VALUE_SET: + return Set(Array(String)) + raise ValueError(f"Could not convert value type {value_type} to FeastType.") @@ -426,6 +462,12 @@ def from_feast_type( if isinstance(feast_type, Array) and isinstance(feast_type.base_type, Struct): return ValueType.STRUCT_LIST + # Handle nested collection types + if isinstance(feast_type, Array) and isinstance(feast_type.base_type, (Array, Set)): + return ValueType.VALUE_LIST + if isinstance(feast_type, Set) and isinstance(feast_type.base_type, (Array, Set)): + return ValueType.VALUE_SET + if feast_type in VALUE_TYPES_TO_FEAST_TYPES.values(): return list(VALUE_TYPES_TO_FEAST_TYPES.keys())[ list(VALUE_TYPES_TO_FEAST_TYPES.values()).index(feast_type) diff --git a/sdk/python/feast/value_type.py b/sdk/python/feast/value_type.py index 0575d25a1f1..508493de6d8 100644 --- a/sdk/python/feast/value_type.py +++ b/sdk/python/feast/value_type.py @@ -77,6 +77,8 @@ class ValueType(enum.Enum): TIME_UUID_LIST = 39 UUID_SET = 40 TIME_UUID_SET = 41 + VALUE_LIST = 42 + VALUE_SET = 43 ListType = Union[ diff --git a/sdk/python/tests/unit/test_proto_json.py b/sdk/python/tests/unit/test_proto_json.py index b5e01744e44..6b3c7181ad7 100644 --- a/sdk/python/tests/unit/test_proto_json.py +++ b/sdk/python/tests/unit/test_proto_json.py @@ -103,6 +103,33 @@ def test_feature_list(proto_json_patch): ) +def test_nested_collection_json_roundtrip(proto_json_patch): + """Nested collection values (list of lists) should survive JSON roundtrip.""" + from feast.protos.feast.types.Value_pb2 import Value + + # Build a Value with list_val containing [[1,2],[3,4,5]] + value_proto = Value() + inner1 = value_proto.list_val.val.add() + inner1.int64_list_val.val.extend([1, 2]) + inner2 = value_proto.list_val.val.add() + inner2.int64_list_val.val.extend([3, 4, 5]) + + # Serialize to JSON + value_json = MessageToDict(value_proto) + assert isinstance(value_json, list) + assert len(value_json) == 2 + assert value_json[0] == [1, 2] + assert value_json[1] == [3, 4, 5] + + # Deserialize back from JSON + feature_vector_str = '{"values": [[[1, 2], [3, 4, 5]]]}' + feature_vector_proto = FeatureVector() + Parse(feature_vector_str, feature_vector_proto) + assert len(feature_vector_proto.values) == 1 + assert feature_vector_proto.values[0].WhichOneof("val") == "list_val" + assert len(feature_vector_proto.values[0].list_val.val) == 2 + + @pytest.fixture(scope="module") def proto_json_patch(): proto_json.patch() diff --git a/sdk/python/tests/unit/test_type_map.py b/sdk/python/tests/unit/test_type_map.py index 6715817d3cb..e135f44ac6e 100644 --- a/sdk/python/tests/unit/test_type_map.py +++ b/sdk/python/tests/unit/test_type_map.py @@ -1553,3 +1553,120 @@ def test_pg_uuid_type_mapping(self): """PostgreSQL uuid type maps to ValueType.UUID.""" assert pg_type_to_feast_value_type("uuid") == ValueType.UUID assert pg_type_to_feast_value_type("uuid[]") == ValueType.UUID_LIST + + +class TestNestedCollectionTypes: + """Tests for nested collection type proto conversion (VALUE_LIST, VALUE_SET).""" + + def test_value_list_proto_roundtrip(self): + """Test python_values_to_proto_values and feast_value_type_to_python_type for VALUE_LIST.""" + values = [[[1, 2, 3], [4, 5]]] + protos = python_values_to_proto_values(values, ValueType.VALUE_LIST) + assert len(protos) == 1 + assert protos[0].WhichOneof("val") == "list_val" + result = feast_value_type_to_python_type(protos[0]) + assert isinstance(result, list) + assert len(result) == 2 + + def test_value_set_proto_roundtrip(self): + """Test VALUE_SET proto conversion.""" + values = [[["a", "b"], ["c"]]] + protos = python_values_to_proto_values(values, ValueType.VALUE_SET) + assert len(protos) == 1 + assert protos[0].WhichOneof("val") == "set_val" + result = feast_value_type_to_python_type(protos[0]) + assert isinstance(result, list) + assert len(result) == 2 + + def test_nested_collection_null_handling(self): + """Test that None values are handled correctly.""" + values = [None] + protos = python_values_to_proto_values(values, ValueType.VALUE_LIST) + assert len(protos) == 1 + assert protos[0].WhichOneof("val") is None + + def test_convert_value_type_str_nested(self): + """Test _convert_value_type_str_to_value_type for nested types.""" + assert ( + _convert_value_type_str_to_value_type("VALUE_LIST") == ValueType.VALUE_LIST + ) + assert _convert_value_type_str_to_value_type("VALUE_SET") == ValueType.VALUE_SET + + def test_nested_collection_empty_inner_list(self): + """Test that empty inner collections are handled gracefully.""" + values = [[[], [1, 2]]] + protos = python_values_to_proto_values(values, ValueType.VALUE_LIST) + result = feast_value_type_to_python_type(protos[0]) + assert isinstance(result, list) + assert len(result) == 2 + # Empty inner list should round-trip as None (empty ProtoValue) + assert result[0] is None + assert result[1] == [1, 2] + + def test_nested_collection_inner_none(self): + """Test that None inner elements are handled.""" + values = [[[1, 2], None, [3]]] + protos = python_values_to_proto_values(values, ValueType.VALUE_LIST) + result = feast_value_type_to_python_type(protos[0]) + assert len(result) == 3 + assert result[0] == [1, 2] + assert result[1] is None + assert result[2] == [3] + + def test_value_list_no_dedup(self): + """Test that VALUE_LIST does NOT deduplicate (Array semantics).""" + values = [[[1, 1, 2], [3, 3]]] + protos = python_values_to_proto_values(values, ValueType.VALUE_LIST) + result = feast_value_type_to_python_type(protos[0]) + assert result[0] == [1, 1, 2] + assert result[1] == [3, 3] + + def test_value_list_proto_roundtrip_values(self): + """Test that VALUE_LIST roundtrip preserves actual inner values.""" + values = [[[1, 2, 3], [4, 5]]] + protos = python_values_to_proto_values(values, ValueType.VALUE_LIST) + result = feast_value_type_to_python_type(protos[0]) + assert result[0] == [1, 2, 3] + assert result[1] == [4, 5] + + def test_value_set_proto_roundtrip_values(self): + """Test that VALUE_SET roundtrip preserves actual inner values.""" + values = [[["a", "b"], ["c"]]] + protos = python_values_to_proto_values(values, ValueType.VALUE_SET) + result = feast_value_type_to_python_type(protos[0]) + assert result[0] == ["a", "b"] + assert result[1] == ["c"] + + def test_multi_value_batch_nested(self): + """Test multiple nested collection values in a single batch.""" + values = [[[1, 2], [3]], [[4], [5, 6]]] + protos = python_values_to_proto_values(values, ValueType.VALUE_LIST) + assert len(protos) == 2 + r0 = feast_value_type_to_python_type(protos[0]) + r1 = feast_value_type_to_python_type(protos[1]) + assert r0 == [[1, 2], [3]] + assert r1 == [[4], [5, 6]] + + def test_feast_value_type_to_pa_nested(self): + """Test feast_value_type_to_pa for nested collection types.""" + for vt in ( + ValueType.VALUE_LIST, + ValueType.VALUE_SET, + ): + pa_type = feast_value_type_to_pa(vt) + assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.string())) + + def test_pa_to_feast_value_type_nested(self): + """Test pa_to_feast_value_type recognizes nested list PyArrow types.""" + assert ( + pa_to_feast_value_type("list>") + == ValueType.VALUE_LIST + ) + assert ( + pa_to_feast_value_type("list>") + == ValueType.VALUE_LIST + ) + assert ( + pa_to_feast_value_type("list>") + == ValueType.VALUE_LIST + ) diff --git a/sdk/python/tests/unit/test_types.py b/sdk/python/tests/unit/test_types.py index ed4b1383879..638a9bc7297 100644 --- a/sdk/python/tests/unit/test_types.py +++ b/sdk/python/tests/unit/test_types.py @@ -1,6 +1,22 @@ +import pyarrow import pytest -from feast.types import Array, Float32, Set, String, TimeUuid, Uuid, from_value_type +from feast.field import Field +from feast.types import ( + Array, + Bool, + Float32, + Float64, + Int32, + Int64, + Set, + String, + TimeUuid, + Uuid, + from_feast_to_pyarrow_type, + from_feast_type, + from_value_type, +) from feast.value_type import ValueType @@ -23,9 +39,6 @@ def test_array_feast_type(): with pytest.raises(ValueError): _ = Array(Array) - with pytest.raises(ValueError): - _ = Array(Array(String)) - def test_set_feast_type(): set_string = Set(String) @@ -39,8 +52,119 @@ def test_set_feast_type(): with pytest.raises(ValueError): _ = Set(Set) - with pytest.raises(ValueError): - _ = Set(Set(String)) + +def test_nested_array_array(): + """Array(Array(T)) should produce VALUE_LIST.""" + t = Array(Array(String)) + assert t.to_value_type() == ValueType.VALUE_LIST + assert from_feast_type(t) == ValueType.VALUE_LIST + + t2 = Array(Array(Int32)) + assert t2.to_value_type() == ValueType.VALUE_LIST + + +def test_nested_array_set(): + """Array(Set(T)) should produce VALUE_LIST.""" + t = Array(Set(String)) + assert t.to_value_type() == ValueType.VALUE_LIST + assert from_feast_type(t) == ValueType.VALUE_LIST + + +def test_nested_set_array(): + """Set(Array(T)) should produce VALUE_SET.""" + t = Set(Array(String)) + assert t.to_value_type() == ValueType.VALUE_SET + assert from_feast_type(t) == ValueType.VALUE_SET + + +def test_nested_set_set(): + """Set(Set(T)) should produce VALUE_SET.""" + t = Set(Set(String)) + assert t.to_value_type() == ValueType.VALUE_SET + assert from_feast_type(t) == ValueType.VALUE_SET + + +def test_nested_unbounded_depth(): + """Nesting depth should be unbounded.""" + # 3-level + t3 = Array(Array(Array(String))) + assert t3.to_value_type() == ValueType.VALUE_LIST + + t3_mixed = Array(Set(Array(String))) + assert t3_mixed.to_value_type() == ValueType.VALUE_LIST + + t3_set = Set(Array(Array(String))) + assert t3_set.to_value_type() == ValueType.VALUE_SET + + t3_set2 = Set(Set(Set(String))) + assert t3_set2.to_value_type() == ValueType.VALUE_SET + + # 4-level + t4 = Array(Array(Array(Array(Int32)))) + assert t4.to_value_type() == ValueType.VALUE_LIST + + +def test_nested_from_value_type_roundtrip(): + """from_value_type should return a placeholder for nested types.""" + for vt in ( + ValueType.VALUE_LIST, + ValueType.VALUE_SET, + ): + ft = from_value_type(vt) + assert ft.to_value_type() == vt + + +def test_nested_pyarrow_conversion(): + """Nested collection types should convert to pyarrow list(list(...)).""" + # Array(Array(String)) -> list(list(string)) + pa_type = from_feast_to_pyarrow_type(Array(Array(String))) + assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.string())) + + # Array(Set(Int64)) -> list(list(int64)) + pa_type = from_feast_to_pyarrow_type(Array(Set(Int64))) + assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.int64())) + + # Set(Array(Float64)) -> list(list(float64)) + pa_type = from_feast_to_pyarrow_type(Set(Array(Float64))) + assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.float64())) + + # Set(Set(Bool)) -> list(list(bool)) + pa_type = from_feast_to_pyarrow_type(Set(Set(Bool))) + assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.bool_())) + + # 3-level: Array(Array(Array(Int32))) -> list(list(list(int32))) + pa_type = from_feast_to_pyarrow_type(Array(Array(Array(Int32)))) + assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.list_(pyarrow.int32()))) + + +def test_nested_field_roundtrip(): + """Field with nested collection type should survive to_proto -> from_proto.""" + test_cases = [ + ("aa", Array(Array(String))), + ("as_field", Array(Set(Int32))), + ("sa", Set(Array(Float64))), + ("ss", Set(Set(Bool))), + # 3-level nesting + ("aaa", Array(Array(Array(Int32)))), + ("asa", Array(Set(Array(String)))), + # 4-level nesting + ("aaaa", Array(Array(Array(Array(Float64))))), + ] + for name, dtype in test_cases: + field = Field(name=name, dtype=dtype, tags={"user_tag": "value"}) + proto = field.to_proto() + restored = Field.from_proto(proto) + assert restored.name == name, f"Name mismatch for {dtype}" + assert restored.dtype.to_value_type() == dtype.to_value_type(), ( + f"dtype mismatch for {name}: {restored.dtype} vs {dtype}" + ) + # Verify inner type is preserved (not just ValueType equality) + assert str(restored.dtype) == str(dtype), ( + f"Inner type lost for {name}: got {restored.dtype}, expected {dtype}" + ) + assert restored.tags == {"user_tag": "value"}, ( + f"Tags should not contain internal tags for {name}" + ) def test_uuid_feast_type(): @@ -70,6 +194,44 @@ def test_uuid_set_feast_type(): assert from_value_type(set_time_uuid.to_value_type()) == set_time_uuid +def test_feast_type_str_roundtrip(): + """_feast_type_to_str and _str_to_feast_type should roundtrip for nested types.""" + from feast.field import _feast_type_to_str, _str_to_feast_type + + test_cases = [ + Array(Array(String)), + Array(Array(Int32)), + Array(Array(Float64)), + Array(Set(Int64)), + Array(Set(Bool)), + Set(Array(String)), + Set(Array(Float32)), + Set(Set(Int32)), + Set(Set(Float64)), + # 3+ level nesting + Array(Array(Array(String))), + Array(Set(Array(Int32))), + Set(Set(Set(Float64))), + ] + for dtype in test_cases: + s = _feast_type_to_str(dtype) + restored = _str_to_feast_type(s) + assert str(restored) == str(dtype), ( + f"Roundtrip failed: {dtype} -> '{s}' -> {restored}" + ) + + +def test_str_to_feast_type_invalid(): + """_str_to_feast_type should raise ValueError on unrecognized type names.""" + from feast.field import _str_to_feast_type + + with pytest.raises(ValueError, match="Unknown FeastType"): + _str_to_feast_type("INVALID_TYPE") + + with pytest.raises(ValueError, match="Unknown FeastType"): + _str_to_feast_type("Strig") + + def test_all_value_types(): for value in ValueType: # We do not support the NULL type.