From 59a1f7749afa46f3e4228a7c8a2debe2e86847ae Mon Sep 17 00:00:00 2001 From: jyejare Date: Thu, 20 Mar 2025 21:05:38 +0530 Subject: [PATCH 1/6] Fixed Retrive online documents for serialization ver 3 Signed-off-by: jyejare --- sdk/python/feast/feature_store.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 82e30c5df80..164f8df09c1 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1895,12 +1895,13 @@ def retrieve_online_documents( for feature_view in available_feature_views: if feature_view.name == requested_feature_view_name: requested_feature_view = feature_view + break if not requested_feature_view: raise ValueError( f"Feature view {requested_feature_view} not found in the registry." ) - - requested_feature_view = available_feature_views[0] + # Unnecessary code here a its overriding the requested_feature_view set from the above for loop + # requested_feature_view = available_feature_views[0] provider = self._get_provider() document_features = self._retrieve_from_online_store( @@ -1933,7 +1934,7 @@ def retrieve_online_documents( online_features_response=online_features_response, data={ **join_key_values, - requested_feature: document_feature_vals, + "embedding": document_feature_vals, # Replace the hardcoded "embedding" with the actual feature name "distance": document_feature_distance_vals, }, ) From 9c07b0a8a7b80bde81412dc7ed47b281a5ce6e2a Mon Sep 17 00:00:00 2001 From: jyejare Date: Fri, 21 Mar 2025 21:11:33 +0530 Subject: [PATCH 2/6] Dynamic vector field name Signed-off-by: jyejare --- sdk/python/feast/feature_store.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 164f8df09c1..322c215eb5b 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1930,13 +1930,25 @@ def retrieve_online_documents( document_feature_distance_vals = [feature[5] for feature in document_features] online_features_response = GetOnlineFeaturesResponse(results=[]) requested_feature = requested_feature or requested_features[0] + if vector_field_metadata := _get_feature_view_vector_field_metadata( + requested_feature_view + ): + vector_field_name = vector_field_metadata.name + data = { + **join_key_values, + vector_field_name: document_feature_vals, + "distance": document_feature_distance_vals, + } + _requested_features = [_feature.split(":")[-1] for _feature in feature_list] + requested_features_data = { + _feature: data[_feature] + for _feature in _requested_features + if _feature in data + } + # TODO currently the requested 'features' list is not functioning as expected utils._populate_result_rows_from_columnar( online_features_response=online_features_response, - data={ - **join_key_values, - "embedding": document_feature_vals, # Replace the hardcoded "embedding" with the actual feature name - "distance": document_feature_distance_vals, - }, + data=requested_features_data, ) return OnlineResponse(online_features_response) From b1a66628638fcd2ae4debef91a52522aa618ed6c Mon Sep 17 00:00:00 2001 From: jyejare Date: Sun, 23 Mar 2025 00:43:33 +0530 Subject: [PATCH 3/6] Requested only features with values Signed-off-by: jyejare --- sdk/python/feast/feature_store.py | 25 ++++++------------- .../postgres_online_store/postgres.py | 8 +++++- .../online_store/test_online_retrieval.py | 4 ++- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 322c215eb5b..6f314ad96b4 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1869,39 +1869,31 @@ def retrieve_online_documents( allow_cache=True, hide_dummy_entity=False, ) - if features: + if feature_list: feature_view_set = set() - for feature in features: - feature_view_name = feature.split(":")[0] + for _feature in feature_list: + feature_view_name = _feature.split(":")[0] feature_view = self.get_feature_view(feature_view_name) feature_view_set.add(feature_view.name) if len(feature_view_set) > 1: raise ValueError( "Document retrieval only supports a single feature view." ) - requested_feature = None - requested_features = [ - f.split(":")[1] for f in features if isinstance(f, str) and ":" in f - ] - else: requested_feature = ( feature.split(":")[1] if isinstance(feature, str) else feature ) - requested_features = [requested_feature] if requested_feature else [] - - requested_feature_view_name = ( - feature.split(":")[0] if feature else list(feature_view_set)[0] - ) + requested_features = [ + f.split(":")[1] for f in feature_list if isinstance(f, str) and ":" in f + ] + requested_feature_view_name = list(feature_view_set)[0] for feature_view in available_feature_views: if feature_view.name == requested_feature_view_name: requested_feature_view = feature_view break - if not requested_feature_view: + else: raise ValueError( f"Feature view {requested_feature_view} not found in the registry." ) - # Unnecessary code here a its overriding the requested_feature_view set from the above for loop - # requested_feature_view = available_feature_views[0] provider = self._get_provider() document_features = self._retrieve_from_online_store( @@ -1945,7 +1937,6 @@ def retrieve_online_documents( for _feature in _requested_features if _feature in data } - # TODO currently the requested 'features' list is not functioning as expected utils._populate_result_rows_from_columnar( online_features_response=online_features_response, data=requested_features_data, diff --git a/sdk/python/feast/infra/online_stores/postgres_online_store/postgres.py b/sdk/python/feast/infra/online_stores/postgres_online_store/postgres.py index 4f519003d61..61baed31f98 100644 --- a/sdk/python/feast/infra/online_stores/postgres_online_store/postgres.py +++ b/sdk/python/feast/infra/online_stores/postgres_online_store/postgres.py @@ -394,6 +394,11 @@ def retrieve_online_documents( f"Distance metric {distance_metric} is not supported. Supported distance metrics are {SUPPORTED_DISTANCE_METRICS_DICT.keys()}" ) + if requested_features: + required_feature_names = ", ".join( + [feature for feature in requested_features] + ) + distance_metric_sql = SUPPORTED_DISTANCE_METRICS_DICT[distance_metric] result: List[ @@ -415,7 +420,7 @@ def retrieve_online_documents( """ SELECT entity_key, - feature_name, + {feature_names}, value, vector_value, vector_value {distance_metric_sql} %s::vector as distance, @@ -427,6 +432,7 @@ def retrieve_online_documents( ).format( distance_metric_sql=sql.SQL(distance_metric_sql), table_name=sql.Identifier(table_name), + feature_names=required_feature_names, feature_name=sql.Literal(requested_feature), top_k=sql.Literal(top_k), ), diff --git a/sdk/python/tests/unit/online_store/test_online_retrieval.py b/sdk/python/tests/unit/online_store/test_online_retrieval.py index 6aa88d3f258..e7fca47bb55 100644 --- a/sdk/python/tests/unit/online_store/test_online_retrieval.py +++ b/sdk/python/tests/unit/online_store/test_online_retrieval.py @@ -718,7 +718,9 @@ def test_sqlite_get_online_documents() -> None: vector_length, ) result = store.retrieve_online_documents( - feature="document_embeddings:Embeddings", query=query_embedding, top_k=3 + query=query_embedding, + top_k=3, + features=["document_embeddings:Embeddings", "document_embeddings:distance"], ).to_dict() assert "Embeddings" in result From 912e30b03e622770226bdaae2e60c30fc5fb489a Mon Sep 17 00:00:00 2001 From: jyejare Date: Wed, 26 Mar 2025 23:26:30 +0530 Subject: [PATCH 4/6] Removing the singular feature parameter for doc retrieval Signed-off-by: jyejare --- sdk/python/feast/feature_store.py | 56 +++++++------------ .../elasticsearch.py | 3 +- .../infra/online_stores/faiss_online_store.py | 3 +- .../feast/infra/online_stores/online_store.py | 10 +--- .../postgres_online_store/postgres.py | 4 -- .../qdrant_online_store/qdrant.py | 3 +- .../feast/infra/online_stores/sqlite.py | 5 +- .../feast/infra/passthrough_provider.py | 2 - sdk/python/feast/infra/provider.py | 2 - 9 files changed, 28 insertions(+), 60 deletions(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 6f314ad96b4..2450942d792 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1831,19 +1831,15 @@ async def get_online_features_async( def retrieve_online_documents( self, - feature: Optional[str], query: Union[str, List[float]], top_k: int, - features: Optional[List[str]] = None, + features: List[str], distance_metric: Optional[str] = "L2", ) -> OnlineResponse: """ Retrieves the top k closest document features. Note, embeddings are a subset of features. Args: - feature: The list of document features that should be retrieved from the online document store. These features can be - specified either as a list of string document feature references or as a feature service. String feature - references must have format "feature_view:feature", e.g, "document_fv:document_embeddings". features: The list of features that should be retrieved from the online store. query: The query to retrieve the closest document features for. top_k: The number of closest document features to retrieve. @@ -1853,11 +1849,6 @@ def retrieve_online_documents( raise ValueError( "Using embedding functionality is not supported for document retrieval. Please embed the query before calling retrieve_online_documents." ) - feature_list: List[str] = ( - features - if features is not None - else ([feature] if feature is not None else []) - ) ( available_feature_views, @@ -1865,26 +1856,20 @@ def retrieve_online_documents( ) = utils._get_feature_views_to_use( registry=self._registry, project=self.project, - features=feature_list, + features=features, allow_cache=True, hide_dummy_entity=False, ) - if feature_list: - feature_view_set = set() - for _feature in feature_list: - feature_view_name = _feature.split(":")[0] - feature_view = self.get_feature_view(feature_view_name) - feature_view_set.add(feature_view.name) - if len(feature_view_set) > 1: - raise ValueError( - "Document retrieval only supports a single feature view." - ) - requested_feature = ( - feature.split(":")[1] if isinstance(feature, str) else feature - ) - requested_features = [ - f.split(":")[1] for f in feature_list if isinstance(f, str) and ":" in f - ] + feature_view_set = set() + for _feature in features: + feature_view_name = _feature.split(":")[0] + feature_view = self.get_feature_view(feature_view_name) + feature_view_set.add(feature_view.name) + if len(feature_view_set) > 1: + raise ValueError("Document retrieval only supports a single feature view.") + requested_features = [ + f.split(":")[1] for f in features if isinstance(f, str) and ":" in f + ] requested_feature_view_name = list(feature_view_set)[0] for feature_view in available_feature_views: if feature_view.name == requested_feature_view_name: @@ -1899,15 +1884,20 @@ def retrieve_online_documents( document_features = self._retrieve_from_online_store( provider, requested_feature_view, - requested_feature, requested_features, query, top_k, distance_metric, ) + # TODO currently not return the vector value since it is same as feature value, if embedding is supported, # the feature value can be raw text before embedded - entity_key_vals = [feature[1] for feature in document_features] + def _doc_feature(x): + return [feature[x] for feature in document_features] + + entity_key_vals, document_feature_vals, document_feature_distance_vals = map( + _doc_feature, (1, 4, 5) + ) join_key_values: Dict[str, List[ValueProto]] = {} for entity_key_val in entity_key_vals: if entity_key_val is not None: @@ -1917,11 +1907,7 @@ def retrieve_online_documents( if join_key not in join_key_values: join_key_values[join_key] = [] join_key_values[join_key].append(entity_value) - - document_feature_vals = [feature[4] for feature in document_features] - document_feature_distance_vals = [feature[5] for feature in document_features] online_features_response = GetOnlineFeaturesResponse(results=[]) - requested_feature = requested_feature or requested_features[0] if vector_field_metadata := _get_feature_view_vector_field_metadata( requested_feature_view ): @@ -1931,7 +1917,7 @@ def retrieve_online_documents( vector_field_name: document_feature_vals, "distance": document_feature_distance_vals, } - _requested_features = [_feature.split(":")[-1] for _feature in feature_list] + _requested_features = [_feature.split(":")[-1] for _feature in features] requested_features_data = { _feature: data[_feature] for _feature in _requested_features @@ -2016,7 +2002,6 @@ def _retrieve_from_online_store( self, provider: Provider, table: FeatureView, - requested_feature: Optional[str], requested_features: Optional[List[str]], query: List[float], top_k: int, @@ -2036,7 +2021,6 @@ def _retrieve_from_online_store( documents = provider.retrieve_online_documents( config=self.config, table=table, - requested_feature=requested_feature, requested_features=requested_features, query=query, top_k=top_k, diff --git a/sdk/python/feast/infra/online_stores/elasticsearch_online_store/elasticsearch.py b/sdk/python/feast/infra/online_stores/elasticsearch_online_store/elasticsearch.py index af328141520..d025486a976 100644 --- a/sdk/python/feast/infra/online_stores/elasticsearch_online_store/elasticsearch.py +++ b/sdk/python/feast/infra/online_stores/elasticsearch_online_store/elasticsearch.py @@ -213,8 +213,7 @@ def retrieve_online_documents( self, config: RepoConfig, table: FeatureView, - requested_feature: Optional[str], - requested_features: Optional[List[str]], + requested_features: List[str], embedding: List[float], top_k: int, *args, diff --git a/sdk/python/feast/infra/online_stores/faiss_online_store.py b/sdk/python/feast/infra/online_stores/faiss_online_store.py index fd4d6768abd..4b666f60f40 100644 --- a/sdk/python/feast/infra/online_stores/faiss_online_store.py +++ b/sdk/python/feast/infra/online_stores/faiss_online_store.py @@ -176,8 +176,7 @@ def retrieve_online_documents( self, config: RepoConfig, table: FeatureView, - requested_feature: Optional[str], - requested_featres: Optional[List[str]], + requested_featres: List[str], embedding: List[float], top_k: int, distance_metric: Optional[str] = None, diff --git a/sdk/python/feast/infra/online_stores/online_store.py b/sdk/python/feast/infra/online_stores/online_store.py index 5111bcd47bd..b77185229d5 100644 --- a/sdk/python/feast/infra/online_stores/online_store.py +++ b/sdk/python/feast/infra/online_stores/online_store.py @@ -392,8 +392,7 @@ def retrieve_online_documents( self, config: RepoConfig, table: FeatureView, - requested_feature: Optional[str], - requested_features: Optional[List[str]], + requested_features: List[str], embedding: List[float], top_k: int, distance_metric: Optional[str] = None, @@ -413,7 +412,6 @@ def retrieve_online_documents( distance_metric: distance metric to use for retrieval. config: The config for the current feature store. table: The feature view whose feature values should be read. - requested_feature: The name of the feature whose embeddings should be used for retrieval. requested_features: The list of features whose embeddings should be used for retrieval. embedding: The embeddings to use for retrieval. top_k: The number of documents to retrieve. @@ -423,10 +421,8 @@ def retrieve_online_documents( where the first item is the event timestamp for the row, and the second item is a dict of feature name to embeddings. """ - if not requested_feature and not requested_features: - raise ValueError( - "Either requested_feature or requested_features must be specified" - ) + if not requested_features: + raise ValueError("Requested_features must be specified") raise NotImplementedError( f"Online store {self.__class__.__name__} does not support online retrieval" ) diff --git a/sdk/python/feast/infra/online_stores/postgres_online_store/postgres.py b/sdk/python/feast/infra/online_stores/postgres_online_store/postgres.py index 61baed31f98..b5c1dd05f3a 100644 --- a/sdk/python/feast/infra/online_stores/postgres_online_store/postgres.py +++ b/sdk/python/feast/infra/online_stores/postgres_online_store/postgres.py @@ -354,7 +354,6 @@ def retrieve_online_documents( self, config: RepoConfig, table: FeatureView, - requested_feature: Optional[str], requested_features: Optional[List[str]], embedding: List[float], top_k: int, @@ -373,7 +372,6 @@ def retrieve_online_documents( Args: config: Feast configuration object table: FeatureView object as the table to search - requested_feature: The requested feature as the column to search requested_features: The list of features whose embeddings should be used for retrieval. embedding: The query embedding to search for top_k: The number of items to return @@ -425,7 +423,6 @@ def retrieve_online_documents( vector_value, vector_value {distance_metric_sql} %s::vector as distance, event_ts FROM {table_name} - WHERE feature_name = {feature_name} ORDER BY distance LIMIT {top_k}; """ @@ -433,7 +430,6 @@ def retrieve_online_documents( distance_metric_sql=sql.SQL(distance_metric_sql), table_name=sql.Identifier(table_name), feature_names=required_feature_names, - feature_name=sql.Literal(requested_feature), top_k=sql.Literal(top_k), ), (embedding,), diff --git a/sdk/python/feast/infra/online_stores/qdrant_online_store/qdrant.py b/sdk/python/feast/infra/online_stores/qdrant_online_store/qdrant.py index 81652c3e2a9..2a2fafdab1c 100644 --- a/sdk/python/feast/infra/online_stores/qdrant_online_store/qdrant.py +++ b/sdk/python/feast/infra/online_stores/qdrant_online_store/qdrant.py @@ -248,8 +248,7 @@ def retrieve_online_documents( self, config: RepoConfig, table: FeatureView, - requested_feature: Optional[str], - requested_features: Optional[List[str]], + requested_features: List[str], embedding: List[float], top_k: int, distance_metric: Optional[str] = "cosine", diff --git a/sdk/python/feast/infra/online_stores/sqlite.py b/sdk/python/feast/infra/online_stores/sqlite.py index 15ef81188b0..4785d1f6ba1 100644 --- a/sdk/python/feast/infra/online_stores/sqlite.py +++ b/sdk/python/feast/infra/online_stores/sqlite.py @@ -322,8 +322,7 @@ def retrieve_online_documents( self, config: RepoConfig, table: FeatureView, - requested_feature: Optional[str], - requested_featuers: Optional[List[str]], + requested_featuers: List[str], embedding: List[float], top_k: int, distance_metric: Optional[str] = None, @@ -341,7 +340,7 @@ def retrieve_online_documents( Args: config: Feast configuration object table: FeatureView object as the table to search - requested_feature: The requested feature as the column to search + requested_features: The list of requested features to retrieve embedding: The query embedding to search for top_k: The number of items to return Returns: diff --git a/sdk/python/feast/infra/passthrough_provider.py b/sdk/python/feast/infra/passthrough_provider.py index 4e504997d2a..f5df0f2eb1a 100644 --- a/sdk/python/feast/infra/passthrough_provider.py +++ b/sdk/python/feast/infra/passthrough_provider.py @@ -294,7 +294,6 @@ def retrieve_online_documents( self, config: RepoConfig, table: FeatureView, - requested_feature: Optional[str], requested_features: Optional[List[str]], query: List[float], top_k: int, @@ -305,7 +304,6 @@ def retrieve_online_documents( result = self.online_store.retrieve_online_documents( config, table, - requested_feature, requested_features, query, top_k, diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py index 18fbd051771..15917420af0 100644 --- a/sdk/python/feast/infra/provider.py +++ b/sdk/python/feast/infra/provider.py @@ -419,7 +419,6 @@ def retrieve_online_documents( self, config: RepoConfig, table: FeatureView, - requested_feature: Optional[str], requested_features: Optional[List[str]], query: List[float], top_k: int, @@ -440,7 +439,6 @@ def retrieve_online_documents( distance_metric: distance metric to use for the search. config: The config for the current feature store. table: The feature view whose embeddings should be searched. - requested_feature: the requested document feature name. requested_features: the requested document feature names. query: The query embedding to search for. top_k: The number of documents to return. From e298d1d1edc184bfe26e71481025559d766636aa Mon Sep 17 00:00:00 2001 From: jyejare Date: Thu, 27 Mar 2025 00:06:13 +0530 Subject: [PATCH 5/6] Documentation and other DBs test fixes Signed-off-by: jyejare --- docs/reference/online-stores/elasticsearch.md | 2 +- docs/reference/online-stores/qdrant.md | 2 +- .../tests/integration/online_store/test_universal_online.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/reference/online-stores/elasticsearch.md b/docs/reference/online-stores/elasticsearch.md index 81d267a1c65..509913f8113 100644 --- a/docs/reference/online-stores/elasticsearch.md +++ b/docs/reference/online-stores/elasticsearch.md @@ -68,7 +68,7 @@ top_k = 5 # Retrieve the top k closest features to the query vector feature_values = feature_store.retrieve_online_documents( - feature="my_feature", + features=["my_feature"], query=query_vector, top_k=top_k ) diff --git a/docs/reference/online-stores/qdrant.md b/docs/reference/online-stores/qdrant.md index d3f1eebf319..18ddbb7fc0d 100644 --- a/docs/reference/online-stores/qdrant.md +++ b/docs/reference/online-stores/qdrant.md @@ -70,7 +70,7 @@ top_k = 5 # the vector to use can be specified in the repo config. # Reference: https://qdrant.tech/documentation/concepts/vectors/#named-vectors feature_values = feature_store.retrieve_online_documents( - feature="my_feature", + features=["my_feature"], query=query_vector, top_k=top_k ) diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py index 4a820e833d0..3445cd27aa3 100644 --- a/sdk/python/tests/integration/online_store/test_universal_online.py +++ b/sdk/python/tests/integration/online_store/test_universal_online.py @@ -870,7 +870,7 @@ def test_retrieve_online_documents(environment, fake_document_data): fs.write_to_online_store("item_embeddings", df) documents = fs.retrieve_online_documents( - feature="item_embeddings:embedding_float", + features=["item_embeddings:embedding_float"], query=[1.0, 2.0], top_k=2, distance_metric="L2", @@ -881,7 +881,7 @@ def test_retrieve_online_documents(environment, fake_document_data): assert len(documents["item_id"]) == 2 documents = fs.retrieve_online_documents( - feature="item_embeddings:embedding_float", + features=["item_embeddings:embedding_float"], query=[1.0, 2.0], top_k=2, distance_metric="L1", @@ -890,7 +890,7 @@ def test_retrieve_online_documents(environment, fake_document_data): with pytest.raises(ValueError): fs.retrieve_online_documents( - feature="item_embeddings:embedding_float", + features=["item_embeddings:embedding_float"], query=[1.0, 2.0], top_k=2, distance_metric="wrong", From 436c333bec79b2d503b0974fad5f221dbbdcdd2c Mon Sep 17 00:00:00 2001 From: jyejare Date: Thu, 27 Mar 2025 15:08:40 +0530 Subject: [PATCH 6/6] Review Fixes Signed-off-by: jyejare --- sdk/python/feast/infra/online_stores/sqlite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/feast/infra/online_stores/sqlite.py b/sdk/python/feast/infra/online_stores/sqlite.py index 4785d1f6ba1..12d94ff25fb 100644 --- a/sdk/python/feast/infra/online_stores/sqlite.py +++ b/sdk/python/feast/infra/online_stores/sqlite.py @@ -322,7 +322,7 @@ def retrieve_online_documents( self, config: RepoConfig, table: FeatureView, - requested_featuers: List[str], + requested_features: List[str], embedding: List[float], top_k: int, distance_metric: Optional[str] = None,