Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
fix linter 2
Signed-off-by: yassinnouh21 <yassinnouh21@gmail.com>
  • Loading branch information
YassinNouh21 committed Mar 31, 2025
commit 79548cc7cd76675fd50d1f11d71a3fb913dda01e
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,6 @@ def retrieve_online_documents_v2(
]:
"""
Retrieve documents using vector similarity search or keyword search in Milvus.

Args:
config: Feast configuration object
table: FeatureView object as the table to search
Expand All @@ -504,7 +503,6 @@ def retrieve_online_documents_v2(
top_k: Number of items to return
distance_metric: Distance metric to use (optional)
query_string: The query string to search for using keyword search (optional)

Returns:
List of tuples containing the event timestamp, entity key, and feature values
"""
Expand Down Expand Up @@ -539,22 +537,30 @@ def retrieve_online_documents_v2(
if embedding is not None:
for field in collection["fields"]:
if (
field["type"] in [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR]
and field["name"] in output_fields
field["type"] in [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR]
and field["name"] in output_fields
):
ann_search_field = field["name"]
break

self.client.load_collection(collection_name)

if embedding is not None and query_string is not None and config.online_store.vector_enabled:
if (
embedding is not None
and query_string is not None
and config.online_store.vector_enabled
):
string_field_list = [
f.name for f in table.features if
isinstance(f.dtype, PrimitiveFeastType) and f.dtype.to_value_type() == ValueType.STRING
f.name
for f in table.features
if isinstance(f.dtype, PrimitiveFeastType)
and f.dtype.to_value_type() == ValueType.STRING
]

if not string_field_list:
raise ValueError("No string fields found in the feature view for text search in hybrid mode")
raise ValueError(
"No string fields found in the feature view for text search in hybrid mode"
)

# Create a filter expression for text search
filter_expressions = []
Expand Down Expand Up @@ -600,12 +606,16 @@ def retrieve_online_documents_v2(

elif query_string is not None:
string_field_list = [
f.name for f in table.features if
isinstance(f.dtype, PrimitiveFeastType) and f.dtype.to_value_type() == ValueType.STRING
f.name
for f in table.features
if isinstance(f.dtype, PrimitiveFeastType)
and f.dtype.to_value_type() == ValueType.STRING
]

if not string_field_list:
raise ValueError("No string fields found in the feature view for text search")
raise ValueError(
"No string fields found in the feature view for text search"
)

filter_expressions = []
for field in string_field_list:
Expand All @@ -615,7 +625,9 @@ def retrieve_online_documents_v2(
filter_expr = " OR ".join(filter_expressions)

if not filter_expr:
raise ValueError("No text fields found in requested features for search")
raise ValueError(
"No text fields found in requested features for search"
)

query_results = self.client.query(
collection_name=collection_name,
Expand All @@ -624,7 +636,9 @@ def retrieve_online_documents_v2(
limit=top_k,
)

results = [[{"entity": entity, "distance": -1.0}] for entity in query_results]
results = [
[{"entity": entity, "distance": -1.0}] for entity in query_results
]
else:
raise ValueError(
"Either vector_enabled must be True for embedding search or query_string must be provided for keyword search"
Expand Down
36 changes: 19 additions & 17 deletions sdk/python/tests/unit/online_store/test_online_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1502,16 +1502,16 @@ def test_milvus_keyword_search() -> None:
teardown=False,
) as store:
from datetime import timedelta

from feast import Entity, FeatureView, Field, FileSource
from feast.types import Array, Float32, Int64, String, UnixTimestamp

rag_documents_source = FileSource(
path="data/embedded_documents.parquet",
timestamp_field="event_timestamp",
created_timestamp_column="created_timestamp",
)

item = Entity(
name="item_id",
join_keys=["item_id"],
Expand All @@ -1522,7 +1522,7 @@ def test_milvus_keyword_search() -> None:
join_keys=["author_id"],
value_type=ValueType.STRING,
)

document_embeddings = FeatureView(
name="text_documents",
entities=[item, author],
Expand All @@ -1543,13 +1543,13 @@ def test_milvus_keyword_search() -> None:
source=rag_documents_source,
ttl=timedelta(hours=24),
)

store.apply([rag_documents_source, item, document_embeddings])

# Write some data with specific text content for keyword search
document_embeddings_fv = store.get_feature_view(name="text_documents")
provider = store._get_provider()

contents = [
"Feast is an open source feature store for machine learning",
"Feature stores solve the problem of coordinating features for training and serving",
Expand All @@ -1562,7 +1562,7 @@ def test_milvus_keyword_search() -> None:
"Offline stores are used for batch feature retrieval during training",
"Feast enables data scientists to define, manage, and share features",
]

titles = [
"Introduction to Feast",
"Feature Store Benefits",
Expand All @@ -1575,7 +1575,7 @@ def test_milvus_keyword_search() -> None:
"Offline Training Support",
"Feast for Data Scientists",
]

item_keys = [
EntityKeyProto(
join_keys=["item_id", "author_id"],
Expand Down Expand Up @@ -1604,14 +1604,14 @@ def test_milvus_keyword_search() -> None:
_utc_now(),
)
)

provider.online_write_batch(
config=store.config,
table=document_embeddings_fv,
data=data,
progress=None,
)

# Test keyword search for "Milvus"
result_milvus = store.retrieve_online_documents_v2(
features=[
Expand All @@ -1621,11 +1621,11 @@ def test_milvus_keyword_search() -> None:
query_string="Milvus",
top_k=3,
).to_dict()

# Verify that documents containing "Milvus" are returned
assert len(result_milvus["content"]) > 0
assert any("Milvus" in content for content in result_milvus["content"])

# Test keyword search for "machine learning"
result_ml = store.retrieve_online_documents_v2(
features=[
Expand All @@ -1635,11 +1635,13 @@ def test_milvus_keyword_search() -> None:
query_string="machine learning",
top_k=3,
).to_dict()

# Verify that documents containing "machine learning" are returned
assert len(result_ml["content"]) > 0
assert any("machine learning" in content.lower() for content in result_ml["content"])

assert any(
"machine learning" in content.lower() for content in result_ml["content"]
)

# Test hybrid search (vector + keyword)
query_embedding = np.random.random(vector_length).tolist()
result_hybrid = store.retrieve_online_documents_v2(
Expand All @@ -1652,7 +1654,7 @@ def test_milvus_keyword_search() -> None:
query_string="Feast",
top_k=3,
).to_dict()

# Verify hybrid search results
assert len(result_hybrid["content"]) > 0
assert any("Feast" in content for content in result_hybrid["content"])
Expand Down
Loading