Skip to content

Commit 56c5910

Browse files
authored
feat: Added support for image search (#5577)
1 parent 697b226 commit 56c5910

31 files changed

+7916
-7110
lines changed

pyproject.toml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,12 @@ ibis = [
9090
ikv = [
9191
"ikvpy>=0.0.36",
9292
]
93+
image = [
94+
"feast[pytorch]",
95+
"timm>=0.6.0",
96+
"Pillow>=8.0.0",
97+
"scikit-learn>=1.0.0",
98+
]
9399
k8s = ["kubernetes<=20.13.0"]
94100
milvus = [
95101
"pymilvus==2.4.9",
@@ -168,9 +174,9 @@ ci = [
168174
"types-setuptools",
169175
"types-tabulate",
170176
"virtualenv<20.24.2",
171-
"feast[aws, azure, cassandra, clickhouse, couchbase, delta, docling, duckdb, elasticsearch, faiss, gcp, ge, go, grpcio, hazelcast, hbase, ibis, ikv, k8s, mcp, milvus, mssql, mysql, opentelemetry, spark, trino, postgres, pytorch, qdrant, rag, ray, redis, singlestore, snowflake, sqlite_vec]"
177+
"feast[aws, azure, cassandra, clickhouse, couchbase, delta, docling, duckdb, elasticsearch, faiss, gcp, ge, go, grpcio, hazelcast, hbase, ibis, ikv, image, k8s, mcp, milvus, mssql, mysql, opentelemetry, spark, trino, postgres, pytorch, qdrant, rag, ray, redis, singlestore, snowflake, sqlite_vec]"
172178
]
173-
nlp = ["feast[docling, milvus, pytorch, rag]"]
179+
nlp = ["feast[docling, image, milvus, pytorch, rag]"]
174180
dev = ["feast[ci]"]
175181
docs = ["feast[ci]"]
176182
# used for the 'feature-server' container image build

sdk/python/feast/feature_store.py

Lines changed: 103 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2239,6 +2239,12 @@ def retrieve_online_documents_v2(
22392239
query: Optional[List[float]] = None,
22402240
query_string: Optional[str] = None,
22412241
distance_metric: Optional[str] = "L2",
2242+
query_image_bytes: Optional[bytes] = None,
2243+
query_image_model: Optional[str] = "resnet34",
2244+
combine_with_text: bool = False,
2245+
text_weight: float = 0.5,
2246+
image_weight: float = 0.5,
2247+
combine_strategy: str = "weighted_sum",
22422248
) -> OnlineResponse:
22432249
"""
22442250
Retrieves the top k closest document features. Note, embeddings are a subset of features.
@@ -2247,13 +2253,105 @@ def retrieve_online_documents_v2(
22472253
features: The list of features that should be retrieved from the online document store. These features can be
22482254
specified either as a list of string document feature references or as a feature service. String feature
22492255
references must have format "feature_view:feature", e.g, "document_fv:document_embeddings".
2250-
query: The embeded query to retrieve the closest document features for (optional)
22512256
top_k: The number of closest document features to retrieve.
2257+
query_string: Text query for hybrid search (alternative to query parameter)
22522258
distance_metric: The distance metric to use for retrieval.
2253-
query_string: The query string to retrieve the closest document features using keyword search (bm25).
2259+
query_image_bytes: Query image as bytes (for image similarity search)
2260+
query_image_model: Model name for image embedding generation
2261+
combine_with_text: Whether to combine text and image embeddings for multi-modal search
2262+
text_weight: Weight for text embedding in combined search (0.0 to 1.0)
2263+
image_weight: Weight for image embedding in combined search (0.0 to 1.0)
2264+
combine_strategy: Strategy for combining embeddings ("weighted_sum", "concatenate", "average")
2265+
2266+
Returns:
2267+
OnlineResponse with similar documents and metadata
2268+
2269+
Examples:
2270+
Text search only::
2271+
2272+
results = store.retrieve_online_documents_v2(
2273+
features=["documents:embedding", "documents:title"],
2274+
query=[0.1, 0.2, 0.3], # text embedding vector
2275+
top_k=5
2276+
)
2277+
2278+
Image search only::
2279+
2280+
results = store.retrieve_online_documents_v2(
2281+
features=["images:embedding", "images:filename"],
2282+
query_image_bytes=b"image_data", # image bytes
2283+
top_k=5
2284+
)
2285+
2286+
Combined text + image search::
2287+
2288+
results = store.retrieve_online_documents_v2(
2289+
features=["documents:embedding", "documents:title"],
2290+
query=[0.1, 0.2, 0.3], # text embedding vector
2291+
query_image_bytes=b"image_data", # image bytes
2292+
combine_with_text=True,
2293+
text_weight=0.3,
2294+
image_weight=0.7,
2295+
top_k=5
2296+
)
22542297
"""
2255-
assert query is not None or query_string is not None, (
2256-
"Either query or query_string must be provided."
2298+
if query is None and not query_image_bytes and not query_string:
2299+
raise ValueError(
2300+
"Must provide either query (text embedding), "
2301+
"query_image_bytes, or query_string"
2302+
)
2303+
2304+
if combine_with_text and not (query is not None and query_image_bytes):
2305+
raise ValueError(
2306+
"combine_with_text=True requires both query (text embedding) "
2307+
"and query_image_bytes"
2308+
)
2309+
2310+
if combine_with_text and abs(text_weight + image_weight - 1.0) > 1e-6:
2311+
raise ValueError("text_weight + image_weight must equal 1.0 when combining")
2312+
2313+
image_embedding = None
2314+
if query_image_bytes is not None:
2315+
try:
2316+
from feast.image_utils import ImageFeatureExtractor
2317+
2318+
model_name = query_image_model or "resnet34"
2319+
extractor = ImageFeatureExtractor(model_name)
2320+
image_embedding = extractor.extract_embedding(query_image_bytes)
2321+
except ImportError:
2322+
raise ImportError(
2323+
"Image processing dependencies are not installed. "
2324+
"Please install with: pip install feast[image]"
2325+
)
2326+
2327+
text_embedding = query
2328+
2329+
if (
2330+
combine_with_text
2331+
and text_embedding is not None
2332+
and image_embedding is not None
2333+
):
2334+
# Combine text and image embeddings
2335+
from feast.image_utils import combine_embeddings
2336+
2337+
final_query = combine_embeddings(
2338+
text_embedding=text_embedding,
2339+
image_embedding=image_embedding,
2340+
strategy=combine_strategy,
2341+
text_weight=text_weight,
2342+
image_weight=image_weight,
2343+
)
2344+
elif image_embedding is not None:
2345+
final_query = image_embedding
2346+
elif text_embedding is not None:
2347+
final_query = text_embedding
2348+
else:
2349+
final_query = None
2350+
2351+
effective_query = final_query
2352+
2353+
assert effective_query is not None or query_string is not None, (
2354+
"Either query embedding or query_string must be provided."
22572355
)
22582356

22592357
(
@@ -2295,7 +2393,7 @@ def retrieve_online_documents_v2(
22952393
provider,
22962394
requested_feature_view,
22972395
requested_features,
2298-
query,
2396+
effective_query,
22992397
top_k,
23002398
distance_metric,
23012399
query_string,

0 commit comments

Comments
 (0)