Skip to content

Commit e9dc0e0

Browse files
patelchaitanyntkathole
authored andcommitted
fix the retrival in the postgres
Signed-off-by: Chaitany patel <patelchaitany93@gmail.com>
1 parent 18ede88 commit e9dc0e0

5 files changed

Lines changed: 127 additions & 47 deletions

File tree

.secrets.baseline

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1460,14 +1460,14 @@
14601460
"filename": "sdk/python/tests/universal/feature_repos/repo_configuration.py",
14611461
"hashed_secret": "d90e76ef629fb00c95f4e84fec29fbda111e2392",
14621462
"is_verified": false,
1463-
"line_number": 452
1463+
"line_number": 459
14641464
},
14651465
{
14661466
"type": "Secret Keyword",
14671467
"filename": "sdk/python/tests/universal/feature_repos/repo_configuration.py",
14681468
"hashed_secret": "5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8",
14691469
"is_verified": false,
1470-
"line_number": 454
1470+
"line_number": 461
14711471
}
14721472
],
14731473
"sdk/python/tests/universal/feature_repos/universal/data_sources/file.py": [
@@ -1539,5 +1539,5 @@
15391539
}
15401540
]
15411541
},
1542-
"generated_at": "2026-03-14T16:01:28Z"
1542+
"generated_at": "2026-03-18T08:09:25Z"
15431543
}

sdk/python/feast/infra/online_stores/postgres_online_store/postgres.py

Lines changed: 93 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -543,50 +543,114 @@ def retrieve_online_documents_v2(
543543
tsquery_str = " & ".join(query_string.split())
544544
query = sql.SQL(
545545
"""
546+
WITH vector_candidates AS (
547+
SELECT entity_key,
548+
MIN(vector_value {distance_metric_sql} %s::vector) as distance
549+
FROM {table_name}
550+
WHERE vector_value IS NOT NULL
551+
GROUP BY entity_key
552+
ORDER BY distance
553+
LIMIT {top_k}
554+
),
555+
text_candidates AS (
556+
SELECT entity_key,
557+
MAX(ts_rank(to_tsvector('english', value_text), to_tsquery('english', %s))) as text_rank
558+
FROM {table_name}
559+
WHERE feature_name = ANY(%s)
560+
AND to_tsvector('english', value_text) @@ to_tsquery('english', %s)
561+
GROUP BY entity_key
562+
ORDER BY text_rank DESC
563+
LIMIT {top_k}
564+
),
565+
all_candidates AS (
566+
SELECT entity_key FROM vector_candidates
567+
UNION
568+
SELECT entity_key FROM text_candidates
569+
),
570+
scored AS (
571+
SELECT
572+
ac.entity_key,
573+
COALESCE(vc.distance,
574+
(SELECT MIN(t.vector_value {distance_metric_sql} %s::vector)
575+
FROM {table_name} t
576+
WHERE t.entity_key = ac.entity_key AND t.vector_value IS NOT NULL)
577+
) as distance,
578+
COALESCE(tc.text_rank,
579+
COALESCE(
580+
(SELECT MAX(ts_rank(to_tsvector('english', ft.value_text), to_tsquery('english', %s)))
581+
FROM {table_name} ft
582+
WHERE ft.entity_key = ac.entity_key AND ft.feature_name = ANY(%s) AND ft.value_text IS NOT NULL),
583+
0
584+
)
585+
) as text_rank
586+
FROM all_candidates ac
587+
LEFT JOIN vector_candidates vc ON ac.entity_key = vc.entity_key
588+
LEFT JOIN text_candidates tc ON ac.entity_key = tc.entity_key
589+
ORDER BY text_rank DESC, distance
590+
LIMIT {top_k}
591+
)
546592
SELECT
547-
entity_key,
548-
feature_name,
549-
value,
550-
vector_value,
551-
vector_value {distance_metric_sql} %s::vector as distance,
552-
ts_rank(to_tsvector('english', value_text), to_tsquery('english', %s)) as text_rank,
553-
event_ts,
554-
created_ts
555-
FROM {table_name}
556-
WHERE feature_name = ANY(%s) AND to_tsvector('english', value_text) @@ to_tsquery('english', %s)
557-
ORDER BY distance
558-
LIMIT {top_k}
593+
t1.entity_key,
594+
t1.feature_name,
595+
t1.value,
596+
t1.vector_value,
597+
s.distance,
598+
s.text_rank,
599+
t1.event_ts,
600+
t1.created_ts
601+
FROM {table_name} t1
602+
INNER JOIN scored s ON t1.entity_key = s.entity_key
603+
WHERE t1.feature_name = ANY(%s)
604+
ORDER BY s.text_rank DESC, s.distance
559605
"""
560606
).format(
561607
distance_metric_sql=sql.SQL(distance_metric_sql),
562608
table_name=sql.Identifier(table_name),
563609
top_k=sql.Literal(top_k),
564610
)
565-
params = (embedding, tsquery_str, string_fields, tsquery_str)
566-
611+
params = (
612+
embedding,
613+
tsquery_str,
614+
string_fields,
615+
tsquery_str,
616+
embedding,
617+
tsquery_str,
618+
string_fields,
619+
requested_features,
620+
)
567621
elif embedding is not None:
568622
# Case 2: Vector Search Only
569623
query = sql.SQL(
570624
"""
625+
WITH vector_matches AS (
626+
SELECT entity_key,
627+
MIN(vector_value {distance_metric_sql} %s::vector) as distance
628+
FROM {table_name}
629+
WHERE vector_value IS NOT NULL
630+
GROUP BY entity_key
631+
ORDER BY distance
632+
LIMIT {top_k}
633+
)
571634
SELECT
572-
entity_key,
573-
feature_name,
574-
value,
575-
vector_value,
576-
vector_value {distance_metric_sql} %s::vector as distance,
577-
NULL as text_rank, -- Keep consistent columns
578-
event_ts,
579-
created_ts
580-
FROM {table_name}
581-
ORDER BY distance
582-
LIMIT {top_k}
635+
t1.entity_key,
636+
t1.feature_name,
637+
t1.value,
638+
t1.vector_value,
639+
t2.distance,
640+
NULL as text_rank,
641+
t1.event_ts,
642+
t1.created_ts
643+
FROM {table_name} t1
644+
INNER JOIN vector_matches t2 ON t1.entity_key = t2.entity_key
645+
WHERE t1.feature_name = ANY(%s)
646+
ORDER BY t2.distance
583647
"""
584648
).format(
585649
distance_metric_sql=sql.SQL(distance_metric_sql),
586650
table_name=sql.Identifier(table_name),
587651
top_k=sql.Literal(top_k),
588652
)
589-
params = (embedding,)
653+
params = (embedding, requested_features)
590654

591655
elif query_string is not None and string_fields:
592656
# Case 3: Text Search Only
@@ -686,9 +750,10 @@ def retrieve_online_documents_v2(
686750
sorted_entities = sorted(
687751
entities_dict.values(),
688752
key=lambda x: (
689-
x["vector_distance"] if embedding is not None else x["text_rank"]
753+
(-x["text_rank"], x["vector_distance"])
754+
if query_string is not None
755+
else (x["vector_distance"],)
690756
),
691-
reverse=(embedding is None),
692757
)[:top_k]
693758

694759
result: List[

sdk/python/tests/integration/online_store/test_universal_online.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,6 +1159,12 @@ def test_retrieve_online_documents_v2(environment, fake_document_data):
11591159
assert len(vector_results["text_field"]) == 5
11601160
assert len(vector_results["category"]) == 5
11611161

1162+
assert all(isinstance(v, list) for v in vector_results["embedding"])
1163+
assert all(isinstance(v, float) for v in vector_results["distance"])
1164+
assert all(isinstance(v, str) for v in vector_results["text_field"])
1165+
assert all(isinstance(v, str) for v in vector_results["category"])
1166+
assert all(isinstance(v, int) for v in vector_results["item_id"])
1167+
11621168
# Test 2: Vector similarity search with Cosine distance
11631169
vector_results = fs.retrieve_online_documents_v2(
11641170
features=[
@@ -1177,6 +1183,12 @@ def test_retrieve_online_documents_v2(environment, fake_document_data):
11771183
assert len(vector_results["text_field"]) == 5
11781184
assert len(vector_results["category"]) == 5
11791185

1186+
assert all(isinstance(v, list) for v in vector_results["embedding"])
1187+
assert all(isinstance(v, float) for v in vector_results["distance"])
1188+
assert all(isinstance(v, str) for v in vector_results["text_field"])
1189+
assert all(isinstance(v, str) for v in vector_results["category"])
1190+
assert all(isinstance(v, int) for v in vector_results["item_id"])
1191+
11801192
# Test 3: Full text search
11811193
text_results = fs.retrieve_online_documents_v2(
11821194
features=[
@@ -1195,6 +1207,11 @@ def test_retrieve_online_documents_v2(environment, fake_document_data):
11951207
assert len(text_results["category"]) == 5
11961208
assert len(text_results["item_id"]) == 5
11971209

1210+
assert all(isinstance(v, str) for v in text_results["text_field"])
1211+
assert all(isinstance(v, float) for v in text_results["text_rank"])
1212+
assert all(isinstance(v, str) for v in text_results["category"])
1213+
assert all(isinstance(v, int) for v in text_results["item_id"])
1214+
11981215
# Verify text rank values are between 0 and 1
11991216
assert all(0 <= rank <= 1 for rank in text_results["text_rank"])
12001217

@@ -1224,22 +1241,12 @@ def test_retrieve_online_documents_v2(environment, fake_document_data):
12241241
assert len(hybrid_results["category"]) == 5
12251242
assert len(hybrid_results["item_id"]) == 5
12261243

1227-
# Test 5: Hybrid search with different text query
1228-
hybrid_results = fs.retrieve_online_documents_v2(
1229-
features=[
1230-
"item_embeddings:embedding",
1231-
"item_embeddings:text_field",
1232-
"item_embeddings:category",
1233-
"item_embeddings:item_id",
1234-
],
1235-
query=query_embedding,
1236-
query_string="Category-1",
1237-
top_k=5,
1238-
distance_metric="L2",
1239-
).to_dict()
1240-
1241-
# Verify results contain only documents from Category-1
1242-
assert all(cat == "Category-1" for cat in hybrid_results["category"])
1244+
assert all(isinstance(v, list) for v in hybrid_results["embedding"])
1245+
assert all(isinstance(v, float) for v in hybrid_results["distance"])
1246+
assert all(isinstance(v, str) for v in hybrid_results["text_field"])
1247+
assert all(isinstance(v, float) for v in hybrid_results["text_rank"])
1248+
assert all(isinstance(v, str) for v in hybrid_results["category"])
1249+
assert all(isinstance(v, int) for v in hybrid_results["item_id"])
12431250

12441251
# Test 6: Full text search with no matches
12451252
no_match_results = fs.retrieve_online_documents_v2(

sdk/python/tests/universal/feature_repos/repo_configuration.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,9 @@
193193
from tests.universal.feature_repos.universal.online_store.milvus import (
194194
MilvusOnlineStoreCreator,
195195
)
196+
from tests.universal.feature_repos.universal.online_store.postgres import (
197+
PGVectorOnlineStoreCreator,
198+
)
196199
from tests.universal.feature_repos.universal.online_store.redis import (
197200
RedisOnlineStoreCreator,
198201
)
@@ -206,6 +209,10 @@
206209
"datastore": ("datastore", DatastoreOnlineStoreCreator),
207210
"bigtable": ("bigtable", BigtableOnlineStoreCreator),
208211
}
212+
AVAILABLE_ONLINE_STORES["pgvector"] = (
213+
{"type": "postgres", "vector_enabled": True, "sslmode": "disable"},
214+
PGVectorOnlineStoreCreator,
215+
)
209216

210217
for key, replacement in replacements.items():
211218
if key in AVAILABLE_ONLINE_STORES:

sdk/python/tests/universal/feature_repos/universal/online_store/postgres.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def create_online_store(self) -> Dict[str, Any]:
6969
"database": "test",
7070
"vector_enabled": True,
7171
"port": self.container.get_exposed_port(5432),
72+
"sslmode": "disable",
7273
}
7374

7475
def teardown(self):

0 commit comments

Comments
 (0)