Skip to content

Commit 994a632

Browse files
committed
fix: Add get_table_query_string_with_alias() for PostgreSQL subquery aliasing
PostgreSQL requires all subqueries in FROM clauses to have aliases. This adds a new method get_table_query_string_with_alias() to PostgreSQLSource that automatically adds an alias when the source is query-based. The existing get_table_query_string() is preserved for backward compatibility and updated with documentation explaining when to use the new method. Fixes #5605 Signed-off-by: Yassin Nouh <yassinmnouh@gmail.com> Signed-off-by: yassinnouh21 <yassinnouh21@gmail.com>
1 parent ce35ce6 commit 994a632

File tree

2 files changed

+168
-0
lines changed

2 files changed

+168
-0
lines changed

sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres_source.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,11 +145,44 @@ def get_table_column_names_and_types(
145145
)
146146

147147
def get_table_query_string(self) -> str:
148+
"""Returns a string that can be used to reference this table in SQL.
149+
150+
For query-based sources, returns the query wrapped in parentheses.
151+
152+
Note:
153+
When using the returned string directly in a FROM clause with PostgreSQL,
154+
you may need to add an alias if this is a query-based source. PostgreSQL
155+
requires all subqueries in FROM clauses to have aliases. Consider using
156+
get_table_query_string_with_alias() for automatic aliasing.
157+
"""
148158
if self._postgres_options._table:
149159
return f"{self._postgres_options._table}"
150160
else:
151161
return f"({self._postgres_options._query})"
152162

163+
def get_table_query_string_with_alias(self, alias: str = "subquery") -> str:
164+
"""Returns a string for use in FROM clause with alias for PostgreSQL compatibility.
165+
166+
PostgreSQL requires all subqueries in FROM clauses to have aliases. This method
167+
automatically adds an alias when the source is query-based.
168+
169+
Args:
170+
alias: The alias to use for query-based sources. Defaults to "subquery".
171+
172+
Returns:
173+
For table-based sources: the table name (no alias needed).
174+
For query-based sources: "(query) AS alias".
175+
176+
Example:
177+
>>> source = PostgreSQLSource(query="SELECT * FROM my_table", ...)
178+
>>> entity_sql = f"SELECT id, ts FROM {source.get_table_query_string_with_alias()}"
179+
# Results in: "SELECT id, ts FROM (SELECT * FROM my_table) AS subquery"
180+
"""
181+
if self._postgres_options._table:
182+
return f"{self._postgres_options._table}"
183+
else:
184+
return f"({self._postgres_options._query}) AS {alias}"
185+
153186

154187
class PostgreSQLOptions:
155188
def __init__(

sdk/python/tests/unit/infra/offline_stores/contrib/postgres_offline_store/test_postgres.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,3 +950,138 @@ def test_cli_date_combinations(self):
950950
# Should not fail on parameter validation
951951
stderr_output = result.stderr.decode()
952952
assert "must be provided" not in stderr_output
953+
954+
955+
class TestPostgreSQLSourceQueryStringAlias:
956+
"""Test suite for get_table_query_string_with_alias() method.
957+
958+
This addresses GitHub issue #5605: PostgreSQL requires all subqueries
959+
in FROM clauses to have aliases.
960+
"""
961+
962+
def test_table_source_get_table_query_string(self):
963+
"""Test get_table_query_string() with table-based source"""
964+
source = PostgreSQLSource(
965+
name="test_source",
966+
table="my_schema.my_table",
967+
timestamp_field="event_timestamp",
968+
)
969+
result = source.get_table_query_string()
970+
assert result == "my_schema.my_table"
971+
972+
def test_query_source_get_table_query_string(self):
973+
"""Test get_table_query_string() with query-based source"""
974+
source = PostgreSQLSource(
975+
name="test_source",
976+
query="SELECT * FROM my_table WHERE active = true",
977+
timestamp_field="event_timestamp",
978+
)
979+
result = source.get_table_query_string()
980+
assert result == "(SELECT * FROM my_table WHERE active = true)"
981+
982+
def test_table_source_with_alias(self):
983+
"""Test get_table_query_string_with_alias() with table-based source returns table without alias"""
984+
source = PostgreSQLSource(
985+
name="test_source",
986+
table="my_schema.my_table",
987+
timestamp_field="event_timestamp",
988+
)
989+
result = source.get_table_query_string_with_alias()
990+
# Table sources don't need aliases
991+
assert result == "my_schema.my_table"
992+
993+
def test_query_source_with_default_alias(self):
994+
"""Test get_table_query_string_with_alias() with query-based source uses default alias"""
995+
source = PostgreSQLSource(
996+
name="test_source",
997+
query="SELECT * FROM my_table WHERE active = true",
998+
timestamp_field="event_timestamp",
999+
)
1000+
result = source.get_table_query_string_with_alias()
1001+
assert result == "(SELECT * FROM my_table WHERE active = true) AS subquery"
1002+
1003+
def test_query_source_with_custom_alias(self):
1004+
"""Test get_table_query_string_with_alias() with custom alias"""
1005+
source = PostgreSQLSource(
1006+
name="test_source",
1007+
query="SELECT id, name FROM users",
1008+
timestamp_field="event_timestamp",
1009+
)
1010+
result = source.get_table_query_string_with_alias(alias="user_data")
1011+
assert result == "(SELECT id, name FROM users) AS user_data"
1012+
1013+
def test_table_source_with_custom_alias_ignored(self):
1014+
"""Test get_table_query_string_with_alias() ignores alias for table-based sources"""
1015+
source = PostgreSQLSource(
1016+
name="test_source",
1017+
table="events",
1018+
timestamp_field="event_timestamp",
1019+
)
1020+
result = source.get_table_query_string_with_alias(alias="ignored_alias")
1021+
# Alias should be ignored for table sources
1022+
assert result == "events"
1023+
1024+
def test_sql_query_with_alias_is_valid(self):
1025+
"""Test that SQL using get_table_query_string_with_alias() is syntactically valid"""
1026+
source = PostgreSQLSource(
1027+
name="test_source",
1028+
query="SELECT id, ts FROM raw_data",
1029+
timestamp_field="ts",
1030+
)
1031+
1032+
# Construct a SQL query using the new method
1033+
entity_sql = f"SELECT id, ts FROM {source.get_table_query_string_with_alias()}"
1034+
1035+
# Verify SQL is valid using sqlglot
1036+
parsed = sqlglot.parse(entity_sql, dialect="postgres")
1037+
assert len(parsed) == 1
1038+
assert parsed[0] is not None
1039+
1040+
def test_sql_query_without_alias_fails_in_postgres(self):
1041+
"""Test that SQL using get_table_query_string() for query source produces invalid PostgreSQL
1042+
1043+
This demonstrates the issue that get_table_query_string_with_alias() fixes:
1044+
PostgreSQL requires all subqueries in FROM clauses to have aliases.
1045+
"""
1046+
source = PostgreSQLSource(
1047+
name="test_source",
1048+
query="SELECT id, ts FROM raw_data",
1049+
timestamp_field="ts",
1050+
)
1051+
1052+
# Using the old method (without alias) for query-based source
1053+
entity_sql_without_alias = (
1054+
f"SELECT id, ts FROM {source.get_table_query_string()}"
1055+
)
1056+
1057+
# This produces: SELECT id, ts FROM (SELECT id, ts FROM raw_data)
1058+
# which is invalid in PostgreSQL (subquery needs alias)
1059+
# sqlglot is lenient and may parse it, but PostgreSQL would reject it
1060+
assert (
1061+
"AS" not in entity_sql_without_alias
1062+
), "get_table_query_string() should not add alias"
1063+
1064+
# Using the new method (with alias) produces valid SQL
1065+
entity_sql_with_alias = (
1066+
f"SELECT id, ts FROM {source.get_table_query_string_with_alias()}"
1067+
)
1068+
assert "AS subquery" in entity_sql_with_alias
1069+
1070+
def test_complex_query_with_alias(self):
1071+
"""Test get_table_query_string_with_alias() with complex nested query"""
1072+
complex_query = """
1073+
SELECT u.id, u.name, o.total
1074+
FROM users u
1075+
JOIN orders o ON u.id = o.user_id
1076+
WHERE o.created_at > '2023-01-01'
1077+
"""
1078+
source = PostgreSQLSource(
1079+
name="test_source",
1080+
query=complex_query,
1081+
timestamp_field="created_at",
1082+
)
1083+
1084+
result = source.get_table_query_string_with_alias(alias="user_orders")
1085+
assert result.startswith("(")
1086+
assert result.endswith(") AS user_orders")
1087+
assert "SELECT u.id" in result

0 commit comments

Comments
 (0)