From 2ab8fc3ce6ba81548dd6556aa3100cdb8a257d01 Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 16 Feb 2024 19:23:01 +0200 Subject: [PATCH 1/6] Add SQLAlchemy Engine settings to registry config Signed-off-by: Galen --- sdk/python/feast/infra/registry/sql.py | 7 +++++-- sdk/python/feast/repo_config.py | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/infra/registry/sql.py b/sdk/python/feast/infra/registry/sql.py index d57bcc7c0a..46e1223f22 100644 --- a/sdk/python/feast/infra/registry/sql.py +++ b/sdk/python/feast/infra/registry/sql.py @@ -4,7 +4,7 @@ from enum import Enum from pathlib import Path from threading import Lock -from typing import Any, Callable, List, Optional, Set, Union +from typing import Any, Callable, Dict, List, Optional, Set, Union from pydantic import StrictStr from sqlalchemy import ( # type: ignore @@ -190,6 +190,9 @@ class SqlRegistryConfig(RegistryConfig): """ str: Path to metadata store. If registry_type is 'sql', then this is a database URL as expected by SQLAlchemy """ + sqlalchemy_config_kwargs: Optional[Dict[str, str]] = {"echo": False} + """ Dict[str, str]: Extra arguments to pass to SQLAlchemy.create_engine. """ + class SqlRegistry(BaseRegistry): def __init__( @@ -199,7 +202,7 @@ def __init__( repo_path: Optional[Path], ): assert registry_config is not None, "SqlRegistry needs a valid registry_config" - self.engine: Engine = create_engine(registry_config.path, echo=False) + self.engine: Engine = create_engine(registry_config.path, **registry_config.sqlalchemy_config_kwargs) metadata.create_all(self.engine) self.cached_registry_proto = self.proto() proto_registry_utils.init_project_metadata(self.cached_registry_proto, project) diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index d500059c6b..730af5d76d 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -125,6 +125,9 @@ class RegistryConfig(FeastBaseModel): s3_additional_kwargs: Optional[Dict[str, str]] = None """ Dict[str, str]: Extra arguments to pass to boto3 when writing the registry file to S3. """ + sqlalchemy_config_kwargs: Optional[Dict[str, str]] = {} + """ Dict[str, str]: Extra arguments to pass to SQLAlchemy.create_engine. """ + class RepoConfig(FeastBaseModel): """Repo config. Typically loaded from `feature_store.yaml`""" From a38f493ba2187c99684e024a669885b8d41cb83e Mon Sep 17 00:00:00 2001 From: Galen Terziysky <105213101+galen-ft@users.noreply.github.com> Date: Fri, 16 Feb 2024 20:17:53 +0200 Subject: [PATCH 2/6] Allow for dict values of any-type Signed-off-by: Galen Terziysky <105213101+galen-ft@users.noreply.github.com> --- sdk/python/feast/infra/registry/sql.py | 4 ++-- sdk/python/feast/repo_config.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sdk/python/feast/infra/registry/sql.py b/sdk/python/feast/infra/registry/sql.py index 46e1223f22..b0adb2b501 100644 --- a/sdk/python/feast/infra/registry/sql.py +++ b/sdk/python/feast/infra/registry/sql.py @@ -190,8 +190,8 @@ class SqlRegistryConfig(RegistryConfig): """ str: Path to metadata store. If registry_type is 'sql', then this is a database URL as expected by SQLAlchemy """ - sqlalchemy_config_kwargs: Optional[Dict[str, str]] = {"echo": False} - """ Dict[str, str]: Extra arguments to pass to SQLAlchemy.create_engine. """ + sqlalchemy_config_kwargs: Optional[Dict[str, Any]] = {"echo": False} + """ Dict[str, Any]: Extra arguments to pass to SQLAlchemy.create_engine. """ class SqlRegistry(BaseRegistry): diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index 730af5d76d..d8cc2a4b6a 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -125,8 +125,8 @@ class RegistryConfig(FeastBaseModel): s3_additional_kwargs: Optional[Dict[str, str]] = None """ Dict[str, str]: Extra arguments to pass to boto3 when writing the registry file to S3. """ - sqlalchemy_config_kwargs: Optional[Dict[str, str]] = {} - """ Dict[str, str]: Extra arguments to pass to SQLAlchemy.create_engine. """ + sqlalchemy_config_kwargs: Optional[Dict[str, Any]] = {} + """ Dict[str, Any]: Extra arguments to pass to SQLAlchemy.create_engine. """ class RepoConfig(FeastBaseModel): From b3037c05e3b76796a66deac7d7cee3baa4ea5eb5 Mon Sep 17 00:00:00 2001 From: Galen Terziysky <105213101+galen-ft@users.noreply.github.com> Date: Thu, 7 Mar 2024 17:55:40 +0200 Subject: [PATCH 3/6] Extend PostgreSQL and MySQL registry tests and add examples to the docs Signed-off-by: Galen Terziysky <105213101+galen-ft@users.noreply.github.com> --- docs/getting-started/concepts/registry.md | 2 ++ docs/tutorials/using-scalable-registry.md | 2 ++ sdk/python/tests/unit/test_sql_registry.py | 2 ++ 3 files changed, 6 insertions(+) diff --git a/docs/getting-started/concepts/registry.md b/docs/getting-started/concepts/registry.md index f7d4a5b3e1..41d6330e77 100644 --- a/docs/getting-started/concepts/registry.md +++ b/docs/getting-started/concepts/registry.md @@ -57,6 +57,8 @@ registry: registry_type: sql path: postgresql://postgres:mysecretpassword@127.0.0.1:55001/feast cache_ttl_seconds: 60 + sqlalchemy_config_kwargs: + pool_pre_ping: true ``` This supports any SQLAlchemy compatible database as a backend. The exact schema can be seen in [sql.py](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/infra/registry/sql.py) diff --git a/docs/tutorials/using-scalable-registry.md b/docs/tutorials/using-scalable-registry.md index a87aedd9b9..79021127f2 100644 --- a/docs/tutorials/using-scalable-registry.md +++ b/docs/tutorials/using-scalable-registry.md @@ -29,6 +29,8 @@ registry: registry_type: sql path: postgresql://postgres:mysecretpassword@127.0.0.1:55001/feast cache_ttl_seconds: 60 + sqlalchemy_config_kwargs: + pool_pre_ping: true ``` Specifically, the registry_type needs to be set to sql in the registry config block. On doing so, the path should refer to the [Database URL](https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls) for the database to be used, as expected by SQLAlchemy. No other additional commands are currently needed to configure this registry. diff --git a/sdk/python/tests/unit/test_sql_registry.py b/sdk/python/tests/unit/test_sql_registry.py index 722e318b0c..4ca41423c1 100644 --- a/sdk/python/tests/unit/test_sql_registry.py +++ b/sdk/python/tests/unit/test_sql_registry.py @@ -71,6 +71,7 @@ def pg_registry(): registry_config = RegistryConfig( registry_type="sql", path=f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{container_host}:{container_port}/{POSTGRES_DB}", + sqlalchemy_config_kwargs={"echo": False, "pool_pre_ping": True}, ) yield SqlRegistry(registry_config, "project", None) @@ -106,6 +107,7 @@ def mysql_registry(): registry_config = RegistryConfig( registry_type="sql", path=f"mysql+pymysql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{container_host}:{container_port}/{POSTGRES_DB}", + sqlalchemy_config_kwargs={"echo": False, "pool_pre_ping": True}, ) yield SqlRegistry(registry_config, "project", None) From 16da7c9c350bbecde7539ee15c7c3419c20e7e64 Mon Sep 17 00:00:00 2001 From: Galen Terziysky <105213101+galen-ft@users.noreply.github.com> Date: Thu, 7 Mar 2024 18:42:26 +0200 Subject: [PATCH 4/6] Add "echo=False" to registry docs Signed-off-by: Galen Terziysky <105213101+galen-ft@users.noreply.github.com> --- docs/getting-started/concepts/registry.md | 1 + docs/tutorials/using-scalable-registry.md | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/getting-started/concepts/registry.md b/docs/getting-started/concepts/registry.md index 41d6330e77..8ac32ce87b 100644 --- a/docs/getting-started/concepts/registry.md +++ b/docs/getting-started/concepts/registry.md @@ -58,6 +58,7 @@ registry: path: postgresql://postgres:mysecretpassword@127.0.0.1:55001/feast cache_ttl_seconds: 60 sqlalchemy_config_kwargs: + echo: false pool_pre_ping: true ``` diff --git a/docs/tutorials/using-scalable-registry.md b/docs/tutorials/using-scalable-registry.md index 79021127f2..30b8e01ed5 100644 --- a/docs/tutorials/using-scalable-registry.md +++ b/docs/tutorials/using-scalable-registry.md @@ -30,6 +30,7 @@ registry: path: postgresql://postgres:mysecretpassword@127.0.0.1:55001/feast cache_ttl_seconds: 60 sqlalchemy_config_kwargs: + echo: false pool_pre_ping: true ``` From 809c863ea512d0c8167d1abb66a31350bc527053 Mon Sep 17 00:00:00 2001 From: Galen Terziysky <105213101+galen-ft@users.noreply.github.com> Date: Fri, 8 Mar 2024 14:21:41 +0200 Subject: [PATCH 5/6] Fix mypy type checker error Signed-off-by: Galen Terziysky <105213101+galen-ft@users.noreply.github.com> --- sdk/python/feast/infra/registry/sql.py | 2 +- sdk/python/feast/repo_config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/infra/registry/sql.py b/sdk/python/feast/infra/registry/sql.py index b0adb2b501..b6032ccec4 100644 --- a/sdk/python/feast/infra/registry/sql.py +++ b/sdk/python/feast/infra/registry/sql.py @@ -190,7 +190,7 @@ class SqlRegistryConfig(RegistryConfig): """ str: Path to metadata store. If registry_type is 'sql', then this is a database URL as expected by SQLAlchemy """ - sqlalchemy_config_kwargs: Optional[Dict[str, Any]] = {"echo": False} + sqlalchemy_config_kwargs: Dict[str, Any] = {"echo": False} """ Dict[str, Any]: Extra arguments to pass to SQLAlchemy.create_engine. """ diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index d8cc2a4b6a..b96aa17484 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -125,7 +125,7 @@ class RegistryConfig(FeastBaseModel): s3_additional_kwargs: Optional[Dict[str, str]] = None """ Dict[str, str]: Extra arguments to pass to boto3 when writing the registry file to S3. """ - sqlalchemy_config_kwargs: Optional[Dict[str, Any]] = {} + sqlalchemy_config_kwargs: Dict[str, Any] = {} """ Dict[str, Any]: Extra arguments to pass to SQLAlchemy.create_engine. """ From 0b6b9579b053f0a47c26dcf432025526f5983b8d Mon Sep 17 00:00:00 2001 From: Galen Terziysky <105213101+galen-ft@users.noreply.github.com> Date: Fri, 8 Mar 2024 17:03:23 +0200 Subject: [PATCH 6/6] Accept change by the Black code formatter Signed-off-by: Galen Terziysky <105213101+galen-ft@users.noreply.github.com> --- sdk/python/feast/infra/registry/sql.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/infra/registry/sql.py b/sdk/python/feast/infra/registry/sql.py index b6032ccec4..1e5c2a8725 100644 --- a/sdk/python/feast/infra/registry/sql.py +++ b/sdk/python/feast/infra/registry/sql.py @@ -202,7 +202,9 @@ def __init__( repo_path: Optional[Path], ): assert registry_config is not None, "SqlRegistry needs a valid registry_config" - self.engine: Engine = create_engine(registry_config.path, **registry_config.sqlalchemy_config_kwargs) + self.engine: Engine = create_engine( + registry_config.path, **registry_config.sqlalchemy_config_kwargs + ) metadata.create_all(self.engine) self.cached_registry_proto = self.proto() proto_registry_utils.init_project_metadata(self.cached_registry_proto, project)