Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Refactor OfflineStoreConfig classes into their owning modules
Signed-off-by: Achal Shah <achals@gmail.com>
  • Loading branch information
achals committed Jun 21, 2021
commit c642b4280d4dd2387a84a91dde6b756a0f41070c
6 changes: 3 additions & 3 deletions sdk/python/feast/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ def __init__(self, online_store_class_name: str):
)


class FeastOnlineStoreConfigInvalidName(Exception):
def __init__(self, online_store_config_class_name: str):
class FeastStoreConfigInvalidName(Exception):
def __init__(self, online_store_config_class_name: str, store_type="Online"):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason to set a default here?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, going to clean this up

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, should online_store_config_class_name be online store agnostic?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

super().__init__(
f"Online Store Config Class '{online_store_config_class_name}' should end with the string `OnlineStoreConfig`.'"
f"Online Store Config Class '{online_store_config_class_name}' should end with the string `{store_type}StoreConfig`.'"
)


Expand Down
14 changes: 13 additions & 1 deletion sdk/python/feast/infra/offline_stores/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import pandas
import pyarrow
from jinja2 import BaseLoader, Environment
from pydantic import StrictStr
from pydantic.typing import Literal
from tenacity import retry, stop_after_delay, wait_fixed

from feast import errors
Expand All @@ -20,7 +22,7 @@
_get_requested_feature_views_to_features_dict,
)
from feast.registry import Registry
from feast.repo_config import BigQueryOfflineStoreConfig, RepoConfig
from feast.repo_config import FeastConfigBaseModel, RepoConfig

try:
from google.api_core.exceptions import NotFound
Expand All @@ -34,6 +36,16 @@
raise FeastExtrasDependencyImportError("gcp", str(e))


class BigQueryOfflineStoreConfig(FeastConfigBaseModel):
""" Offline store config for GCP BigQuery """

type: Literal["bigquery"] = "bigquery"
""" Offline store type selector"""

dataset: StrictStr = "feast"
""" (optional) BigQuery Dataset name for temporary tables """


class BigQueryOfflineStore(OfflineStore):
@staticmethod
def pull_latest_from_table_or_query(
Expand Down
10 changes: 9 additions & 1 deletion sdk/python/feast/infra/offline_stores/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd
import pyarrow
import pytz
from pydantic.typing import Literal

from feast.data_source import DataSource, FileSource
from feast.errors import FeastJoinKeysDuringMaterialization
Expand All @@ -15,7 +16,14 @@
_run_field_mapping,
)
from feast.registry import Registry
from feast.repo_config import RepoConfig
from feast.repo_config import FeastConfigBaseModel, RepoConfig


class FileOfflineStoreConfig(FeastConfigBaseModel):
""" Offline store config for local (file-based) store """

type: Literal["file"] = "file"
""" Offline store type selector"""


class FileRetrievalJob(RetrievalJob):
Expand Down
64 changes: 28 additions & 36 deletions sdk/python/feast/infra/offline_stores/helpers.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,33 @@
from feast.data_source import BigQuerySource, DataSource, FileSource
from feast.errors import FeastOfflineStoreUnsupportedDataSource
import importlib
from typing import Any

from feast import errors
from feast.infra.offline_stores.offline_store import OfflineStore
from feast.repo_config import (
BigQueryOfflineStoreConfig,
FileOfflineStoreConfig,
OfflineStoreConfig,
)


def get_offline_store_from_config(
offline_store_config: OfflineStoreConfig,
) -> OfflineStore:
def get_offline_store_from_config(offline_store_config: Any,) -> OfflineStore:
"""Get the offline store from offline store config"""

if isinstance(offline_store_config, FileOfflineStoreConfig):
from feast.infra.offline_stores.file import FileOfflineStore

return FileOfflineStore()
elif isinstance(offline_store_config, BigQueryOfflineStoreConfig):
from feast.infra.offline_stores.bigquery import BigQueryOfflineStore

return BigQueryOfflineStore()

raise ValueError(f"Unsupported offline store config '{offline_store_config}'")


def assert_offline_store_supports_data_source(
offline_store_config: OfflineStoreConfig, data_source: DataSource
):
if (
isinstance(offline_store_config, FileOfflineStoreConfig)
and isinstance(data_source, FileSource)
) or (
isinstance(offline_store_config, BigQueryOfflineStoreConfig)
and isinstance(data_source, BigQuerySource)
):
return
raise FeastOfflineStoreUnsupportedDataSource(
offline_store_config.type, data_source.__class__.__name__
)
module_name = offline_store_config.__module__
qualified_name = type(offline_store_config).__name__
store_class_name = qualified_name.replace("Config", "")
try:
module = importlib.import_module(module_name)
except Exception as e:
# The original exception can be anything - either module not found,
# or any other kind of error happening during the module import time.
# So we should include the original error as well in the stack trace.
raise errors.FeastModuleImportError(
module_name, module_type="OfflineStore"
) from e

# Try getting the provider class definition
try:
offline_store_class = getattr(module, store_class_name)
except AttributeError:
# This can only be one type of error, when class_name attribute does not exist in the module
# So we don't have to include the original exception here
raise errors.FeastClassImportError(
module_name, store_class_name, class_type="OfflineStore"
) from None
return offline_store_class()
94 changes: 41 additions & 53 deletions sdk/python/feast/repo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,23 @@
import yaml
from pydantic import BaseModel, StrictInt, StrictStr, ValidationError, root_validator
from pydantic.error_wrappers import ErrorWrapper
from pydantic.typing import Dict, Literal, Optional, Union
from pydantic.typing import Dict, Optional, Union

from feast import errors
from feast.telemetry import log_exceptions

# This dict exists so that:
# These dict exists so that:
# - existing values for the online store type in featurestore.yaml files continue to work in a backwards compatible way
# - first party and third party implementations can use the same class loading code path.
ONLINE_CONFIG_CLASS_FOR_TYPE = {
"sqlite": "feast.infra.online_stores.sqlite.SqliteOnlineStore",
"datastore": "feast.infra.online_stores.datastore.DatastoreOnlineStore",
"redis": "feast.infra.online_stores.redis.RedisOnlineStore",
"sqlite": "feast.infra.online_stores.sqlite.SqliteOnlineStoreConfig",
"datastore": "feast.infra.online_stores.datastore.DatastoreOnlineStoreConfig",
"redis": "feast.infra.online_stores.redis.RedisOnlineStoreConfig",
}

OFFLINE_CONFIG_CLASS_FOR_TYPE = {
"file": "feast.infra.offline_stores.file.FileOfflineStoreConfig",
"bigquery": "feast.infra.offline_stores.bigquery.BigQueryOfflineStoreConfig",
}


Expand All @@ -36,29 +41,6 @@ class Config:
extra = "forbid"


class FileOfflineStoreConfig(FeastBaseModel):
""" Offline store config for local (file-based) store """

type: Literal["file"] = "file"
""" Offline store type selector"""


class BigQueryOfflineStoreConfig(FeastBaseModel):
""" Offline store config for GCP BigQuery """

type: Literal["bigquery"] = "bigquery"
""" Offline store type selector"""

dataset: StrictStr = "feast"
""" (optional) BigQuery dataset name used for the BigQuery offline store """

project_id: Optional[StrictStr] = None
""" (optional) GCP project name used for the BigQuery offline store """


OfflineStoreConfig = Union[FileOfflineStoreConfig, BigQueryOfflineStoreConfig]


class RegistryConfig(FeastBaseModel):
""" Metadata Store Configuration. Configuration that relates to reading from and writing to the Feast registry."""

Expand Down Expand Up @@ -90,7 +72,7 @@ class RepoConfig(FeastBaseModel):
online_store: Any
""" OnlineStoreConfig: Online store configuration (optional depending on provider) """

offline_store: OfflineStoreConfig = FileOfflineStoreConfig()
offline_store: Any
""" OfflineStoreConfig: Offline store configuration (optional depending on provider) """

repo_path: Optional[Path] = None
Expand All @@ -101,6 +83,10 @@ def __init__(self, **data: Any):
self.online_store = get_online_config_from_type(self.online_store["type"])(
**self.online_store
)
if isinstance(self.offline_store, Dict):
self.offline_store = get_offline_config_from_type(
self.offline_store["type"]
)(**self.offline_store)

def get_registry_config(self):
if isinstance(self.registry, str):
Expand Down Expand Up @@ -172,22 +158,13 @@ def _validate_offline_store_config(cls, values):

offline_store_type = values["offline_store"]["type"]

# Make sure the user hasn't provided the wrong type
assert offline_store_type in ["file", "bigquery"]

# Validate the dict to ensure one of the union types match
try:
if offline_store_type == "file":
FileOfflineStoreConfig(**values["offline_store"])
elif offline_store_type == "bigquery":
BigQueryOfflineStoreConfig(**values["offline_store"])
else:
raise ValidationError(
f"Invalid offline store type {offline_store_type}"
)
offline_config_class = get_offline_config_from_type(offline_store_type)
offline_config_class(**values["offline_store"])
except ValidationError as e:
raise ValidationError(
[ErrorWrapper(e, loc="offline_store")], model=FileOfflineStoreConfig,
[ErrorWrapper(e, loc="offline_store")], model=RepoConfig,
)

return values
Expand All @@ -208,14 +185,11 @@ def __repr__(self) -> str:
)


def get_online_config_from_type(online_store_type: str):
if online_store_type in ONLINE_CONFIG_CLASS_FOR_TYPE:
online_store_type = ONLINE_CONFIG_CLASS_FOR_TYPE[online_store_type]
module_name, class_name = online_store_type.rsplit(".", 1)

if not class_name.endswith("OnlineStore"):
raise errors.FeastOnlineStoreConfigInvalidName(class_name)
config_class_name = f"{class_name}Config"
def get_config_class_from_type(
module_name: str, config_class_name: str, store_type: str
):
if not config_class_name.endswith(f"{store_type}Config"):
raise errors.FeastStoreConfigInvalidName(config_class_name)

# Try importing the module that contains the custom provider
try:
Expand All @@ -224,9 +198,7 @@ def get_online_config_from_type(online_store_type: str):
# The original exception can be anything - either module not found,
# or any other kind of error happening during the module import time.
# So we should include the original error as well in the stack trace.
raise errors.FeastModuleImportError(
module_name, module_type="OnlineStore"
) from e
raise errors.FeastModuleImportError(module_name, module_type=store_type) from e

# Try getting the provider class definition
try:
Expand All @@ -235,11 +207,27 @@ def get_online_config_from_type(online_store_type: str):
# This can only be one type of error, when class_name attribute does not exist in the module
# So we don't have to include the original exception here
raise errors.FeastClassImportError(
module_name, config_class_name, class_type="OnlineStoreConfig"
module_name, config_class_name, class_type=f"{store_type}Config"
) from None
return online_store_config_class


def get_online_config_from_type(online_store_type: str):
if online_store_type in ONLINE_CONFIG_CLASS_FOR_TYPE:
online_store_type = ONLINE_CONFIG_CLASS_FOR_TYPE[online_store_type]
module_name, config_class_name = online_store_type.rsplit(".", 1)

return get_config_class_from_type(module_name, config_class_name, "OnlineStore")


def get_offline_config_from_type(offline_store_type: str):
if offline_store_type in OFFLINE_CONFIG_CLASS_FOR_TYPE:
offline_store_type = OFFLINE_CONFIG_CLASS_FOR_TYPE[offline_store_type]
module_name, config_class_name = offline_store_type.rsplit(".", 1)

return get_config_class_from_type(module_name, config_class_name, "OfflineStore")


def load_repo_config(repo_path: Path) -> RepoConfig:
config_path = repo_path / "feature_store.yaml"

Expand Down
4 changes: 0 additions & 4 deletions sdk/python/feast/repo_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
infer_entity_value_type_from_feature_views,
update_data_sources_with_inferred_event_timestamp_col,
)
from feast.infra.offline_stores.helpers import assert_offline_store_supports_data_source
from feast.infra.provider import get_provider
from feast.names import adjectives, animals
from feast.registry import Registry
Expand Down Expand Up @@ -156,9 +155,6 @@ def apply_total(repo_config: RepoConfig, repo_path: Path):

# Make sure the data source used by this feature view is supported by Feast
for data_source in data_sources:
assert_offline_store_supports_data_source(
repo_config.offline_store, data_source
)
data_source.validate()

update_data_sources_with_inferred_event_timestamp_col(data_sources)
Expand Down
2 changes: 1 addition & 1 deletion sdk/python/tests/test_historical_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
from feast.feature import Feature
from feast.feature_store import FeatureStore
from feast.feature_view import FeatureView
from feast.infra.offline_stores.bigquery import BigQueryOfflineStoreConfig
from feast.infra.online_stores.sqlite import SqliteOnlineStoreConfig
from feast.infra.provider import DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL
from feast.repo_config import BigQueryOfflineStoreConfig
from feast.value_type import ValueType

np.random.seed(0)
Expand Down