From 1dddebcb8ecb530449095c99c506feb9ed245d0a Mon Sep 17 00:00:00 2001 From: Achal Shah Date: Wed, 6 Jul 2022 11:14:45 -0700 Subject: [PATCH 1/2] feat: Add to_remote_storage method to RetrievalJob Signed-off-by: Achal Shah --- .../feast/infra/offline_stores/offline_store.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py index 439911fe2a3..b50204b0f2e 100644 --- a/sdk/python/feast/infra/offline_stores/offline_store.py +++ b/sdk/python/feast/infra/offline_stores/offline_store.py @@ -163,6 +163,23 @@ def metadata(self) -> Optional[RetrievalMetadata]: """ pass + def supports_remote_storage_export(self) -> bool: + """ + This method should return True if the RetrievalJob supports `to_remote_storage()`. + """ + return False + + def to_remote_storage(self) -> List[str]: + """ + This method should export the result of this RetrievalJob to + remote storage (such as S3, GCS, HDFS, etc). + Implementations of this method should export the results as + multiple parquet files, each file sized appropriately + depending on how much data is being returned by the retrieval + job. + """ + raise NotImplementedError() + class OfflineStore(ABC): """ From 7532e645ff82e77f8dcb1643d2ae142938b77ba5 Mon Sep 17 00:00:00 2001 From: Achal Shah Date: Wed, 6 Jul 2022 11:43:01 -0700 Subject: [PATCH 2/2] docstring Signed-off-by: Achal Shah --- sdk/python/feast/infra/offline_stores/offline_store.py | 3 +++ .../offline_store/test_universal_historical_retrieval.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py index b50204b0f2e..e24317e8599 100644 --- a/sdk/python/feast/infra/offline_stores/offline_store.py +++ b/sdk/python/feast/infra/offline_stores/offline_store.py @@ -177,6 +177,9 @@ def to_remote_storage(self) -> List[str]: multiple parquet files, each file sized appropriately depending on how much data is being returned by the retrieval job. + + Returns: + A list of parquet file paths in remote storage. """ raise NotImplementedError() diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py index 2076ab2aedf..0b2965084d8 100644 --- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py +++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py @@ -280,7 +280,7 @@ def get_expected_training_df( @pytest.mark.integration @pytest.mark.universal_offline_stores -@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v)) +@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: f"full:{v}") def test_historical_features(environment, universal_data_sources, full_feature_names): store = environment.feature_store @@ -410,7 +410,7 @@ def test_historical_features(environment, universal_data_sources, full_feature_n @pytest.mark.integration -@pytest.mark.universal +@pytest.mark.universal_offline_stores @pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v)) def test_historical_features_with_shared_batch_source( environment, universal_data_sources, full_feature_names