Skip to content

Commit 8e4bbe5

Browse files
authored
Provide descriptive error on invalid table reference (feast-dev#1627)
* Initial commit to catch nonexistent table Signed-off-by: Cody Lin <codyjlin@yahoo.com> Signed-off-by: Cody Lin <codyl@twitter.com> * simplify nonexistent BQ table test Signed-off-by: Cody Lin <codyl@twitter.com> * clean up table_exists exception Signed-off-by: Cody Lin <codyl@twitter.com> * remove unneeded variable Signed-off-by: Cody Lin <codyl@twitter.com> * function name change to _assert_table_exists Signed-off-by: Cody Lin <codyl@twitter.com> * Initial commit to catch nonexistent table Signed-off-by: Cody Lin <codyjlin@yahoo.com> Signed-off-by: Cody Lin <codyl@twitter.com> * simplify nonexistent BQ table test Signed-off-by: Cody Lin <codyl@twitter.com> * clean up table_exists exception Signed-off-by: Cody Lin <codyl@twitter.com> * function name change to _assert_table_exists Signed-off-by: Cody Lin <codyl@twitter.com> * fix lint errors and rebase Signed-off-by: Cody Lin <codyl@twitter.com> * Fix get_table(None) error Signed-off-by: Cody Lin <codyl@twitter.com> * custom exception for both missing file and BQ source Signed-off-by: Cody Lin <codyl@twitter.com> * revert FileSource checks Signed-off-by: Cody Lin <codyl@twitter.com> * Use DataSourceNotFoundException instead of subclassing Signed-off-by: Cody Lin <codyl@twitter.com> * Moved assert_table_exists out of the BQ constructor to apply_total Signed-off-by: Cody Lin <codyl@twitter.com> * rename test and test asset Signed-off-by: Cody Lin <codyl@twitter.com> * move validate logic back to data_source Signed-off-by: Cody Lin <codyl@twitter.com> * fixed tests Signed-off-by: Cody Lin <codyl@twitter.com> * Set pytest.integration for tests that access BQ Signed-off-by: Cody Lin <codyl@twitter.com> * Import pytest in failed test files Signed-off-by: Cody Lin <codyl@twitter.com>
1 parent 5d6ee4f commit 8e4bbe5

8 files changed

Lines changed: 93 additions & 1 deletion

sdk/python/feast/data_source.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from feast import type_map
2222
from feast.data_format import FileFormat, StreamFormat
23+
from feast.errors import DataSourceNotFoundException
2324
from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto
2425
from feast.value_type import ValueType
2526

@@ -519,6 +520,12 @@ def to_proto(self) -> DataSourceProto:
519520
"""
520521
raise NotImplementedError
521522

523+
def validate(self):
524+
"""
525+
Validates the underlying data source.
526+
"""
527+
raise NotImplementedError
528+
522529

523530
class FileSource(DataSource):
524531
def __init__(
@@ -615,6 +622,10 @@ def to_proto(self) -> DataSourceProto:
615622

616623
return data_source_proto
617624

625+
def validate(self):
626+
# TODO: validate a FileSource
627+
pass
628+
618629
@staticmethod
619630
def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]:
620631
return type_map.pa_to_feast_value_type
@@ -692,6 +703,17 @@ def to_proto(self) -> DataSourceProto:
692703

693704
return data_source_proto
694705

706+
def validate(self):
707+
if not self.query:
708+
from google.api_core.exceptions import NotFound
709+
from google.cloud import bigquery
710+
711+
client = bigquery.Client()
712+
try:
713+
client.get_table(self.table_ref)
714+
except NotFound:
715+
raise DataSourceNotFoundException(self.table_ref)
716+
695717
def get_table_query_string(self) -> str:
696718
"""Returns a string that can directly be used to reference this table in SQL"""
697719
if self.table_ref:

sdk/python/feast/errors.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@
33
from colorama import Fore, Style
44

55

6+
class DataSourceNotFoundException(Exception):
7+
def __init__(self, path):
8+
super().__init__(
9+
f"Unable to find table at '{path}'. Please check that table exists."
10+
)
11+
12+
613
class FeastObjectNotFoundException(Exception):
714
pass
815

sdk/python/feast/repo_operations.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,12 @@ def apply_total(repo_config: RepoConfig, repo_path: Path):
154154

155155
data_sources = [t.input for t in repo.feature_views]
156156

157-
# Make sure the data source used by this feature view is supported by
157+
# Make sure the data source used by this feature view is supported by Feast
158158
for data_source in data_sources:
159159
assert_offline_store_supports_data_source(
160160
repo_config.offline_store, data_source
161161
)
162+
data_source.validate()
162163

163164
update_data_sources_with_inferred_event_timestamp_col(data_sources)
164165
for view in repo.feature_views:
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from datetime import timedelta
2+
3+
from feast import BigQuerySource, Entity, Feature, FeatureView, ValueType
4+
5+
nonexistent_source = BigQuerySource(
6+
table_ref="project.dataset.nonexistent_table", event_timestamp_column=""
7+
)
8+
9+
driver = Entity(name="driver", value_type=ValueType.INT64, description="driver id",)
10+
11+
nonexistent_features = FeatureView(
12+
name="driver_locations",
13+
entities=["driver"],
14+
ttl=timedelta(days=1),
15+
features=[
16+
Feature(name="lat", dtype=ValueType.FLOAT),
17+
Feature(name="lon", dtype=ValueType.STRING),
18+
],
19+
input=nonexistent_source,
20+
)

sdk/python/tests/test_cli_gcp.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,38 @@ def test_basic() -> None:
5353

5454
result = runner.run(["teardown"], cwd=repo_path)
5555
assert result.returncode == 0
56+
57+
58+
@pytest.mark.integration
59+
def test_missing_bq_source_fail() -> None:
60+
project_id = "".join(
61+
random.choice(string.ascii_lowercase + string.digits) for _ in range(10)
62+
)
63+
runner = CliRunner()
64+
with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:
65+
66+
repo_path = Path(repo_dir_name)
67+
data_path = Path(data_dir_name)
68+
69+
repo_config = repo_path / "feature_store.yaml"
70+
71+
repo_config.write_text(
72+
dedent(
73+
f"""
74+
project: {project_id}
75+
registry: {data_path / "registry.db"}
76+
provider: gcp
77+
"""
78+
)
79+
)
80+
81+
repo_example = repo_path / "example.py"
82+
repo_example.write_text(
83+
(
84+
Path(__file__).parent / "example_feature_repo_with_missing_bq_source.py"
85+
).read_text()
86+
)
87+
88+
returncode, output = runner.run_with_output(["apply"], cwd=repo_path)
89+
assert returncode == 1
90+
assert b"DataSourceNotFoundException" in output

sdk/python/tests/test_cli_local.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
from textwrap import dedent
55

66
import assertpy
7+
import pytest
78

89
from feast.feature_store import FeatureStore
910
from tests.cli_utils import CliRunner
1011
from tests.online_read_write_test import basic_rw_test
1112

1213

14+
@pytest.mark.integration
1315
def test_workflow() -> None:
1416
"""
1517
Test running apply on a sample repo, and make sure the infra gets created.
@@ -78,6 +80,7 @@ def test_workflow() -> None:
7880
assertpy.assert_that(result.returncode).is_equal_to(0)
7981

8082

83+
@pytest.mark.integration
8184
def test_non_local_feature_repo() -> None:
8285
"""
8386
Test running apply on a sample repo, and make sure the infra gets created.

sdk/python/tests/test_online_retrieval.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from tests.cli_utils import CliRunner, get_example_repo
1515

1616

17+
@pytest.mark.integration
1718
def test_online() -> None:
1819
"""
1920
Test reading from the online store in local mode.
@@ -238,6 +239,7 @@ def test_online() -> None:
238239
os.rename(store.config.registry + "_fake", store.config.registry)
239240

240241

242+
@pytest.mark.integration
241243
def test_online_to_df():
242244
"""
243245
Test dataframe conversion. Make sure the response columns and rows are

sdk/python/tests/test_partial_apply.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1+
import pytest
12
from google.protobuf.duration_pb2 import Duration
23

34
from feast import BigQuerySource, Feature, FeatureView, ValueType
45
from tests.cli_utils import CliRunner, get_example_repo
56
from tests.online_read_write_test import basic_rw_test
67

78

9+
@pytest.mark.integration
810
def test_partial() -> None:
911
"""
1012
Add another table to existing repo using partial apply API. Make sure both the table

0 commit comments

Comments
 (0)