Skip to content

Commit 866b81e

Browse files
committed
add test
Signed-off-by: HaoXuAI <sduxuhao@gmail.com>
1 parent d37b027 commit 866b81e

File tree

1 file changed

+163
-0
lines changed

1 file changed

+163
-0
lines changed
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
"""Tests for RetrievalJob FeastDataFrame integration."""
2+
3+
from unittest.mock import Mock
4+
5+
import pandas as pd
6+
import pyarrow as pa
7+
8+
from feast.dataframe import DataFrameEngine, FeastDataFrame
9+
from feast.infra.offline_stores.offline_store import RetrievalJob
10+
11+
12+
class MockRetrievalJob(RetrievalJob):
13+
"""Mock RetrievalJob for testing."""
14+
15+
def __init__(
16+
self, arrow_table: pa.Table, features: list = None, odfvs: list = None
17+
):
18+
self.arrow_table = arrow_table
19+
self.features = features or []
20+
self.odfvs = odfvs or []
21+
22+
def _to_arrow_internal(self, timeout=None):
23+
return self.arrow_table
24+
25+
@property
26+
def full_feature_names(self):
27+
return False
28+
29+
@property
30+
def on_demand_feature_views(self):
31+
return self.odfvs
32+
33+
34+
class TestRetrievalJobFeastDataFrame:
35+
"""Test RetrievalJob FeastDataFrame integration."""
36+
37+
def test_to_feast_df_basic(self):
38+
"""Test basic to_feast_df functionality."""
39+
# Create test data
40+
test_data = pa.table(
41+
{
42+
"feature1": [1, 2, 3],
43+
"feature2": ["a", "b", "c"],
44+
"timestamp": pd.to_datetime(["2023-01-01", "2023-01-02", "2023-01-03"]),
45+
}
46+
)
47+
48+
# Create mock retrieval job
49+
job = MockRetrievalJob(test_data, features=["feature1", "feature2"])
50+
51+
# Test to_feast_df
52+
feast_df = job.to_feast_df()
53+
54+
# Assertions
55+
assert isinstance(feast_df, FeastDataFrame)
56+
assert feast_df.engine == DataFrameEngine.ARROW
57+
assert isinstance(feast_df.data, pa.Table)
58+
assert feast_df.data.num_rows == 3
59+
assert feast_df.data.num_columns == 3
60+
61+
def test_to_feast_df_metadata(self):
62+
"""Test to_feast_df metadata population."""
63+
# Create test data
64+
test_data = pa.table({"feature1": [1, 2, 3], "feature2": [4.0, 5.0, 6.0]})
65+
66+
# Create mock on-demand feature views
67+
mock_odfv1 = Mock()
68+
mock_odfv1.name = "odfv1"
69+
mock_odfv2 = Mock()
70+
mock_odfv2.name = "odfv2"
71+
72+
# Create mock retrieval job with features and ODFVs
73+
job = MockRetrievalJob(
74+
test_data, features=["feature1", "feature2"], odfvs=[mock_odfv1, mock_odfv2]
75+
)
76+
77+
# Test to_feast_df
78+
feast_df = job.to_feast_df()
79+
80+
# Check metadata
81+
assert "features" in feast_df.metadata
82+
assert "on_demand_feature_views" in feast_df.metadata
83+
assert feast_df.metadata["features"] == ["feature1", "feature2"]
84+
assert feast_df.metadata["on_demand_feature_views"] == ["odfv1", "odfv2"]
85+
86+
def test_to_feast_df_with_timeout(self):
87+
"""Test to_feast_df with timeout parameter."""
88+
test_data = pa.table({"feature1": [1, 2, 3]})
89+
job = MockRetrievalJob(test_data)
90+
91+
# Test with timeout - should not raise any errors
92+
feast_df = job.to_feast_df(timeout=30)
93+
94+
assert isinstance(feast_df, FeastDataFrame)
95+
assert feast_df.engine == DataFrameEngine.ARROW
96+
97+
def test_to_feast_df_empty_metadata(self):
98+
"""Test to_feast_df with empty features and ODFVs."""
99+
test_data = pa.table({"feature1": [1, 2, 3]})
100+
job = MockRetrievalJob(test_data) # No features or ODFVs provided
101+
102+
feast_df = job.to_feast_df()
103+
104+
# Should handle missing features gracefully
105+
assert feast_df.metadata["features"] == []
106+
assert feast_df.metadata["on_demand_feature_views"] == []
107+
108+
def test_to_feast_df_preserves_arrow_data(self):
109+
"""Test that to_feast_df preserves the original Arrow data."""
110+
# Create test data with specific types
111+
test_data = pa.table(
112+
{
113+
"int_feature": pa.array([1, 2, 3], type=pa.int64()),
114+
"float_feature": pa.array([1.1, 2.2, 3.3], type=pa.float64()),
115+
"string_feature": pa.array(["a", "b", "c"], type=pa.string()),
116+
"bool_feature": pa.array([True, False, True], type=pa.bool_()),
117+
}
118+
)
119+
120+
job = MockRetrievalJob(test_data)
121+
feast_df = job.to_feast_df()
122+
123+
# Check that the Arrow data is exactly the same
124+
assert feast_df.data.equals(test_data)
125+
assert feast_df.data.schema == test_data.schema
126+
127+
# Check column names and types are preserved
128+
assert feast_df.data.column_names == test_data.column_names
129+
for i, column in enumerate(test_data.schema):
130+
assert feast_df.data.schema.field(i).type == column.type
131+
132+
def test_to_df_still_works(self):
133+
"""Test that the original to_df method still works unchanged."""
134+
test_data = pa.table({"feature1": [1, 2, 3], "feature2": ["a", "b", "c"]})
135+
136+
job = MockRetrievalJob(test_data)
137+
138+
# Test to_df returns pandas DataFrame
139+
df = job.to_df()
140+
141+
assert isinstance(df, pd.DataFrame)
142+
assert len(df) == 3
143+
assert list(df.columns) == ["feature1", "feature2"]
144+
assert df["feature1"].tolist() == [1, 2, 3]
145+
assert df["feature2"].tolist() == ["a", "b", "c"]
146+
147+
def test_both_methods_return_same_data(self):
148+
"""Test that to_df and to_feast_df return equivalent data."""
149+
test_data = pa.table(
150+
{"feature1": [1, 2, 3, 4], "feature2": [10.5, 20.5, 30.5, 40.5]}
151+
)
152+
153+
job = MockRetrievalJob(test_data)
154+
155+
# Get data from both methods
156+
df = job.to_df()
157+
feast_df = job.to_feast_df()
158+
159+
# Convert FeastDataFrame to pandas for comparison
160+
feast_as_pandas = feast_df.data.to_pandas().reset_index(drop=True)
161+
162+
# Should be equivalent
163+
pd.testing.assert_frame_equal(df, feast_as_pandas)

0 commit comments

Comments
 (0)