Skip to content

Commit 37923de

Browse files
committed
Add FeastDataFrame
Signed-off-by: HaoXuAI <sduxuhao@gmail.com>
1 parent a66d890 commit 37923de

File tree

3 files changed

+180
-0
lines changed

3 files changed

+180
-0
lines changed

sdk/python/feast/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from .batch_feature_view import BatchFeatureView
1313
from .data_source import KafkaSource, KinesisSource, PushSource, RequestSource
14+
from .dataframe import DataFrameEngine, FeastDataFrame
1415
from .entity import Entity
1516
from .feature import Feature
1617
from .feature_service import FeatureService
@@ -32,9 +33,11 @@
3233

3334
__all__ = [
3435
"BatchFeatureView",
36+
"DataFrameEngine",
3537
"Entity",
3638
"KafkaSource",
3739
"KinesisSource",
40+
"FeastDataFrame",
3841
"Feature",
3942
"Field",
4043
"FeatureService",

sdk/python/feast/dataframe.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
"""FeastDataFrame: A lightweight container for DataFrame-like objects in Feast."""
2+
3+
from enum import Enum
4+
from typing import Any, Dict, Optional, Union
5+
6+
import pandas as pd
7+
import pyarrow as pa
8+
9+
10+
class DataFrameEngine(str, Enum):
11+
"""Supported DataFrame engines."""
12+
13+
PANDAS = "pandas"
14+
SPARK = "spark"
15+
DASK = "dask"
16+
RAY = "ray"
17+
ARROW = "arrow"
18+
POLARS = "polars"
19+
UNKNOWN = "unknown"
20+
21+
22+
class FeastDataFrame:
23+
"""
24+
A lightweight container for DataFrame-like objects in Feast.
25+
26+
This class wraps any DataFrame implementation and provides metadata
27+
about the engine type for proper routing in Feast's processing pipeline.
28+
"""
29+
30+
def __init__(
31+
self,
32+
data: Any,
33+
engine: Optional[DataFrameEngine] = None,
34+
metadata: Optional[Dict[str, Any]] = None,
35+
):
36+
"""
37+
Initialize a FeastDataFrame.
38+
39+
Args:
40+
data: The wrapped DataFrame object (pandas, Spark, Dask, etc.)
41+
engine: Explicitly specify the engine type (auto-detected if None)
42+
metadata: Additional metadata (schema hints, etc.)
43+
"""
44+
self.data = data
45+
self.metadata = metadata or {}
46+
self._engine = engine or self._detect_engine()
47+
48+
def _detect_engine(self) -> DataFrameEngine:
49+
"""Auto-detect the DataFrame engine based on type."""
50+
if isinstance(self.data, pd.DataFrame):
51+
return DataFrameEngine.PANDAS
52+
elif isinstance(self.data, pa.Table):
53+
return DataFrameEngine.ARROW
54+
55+
# For optional dependencies, check module name to avoid import errors
56+
module = type(self.data).__module__
57+
if "pyspark" in module:
58+
return DataFrameEngine.SPARK
59+
elif "dask" in module:
60+
return DataFrameEngine.DASK
61+
elif "ray" in module:
62+
return DataFrameEngine.RAY
63+
elif "polars" in module:
64+
return DataFrameEngine.POLARS
65+
else:
66+
return DataFrameEngine.UNKNOWN
67+
68+
@property
69+
def engine(self) -> DataFrameEngine:
70+
"""Get the detected or specified engine type."""
71+
return self._engine
72+
73+
def __repr__(self):
74+
return f"FeastDataFrame(engine={self.engine}, type={type(self.data).__name__})"
75+
76+
@property
77+
def is_lazy(self) -> bool:
78+
"""Check if the underlying DataFrame is lazy (Spark, Dask, Ray)."""
79+
return self.engine in [
80+
DataFrameEngine.SPARK,
81+
DataFrameEngine.DASK,
82+
DataFrameEngine.RAY,
83+
]
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
"""Unit tests for FeastDataFrame."""
2+
3+
import pandas as pd
4+
import pyarrow as pa
5+
import pytest
6+
7+
from feast.dataframe import DataFrameEngine, FeastDataFrame
8+
9+
10+
class TestFeastDataFrame:
11+
"""Test suite for FeastDataFrame functionality."""
12+
13+
def test_pandas_detection(self):
14+
"""Test auto-detection of pandas DataFrame."""
15+
df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
16+
feast_df = FeastDataFrame(df)
17+
18+
assert feast_df.engine == DataFrameEngine.PANDAS
19+
assert not feast_df.is_lazy
20+
assert isinstance(feast_df.data, pd.DataFrame)
21+
22+
def test_arrow_detection(self):
23+
"""Test auto-detection of Arrow Table."""
24+
table = pa.table({"a": [1, 2, 3], "b": [4, 5, 6]})
25+
feast_df = FeastDataFrame(table)
26+
27+
assert feast_df.engine == DataFrameEngine.ARROW
28+
assert not feast_df.is_lazy
29+
assert isinstance(feast_df.data, pa.Table)
30+
31+
def test_explicit_engine(self):
32+
"""Test explicit engine specification."""
33+
data = {"mock": "data"}
34+
feast_df = FeastDataFrame(data, engine=DataFrameEngine.SPARK)
35+
36+
assert feast_df.engine == DataFrameEngine.SPARK
37+
assert feast_df.is_lazy
38+
39+
def test_unknown_engine(self):
40+
"""Test handling of unknown DataFrame types."""
41+
data = {"some": "dict"}
42+
feast_df = FeastDataFrame(data)
43+
44+
assert feast_df.engine == DataFrameEngine.UNKNOWN
45+
46+
def test_metadata(self):
47+
"""Test metadata handling."""
48+
df = pd.DataFrame({"a": [1, 2, 3]})
49+
metadata = {"features": ["a"], "source": "test"}
50+
feast_df = FeastDataFrame(df, metadata=metadata)
51+
52+
assert feast_df.metadata == metadata
53+
assert feast_df.metadata["features"] == ["a"]
54+
55+
def test_repr(self):
56+
"""Test string representation."""
57+
df = pd.DataFrame({"a": [1, 2, 3]})
58+
feast_df = FeastDataFrame(df)
59+
60+
repr_str = repr(feast_df)
61+
assert "FeastDataFrame" in repr_str
62+
assert "engine=pandas" in repr_str
63+
assert "DataFrame" in repr_str
64+
65+
@pytest.mark.parametrize(
66+
"engine,expected_lazy",
67+
[
68+
(DataFrameEngine.PANDAS, False),
69+
(DataFrameEngine.ARROW, False),
70+
(DataFrameEngine.POLARS, False),
71+
(DataFrameEngine.SPARK, True),
72+
(DataFrameEngine.DASK, True),
73+
(DataFrameEngine.RAY, True),
74+
(DataFrameEngine.UNKNOWN, False),
75+
],
76+
)
77+
def test_is_lazy_property(self, engine, expected_lazy):
78+
"""Test is_lazy property for different engines."""
79+
feast_df = FeastDataFrame({"mock": "data"}, engine=engine)
80+
assert feast_df.is_lazy == expected_lazy
81+
82+
def test_polars_detection(self):
83+
"""Test detection of polars DataFrame (using mock)."""
84+
# Mock polars DataFrame
85+
class MockPolarsDF:
86+
def __init__(self):
87+
self.__module__ = "polars.dataframe.frame"
88+
self.__class__.__name__ = "DataFrame"
89+
90+
polars_df = MockPolarsDF()
91+
feast_df = FeastDataFrame(polars_df)
92+
93+
assert feast_df.engine == DataFrameEngine.POLARS
94+
assert not feast_df.is_lazy

0 commit comments

Comments
 (0)