fix localcompute

Signed-off-by: HaoXuAI <sduxuhao@gmail.com>
feast-dev · franciscojavierarceo · Jun 3, 2025 · May 14, 2025 · May 14, 2025 · May 15, 2025
commit 3b4912e1937a1e04d39baa1dbbdb05fd0057c82e
diff --git a/README.md b/README.md
@@ -9,7 +9,7 @@
 
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/feast)](https://pypi.org/project/feast/)
 [![GitHub contributors](https://img.shields.io/github/contributors/feast-dev/feast)](https://github.com/feast-dev/feast/graphs/contributors)
-[![unit-tests](https://github.com/feast-dev/feast/actions/workflows/unit_tests.yml/badge.svg?branch=master)](https://github.com/feast-dev/feast/actions/workflows/unit_tests.yml)
+[![unit-tests](https://github.com/feast-dev/feast/actions/workflows/unit_tests.yml/badge.svg?branch=master&event=pull_request)](https://github.com/feast-dev/feast/actions/workflows/unit_tests.yml)
 [![integration-tests-and-build](https://github.com/feast-dev/feast/actions/workflows/master_only.yml/badge.svg?branch=master&event=push)](https://github.com/feast-dev/feast/actions/workflows/master_only.yml)
 [![java-integration-tests](https://github.com/feast-dev/feast/actions/workflows/java_master_only.yml/badge.svg?branch=master&event=push)](https://github.com/feast-dev/feast/actions/workflows/java_master_only.yml)
 [![linter](https://github.com/feast-dev/feast/actions/workflows/linter.yml/badge.svg?branch=master&event=push)](https://github.com/feast-dev/feast/actions/workflows/linter.yml)
@@ -21,6 +21,8 @@
 ## Join us on Slack!
 👋👋👋 [Come say hi on Slack!](https://communityinviter.com/apps/feastopensource/feast-the-open-source-feature-store)
 
+[Check out our DeepWiki!](https://deepwiki.com/feast-dev/feast)
+
 ## Overview
 
 Feast (**Fea**ture **St**ore) is an open source feature store for machine learning. Feast is the fastest path to manage existing infrastructure to productionize analytic data for model training and online inference.
@@ -257,4 +259,4 @@ Thanks goes to these incredible people:
 
 <a href="https://github.com/feast-dev/feast/graphs/contributors">
   <img src="https://contrib.rocks/image?repo=feast-dev/feast" />
-</a>
+</a>
@@ -23,10 +23,10 @@
 
 class ComputeEngine(ABC):
     """
-    The interface that Feast uses to control the compute system that handles materialization and get_historical_features.
+    The interface that Feast uses to control to compute system that handles materialization and get_historical_features.
     Each engine must implement:
         - materialize(): to generate and persist features
-        - get_historical_features(): to perform point-in-time correct joins
+        - get_historical_features(): to perform historical retrieval of features
     Engines should use FeatureBuilder and DAGNode abstractions to build modular, pluggable workflows.
     """
 

@@ -23,7 +23,11 @@ def from_name(name: str) -> DataFrameBackend:
 
     @staticmethod
     def infer_from_entity_df(entity_df) -> Optional[DataFrameBackend]:
-        if isinstance(entity_df, pyarrow.Table) or isinstance(entity_df, pd.DataFrame):
+        if (
+            not entity_df
+            or isinstance(entity_df, pyarrow.Table)
+            or isinstance(entity_df, pd.DataFrame)
+        ):
             return PandasBackend()
 
         if BackendFactory._is_polars(entity_df):

@@ -26,7 +26,10 @@ def __init__(
         self.backend = backend
 
     def build_source_node(self):
-        node = LocalSourceReadNode("source", self.feature_view, self.task)
+        source = self.feature_view.batch_source
+        start_time = self.task.start_time
+        end_time = self.task.end_time
+        node = LocalSourceReadNode("source", source, start_time, end_time)
         self.nodes.append(node)
         return node
 

@@ -9,6 +9,10 @@
 from feast.infra.compute_engines.local.arrow_table_value import ArrowTableValue
 from feast.infra.compute_engines.local.backends.base import DataFrameBackend
 from feast.infra.compute_engines.local.local_node import LocalNode
+from feast.infra.compute_engines.utils import (
+    create_offline_store_retrieval_job,
+    get_partition_columns,
+)
 from feast.infra.offline_stores.offline_utils import (
     infer_event_timestamp_from_entity_df,
 )
@@ -31,24 +35,11 @@ def __init__(
         self.end_time = end_time
 
     def execute(self, context: ExecutionContext) -> ArrowTableValue:
-        offline_store = context.offline_store
-        (
-            join_key_columns,
-            feature_name_columns,
-            timestamp_field,
-            created_timestamp_column,
-        ) = context.column_info
-
-        # 📥 Reuse Feast's robust query resolver
-        retrieval_job = offline_store.pull_all_from_table_or_query(
-            config=context.repo_config,
+        retrieval_job = create_offline_store_retrieval_job(
             data_source=self.source,
-            join_key_columns=join_key_columns,
-            feature_name_columns=feature_name_columns,
-            timestamp_field=timestamp_field,
-            created_timestamp_column=created_timestamp_column,
-            start_date=self.start_time,
-            end_date=self.end_time,
+            context=context,
+            start_time=self.start_time,
+            end_time=self.end_time,
         )
         arrow_table = retrieval_job.to_arrow()
         return ArrowTableValue(data=arrow_table)
@@ -63,8 +54,9 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue:
         feature_table = self.get_single_table(context).data
 
         if context.entity_df is None:
-            context.node_outputs[self.name] = feature_table
-            return feature_table
+            output = ArrowTableValue(feature_table)
+            context.node_outputs[self.name] = output
+            return output
 
         entity_table = pa.Table.from_pandas(context.entity_df)
         feature_df = self.backend.from_arrow(feature_table)
@@ -105,18 +97,18 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue:
         input_table = self.get_single_table(context).data
         df = self.backend.from_arrow(input_table)
 
-        _, _, ts_col, _ = context.column_info
+        timestamp_column = context.column_info.timestamp_column
 
         if ENTITY_TS_ALIAS in self.backend.columns(df):
             # filter where feature.ts <= entity.event_timestamp
-            df = df[df[ts_col] <= df[ENTITY_TS_ALIAS]]
+            df = df[df[timestamp_column] <= df[ENTITY_TS_ALIAS]]
 
             # TTL: feature.ts >= entity.event_timestamp - ttl
             if self.ttl:
                 lower_bound = df[ENTITY_TS_ALIAS] - self.backend.to_timedelta_value(
                     self.ttl
                 )
-                df = df[df[ts_col] >= lower_bound]
+                df = df[df[timestamp_column] >= lower_bound]
 
         # Optional user-defined filter expression (e.g., "value > 0")
         if self.filter_expr:
@@ -157,14 +149,14 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue:
         df = self.backend.from_arrow(input_table)
 
         # Extract join_keys, timestamp, and created_ts from context
-        join_keys, _, ts_col, created_ts_col = context.column_info
+        column_info = context.column_info
 
         # Dedup strategy: sort and drop_duplicates
-        sort_keys = [ts_col]
-        if created_ts_col:
-            sort_keys.append(created_ts_col)
+        dedup_keys = get_partition_columns(context)
+        sort_keys = [column_info.timestamp_column]
+        if column_info.created_timestamp_column:
+            sort_keys.append(column_info.created_timestamp_column)
 
-        dedup_keys = join_keys + [ENTITY_TS_ALIAS]
         df = self.backend.drop_duplicates(
             df, keys=dedup_keys, sort_by=sort_keys, ascending=False
         )

@@ -13,7 +13,10 @@
 from feast.infra.compute_engines.dag.node import DAGNode
 from feast.infra.compute_engines.dag.value import DAGValue
 from feast.infra.compute_engines.spark.utils import map_in_arrow
-from feast.infra.compute_engines.utils import get_partition_columns
+from feast.infra.compute_engines.utils import (
+    create_offline_store_retrieval_job,
+    get_partition_columns,
+)
 from feast.infra.offline_stores.contrib.spark_offline_store.spark import (
     SparkRetrievalJob,
     _get_entity_schema,
@@ -63,19 +66,12 @@ def __init__(
         self.end_time = end_time
 
     def execute(self, context: ExecutionContext) -> DAGValue:
-        offline_store = context.offline_store
         column_info = context.column_info
-
-        # 📥 Reuse Feast's robust query resolver
-        retrieval_job = offline_store.pull_all_from_table_or_query(
-            config=context.repo_config,
+        retrieval_job = create_offline_store_retrieval_job(
             data_source=self.source,
-            join_key_columns=column_info.join_keys,
-            feature_name_columns=column_info.feature_cols,
-            timestamp_field=column_info.ts_col,
-            created_timestamp_column=column_info.created_ts_col,
-            start_date=self.start_time,
-            end_date=self.end_time,
+            context=context,
+            start_time=self.start_time,
+            end_time=self.end_time,
         )
         spark_df = cast(SparkRetrievalJob, retrieval_job).to_spark_df()
 

@@ -1,6 +1,9 @@
-from typing import List
+from datetime import datetime
+from typing import List, Optional
 
+from feast.data_source import DataSource
 from feast.infra.compute_engines.dag.context import ExecutionContext
+from feast.infra.offline_stores.offline_store import RetrievalJob
 
 ENTITY_TS_ALIAS = "__entity_event_timestamp"
 
@@ -12,3 +15,36 @@ def get_partition_columns(context: ExecutionContext) -> List[str]:
         else context.column_info.timestamp_column
     ]
     return [col for col in partition_columns if col]
+
+
+def create_offline_store_retrieval_job(
+    data_source: DataSource,
+    context: ExecutionContext,
+    start_time: Optional[datetime] = None,
+    end_time: Optional[datetime] = None,
+) -> RetrievalJob:
+    """
+    Create a retrieval job for the offline store.
+    Args:
+        data_source: The data source to pull from.
+        context:
+        start_time:
+        end_time:
+
+    Returns:
+
+    """
+    offline_store = context.offline_store
+    column_info = context.column_info
+    # 📥 Reuse Feast's robust query resolver
+    retrieval_job = offline_store.pull_all_from_table_or_query(
+        config=context.repo_config,
+        data_source=data_source,
+        join_key_columns=column_info.join_keys,
+        feature_name_columns=column_info.feature_cols,
+        timestamp_field=column_info.ts_col,
+        created_timestamp_column=column_info.created_ts_col,
+        start_date=start_time,
+        end_date=end_time,
+    )
+    return retrieval_job