Skip to content

Commit b3ba611

Browse files
committed
feat: support staging for spark materialization ([#5671](#5671))
Signed-off-by: Jacob Weinhold <29459386+jfw-ppi@users.noreply.github.com>
1 parent 8b4190d commit b3ba611

File tree

3 files changed

+2
-10
lines changed
  • docs/reference/offline-stores
  • sdk/python
    • feast/infra/offline_stores/contrib/spark_offline_store
    • tests/unit/infra/offline_stores/contrib/spark_offline_store

3 files changed

+2
-10
lines changed

docs/reference/offline-stores/spark.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ offline_store:
3434
spark.sql.execution.arrow.pyspark.enabled: "true"
3535
# Optional: spill large materializations to the staging location instead of collecting in the driver
3636
staging_location: "s3://my-bucket/tmp/feast"
37-
staging_allow_materialize: true
3837
online_store:
3938
path: data/online_store.db
4039
```

sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import pyarrow.dataset as ds
2929
import pyarrow.parquet as pq
3030
import pyspark
31-
from pydantic import StrictBool, StrictStr
31+
from pydantic import StrictStr
3232
from pyspark import SparkConf
3333
from pyspark.sql import SparkSession
3434

@@ -68,9 +68,6 @@ class SparkOfflineStoreConfig(FeastConfigBaseModel):
6868
staging_location: Optional[StrictStr] = None
6969
""" Remote path for batch materialization jobs"""
7070

71-
staging_allow_materialize: StrictBool = False
72-
""" Enable use of staging_location during materialization to avoid driver OOM """
73-
7471
region: Optional[StrictStr] = None
7572
""" AWS Region if applicable for s3-based staging locations"""
7673

@@ -459,7 +456,6 @@ def _should_use_staging_for_arrow(self) -> bool:
459456
offline_store = getattr(self._config, "offline_store", None)
460457
return bool(
461458
isinstance(offline_store, SparkOfflineStoreConfig)
462-
and getattr(offline_store, "staging_allow_materialize", False)
463459
and getattr(offline_store, "staging_location", None)
464460
)
465461

sdk/python/tests/unit/infra/offline_stores/contrib/spark_offline_store/test_spark.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,6 @@ def test_to_arrow_uses_staging_when_enabled(monkeypatch, tmp_path):
518518
offline_store=SparkOfflineStoreConfig(
519519
type="spark",
520520
staging_location=str(tmp_path),
521-
staging_allow_materialize=True,
522521
),
523522
)
524523

@@ -555,7 +554,6 @@ def test_to_arrow_normalizes_local_staging_paths(monkeypatch, tmp_path):
555554
offline_store=SparkOfflineStoreConfig(
556555
type="spark",
557556
staging_location=str(tmp_path / "local"),
558-
staging_allow_materialize=True,
559557
),
560558
)
561559

@@ -590,8 +588,7 @@ def test_to_arrow_falls_back_to_pandas_when_staging_disabled(monkeypatch):
590588
provider="local",
591589
offline_store=SparkOfflineStoreConfig(
592590
type="spark",
593-
staging_location="/tmp",
594-
staging_allow_materialize=False,
591+
staging_location=None,
595592
),
596593
)
597594

0 commit comments

Comments
 (0)