Skip to content

Commit a92e56d

Browse files
committed
feat: Support GCS filesystem for bytewax engine
Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com>
1 parent 6a728fe commit a92e56d

File tree

2 files changed

+11
-2
lines changed

2 files changed

+11
-2
lines changed

sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_dataflow.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import pyarrow as pa
44
import pyarrow.parquet as pq
5-
import s3fs
65
from bytewax.dataflow import Dataflow # type: ignore
76
from bytewax.execution import cluster_main
87
from bytewax.inputs import ManualInputConfig, distribute
@@ -29,7 +28,16 @@ def __init__(
2928
self._run_dataflow()
3029

3130
def process_path(self, path):
32-
fs = s3fs.S3FileSystem()
31+
if path.startswith("s3://"):
32+
import s3fs
33+
34+
fs = s3fs.S3FileSystem()
35+
elif path.startswith("gs://"):
36+
import gcsfs
37+
38+
fs = gcsfs.GCSFileSystem()
39+
else:
40+
raise NotImplementedError(f"Unsupported path: {path}")
3341
dataset = pq.ParquetDataset(path, filesystem=fs, use_legacy_dataset=False)
3442
batches = []
3543
for fragment in dataset.fragments:

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
"google-cloud-datastore>=2.1.0,<3",
9292
"google-cloud-storage>=1.34.0,<3",
9393
"google-cloud-bigtable>=2.11.0,<3",
94+
"gcsfs>=2023.3.0,<2024.0.0",
9495
]
9596

9697
REDIS_REQUIRED = [

0 commit comments

Comments
 (0)