Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat: adjust ray offline store to support abfs(s) ADLS Azure Storage
Signed-off-by: Jonas Bauer <jbauer@easy2parts.com>
  • Loading branch information
p3s-jbauer authored and ntkathole committed Jan 30, 2026
commit d07896c1e0e33b5d91e413ebad53c163f812e9d6
Original file line number Diff line number Diff line change
Expand Up @@ -1160,13 +1160,17 @@ def persist(
f"Ray offline store only supports SavedDatasetFileStorage, got {type(storage)}"
)
destination_path = storage.file_options.uri
if not destination_path.startswith(("s3://", "gs://", "hdfs://")):
if not destination_path.startswith(
("s3://", "gs://", "hdfs://", "abfs://", "abfss://")
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

# Remote storage URI schemes supported by the Ray offline store
# S3: Amazon S3
# GCS: Google Cloud Storage
# HDFS: Hadoop Distributed File System
# Azure: Azure Storage Gen2
REMOTE_STORAGE_SCHEMES = ("s3://", "gs://", "hdfs://", "abfs://", "abfss://")

Can we define a constant for supported remote storage URI schemes at top and use it later at all three locations?

):
if not allow_overwrite and os.path.exists(destination_path):
raise SavedDatasetLocationAlreadyExists(location=destination_path)
try:
ray_ds = self._get_ray_dataset()

if not destination_path.startswith(("s3://", "gs://", "hdfs://")):
if not destination_path.startswith(
("s3://", "gs://", "hdfs://", "abfs://", "abfss://")
):
os.makedirs(os.path.dirname(destination_path), exist_ok=True)

ray_ds.write_parquet(destination_path)
Expand Down Expand Up @@ -1959,7 +1963,7 @@ def normalize_timestamps(batch: pd.DataFrame) -> pd.DataFrame:
path_obj = Path(resolved_path)
if path_obj.suffix == ".parquet":
path_obj = path_obj.with_suffix("")
if not absolute_path.startswith(("s3://", "gs://")):
if not absolute_path.startswith(("s3://", "gs://", "abfs://", "abfss://")):
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
Outdated
path_obj.mkdir(parents=True, exist_ok=True)
ds.write_parquet(str(path_obj))
except Exception as e:
Expand Down
1 change: 1 addition & 0 deletions test_registry
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1"$da30d864-b635-49a1-aa21-43386ad9101b* ԩ������