Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions openml/_api_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,17 +193,18 @@ def _download_minio_bucket(source: str, destination: str | Path) -> None:
parsed_url = urllib.parse.urlparse(source)

# expect path format: /BUCKET/path/to/file.ext
bucket = parsed_url.path[1:]
_, bucket, *prefixes, _file = parsed_url.path.split("/")
prefix = "/".join(prefixes)

client = minio.Minio(endpoint=parsed_url.netloc, secure=False)

for file_object in client.list_objects(bucket, recursive=True):
for file_object in client.list_objects(bucket, prefix=prefix, recursive=True):
if file_object.object_name is None:
raise ValueError("Object name is None.")

_download_minio_file(
source=source + "/" + file_object.object_name,
destination=Path(destination, file_object.object_name),
source=source.rsplit("/", 1)[0] + "/" + file_object.object_name.rsplit("/", 1)[1],
destination=Path(destination, file_object.object_name.rsplit("/", 1)[1]),
exists_ok=True,
)

Expand Down
3 changes: 0 additions & 3 deletions openml/datasets/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1211,9 +1211,6 @@ def _get_dataset_parquet(
# For now, it would be the only way for the user to fetch the additional
# files in the bucket (no function exists on an OpenMLDataset to do this).
if download_all_files:
if url.endswith(".pq"):
url, _ = url.rsplit("/", maxsplit=1)

openml._api_calls._download_minio_bucket(source=url, destination=cache_directory)

if not output_file_path.is_file():
Expand Down