Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/progress.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Changelog

0.11.1
~~~~~~
* MAINT #671: Improved the performance of ``check_datasets_active`` by only querying the given list of datasets in contrast to querying all datasets. Modified the corresponding unit test.

0.11.0
~~~~~~
Expand Down
19 changes: 16 additions & 3 deletions openml/datasets/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,27 +333,40 @@ def _load_features_from_file(features_file: str) -> Dict:
return xml_dict["oml:data_features"]


def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
def check_datasets_active(
dataset_ids: List[int],
raise_error_if_not_exist: bool = True,
) -> Dict[int, bool]:
"""
Check if the dataset ids provided are active.

Raises an error if a dataset_id in the given list
of dataset_ids does not exist on the server.

Parameters
----------
dataset_ids : List[int]
A list of integers representing dataset ids.
raise_error_if_not_exist : bool (default=True)
Flag that if activated can raise an error, if one or more of the
given dataset ids do not exist on the server.

Returns
-------
dict
A dictionary with items {did: bool}
"""
dataset_list = list_datasets(status="all")
dataset_list = list_datasets(
data_id=dataset_ids,
status="all",
)
active = {}

for did in dataset_ids:
dataset = dataset_list.get(did, None)
if dataset is None:
raise ValueError("Could not find dataset {} in OpenML dataset list.".format(did))
if raise_error_if_not_exist:
raise ValueError(f'Could not find dataset {did} in OpenML dataset list.')
else:
active[did] = dataset["status"] == "active"

Expand Down
6 changes: 5 additions & 1 deletion tests/test_datasets/test_dataset_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,13 @@ def test_list_datasets_empty(self):
def test_check_datasets_active(self):
# Have to test on live because there is no deactivated dataset on the test server.
openml.config.server = self.production_server
active = openml.datasets.check_datasets_active([2, 17])
active = openml.datasets.check_datasets_active(
[2, 17, 79],
raise_error_if_not_exist=False,
)
self.assertTrue(active[2])
self.assertFalse(active[17])
self.assertIsNone(active.get(79))
self.assertRaisesRegex(
ValueError,
"Could not find dataset 79 in OpenML dataset list.",
Expand Down