Skip to content

Commit 55c7196

Browse files
author
Mohamed Adel
committed
Merge branch 'feature/give_possibility_to_not_download_the_dataset_qualities' of https://github.com/a-moadel/openml-python into feature/give_possibility_to_not_download_the_dataset_qualities
2 parents 63b791d + 84b9a8f commit 55c7196

File tree

2 files changed

+17
-7
lines changed

2 files changed

+17
-7
lines changed

openml/datasets/functions.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ def _name_to_id(
290290
error_if_multiple : bool (default=False)
291291
If `False`, if multiple datasets match, return the least recent active dataset.
292292
If `True`, if multiple datasets match, raise an error.
293-
download_qualities : bool, optional
293+
download_qualities : bool, optional (default=True)
294294
If `True`, also download qualities.xml file. If false use the file if it was cached.
295295
296296
Returns
@@ -328,7 +328,7 @@ def get_datasets(
328328
make the operation noticeably slower. Metadata is also still retrieved.
329329
If False, create the OpenMLDataset and only populate it with the metadata.
330330
The data may later be retrieved through the `OpenMLDataset.get_data` method.
331-
download_qualities : bool, optional
331+
download_qualities : bool, optional (default=True)
332332
If True, also download qualities.xml file. If false use the file if it was cached.
333333
334334
Returns
@@ -1005,22 +1005,27 @@ def _get_dataset_qualities_file(did_cache_dir, dataset_id, download_qualities=Tr
10051005
dataset_id : int
10061006
Dataset ID
10071007
1008+
download_qualities : bool
1009+
wheather to download/use cahsed version or not.
10081010
Returns
10091011
-------
10101012
str
10111013
Path of the cached qualities file
10121014
"""
1015+
# return empty path to avoied used cahched version, this will make the output consistent
1016+
# regardless the cache state.
1017+
if not download_qualities:
1018+
return ""
10131019
# Dataset qualities are subject to change and must be fetched every time
10141020
qualities_file = os.path.join(did_cache_dir, "qualities.xml")
10151021
try:
10161022
with io.open(qualities_file, encoding="utf8") as fh:
10171023
qualities_xml = fh.read()
10181024
except (OSError, IOError):
1019-
if download_qualities:
1020-
url_extension = "data/qualities/{}".format(dataset_id)
1021-
qualities_xml = openml._api_calls._perform_api_call(url_extension, "get")
1022-
with io.open(qualities_file, "w", encoding="utf8") as fh:
1023-
fh.write(qualities_xml)
1025+
url_extension = "data/qualities/{}".format(dataset_id)
1026+
qualities_xml = openml._api_calls._perform_api_call(url_extension, "get")
1027+
with io.open(qualities_file, "w", encoding="utf8") as fh:
1028+
fh.write(qualities_xml)
10241029
return qualities_file
10251030

10261031

tests/test_datasets/test_dataset_functions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,11 @@ def test__get_dataset_qualities(self):
433433
qualities_xml_path = os.path.join(self.workdir, "qualities.xml")
434434
self.assertTrue(os.path.exists(qualities_xml_path))
435435

436+
def test__get_dataset_qualities_skip_download(self):
437+
qualities = _get_dataset_qualities_file(self.workdir, 2, False)
438+
self.assertIsInstance(qualities, str)
439+
self.assertEqual(qualities, "")
440+
436441
def test_deletion_of_cache_dir(self):
437442
# Simple removal
438443
did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, 1,)

0 commit comments

Comments
 (0)