Skip to content
Prev Previous commit
Next Next commit
make download qualities defaulted to True
  • Loading branch information
Mohamed Adel committed Jan 7, 2021
commit 6cc19526a2f9ab0c450c5c7222715efc4fdd5055
15 changes: 8 additions & 7 deletions openml/datasets/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,7 @@ def _name_to_id(


def get_datasets(
dataset_ids: List[Union[str, int]], download_data: bool = True,
download_qualities: bool = True
dataset_ids: List[Union[str, int]], download_data: bool = True, download_qualities: bool = True
) -> List[OpenMLDataset]:
"""Download datasets.

Expand Down Expand Up @@ -346,7 +345,8 @@ def get_dataset(
version: int = None,
error_if_multiple: bool = False,
cache_format: str = "pickle",
download_qualities: bool = True) -> OpenMLDataset:
download_qualities: bool = True,
) -> OpenMLDataset:
""" Download the OpenML dataset representation, optionally also download actual data file.

This function is thread/multiprocessing safe.
Expand Down Expand Up @@ -406,7 +406,9 @@ def get_dataset(
features_file = _get_dataset_features_file(did_cache_dir, dataset_id)

try:
qualities_file = _get_dataset_qualities_file(did_cache_dir, dataset_id,download_qualities)
qualities_file = _get_dataset_qualities_file(
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it makes more sense to use download_qualities here and not call the _get_dataset_qualities_file if we don't want to process the qualities.

did_cache_dir, dataset_id, download_qualities
)
except OpenMLServerException as e:
if e.code == 362 and str(e) == "No qualities found - None":
logger.warning("No qualities found for dataset {}".format(dataset_id))
Expand Down Expand Up @@ -982,7 +984,7 @@ def _get_dataset_features_file(did_cache_dir: str, dataset_id: int) -> str:
return features_file


def _get_dataset_qualities_file(did_cache_dir, dataset_id,download_qualities):
def _get_dataset_qualities_file(did_cache_dir, dataset_id, download_qualities=True):
"""API call to load dataset qualities. Loads from cache or downloads them.

Features are metafeatures (number of features, number of classes, ...)
Expand All @@ -1002,7 +1004,7 @@ def _get_dataset_qualities_file(did_cache_dir, dataset_id,download_qualities):
str
Path of the cached qualities file
"""
if download_qualities == True:
if download_qualities:
# Dataset qualities are subject to change and must be fetched every time
qualities_file = os.path.join(did_cache_dir, "qualities.xml")
try:
Expand All @@ -1017,7 +1019,6 @@ def _get_dataset_qualities_file(did_cache_dir, dataset_id,download_qualities):
return qualities_file
else:
pass



def _create_dataset_from_description(
Expand Down