Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
0639882
Merge pull request #1274 from openml/develop
PGijsbers Jul 20, 2023
12c3d30
[pre-commit.ci] pre-commit autoupdate
pre-commit-ci[bot] Aug 1, 2023
2397231
Raise correct TypeError and improve type check
PGijsbers Aug 1, 2023
2801e9d
Type check with isinstance instead of type() ==
PGijsbers Aug 1, 2023
241608a
Docker enhancement #1277 (#1278)
PGijsbers Aug 15, 2023
1cc1169
fix: carefully replaced minio_url with parquet_url (#1280)
varshneydevansh Aug 17, 2023
5dc10b6
Pytest/utils (#1269)
PGijsbers Sep 20, 2023
895aefa
Documented remaining Attributes of classes and functions (#1283)
v-parmar Oct 31, 2023
8a039ed
[pre-commit.ci] pre-commit autoupdate (#1281)
pre-commit-ci[bot] Nov 14, 2023
abf7caa
[pre-commit.ci] pre-commit autoupdate (#1291)
pre-commit-ci[bot] Jan 2, 2024
2547217
Bump actions/checkout from 3 to 4 (#1284)
dependabot[bot] Jan 2, 2024
8014987
Bump docker/login-action from 2 to 3 (#1285)
dependabot[bot] Jan 2, 2024
e340838
Bump docker/metadata-action from 4 to 5 (#1286)
dependabot[bot] Jan 2, 2024
015f3cd
Bump docker/setup-qemu-action from 2 to 3 (#1287)
dependabot[bot] Jan 2, 2024
0d100db
Bump docker/build-push-action from 4 to 5 (#1288)
dependabot[bot] Jan 2, 2024
c186c64
Add more type annotations (#1261)
mfeurer Jan 2, 2024
f750f99
Bump docker/setup-buildx-action from 2 to 3 (#1292)
dependabot[bot] Jan 4, 2024
fe5403a
Bump actions/setup-python from 4 to 5 (#1293)
dependabot[bot] Jan 4, 2024
0eb0b72
Rework Tagging Tests for New Server Specification (#1294)
LennartPurucker Jan 5, 2024
054ec33
ci: Update tooling (#1298)
eddiebergman Jan 8, 2024
ee1231b
ci: Disable 3.6 tests (#1302)
eddiebergman Jan 8, 2024
b217a3f
fix: Chipping away at ruff lints (#1303)
eddiebergman Jan 8, 2024
88cfa21
Tagging constraints (#1305)
janvanrijn Jan 9, 2024
3c5a4c0
[pre-commit.ci] pre-commit autoupdate (#1306)
pre-commit-ci[bot] Jan 9, 2024
c771a78
Linting Everything - Fix All mypy and ruff Errors (#1307)
LennartPurucker Jan 9, 2024
6e13e85
ci: Remove Python 3.6/7 (#1308)
eddiebergman Jan 9, 2024
1b27d14
ci: remove 3.7 patch (#1309)
LennartPurucker Jan 10, 2024
bd193bb
Make Test Work Again After Ruff and Linter Changes (#1310)
LennartPurucker Jan 12, 2024
18ead2f
Add Feature Descriptions Rebase Clean (#1316)
LennartPurucker Jan 12, 2024
c93b022
Make Class Label Retrieval More Lenient (#1315)
LennartPurucker Jan 12, 2024
a7cb9a5
[pre-commit.ci] pre-commit autoupdate (#1318)
pre-commit-ci[bot] Jan 15, 2024
f730ebc
Fix: update fetching a bucket from MinIO (#1314)
eddiebergman Jan 17, 2024
d3f9de6
Update progress.rst for minor release (#1319)
LennartPurucker Jan 18, 2024
1d26ad5
Prepare Develop for Merge with Main (#1321)
LennartPurucker Jan 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Pytest/utils (#1269)
* Extract mocked_perform_api_call because its independent of object

* Remove _multiprocess_can_split_ as it is a nose directive

and we use pytest

* Convert test list all

* Add markers and refactor test_list_all_for_tasks for pytest

* Add cache marker

* Converted remainder of tests to pytest
  • Loading branch information
PGijsbers authored and eddiebergman committed Jan 18, 2024
commit 5dc10b666293c07babb73a60c86bc4a3f5812eea
10 changes: 10 additions & 0 deletions openml/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@
import logging


def _check_dataset(dataset):
assert isinstance(dataset, dict)
assert 2 <= len(dataset)
assert "did" in dataset
assert isinstance(dataset["did"], int)
assert "status" in dataset
assert dataset["status"] in ["in_preparation", "active", "deactivated"]


class TestBase(unittest.TestCase):
"""Base class for tests

Expand Down Expand Up @@ -177,6 +186,7 @@ def _add_sentinel_to_flow_name(self, flow, sentinel=None):
return flow, sentinel

def _check_dataset(self, dataset):
_check_dataset(dataset)
self.assertEqual(type(dataset), dict)
self.assertGreaterEqual(len(dataset), 2)
self.assertIn("did", dataset)
Expand Down
6 changes: 6 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,9 @@ description-file = README.md
[tool:pytest]
filterwarnings =
ignore:the matrix subclass:PendingDeprecationWarning
markers=
server: anything that connects to a server
upload: anything that uploads to a server
production: any interaction with the production server
cache: anything that interacts with the (test) cache

277 changes: 163 additions & 114 deletions tests/test_utils/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,118 +1,167 @@
import os
import tempfile
import unittest.mock

import openml
from openml.testing import TestBase


class OpenMLTaskTest(TestBase):
_multiprocess_can_split_ = True

def mocked_perform_api_call(call, request_method):
# TODO: JvR: Why is this not a staticmethod?
url = openml.config.server + "/" + call
return openml._api_calls._download_text_file(url)

def test_list_all(self):
openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)
openml.utils._list_all(
listing_call=openml.tasks.functions._list_tasks, output_format="dataframe"
)

def test_list_all_with_multiple_batches(self):
res = openml.utils._list_all(
listing_call=openml.tasks.functions._list_tasks, output_format="dict", batch_size=1050
)
# Verify that test server state is still valid for this test to work as intended
# -> If the number of results is less than 1050, the test can not test the
# batching operation. By having more than 1050 results we know that batching
# was triggered. 1050 appears to be a number of tasks that is available on a fresh
# test server.
assert len(res) > 1050
openml.utils._list_all(
listing_call=openml.tasks.functions._list_tasks,
output_format="dataframe",
batch_size=1050,
)
# Comparing the number of tasks is not possible as other unit tests running in
# parallel might be adding or removing tasks!
# assert len(res) <= len(res2)

@unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call)
def test_list_all_few_results_available(self, _perform_api_call):
# we want to make sure that the number of api calls is only 1.
# Although we have multiple versions of the iris dataset, there is only
# one with this name/version combination

datasets = openml.datasets.list_datasets(
size=1000, data_name="iris", data_version=1, output_format="dataframe"
)
self.assertEqual(len(datasets), 1)
self.assertEqual(_perform_api_call.call_count, 1)

def test_list_all_for_datasets(self):
required_size = 127 # default test server reset value
datasets = openml.datasets.list_datasets(
batch_size=100, size=required_size, output_format="dataframe"
)

self.assertEqual(len(datasets), required_size)
for dataset in datasets.to_dict(orient="index").values():
self._check_dataset(dataset)

def test_list_all_for_tasks(self):
required_size = 1068 # default test server reset value
tasks = openml.tasks.list_tasks(
batch_size=1000, size=required_size, output_format="dataframe"
)
self.assertEqual(len(tasks), required_size)

def test_list_all_for_flows(self):
required_size = 15 # default test server reset value
flows = openml.flows.list_flows(
batch_size=25, size=required_size, output_format="dataframe"
)
self.assertEqual(len(flows), required_size)

def test_list_all_for_setups(self):
required_size = 50
# TODO apparently list_setups function does not support kwargs
setups = openml.setups.list_setups(size=required_size)

# might not be on test server after reset, please rerun test at least once if fails
self.assertEqual(len(setups), required_size)

def test_list_all_for_runs(self):
required_size = 21
runs = openml.runs.list_runs(batch_size=25, size=required_size)

# might not be on test server after reset, please rerun test at least once if fails
self.assertEqual(len(runs), required_size)

def test_list_all_for_evaluations(self):
required_size = 22
# TODO apparently list_evaluations function does not support kwargs
evaluations = openml.evaluations.list_evaluations(
function="predictive_accuracy", size=required_size
)

# might not be on test server after reset, please rerun test at least once if fails
self.assertEqual(len(evaluations), required_size)

@unittest.mock.patch("openml.config.get_cache_directory")
@unittest.skipIf(os.name == "nt", "https://github.com/openml/openml-python/issues/1033")
def test__create_cache_directory(self, config_mock):
with tempfile.TemporaryDirectory(dir=self.workdir) as td:
config_mock.return_value = td
openml.utils._create_cache_directory("abc")
self.assertTrue(os.path.exists(os.path.join(td, "abc")))
subdir = os.path.join(td, "def")
os.mkdir(subdir)
os.chmod(subdir, 0o444)
config_mock.return_value = subdir
with self.assertRaisesRegex(
openml.exceptions.OpenMLCacheException,
r"Cannot create cache directory",
):
openml.utils._create_cache_directory("ghi")
from openml.testing import _check_dataset

import pytest


@pytest.fixture(autouse=True)
def as_robot():
policy = openml.config.retry_policy
n_retries = openml.config.connection_n_retries
openml.config.set_retry_policy("robot", n_retries=20)
yield
openml.config.set_retry_policy(policy, n_retries)


@pytest.fixture(autouse=True)
def with_test_server():
openml.config.start_using_configuration_for_example()
yield
openml.config.stop_using_configuration_for_example()


@pytest.fixture
def min_number_tasks_on_test_server() -> int:
"""After a reset at least 1068 tasks are on the test server"""
return 1068


@pytest.fixture
def min_number_datasets_on_test_server() -> int:
"""After a reset at least 127 datasets are on the test server"""
return 127


@pytest.fixture
def min_number_flows_on_test_server() -> int:
"""After a reset at least 127 flows are on the test server"""
return 15


@pytest.fixture
def min_number_setups_on_test_server() -> int:
"""After a reset at least 50 setups are on the test server"""
return 50


@pytest.fixture
def min_number_runs_on_test_server() -> int:
"""After a reset at least 50 runs are on the test server"""
return 21


@pytest.fixture
def min_number_evaluations_on_test_server() -> int:
"""After a reset at least 22 evaluations are on the test server"""
return 22


def _mocked_perform_api_call(call, request_method):
url = openml.config.server + "/" + call
return openml._api_calls._download_text_file(url)


@pytest.mark.server
def test_list_all():
openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)
openml.utils._list_all(
listing_call=openml.tasks.functions._list_tasks, output_format="dataframe"
)


@pytest.mark.server
def test_list_all_for_tasks(min_number_tasks_on_test_server):
tasks = openml.tasks.list_tasks(
batch_size=1000,
size=min_number_tasks_on_test_server,
output_format="dataframe",
)
assert min_number_tasks_on_test_server == len(tasks)


@pytest.mark.server
def test_list_all_with_multiple_batches(min_number_tasks_on_test_server):
# By setting the batch size one lower than the minimum we guarantee at least two
# batches and at the same time do as few batches (roundtrips) as possible.
batch_size = min_number_tasks_on_test_server - 1
res = openml.utils._list_all(
listing_call=openml.tasks.functions._list_tasks,
output_format="dataframe",
batch_size=batch_size,
)
assert min_number_tasks_on_test_server <= len(res)


@pytest.mark.server
def test_list_all_for_datasets(min_number_datasets_on_test_server):
datasets = openml.datasets.list_datasets(
batch_size=100, size=min_number_datasets_on_test_server, output_format="dataframe"
)

assert min_number_datasets_on_test_server == len(datasets)
for dataset in datasets.to_dict(orient="index").values():
_check_dataset(dataset)


@pytest.mark.server
def test_list_all_for_flows(min_number_flows_on_test_server):
flows = openml.flows.list_flows(
batch_size=25, size=min_number_flows_on_test_server, output_format="dataframe"
)
assert min_number_flows_on_test_server == len(flows)


@pytest.mark.server
@pytest.mark.flaky # Other tests might need to upload runs first
def test_list_all_for_setups(min_number_setups_on_test_server):
# TODO apparently list_setups function does not support kwargs
setups = openml.setups.list_setups(size=min_number_setups_on_test_server)
assert min_number_setups_on_test_server == len(setups)


@pytest.mark.server
@pytest.mark.flaky # Other tests might need to upload runs first
def test_list_all_for_runs(min_number_runs_on_test_server):
runs = openml.runs.list_runs(batch_size=25, size=min_number_runs_on_test_server)
assert min_number_runs_on_test_server == len(runs)


@pytest.mark.server
@pytest.mark.flaky # Other tests might need to upload runs first
def test_list_all_for_evaluations(min_number_evaluations_on_test_server):
# TODO apparently list_evaluations function does not support kwargs
evaluations = openml.evaluations.list_evaluations(
function="predictive_accuracy", size=min_number_evaluations_on_test_server
)
assert min_number_evaluations_on_test_server == len(evaluations)


@pytest.mark.server
@unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=_mocked_perform_api_call)
def test_list_all_few_results_available(_perform_api_call):
datasets = openml.datasets.list_datasets(
size=1000, data_name="iris", data_version=1, output_format="dataframe"
)
assert 1 == len(datasets), "only one iris dataset version 1 should be present"
assert 1 == _perform_api_call.call_count, "expect just one call to get one dataset"


@unittest.skipIf(os.name == "nt", "https://github.com/openml/openml-python/issues/1033")
@unittest.mock.patch("openml.config.get_cache_directory")
def test__create_cache_directory(config_mock, tmp_path):
config_mock.return_value = tmp_path
openml.utils._create_cache_directory("abc")
assert (tmp_path / "abc").exists()

subdir = tmp_path / "def"
subdir.mkdir()
subdir.chmod(0o444)
config_mock.return_value = subdir
with pytest.raises(
openml.exceptions.OpenMLCacheException,
match="Cannot create cache directory",
):
openml.utils._create_cache_directory("ghi")