Skip to content

Commit 5dc10b6

Browse files
PGijsberseddiebergman
authored andcommitted
Pytest/utils (#1269)
* Extract mocked_perform_api_call because its independent of object * Remove _multiprocess_can_split_ as it is a nose directive and we use pytest * Convert test list all * Add markers and refactor test_list_all_for_tasks for pytest * Add cache marker * Converted remainder of tests to pytest
1 parent 1cc1169 commit 5dc10b6

File tree

3 files changed

+179
-114
lines changed

3 files changed

+179
-114
lines changed

openml/testing.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,15 @@
1919
import logging
2020

2121

22+
def _check_dataset(dataset):
23+
assert isinstance(dataset, dict)
24+
assert 2 <= len(dataset)
25+
assert "did" in dataset
26+
assert isinstance(dataset["did"], int)
27+
assert "status" in dataset
28+
assert dataset["status"] in ["in_preparation", "active", "deactivated"]
29+
30+
2231
class TestBase(unittest.TestCase):
2332
"""Base class for tests
2433
@@ -177,6 +186,7 @@ def _add_sentinel_to_flow_name(self, flow, sentinel=None):
177186
return flow, sentinel
178187

179188
def _check_dataset(self, dataset):
189+
_check_dataset(dataset)
180190
self.assertEqual(type(dataset), dict)
181191
self.assertGreaterEqual(len(dataset), 2)
182192
self.assertIn("did", dataset)

setup.cfg

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,9 @@ description-file = README.md
44
[tool:pytest]
55
filterwarnings =
66
ignore:the matrix subclass:PendingDeprecationWarning
7+
markers=
8+
server: anything that connects to a server
9+
upload: anything that uploads to a server
10+
production: any interaction with the production server
11+
cache: anything that interacts with the (test) cache
12+

tests/test_utils/test_utils.py

Lines changed: 163 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -1,118 +1,167 @@
11
import os
2-
import tempfile
32
import unittest.mock
43

54
import openml
6-
from openml.testing import TestBase
7-
8-
9-
class OpenMLTaskTest(TestBase):
10-
_multiprocess_can_split_ = True
11-
12-
def mocked_perform_api_call(call, request_method):
13-
# TODO: JvR: Why is this not a staticmethod?
14-
url = openml.config.server + "/" + call
15-
return openml._api_calls._download_text_file(url)
16-
17-
def test_list_all(self):
18-
openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)
19-
openml.utils._list_all(
20-
listing_call=openml.tasks.functions._list_tasks, output_format="dataframe"
21-
)
22-
23-
def test_list_all_with_multiple_batches(self):
24-
res = openml.utils._list_all(
25-
listing_call=openml.tasks.functions._list_tasks, output_format="dict", batch_size=1050
26-
)
27-
# Verify that test server state is still valid for this test to work as intended
28-
# -> If the number of results is less than 1050, the test can not test the
29-
# batching operation. By having more than 1050 results we know that batching
30-
# was triggered. 1050 appears to be a number of tasks that is available on a fresh
31-
# test server.
32-
assert len(res) > 1050
33-
openml.utils._list_all(
34-
listing_call=openml.tasks.functions._list_tasks,
35-
output_format="dataframe",
36-
batch_size=1050,
37-
)
38-
# Comparing the number of tasks is not possible as other unit tests running in
39-
# parallel might be adding or removing tasks!
40-
# assert len(res) <= len(res2)
41-
42-
@unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call)
43-
def test_list_all_few_results_available(self, _perform_api_call):
44-
# we want to make sure that the number of api calls is only 1.
45-
# Although we have multiple versions of the iris dataset, there is only
46-
# one with this name/version combination
47-
48-
datasets = openml.datasets.list_datasets(
49-
size=1000, data_name="iris", data_version=1, output_format="dataframe"
50-
)
51-
self.assertEqual(len(datasets), 1)
52-
self.assertEqual(_perform_api_call.call_count, 1)
53-
54-
def test_list_all_for_datasets(self):
55-
required_size = 127 # default test server reset value
56-
datasets = openml.datasets.list_datasets(
57-
batch_size=100, size=required_size, output_format="dataframe"
58-
)
59-
60-
self.assertEqual(len(datasets), required_size)
61-
for dataset in datasets.to_dict(orient="index").values():
62-
self._check_dataset(dataset)
63-
64-
def test_list_all_for_tasks(self):
65-
required_size = 1068 # default test server reset value
66-
tasks = openml.tasks.list_tasks(
67-
batch_size=1000, size=required_size, output_format="dataframe"
68-
)
69-
self.assertEqual(len(tasks), required_size)
70-
71-
def test_list_all_for_flows(self):
72-
required_size = 15 # default test server reset value
73-
flows = openml.flows.list_flows(
74-
batch_size=25, size=required_size, output_format="dataframe"
75-
)
76-
self.assertEqual(len(flows), required_size)
77-
78-
def test_list_all_for_setups(self):
79-
required_size = 50
80-
# TODO apparently list_setups function does not support kwargs
81-
setups = openml.setups.list_setups(size=required_size)
82-
83-
# might not be on test server after reset, please rerun test at least once if fails
84-
self.assertEqual(len(setups), required_size)
85-
86-
def test_list_all_for_runs(self):
87-
required_size = 21
88-
runs = openml.runs.list_runs(batch_size=25, size=required_size)
89-
90-
# might not be on test server after reset, please rerun test at least once if fails
91-
self.assertEqual(len(runs), required_size)
92-
93-
def test_list_all_for_evaluations(self):
94-
required_size = 22
95-
# TODO apparently list_evaluations function does not support kwargs
96-
evaluations = openml.evaluations.list_evaluations(
97-
function="predictive_accuracy", size=required_size
98-
)
99-
100-
# might not be on test server after reset, please rerun test at least once if fails
101-
self.assertEqual(len(evaluations), required_size)
102-
103-
@unittest.mock.patch("openml.config.get_cache_directory")
104-
@unittest.skipIf(os.name == "nt", "https://github.com/openml/openml-python/issues/1033")
105-
def test__create_cache_directory(self, config_mock):
106-
with tempfile.TemporaryDirectory(dir=self.workdir) as td:
107-
config_mock.return_value = td
108-
openml.utils._create_cache_directory("abc")
109-
self.assertTrue(os.path.exists(os.path.join(td, "abc")))
110-
subdir = os.path.join(td, "def")
111-
os.mkdir(subdir)
112-
os.chmod(subdir, 0o444)
113-
config_mock.return_value = subdir
114-
with self.assertRaisesRegex(
115-
openml.exceptions.OpenMLCacheException,
116-
r"Cannot create cache directory",
117-
):
118-
openml.utils._create_cache_directory("ghi")
5+
from openml.testing import _check_dataset
6+
7+
import pytest
8+
9+
10+
@pytest.fixture(autouse=True)
11+
def as_robot():
12+
policy = openml.config.retry_policy
13+
n_retries = openml.config.connection_n_retries
14+
openml.config.set_retry_policy("robot", n_retries=20)
15+
yield
16+
openml.config.set_retry_policy(policy, n_retries)
17+
18+
19+
@pytest.fixture(autouse=True)
20+
def with_test_server():
21+
openml.config.start_using_configuration_for_example()
22+
yield
23+
openml.config.stop_using_configuration_for_example()
24+
25+
26+
@pytest.fixture
27+
def min_number_tasks_on_test_server() -> int:
28+
"""After a reset at least 1068 tasks are on the test server"""
29+
return 1068
30+
31+
32+
@pytest.fixture
33+
def min_number_datasets_on_test_server() -> int:
34+
"""After a reset at least 127 datasets are on the test server"""
35+
return 127
36+
37+
38+
@pytest.fixture
39+
def min_number_flows_on_test_server() -> int:
40+
"""After a reset at least 127 flows are on the test server"""
41+
return 15
42+
43+
44+
@pytest.fixture
45+
def min_number_setups_on_test_server() -> int:
46+
"""After a reset at least 50 setups are on the test server"""
47+
return 50
48+
49+
50+
@pytest.fixture
51+
def min_number_runs_on_test_server() -> int:
52+
"""After a reset at least 50 runs are on the test server"""
53+
return 21
54+
55+
56+
@pytest.fixture
57+
def min_number_evaluations_on_test_server() -> int:
58+
"""After a reset at least 22 evaluations are on the test server"""
59+
return 22
60+
61+
62+
def _mocked_perform_api_call(call, request_method):
63+
url = openml.config.server + "/" + call
64+
return openml._api_calls._download_text_file(url)
65+
66+
67+
@pytest.mark.server
68+
def test_list_all():
69+
openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)
70+
openml.utils._list_all(
71+
listing_call=openml.tasks.functions._list_tasks, output_format="dataframe"
72+
)
73+
74+
75+
@pytest.mark.server
76+
def test_list_all_for_tasks(min_number_tasks_on_test_server):
77+
tasks = openml.tasks.list_tasks(
78+
batch_size=1000,
79+
size=min_number_tasks_on_test_server,
80+
output_format="dataframe",
81+
)
82+
assert min_number_tasks_on_test_server == len(tasks)
83+
84+
85+
@pytest.mark.server
86+
def test_list_all_with_multiple_batches(min_number_tasks_on_test_server):
87+
# By setting the batch size one lower than the minimum we guarantee at least two
88+
# batches and at the same time do as few batches (roundtrips) as possible.
89+
batch_size = min_number_tasks_on_test_server - 1
90+
res = openml.utils._list_all(
91+
listing_call=openml.tasks.functions._list_tasks,
92+
output_format="dataframe",
93+
batch_size=batch_size,
94+
)
95+
assert min_number_tasks_on_test_server <= len(res)
96+
97+
98+
@pytest.mark.server
99+
def test_list_all_for_datasets(min_number_datasets_on_test_server):
100+
datasets = openml.datasets.list_datasets(
101+
batch_size=100, size=min_number_datasets_on_test_server, output_format="dataframe"
102+
)
103+
104+
assert min_number_datasets_on_test_server == len(datasets)
105+
for dataset in datasets.to_dict(orient="index").values():
106+
_check_dataset(dataset)
107+
108+
109+
@pytest.mark.server
110+
def test_list_all_for_flows(min_number_flows_on_test_server):
111+
flows = openml.flows.list_flows(
112+
batch_size=25, size=min_number_flows_on_test_server, output_format="dataframe"
113+
)
114+
assert min_number_flows_on_test_server == len(flows)
115+
116+
117+
@pytest.mark.server
118+
@pytest.mark.flaky # Other tests might need to upload runs first
119+
def test_list_all_for_setups(min_number_setups_on_test_server):
120+
# TODO apparently list_setups function does not support kwargs
121+
setups = openml.setups.list_setups(size=min_number_setups_on_test_server)
122+
assert min_number_setups_on_test_server == len(setups)
123+
124+
125+
@pytest.mark.server
126+
@pytest.mark.flaky # Other tests might need to upload runs first
127+
def test_list_all_for_runs(min_number_runs_on_test_server):
128+
runs = openml.runs.list_runs(batch_size=25, size=min_number_runs_on_test_server)
129+
assert min_number_runs_on_test_server == len(runs)
130+
131+
132+
@pytest.mark.server
133+
@pytest.mark.flaky # Other tests might need to upload runs first
134+
def test_list_all_for_evaluations(min_number_evaluations_on_test_server):
135+
# TODO apparently list_evaluations function does not support kwargs
136+
evaluations = openml.evaluations.list_evaluations(
137+
function="predictive_accuracy", size=min_number_evaluations_on_test_server
138+
)
139+
assert min_number_evaluations_on_test_server == len(evaluations)
140+
141+
142+
@pytest.mark.server
143+
@unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=_mocked_perform_api_call)
144+
def test_list_all_few_results_available(_perform_api_call):
145+
datasets = openml.datasets.list_datasets(
146+
size=1000, data_name="iris", data_version=1, output_format="dataframe"
147+
)
148+
assert 1 == len(datasets), "only one iris dataset version 1 should be present"
149+
assert 1 == _perform_api_call.call_count, "expect just one call to get one dataset"
150+
151+
152+
@unittest.skipIf(os.name == "nt", "https://github.com/openml/openml-python/issues/1033")
153+
@unittest.mock.patch("openml.config.get_cache_directory")
154+
def test__create_cache_directory(config_mock, tmp_path):
155+
config_mock.return_value = tmp_path
156+
openml.utils._create_cache_directory("abc")
157+
assert (tmp_path / "abc").exists()
158+
159+
subdir = tmp_path / "def"
160+
subdir.mkdir()
161+
subdir.chmod(0o444)
162+
config_mock.return_value = subdir
163+
with pytest.raises(
164+
openml.exceptions.OpenMLCacheException,
165+
match="Cannot create cache directory",
166+
):
167+
openml.utils._create_cache_directory("ghi")

0 commit comments

Comments
 (0)