|
1 | 1 | import os |
2 | | -import tempfile |
3 | 2 | import unittest.mock |
4 | 3 |
|
5 | 4 | import openml |
6 | | -from openml.testing import TestBase |
7 | | - |
8 | | - |
9 | | -class OpenMLTaskTest(TestBase): |
10 | | - _multiprocess_can_split_ = True |
11 | | - |
12 | | - def mocked_perform_api_call(call, request_method): |
13 | | - # TODO: JvR: Why is this not a staticmethod? |
14 | | - url = openml.config.server + "/" + call |
15 | | - return openml._api_calls._download_text_file(url) |
16 | | - |
17 | | - def test_list_all(self): |
18 | | - openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks) |
19 | | - openml.utils._list_all( |
20 | | - listing_call=openml.tasks.functions._list_tasks, output_format="dataframe" |
21 | | - ) |
22 | | - |
23 | | - def test_list_all_with_multiple_batches(self): |
24 | | - res = openml.utils._list_all( |
25 | | - listing_call=openml.tasks.functions._list_tasks, output_format="dict", batch_size=1050 |
26 | | - ) |
27 | | - # Verify that test server state is still valid for this test to work as intended |
28 | | - # -> If the number of results is less than 1050, the test can not test the |
29 | | - # batching operation. By having more than 1050 results we know that batching |
30 | | - # was triggered. 1050 appears to be a number of tasks that is available on a fresh |
31 | | - # test server. |
32 | | - assert len(res) > 1050 |
33 | | - openml.utils._list_all( |
34 | | - listing_call=openml.tasks.functions._list_tasks, |
35 | | - output_format="dataframe", |
36 | | - batch_size=1050, |
37 | | - ) |
38 | | - # Comparing the number of tasks is not possible as other unit tests running in |
39 | | - # parallel might be adding or removing tasks! |
40 | | - # assert len(res) <= len(res2) |
41 | | - |
42 | | - @unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call) |
43 | | - def test_list_all_few_results_available(self, _perform_api_call): |
44 | | - # we want to make sure that the number of api calls is only 1. |
45 | | - # Although we have multiple versions of the iris dataset, there is only |
46 | | - # one with this name/version combination |
47 | | - |
48 | | - datasets = openml.datasets.list_datasets( |
49 | | - size=1000, data_name="iris", data_version=1, output_format="dataframe" |
50 | | - ) |
51 | | - self.assertEqual(len(datasets), 1) |
52 | | - self.assertEqual(_perform_api_call.call_count, 1) |
53 | | - |
54 | | - def test_list_all_for_datasets(self): |
55 | | - required_size = 127 # default test server reset value |
56 | | - datasets = openml.datasets.list_datasets( |
57 | | - batch_size=100, size=required_size, output_format="dataframe" |
58 | | - ) |
59 | | - |
60 | | - self.assertEqual(len(datasets), required_size) |
61 | | - for dataset in datasets.to_dict(orient="index").values(): |
62 | | - self._check_dataset(dataset) |
63 | | - |
64 | | - def test_list_all_for_tasks(self): |
65 | | - required_size = 1068 # default test server reset value |
66 | | - tasks = openml.tasks.list_tasks( |
67 | | - batch_size=1000, size=required_size, output_format="dataframe" |
68 | | - ) |
69 | | - self.assertEqual(len(tasks), required_size) |
70 | | - |
71 | | - def test_list_all_for_flows(self): |
72 | | - required_size = 15 # default test server reset value |
73 | | - flows = openml.flows.list_flows( |
74 | | - batch_size=25, size=required_size, output_format="dataframe" |
75 | | - ) |
76 | | - self.assertEqual(len(flows), required_size) |
77 | | - |
78 | | - def test_list_all_for_setups(self): |
79 | | - required_size = 50 |
80 | | - # TODO apparently list_setups function does not support kwargs |
81 | | - setups = openml.setups.list_setups(size=required_size) |
82 | | - |
83 | | - # might not be on test server after reset, please rerun test at least once if fails |
84 | | - self.assertEqual(len(setups), required_size) |
85 | | - |
86 | | - def test_list_all_for_runs(self): |
87 | | - required_size = 21 |
88 | | - runs = openml.runs.list_runs(batch_size=25, size=required_size) |
89 | | - |
90 | | - # might not be on test server after reset, please rerun test at least once if fails |
91 | | - self.assertEqual(len(runs), required_size) |
92 | | - |
93 | | - def test_list_all_for_evaluations(self): |
94 | | - required_size = 22 |
95 | | - # TODO apparently list_evaluations function does not support kwargs |
96 | | - evaluations = openml.evaluations.list_evaluations( |
97 | | - function="predictive_accuracy", size=required_size |
98 | | - ) |
99 | | - |
100 | | - # might not be on test server after reset, please rerun test at least once if fails |
101 | | - self.assertEqual(len(evaluations), required_size) |
102 | | - |
103 | | - @unittest.mock.patch("openml.config.get_cache_directory") |
104 | | - @unittest.skipIf(os.name == "nt", "https://github.com/openml/openml-python/issues/1033") |
105 | | - def test__create_cache_directory(self, config_mock): |
106 | | - with tempfile.TemporaryDirectory(dir=self.workdir) as td: |
107 | | - config_mock.return_value = td |
108 | | - openml.utils._create_cache_directory("abc") |
109 | | - self.assertTrue(os.path.exists(os.path.join(td, "abc"))) |
110 | | - subdir = os.path.join(td, "def") |
111 | | - os.mkdir(subdir) |
112 | | - os.chmod(subdir, 0o444) |
113 | | - config_mock.return_value = subdir |
114 | | - with self.assertRaisesRegex( |
115 | | - openml.exceptions.OpenMLCacheException, |
116 | | - r"Cannot create cache directory", |
117 | | - ): |
118 | | - openml.utils._create_cache_directory("ghi") |
| 5 | +from openml.testing import _check_dataset |
| 6 | + |
| 7 | +import pytest |
| 8 | + |
| 9 | + |
| 10 | +@pytest.fixture(autouse=True) |
| 11 | +def as_robot(): |
| 12 | + policy = openml.config.retry_policy |
| 13 | + n_retries = openml.config.connection_n_retries |
| 14 | + openml.config.set_retry_policy("robot", n_retries=20) |
| 15 | + yield |
| 16 | + openml.config.set_retry_policy(policy, n_retries) |
| 17 | + |
| 18 | + |
| 19 | +@pytest.fixture(autouse=True) |
| 20 | +def with_test_server(): |
| 21 | + openml.config.start_using_configuration_for_example() |
| 22 | + yield |
| 23 | + openml.config.stop_using_configuration_for_example() |
| 24 | + |
| 25 | + |
| 26 | +@pytest.fixture |
| 27 | +def min_number_tasks_on_test_server() -> int: |
| 28 | + """After a reset at least 1068 tasks are on the test server""" |
| 29 | + return 1068 |
| 30 | + |
| 31 | + |
| 32 | +@pytest.fixture |
| 33 | +def min_number_datasets_on_test_server() -> int: |
| 34 | + """After a reset at least 127 datasets are on the test server""" |
| 35 | + return 127 |
| 36 | + |
| 37 | + |
| 38 | +@pytest.fixture |
| 39 | +def min_number_flows_on_test_server() -> int: |
| 40 | + """After a reset at least 127 flows are on the test server""" |
| 41 | + return 15 |
| 42 | + |
| 43 | + |
| 44 | +@pytest.fixture |
| 45 | +def min_number_setups_on_test_server() -> int: |
| 46 | + """After a reset at least 50 setups are on the test server""" |
| 47 | + return 50 |
| 48 | + |
| 49 | + |
| 50 | +@pytest.fixture |
| 51 | +def min_number_runs_on_test_server() -> int: |
| 52 | + """After a reset at least 50 runs are on the test server""" |
| 53 | + return 21 |
| 54 | + |
| 55 | + |
| 56 | +@pytest.fixture |
| 57 | +def min_number_evaluations_on_test_server() -> int: |
| 58 | + """After a reset at least 22 evaluations are on the test server""" |
| 59 | + return 22 |
| 60 | + |
| 61 | + |
| 62 | +def _mocked_perform_api_call(call, request_method): |
| 63 | + url = openml.config.server + "/" + call |
| 64 | + return openml._api_calls._download_text_file(url) |
| 65 | + |
| 66 | + |
| 67 | +@pytest.mark.server |
| 68 | +def test_list_all(): |
| 69 | + openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks) |
| 70 | + openml.utils._list_all( |
| 71 | + listing_call=openml.tasks.functions._list_tasks, output_format="dataframe" |
| 72 | + ) |
| 73 | + |
| 74 | + |
| 75 | +@pytest.mark.server |
| 76 | +def test_list_all_for_tasks(min_number_tasks_on_test_server): |
| 77 | + tasks = openml.tasks.list_tasks( |
| 78 | + batch_size=1000, |
| 79 | + size=min_number_tasks_on_test_server, |
| 80 | + output_format="dataframe", |
| 81 | + ) |
| 82 | + assert min_number_tasks_on_test_server == len(tasks) |
| 83 | + |
| 84 | + |
| 85 | +@pytest.mark.server |
| 86 | +def test_list_all_with_multiple_batches(min_number_tasks_on_test_server): |
| 87 | + # By setting the batch size one lower than the minimum we guarantee at least two |
| 88 | + # batches and at the same time do as few batches (roundtrips) as possible. |
| 89 | + batch_size = min_number_tasks_on_test_server - 1 |
| 90 | + res = openml.utils._list_all( |
| 91 | + listing_call=openml.tasks.functions._list_tasks, |
| 92 | + output_format="dataframe", |
| 93 | + batch_size=batch_size, |
| 94 | + ) |
| 95 | + assert min_number_tasks_on_test_server <= len(res) |
| 96 | + |
| 97 | + |
| 98 | +@pytest.mark.server |
| 99 | +def test_list_all_for_datasets(min_number_datasets_on_test_server): |
| 100 | + datasets = openml.datasets.list_datasets( |
| 101 | + batch_size=100, size=min_number_datasets_on_test_server, output_format="dataframe" |
| 102 | + ) |
| 103 | + |
| 104 | + assert min_number_datasets_on_test_server == len(datasets) |
| 105 | + for dataset in datasets.to_dict(orient="index").values(): |
| 106 | + _check_dataset(dataset) |
| 107 | + |
| 108 | + |
| 109 | +@pytest.mark.server |
| 110 | +def test_list_all_for_flows(min_number_flows_on_test_server): |
| 111 | + flows = openml.flows.list_flows( |
| 112 | + batch_size=25, size=min_number_flows_on_test_server, output_format="dataframe" |
| 113 | + ) |
| 114 | + assert min_number_flows_on_test_server == len(flows) |
| 115 | + |
| 116 | + |
| 117 | +@pytest.mark.server |
| 118 | +@pytest.mark.flaky # Other tests might need to upload runs first |
| 119 | +def test_list_all_for_setups(min_number_setups_on_test_server): |
| 120 | + # TODO apparently list_setups function does not support kwargs |
| 121 | + setups = openml.setups.list_setups(size=min_number_setups_on_test_server) |
| 122 | + assert min_number_setups_on_test_server == len(setups) |
| 123 | + |
| 124 | + |
| 125 | +@pytest.mark.server |
| 126 | +@pytest.mark.flaky # Other tests might need to upload runs first |
| 127 | +def test_list_all_for_runs(min_number_runs_on_test_server): |
| 128 | + runs = openml.runs.list_runs(batch_size=25, size=min_number_runs_on_test_server) |
| 129 | + assert min_number_runs_on_test_server == len(runs) |
| 130 | + |
| 131 | + |
| 132 | +@pytest.mark.server |
| 133 | +@pytest.mark.flaky # Other tests might need to upload runs first |
| 134 | +def test_list_all_for_evaluations(min_number_evaluations_on_test_server): |
| 135 | + # TODO apparently list_evaluations function does not support kwargs |
| 136 | + evaluations = openml.evaluations.list_evaluations( |
| 137 | + function="predictive_accuracy", size=min_number_evaluations_on_test_server |
| 138 | + ) |
| 139 | + assert min_number_evaluations_on_test_server == len(evaluations) |
| 140 | + |
| 141 | + |
| 142 | +@pytest.mark.server |
| 143 | +@unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=_mocked_perform_api_call) |
| 144 | +def test_list_all_few_results_available(_perform_api_call): |
| 145 | + datasets = openml.datasets.list_datasets( |
| 146 | + size=1000, data_name="iris", data_version=1, output_format="dataframe" |
| 147 | + ) |
| 148 | + assert 1 == len(datasets), "only one iris dataset version 1 should be present" |
| 149 | + assert 1 == _perform_api_call.call_count, "expect just one call to get one dataset" |
| 150 | + |
| 151 | + |
| 152 | +@unittest.skipIf(os.name == "nt", "https://github.com/openml/openml-python/issues/1033") |
| 153 | +@unittest.mock.patch("openml.config.get_cache_directory") |
| 154 | +def test__create_cache_directory(config_mock, tmp_path): |
| 155 | + config_mock.return_value = tmp_path |
| 156 | + openml.utils._create_cache_directory("abc") |
| 157 | + assert (tmp_path / "abc").exists() |
| 158 | + |
| 159 | + subdir = tmp_path / "def" |
| 160 | + subdir.mkdir() |
| 161 | + subdir.chmod(0o444) |
| 162 | + config_mock.return_value = subdir |
| 163 | + with pytest.raises( |
| 164 | + openml.exceptions.OpenMLCacheException, |
| 165 | + match="Cannot create cache directory", |
| 166 | + ): |
| 167 | + openml.utils._create_cache_directory("ghi") |
0 commit comments