forked from openml/openml-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconftest.py
More file actions
348 lines (259 loc) · 11.8 KB
/
conftest.py
File metadata and controls
348 lines (259 loc) · 11.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
"""This file is recognized by pytest for defining specified behaviour
'conftest.py' files are directory-scope files that are shared by all
sub-directories from where this file is placed. pytest recognises
'conftest.py' for any unit test executed from within this directory
tree. This file is used to define fixtures, hooks, plugins, and other
functionality that can be shared by the unit tests.
This file has been created for the OpenML testing to primarily make use
of the pytest hooks 'pytest_sessionstart' and 'pytest_sessionfinish',
which are being used for managing the deletion of local and remote files
created by the unit tests, run across more than one process.
This design allows one to comment or remove the conftest.py file to
disable file deletions, without editing any of the test case files.
Possible Future: class TestBase from openml/testing.py can be included
under this file and there would not be any requirements to import
testing.py in each of the unit test modules.
"""
# License: BSD 3-Clause
from __future__ import annotations
import multiprocessing
multiprocessing.set_start_method("spawn", force=True)
from collections.abc import Iterator
import logging
import os
import shutil
from pathlib import Path
import pytest
import openml_sklearn
from openml._api import HTTPClient, MinIOClient
from openml.enums import APIVersion
import openml
from openml.testing import TestBase
import inspect
# creating logger for unit test file deletion status
logger = logging.getLogger("unit_tests")
logger.setLevel(logging.DEBUG)
file_list = []
def worker_id() -> str:
"""Returns the name of the worker process owning this function call.
:return: str
Possible outputs from the set of {'master', 'gw0', 'gw1', ..., 'gw(n-1)'}
where n is the number of workers being used by pytest-xdist
"""
vars_ = list(os.environ.keys())
if "PYTEST_XDIST_WORKER" in vars_ or "PYTEST_XDIST_WORKER_COUNT" in vars_:
return os.environ["PYTEST_XDIST_WORKER"]
else:
return "master"
def read_file_list() -> list[Path]:
"""Returns a list of paths to all files that currently exist in 'openml/tests/files/'
:return: List[Path]
"""
test_files_dir = Path(__file__).parent / "files"
return [f for f in test_files_dir.rglob("*") if f.is_file()]
def compare_delete_files(old_list: list[Path], new_list: list[Path]) -> None:
"""Deletes files that are there in the new_list but not in the old_list
:param old_list: List[Path]
:param new_list: List[Path]
:return: None
"""
file_list = list(set(new_list) - set(old_list))
for file in file_list:
os.remove(file)
logger.info(f"Deleted from local: {file}")
def delete_remote_files(tracker, flow_names) -> None:
"""Function that deletes the entities passed as input, from the OpenML test server
The TestBase class in openml/testing.py has an attribute called publish_tracker.
This function expects the dictionary of the same structure.
It is a dictionary of lists, where the keys are entity types, while the values are
lists of integer IDs, except for key 'flow' where the value is a tuple (ID, flow name).
Iteratively, multiple POST requests are made to the OpenML test server using
openml.utils._delete_entity() to remove the entities uploaded by all the unit tests.
:param tracker: Dict
:return: None
"""
openml.config.use_test_servers()
# reordering to delete sub flows at the end of flows
# sub-flows have shorter names, hence, sorting by descending order of flow name length
if "flow" in tracker:
to_sort = list(zip(tracker["flow"], flow_names))
flow_deletion_order = [
entity_id for entity_id, _ in sorted(to_sort, key=lambda x: len(x[1]), reverse=True)
]
tracker["flow"] = [flow_deletion_order[1] for flow_id, _ in flow_deletion_order]
# deleting all collected entities published to test server
# 'run's are deleted first to prevent dependency issue of entities on deletion
logger.info(f"Entity Types: {['run', 'data', 'flow', 'task', 'study']}")
for entity_type in ["run", "data", "flow", "task", "study"]:
logger.info(f"Deleting {entity_type}s...")
for _i, entity in enumerate(tracker[entity_type]):
try:
openml.utils._delete_entity(entity_type, entity)
logger.info(f"Deleted ({entity_type}, {entity})")
except Exception as e:
logger.warning(f"Cannot delete ({entity_type},{entity}): {e}")
def pytest_sessionstart() -> None:
"""Pytest hook that is executed before any unit test starts
This function will be called by each of the worker processes, along with the master process
when they are spawned. This happens even before the collection of unit tests.
If number of workers, n=4, there will be a total of 5 (1 master + 4 workers) calls of this
function, before execution of any unit test begins. The master pytest process has the name
'master' while the worker processes are named as 'gw{i}' where i = 0, 1, ..., n-1.
The order of process spawning is: 'master' -> random ordering of the 'gw{i}' workers.
Since, master is always executed first, it is checked if the current process is 'master' and
store a list of strings of paths of all files in the directory (pre-unit test snapshot).
:return: None
"""
# file_list is global to maintain the directory snapshot during tear down
global file_list
worker = worker_id()
if worker == "master":
file_list = read_file_list()
def pytest_sessionfinish() -> None:
"""Pytest hook that is executed after all unit tests of a worker ends
This function will be called by each of the worker processes, along with the master process
when they are done with the unit tests allocated to them.
If number of workers, n=4, there will be a total of 5 (1 master + 4 workers) calls of this
function, before execution of any unit test begins. The master pytest process has the name
'master' while the worker processes are named as 'gw{i}' where i = 0, 1, ..., n-1.
The order of invocation is: random ordering of the 'gw{i}' workers -> 'master'.
Since, master is always executed last, it is checked if the current process is 'master' and,
* Compares file list with pre-unit test snapshot and deletes all local files generated
* Iterates over the list of entities uploaded to test server and deletes them remotely
:return: None
"""
# allows access to the file_list read in the set up phase
global file_list
worker = worker_id()
logger.info(f"Finishing worker {worker}")
# Test file deletion
logger.info(f"Deleting files uploaded to test server for worker {worker}")
delete_remote_files(TestBase.publish_tracker, TestBase.flow_name_tracker)
if worker == "master":
# Local file deletion
new_file_list = read_file_list()
compare_delete_files(file_list, new_file_list)
# Delete any test dirs that remain
# In edge cases due to a mixture of pytest parametrization and oslo concurrency,
# some file lock are created after leaving the test. This removes these files!
test_files_dir = Path(__file__).parent.parent / "openml"
for f in test_files_dir.glob("tests.*"):
if f.is_dir():
shutil.rmtree(f)
logger.info("Local files deleted")
logger.info(f"{worker} is killed")
def pytest_configure(config):
config.addinivalue_line("markers", "sklearn: marks tests that use scikit-learn")
def pytest_addoption(parser):
parser.addoption(
"--long",
action="store_true",
default=False,
help="Run the long version of tests which support both short and long scenarios.",
)
def _expected_static_cache_state(root_dir: Path) -> list[Path]:
_c_root_dir = root_dir / "org" / "openml" / "test"
res_paths = [root_dir, _c_root_dir]
for _d in ["datasets", "tasks", "runs", "setups"]:
res_paths.append(_c_root_dir / _d)
for _id in ["-1", "2"]:
tmp_p = _c_root_dir / "datasets" / _id
res_paths.extend(
[
tmp_p / "dataset.arff",
tmp_p / "features.xml",
tmp_p / "qualities.xml",
tmp_p / "description.xml",
]
)
res_paths.append(_c_root_dir / "datasets" / "30" / "dataset_30.pq")
res_paths.append(_c_root_dir / "runs" / "1" / "description.xml")
res_paths.append(_c_root_dir / "setups" / "1" / "description.xml")
for _id in ["1", "3", "1882"]:
tmp_p = _c_root_dir / "tasks" / _id
res_paths.extend(
[
tmp_p / "datasplits.arff",
tmp_p / "task.xml",
]
)
return res_paths
def assert_static_test_cache_correct(root_dir: Path) -> None:
for p in _expected_static_cache_state(root_dir):
assert p.exists(), f"Expected path {p} exists"
@pytest.fixture(scope="class")
def long_version(request):
request.cls.long_version = request.config.getoption("--long")
@pytest.fixture(scope="session")
def test_files_directory() -> Path:
return Path(__file__).parent / "files"
@pytest.fixture(scope="session")
def test_server_v1() -> str:
return openml.config.get_test_servers()[APIVersion.V1]["server"]
@pytest.fixture(scope="session")
def test_apikey_v1() -> str:
return openml.config.get_test_servers()[APIVersion.V1]["apikey"]
@pytest.fixture(scope="session")
def test_server_v2() -> str:
return openml.config.get_test_servers()[APIVersion.V2]["server"]
@pytest.fixture(scope="session")
def test_apikey_v2() -> str:
return openml.config.get_test_servers()[APIVersion.V2]["apikey"]
@pytest.fixture(autouse=True, scope="function")
def verify_cache_state(test_files_directory) -> Iterator[None]:
assert_static_test_cache_correct(test_files_directory)
yield
assert_static_test_cache_correct(test_files_directory)
@pytest.fixture(autouse=True, scope="session")
def as_robot() -> Iterator[None]:
policy = openml.config.retry_policy
n_retries = openml.config.connection_n_retries
openml.config.set_retry_policy("robot", n_retries=20)
yield
openml.config.set_retry_policy(policy, n_retries)
@pytest.fixture(autouse=True)
def with_server(request):
openml.config.set_api_version(APIVersion.V1)
if "production_server" in request.keywords:
openml.config.use_production_servers()
yield
return
openml.config.use_test_servers()
yield
@pytest.fixture(autouse=True)
def with_test_cache(test_files_directory, request):
# Skip this fixture for TestBase subclasses - they manage their own cache directory
# in setUp()/tearDown(). Having both mechanisms fight over the global config
# causes race conditions.
if request.instance is not None and isinstance(request.instance, TestBase):
yield
return
if not test_files_directory.exists():
raise ValueError(
f"Cannot find test cache dir, expected it to be {test_files_directory!s}!",
)
_root_cache_directory = openml.config._root_cache_directory
tmp_cache = test_files_directory / request.node.nodeid.replace("/", ".").replace("::", ".")
openml.config.set_root_cache_directory(tmp_cache)
yield
openml.config.set_root_cache_directory(_root_cache_directory)
if tmp_cache.exists():
shutil.rmtree(tmp_cache)
@pytest.fixture
def static_cache_dir():
return Path(__file__).parent / "files"
@pytest.fixture
def workdir(tmp_path):
original_cwd = Path.cwd()
os.chdir(tmp_path)
yield tmp_path
os.chdir(original_cwd)
@pytest.fixture
def http_client_v1() -> HTTPClient:
return HTTPClient(api_version=APIVersion.V1)
@pytest.fixture
def http_client_v2() -> HTTPClient:
return HTTPClient(api_version=APIVersion.V2)
@pytest.fixture
def minio_client() -> MinIOClient:
return MinIOClient()