forked from openml/openml-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.py
More file actions
341 lines (266 loc) · 11 KB
/
config.py
File metadata and controls
341 lines (266 loc) · 11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
"""
Store module level information like the API key, cache directory and the server
"""
# License: BSD 3-Clause
import logging
import logging.handlers
import os
from pathlib import Path
import platform
from typing import Tuple, cast
from io import StringIO
import configparser
from urllib.parse import urlparse
logger = logging.getLogger(__name__)
openml_logger = logging.getLogger("openml")
console_handler = None
file_handler = None
def _create_log_handlers(create_file_handler=True):
""" Creates but does not attach the log handlers. """
global console_handler, file_handler
if console_handler is not None or file_handler is not None:
logger.debug("Requested to create log handlers, but they are already created.")
return
message_format = "[%(levelname)s] [%(asctime)s:%(name)s] %(message)s"
output_formatter = logging.Formatter(message_format, datefmt="%H:%M:%S")
console_handler = logging.StreamHandler()
console_handler.setFormatter(output_formatter)
if create_file_handler:
one_mb = 2 ** 20
log_path = os.path.join(cache_directory, "openml_python.log")
file_handler = logging.handlers.RotatingFileHandler(
log_path, maxBytes=one_mb, backupCount=1, delay=True
)
file_handler.setFormatter(output_formatter)
def _convert_log_levels(log_level: int) -> Tuple[int, int]:
""" Converts a log level that's either defined by OpenML/Python to both specifications. """
# OpenML verbosity level don't match Python values directly:
openml_to_python = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG}
python_to_openml = {
logging.DEBUG: 2,
logging.INFO: 1,
logging.WARNING: 0,
logging.CRITICAL: 0,
logging.ERROR: 0,
}
# Because the dictionaries share no keys, we use `get` to convert as necessary:
openml_level = python_to_openml.get(log_level, log_level)
python_level = openml_to_python.get(log_level, log_level)
return openml_level, python_level
def _set_level_register_and_store(handler: logging.Handler, log_level: int):
""" Set handler log level, register it if needed, save setting to config file if specified. """
oml_level, py_level = _convert_log_levels(log_level)
handler.setLevel(py_level)
if openml_logger.level > py_level or openml_logger.level == logging.NOTSET:
openml_logger.setLevel(py_level)
if handler not in openml_logger.handlers:
openml_logger.addHandler(handler)
def set_console_log_level(console_output_level: int):
""" Set console output to the desired level and register it with openml logger if needed. """
global console_handler
_set_level_register_and_store(cast(logging.Handler, console_handler), console_output_level)
def set_file_log_level(file_output_level: int):
""" Set file output to the desired level and register it with openml logger if needed. """
global file_handler
_set_level_register_and_store(cast(logging.Handler, file_handler), file_output_level)
# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards)
_defaults = {
"apikey": "",
"server": "https://www.openml.org/api/v1/xml",
"cachedir": (
os.environ.get("XDG_CACHE_HOME", os.path.join("~", ".cache", "openml",))
if platform.system() == "Linux"
else os.path.join("~", ".openml")
),
"avoid_duplicate_runs": "True",
"connection_n_retries": "10",
"max_retries": "20",
}
# Default values are actually added here in the _setup() function which is
# called at the end of this module
server = str(_defaults["server"]) # so mypy knows it is a string
def get_server_base_url() -> str:
"""Return the base URL of the currently configured server.
Turns ``"https://www.openml.org/api/v1/xml"`` in ``"https://www.openml.org/"``
Returns
=======
str
"""
return server.split("/api")[0]
apikey = _defaults["apikey"]
# The current cache directory (without the server name)
cache_directory = str(_defaults["cachedir"]) # so mypy knows it is a string
avoid_duplicate_runs = True if _defaults["avoid_duplicate_runs"] == "True" else False
# Number of retries if the connection breaks
connection_n_retries = int(_defaults["connection_n_retries"])
max_retries = int(_defaults["max_retries"])
class ConfigurationForExamples:
""" Allows easy switching to and from a test configuration, used for examples. """
_last_used_server = None
_last_used_key = None
_start_last_called = False
_test_server = "https://test.openml.org/api/v1/xml"
_test_apikey = "c0c42819af31e706efe1f4b88c23c6c1"
@classmethod
def start_using_configuration_for_example(cls):
""" Sets the configuration to connect to the test server with valid apikey.
To configuration as was before this call is stored, and can be recovered
by using the `stop_use_example_configuration` method.
"""
global server
global apikey
if cls._start_last_called and server == cls._test_server and apikey == cls._test_apikey:
# Method is called more than once in a row without modifying the server or apikey.
# We don't want to save the current test configuration as a last used configuration.
return
cls._last_used_server = server
cls._last_used_key = apikey
cls._start_last_called = True
# Test server key for examples
server = cls._test_server
apikey = cls._test_apikey
@classmethod
def stop_using_configuration_for_example(cls):
""" Return to configuration as it was before `start_use_example_configuration`. """
if not cls._start_last_called:
# We don't want to allow this because it will (likely) result in the `server` and
# `apikey` variables being set to None.
raise RuntimeError(
"`stop_use_example_configuration` called without a saved config."
"`start_use_example_configuration` must be called first."
)
global server
global apikey
server = cls._last_used_server
apikey = cls._last_used_key
cls._start_last_called = False
def _setup(config=None):
"""Setup openml package. Called on first import.
Reads the config file and sets up apikey, server, cache appropriately.
key and server can be set by the user simply using
openml.config.apikey = THEIRKEY
openml.config.server = SOMESERVER
We could also make it a property but that's less clear.
"""
global apikey
global server
global cache_directory
global avoid_duplicate_runs
global connection_n_retries
global max_retries
if platform.system() == "Linux":
config_dir = Path(os.environ.get("XDG_CONFIG_HOME", Path("~") / ".config" / "openml"))
else:
config_dir = Path("~") / ".openml"
# Still use os.path.expanduser to trigger the mock in the unit test
config_dir = Path(os.path.expanduser(config_dir))
config_file = config_dir / "config"
# read config file, create directory for config file
if not os.path.exists(config_dir):
try:
os.mkdir(config_dir)
cache_exists = True
except PermissionError:
cache_exists = False
else:
cache_exists = True
if cache_exists:
_create_log_handlers()
else:
_create_log_handlers(create_file_handler=False)
openml_logger.warning(
"No permission to create OpenML directory at %s! This can result in OpenML-Python "
"not working properly." % config_dir
)
if config is None:
config = _parse_config(config_file)
def _get(config, key):
return config.get("FAKE_SECTION", key)
avoid_duplicate_runs = config.getboolean("FAKE_SECTION", "avoid_duplicate_runs")
else:
def _get(config, key):
return config.get(key)
avoid_duplicate_runs = config.get("avoid_duplicate_runs")
apikey = _get(config, "apikey")
server = _get(config, "server")
short_cache_dir = _get(config, "cachedir")
connection_n_retries = int(_get(config, "connection_n_retries"))
max_retries = int(_get(config, "max_retries"))
cache_directory = os.path.expanduser(short_cache_dir)
# create the cache subdirectory
if not os.path.exists(cache_directory):
try:
os.mkdir(cache_directory)
except PermissionError:
openml_logger.warning(
"No permission to create openml cache directory at %s! This can result in "
"OpenML-Python not working properly." % cache_directory
)
if connection_n_retries > max_retries:
raise ValueError(
"A higher number of retries than {} is not allowed to keep the "
"server load reasonable".format(max_retries)
)
def _parse_config(config_file: str):
""" Parse the config file, set up defaults. """
config = configparser.RawConfigParser(defaults=_defaults)
# The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file.
# Cheat the ConfigParser module by adding a fake section header
config_file_ = StringIO()
config_file_.write("[FAKE_SECTION]\n")
try:
with open(config_file) as fh:
for line in fh:
config_file_.write(line)
except FileNotFoundError:
logger.info("No config file found at %s, using default configuration.", config_file)
except OSError as e:
logger.info("Error opening file %s: %s", config_file, e.args[0])
config_file_.seek(0)
config.read_file(config_file_)
return config
def get_config_as_dict():
config = dict()
config["apikey"] = apikey
config["server"] = server
config["cachedir"] = cache_directory
config["avoid_duplicate_runs"] = avoid_duplicate_runs
config["connection_n_retries"] = connection_n_retries
config["max_retries"] = max_retries
return config
def get_cache_directory():
"""Get the current cache directory.
Returns
-------
cachedir : string
The current cache directory.
"""
url_suffix = urlparse(server).netloc
reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1])
_cachedir = os.path.join(cache_directory, reversed_url_suffix)
return _cachedir
def set_cache_directory(cachedir):
"""Set module-wide cache directory.
Sets the cache directory into which to download datasets, tasks etc.
Parameters
----------
cachedir : string
Path to use as cache directory.
See also
--------
get_cache_directory
"""
global cache_directory
cache_directory = cachedir
start_using_configuration_for_example = (
ConfigurationForExamples.start_using_configuration_for_example
)
stop_using_configuration_for_example = ConfigurationForExamples.stop_using_configuration_for_example
__all__ = [
"get_cache_directory",
"set_cache_directory",
"start_using_configuration_for_example",
"stop_using_configuration_for_example",
"get_config_as_dict",
]
_setup()