diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index e50d67710..773dda6f2 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -29,10 +29,7 @@ jobs:
         python-version: 3.8
     - name: Install dependencies
       run: |
-        pip install -e .[docs,examples,examples_unix]
-        # dependency "fanova" does not work with numpy 1.24 or later
-        # https://github.com/automl/fanova/issues/108
-        pip install numpy==1.23.5
+          pip install -e .[docs,examples]
     - name: Make docs
       run: |
         cd doc
@@ -64,4 +61,4 @@ jobs:
         git config --global user.email 'not@mail.com'
         git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
         git commit -am "$last_commit"
-        git push
+        git diff --quiet @{u} HEAD || git push
diff --git a/.github/workflows/release_docker.yaml b/.github/workflows/release_docker.yaml
index c8f8c59f8..fc629a4e4 100644
--- a/.github/workflows/release_docker.yaml
+++ b/.github/workflows/release_docker.yaml
@@ -8,9 +8,6 @@ on:
       - 'docker'
     tags:
       - 'v*'
-  pull_request:
-    branches:
-      - 'develop'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -47,7 +44,7 @@ jobs:
 
       - name: Build and push
         id: docker_build
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
         with:
           context: ./docker/
           tags: ${{ steps.meta_dockerhub.outputs.tags }}
@@ -57,7 +54,7 @@ jobs:
 
       - name: Update repo description
         if: ${{ startsWith(github.ref, 'refs/tags/v') }}
-        uses: peter-evans/dockerhub-description@v3
+        uses: peter-evans/dockerhub-description@v4
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6a0408137..f2543bc53 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -118,7 +118,7 @@ jobs:
         fi
     - name: Upload coverage
       if: matrix.code-cov && always()
-      uses: codecov/codecov-action@v3
+      uses: codecov/codecov-action@v4
       with:
         files: coverage.xml
         token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5f13625a0..95e2a5239 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,20 +7,20 @@ files: |
   )/.*\.py$
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.14
+    rev: v0.7.3
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix, --no-cache]
       - id: ruff-format
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.8.0
+    rev: v1.13.0
     hooks:
       - id: mypy
         additional_dependencies:
           - types-requests
           - types-python-dateutil
   - repo: https://github.com/python-jsonschema/check-jsonschema
-    rev: 0.27.3
+    rev: 0.29.4
     hooks:
       - id: check-github-workflows
         files: '^github/workflows/.*\.ya?ml$'
@@ -28,7 +28,7 @@ repos:
       - id: check-dependabot
         files: '^\.github/dependabot\.ya?ml$'
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v5.0.0
     hooks:
       - id: check-added-large-files
         files: ".*"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c2b4be187..cc8633f84 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -141,7 +141,7 @@ following rules before you submit a pull request:
 
 -  If your pull request addresses an issue, please use the pull request title
    to describe the issue and mention the issue number in the pull request description. This will make sure a link back to the original issue is
-   created.
+   created. Make sure the title is descriptive enough to understand what the pull request does! 
 
 -  An incomplete contribution -- where you expect to do more work before
    receiving a full review -- should be submitted as a `draft`. These may be useful
@@ -174,8 +174,6 @@ following rules before you submit a pull request:
    For the Bug-fixes case, at the time of the PR, this tests should fail for
    the code base in develop and pass for the PR code.
 
- - Add your changes to the changelog in the file doc/progress.rst.
-
  - If any source file is being added to the repository, please add the BSD 3-Clause license to it.
 
 
@@ -201,17 +199,12 @@ Make sure your code has good unittest **coverage** (at least 80%).
 
 Pre-commit is used for various style checking and code formatting.
 Before each commit, it will automatically run:
- - [black](https://black.readthedocs.io/en/stable/) a code formatter.
+ - [ruff](https://docs.astral.sh/ruff/) a code formatter and linter.
    This will automatically format your code.
    Make sure to take a second look after any formatting takes place,
    if the resulting code is very bloated, consider a (small) refactor.
-   *note*: If Black reformats your code, the commit will automatically be aborted.
-   Make sure to add the formatted files (back) to your commit after checking them.
  - [mypy](https://mypy.readthedocs.io/en/stable/) a static type checker.
    In particular, make sure each function you work on has type hints.
- - [flake8](https://flake8.pycqa.org/en/latest/index.html) style guide enforcement.
-   Almost all of the black-formatted code should automatically pass this check,
-   but make sure to make adjustments if it does fail.
     
 If you want to run the pre-commit tests without doing a commit, run:
 ```bash
@@ -224,23 +217,6 @@ $ pre-commit run --all-files
 Make sure to do this at least once before your first commit to check your setup works.
 
 Executing a specific unit test can be done by specifying the module, test case, and test.
-To obtain a hierarchical list of all tests, run
-
-```bash
-$  pytest --collect-only
-
- <Module 'tests/test_datasets/test_dataset.py'>
-   <UnitTestCase 'OpenMLDatasetTest'>
-     <TestCaseFunction 'test_dataset_format_constructor'>
-     <TestCaseFunction 'test_get_data'>
-     <TestCaseFunction 'test_get_data_rowid_and_ignore_and_target'>
-     <TestCaseFunction 'test_get_data_with_ignore_attributes'>
-     <TestCaseFunction 'test_get_data_with_rowid'>
-     <TestCaseFunction 'test_get_data_with_target'>
-   <UnitTestCase 'OpenMLDatasetTestOnTestServer'>
-     <TestCaseFunction 'test_tagging'>
-```
-
 You may then run a specific module, test case, or unit test respectively:
 ```bash
   $ pytest tests/test_datasets/test_dataset.py
@@ -271,7 +247,7 @@ information.
 
 For building the documentation, you will need to install a few additional dependencies:
 ```bash
-$ pip install -e .[docs]
+$ pip install -e .[examples,docs]
 ```
 When dependencies are installed, run
 ```bash
diff --git a/README.md b/README.md
index f13038faa..0bad7ac66 100644
--- a/README.md
+++ b/README.md
@@ -1,22 +1,76 @@
-# OpenML-Python
-<!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
-[![All Contributors](https://img.shields.io/badge/all_contributors-2-orange.svg?style=flat-square)](#contributors-)
-<!-- ALL-CONTRIBUTORS-BADGE:END -->
 
-A python interface for [OpenML](http://openml.org), an online platform for open science collaboration in machine learning.
-It can be used to download or upload OpenML data such as datasets and machine learning experiment results.
 
-## General
+<div align="center">
 
-* [Documentation](https://openml.github.io/openml-python).
-* [Contribution guidelines](https://github.com/openml/openml-python/blob/develop/CONTRIBUTING.md).
+<div id="user-content-toc">
+  <ul align="center" style="list-style: none;">
+    <summary>
+      <img src="https://github.com/openml/openml.org/blob/master/app/public/static/svg/logo.svg" width="50" alt="OpenML Logo"/> 
+      <h1>OpenML-Python</h1>
+      <img src="https://github.com/openml/docs/blob/master/docs/img/python.png" width="50" alt="Python Logo"/>
+    </summary>
+  </ul>
+</div>
 
+## The Python API for a World of Data and More :dizzy:
+
+[![Latest Release](https://img.shields.io/github/v/release/openml/openml-python)](https://github.com/openml/openml-python/releases)
+[![Python Versions](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue)](https://pypi.org/project/openml/)
+[![Downloads](https://static.pepy.tech/badge/openml)](https://pepy.tech/project/openml)
 [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
+<!-- Add green badges for CI and precommit -->
+
+[Installation](https://openml.github.io/openml-python/main/#how-to-get-openml-for-python) | [Documentation](https://openml.github.io/openml-python) | [Contribution guidelines](https://github.com/openml/openml-python/blob/develop/CONTRIBUTING.md)
+</div>
+
+OpenML-Python provides an easy-to-use and straightforward Python interface for [OpenML](http://openml.org), an online platform for open science collaboration in machine learning.
+It can download or upload data from OpenML, such as datasets and machine learning experiment results.
+
+## :joystick: Minimal Example
 
-## Citing OpenML-Python
+Use the following code to get the [credit-g](https://www.openml.org/search?type=data&sort=runs&status=active&id=31) [dataset](https://docs.openml.org/concepts/data/):
+
+```python
+import openml
+
+dataset = openml.datasets.get_dataset("credit-g") # or by ID get_dataset(31)
+X, y, categorical_indicator, attribute_names = dataset.get_data(target="class")
+```
 
-If you use OpenML-Python in a scientific publication, we would appreciate a reference to the
-following paper:
+Get a [task](https://docs.openml.org/concepts/tasks/) for [supervised classification on credit-g](https://www.openml.org/search?type=task&id=31&source_data.data_id=31):
+
+```python
+import openml
+
+task = openml.tasks.get_task(31)
+dataset = task.get_dataset()
+X, y, categorical_indicator, attribute_names = dataset.get_data(target=task.target_name)
+# get splits for the first fold of 10-fold cross-validation
+train_indices, test_indices = task.get_train_test_split_indices(fold=0)
+```
+
+Use an [OpenML benchmarking suite](https://docs.openml.org/concepts/benchmarking/) to get a curated list of machine-learning tasks:
+```python
+import openml
+
+suite = openml.study.get_suite("amlb-classification-all")  # Get a curated list of tasks for classification
+for task_id in suite.tasks:
+    task = openml.tasks.get_task(task_id)
+```
+
+## :magic_wand: Installation
+
+OpenML-Python is supported on Python 3.8 - 3.13 and is available on Linux, MacOS, and Windows.
+
+You can install OpenML-Python with:
+
+```bash
+pip install openml
+```
+
+## :page_facing_up: Citing OpenML-Python
+
+If you use OpenML-Python in a scientific publication, we would appreciate a reference to the following paper:
 
 [Matthias Feurer, Jan N. van Rijn, Arlind Kadra, Pieter Gijsbers, Neeratyoy Mallik, Sahithya Ravi, Andreas Müller, Joaquin Vanschoren, Frank Hutter<br/>
 **OpenML-Python: an extensible Python API for OpenML**<br/>
@@ -35,23 +89,3 @@ Bibtex entry:
   url     = {http://jmlr.org/papers/v22/19-920.html}
 }
 ```
-
-## Contributors ✨
-
-Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
-
-<!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
-<!-- prettier-ignore-start -->
-<!-- markdownlint-disable -->
-<table>
-  <tr>
-    <td align="center"><a href="https://github.com/a-moadel"><img src="https://avatars0.githubusercontent.com/u/46557866?v=4" width="100px;" alt=""/><br /><sub><b>a-moadel</b></sub></a><br /><a href="https://github.com/openml/openml-python/commits?author=a-moadel" title="Documentation">📖</a> <a href="#example-a-moadel" title="Examples">💡</a></td>
-    <td align="center"><a href="https://github.com/Neeratyoy"><img src="https://avatars2.githubusercontent.com/u/3191233?v=4" width="100px;" alt=""/><br /><sub><b>Neeratyoy Mallik</b></sub></a><br /><a href="https://github.com/openml/openml-python/commits?author=Neeratyoy" title="Code">💻</a> <a href="https://github.com/openml/openml-python/commits?author=Neeratyoy" title="Documentation">📖</a> <a href="#example-Neeratyoy" title="Examples">💡</a></td>
-  </tr>
-</table>
-
-<!-- markdownlint-enable -->
-<!-- prettier-ignore-end -->
-<!-- ALL-CONTRIBUTORS-LIST:END -->
-
-This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
diff --git a/doc/index.rst b/doc/index.rst
index a3b13c9e8..4ab56f5c3 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -71,7 +71,7 @@ Further information
 
 * `OpenML documentation <https://docs.openml.org/>`_
 * `OpenML client APIs <https://docs.openml.org/APIs/>`_
-* `OpenML developer guide <https://docs.openml.org/Contributing/>`_
+* `OpenML developer guide <https://docs.openml.org/contributing/Contributing/>`_
 * `Contact information <https://www.openml.org/contact>`_
 * `Citation request <https://www.openml.org/cite>`_
 * `OpenML blog <https://medium.com/open-machine-learning>`_
diff --git a/doc/progress.rst b/doc/progress.rst
index 6496db7a8..3bf7c05aa 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -2,12 +2,12 @@
 
 .. _progress:
 
-=========
-Changelog
-=========
+=============================================
+Changelog (discontinued after version 0.15.0)
+=============================================
 
-next
-~~~~~~
+See GitHub releases for the latest changes.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 0.15.0
 ~~~~~~
diff --git a/doc/usage.rst b/doc/usage.rst
index 8c713b586..f6476407e 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -118,7 +118,7 @@ this should be repeated several times. Also, the task defines a target metric
 for which a flow should be optimized.
 
 Below you can find our tutorial regarding tasks and if you want to know more
-you can read the `OpenML guide <https://docs.openml.org/#tasks>`_:
+you can read the `OpenML guide <https://docs.openml.org/concepts/tasks/>`_:
 
 * :ref:`sphx_glr_examples_30_extended_tasks_tutorial.py`
 
diff --git a/examples/20_basic/simple_datasets_tutorial.py b/examples/20_basic/simple_datasets_tutorial.py
index 35b325fd9..b90d53660 100644
--- a/examples/20_basic/simple_datasets_tutorial.py
+++ b/examples/20_basic/simple_datasets_tutorial.py
@@ -27,7 +27,7 @@
 # ==================
 
 # Iris dataset https://www.openml.org/d/61
-dataset = openml.datasets.get_dataset(61)
+dataset = openml.datasets.get_dataset(dataset_id="iris", version=1)
 
 # Print a summary
 print(
diff --git a/examples/20_basic/simple_flows_and_runs_tutorial.py b/examples/20_basic/simple_flows_and_runs_tutorial.py
index 0176328b6..eec6d7e8b 100644
--- a/examples/20_basic/simple_flows_and_runs_tutorial.py
+++ b/examples/20_basic/simple_flows_and_runs_tutorial.py
@@ -20,8 +20,8 @@
 # Train a machine learning model
 # ==============================
 
-# NOTE: We are using dataset 20 from the test server: https://test.openml.org/d/20
-dataset = openml.datasets.get_dataset(20)
+# NOTE: We are using dataset "diabetes" from the test server: https://test.openml.org/d/20
+dataset = openml.datasets.get_dataset(dataset_id="diabetes", version=1)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
     target=dataset.default_target_attribute
 )
diff --git a/examples/20_basic/simple_suites_tutorial.py b/examples/20_basic/simple_suites_tutorial.py
index 92dfb3c04..3daf7b992 100644
--- a/examples/20_basic/simple_suites_tutorial.py
+++ b/examples/20_basic/simple_suites_tutorial.py
@@ -39,7 +39,9 @@
 # Downloading benchmark suites
 # ============================
 
-suite = openml.study.get_suite(99)
+# OpenML Benchmarking Suites and the OpenML-CC18
+# https://www.openml.org/s/99
+suite = openml.study.get_suite("OpenML-CC18")
 print(suite)
 
 ####################################################################################################
diff --git a/examples/30_extended/configure_logging.py b/examples/30_extended/configure_logging.py
index 3d33f1546..3878b0436 100644
--- a/examples/30_extended/configure_logging.py
+++ b/examples/30_extended/configure_logging.py
@@ -24,7 +24,7 @@
 
 import openml
 
-openml.datasets.get_dataset("iris")
+openml.datasets.get_dataset("iris", version=1)
 
 # With default configuration, the above example will show no output to console.
 # However, in your cache directory you should find a file named 'openml_python.log',
@@ -39,7 +39,7 @@
 
 openml.config.set_console_log_level(logging.DEBUG)
 openml.config.set_file_log_level(logging.WARNING)
-openml.datasets.get_dataset("iris")
+openml.datasets.get_dataset("iris", version=1)
 
 # Now the log level that was previously written to file should also be shown in the console.
 # The message is now no longer written to file as the `file_log` was set to level `WARNING`.
diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
index 764cb8f36..606455dd8 100644
--- a/examples/30_extended/datasets_tutorial.py
+++ b/examples/30_extended/datasets_tutorial.py
@@ -51,7 +51,7 @@
 # =================
 
 # This is done based on the dataset ID.
-dataset = openml.datasets.get_dataset(1471)
+dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1)
 
 # Print a summary
 print(
@@ -87,8 +87,7 @@
 # Starting from 0.15, not downloading data will be the default behavior instead.
 # The data will be downloading automatically when you try to access it through
 # openml objects, e.g., using `dataset.features`.
-dataset = openml.datasets.get_dataset(1471, download_data=False)
-
+dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1, download_data=False)
 ############################################################################
 # Exercise 2
 # **********
diff --git a/examples/30_extended/flows_and_runs_tutorial.py b/examples/30_extended/flows_and_runs_tutorial.py
index 38b0d23cf..b7c000101 100644
--- a/examples/30_extended/flows_and_runs_tutorial.py
+++ b/examples/30_extended/flows_and_runs_tutorial.py
@@ -25,7 +25,7 @@
 # Train a scikit-learn model on the data manually.
 
 # NOTE: We are using dataset 68 from the test server: https://test.openml.org/d/68
-dataset = openml.datasets.get_dataset(68)
+dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
     target=dataset.default_target_attribute
 )
@@ -36,7 +36,7 @@
 # You can also ask for meta-data to automatically preprocess the data.
 #
 # * e.g. categorical features -> do feature encoding
-dataset = openml.datasets.get_dataset(17)
+dataset = openml.datasets.get_dataset(dataset_id="credit-g", version=1)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
     target=dataset.default_target_attribute
 )
@@ -101,7 +101,7 @@
                 [
                     (
                         "categorical",
-                        preprocessing.OneHotEncoder(sparse=False, handle_unknown="ignore"),
+                        preprocessing.OneHotEncoder(handle_unknown="ignore"),
                         cat,  # returns the categorical feature indices
                     ),
                     (
@@ -145,7 +145,7 @@
                 [
                     (
                         "categorical",
-                        preprocessing.OneHotEncoder(sparse=False, handle_unknown="ignore"),
+                        preprocessing.OneHotEncoder(handle_unknown="ignore"),
                         categorical_feature_indices,
                     ),
                     (
diff --git a/examples/30_extended/run_setup_tutorial.py b/examples/30_extended/run_setup_tutorial.py
index a2bc3a4df..477e49fa6 100644
--- a/examples/30_extended/run_setup_tutorial.py
+++ b/examples/30_extended/run_setup_tutorial.py
@@ -58,7 +58,7 @@
 
 
 cat_imp = make_pipeline(
-    OneHotEncoder(handle_unknown="ignore", sparse=False),
+    OneHotEncoder(handle_unknown="ignore"),
     TruncatedSVD(),
 )
 cont_imp = SimpleImputer(strategy="median")
diff --git a/examples/30_extended/study_tutorial.py b/examples/30_extended/study_tutorial.py
index d5bfcd88a..8715dfb4a 100644
--- a/examples/30_extended/study_tutorial.py
+++ b/examples/30_extended/study_tutorial.py
@@ -79,7 +79,8 @@
 tasks = [115, 259, 307]
 
 # To verify
-suite = openml.study.get_suite(1)
+# https://test.openml.org/api/v1/study/1
+suite = openml.study.get_suite("OpenML100")
 print(all([t_id in suite.tasks for t_id in tasks]))
 
 run_ids = []
diff --git a/examples/30_extended/suites_tutorial.py b/examples/30_extended/suites_tutorial.py
index ff9902356..935d4c529 100644
--- a/examples/30_extended/suites_tutorial.py
+++ b/examples/30_extended/suites_tutorial.py
@@ -37,7 +37,8 @@
 
 ############################################################################
 # This is done based on the dataset ID.
-suite = openml.study.get_suite(99)
+# https://www.openml.org/api/v1/study/99
+suite = openml.study.get_suite("OpenML-CC18")
 print(suite)
 
 ############################################################################
diff --git a/examples/40_paper/2015_neurips_feurer_example.py b/examples/40_paper/2015_neurips_feurer_example.py
index 3960c3852..ae59c9ced 100644
--- a/examples/40_paper/2015_neurips_feurer_example.py
+++ b/examples/40_paper/2015_neurips_feurer_example.py
@@ -49,14 +49,14 @@
 #    this does not allow reproducibility (unclear splitting). Please do not use datasets but the
 #    respective tasks as basis for a paper and publish task IDS. This example is only given to
 #    showcase the use of OpenML-Python for a published paper and as a warning on how not to do it.
-#    Please check the `OpenML documentation of tasks <https://docs.openml.org/#tasks>`_ if you
+#    Please check the `OpenML documentation of tasks <https://docs.openml.org/concepts/tasks/>`_ if you
 #    want to learn more about them.
 
 ####################################################################################################
 # This lists both active and inactive tasks (because of ``status='all'``). Unfortunately,
 # this is necessary as some of the datasets contain issues found after the publication and became
 # deactivated, which also deactivated the tasks on them. More information on active or inactive
-# datasets can be found in the `online docs <https://docs.openml.org/#dataset-status>`_.
+# datasets can be found in the `online docs <https://docs.openml.org/concepts/data/#dataset-status>`_.
 tasks = openml.tasks.list_tasks(
     task_type=openml.tasks.TaskType.SUPERVISED_CLASSIFICATION,
     status="all",
diff --git a/examples/40_paper/2018_kdd_rijn_example.py b/examples/40_paper/2018_kdd_rijn_example.py
index d3ce59f35..6522013e3 100644
--- a/examples/40_paper/2018_kdd_rijn_example.py
+++ b/examples/40_paper/2018_kdd_rijn_example.py
@@ -4,8 +4,10 @@
 
 A tutorial on how to reproduce the paper *Hyperparameter Importance Across Datasets*.
 
-This is a Unix-only tutorial, as the requirements can not be satisfied on a Windows machine (Untested on other
-systems).
+Example Deprecation Warning!
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This example is not supported anymore by the OpenML-Python developers. The example is kept for reference purposes but not tested anymore.
 
 Publication
 ~~~~~~~~~~~
@@ -14,6 +16,16 @@
 | Jan N. van Rijn and Frank Hutter
 | In *Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining*, 2018
 | Available at https://dl.acm.org/doi/10.1145/3219819.3220058
+
+Requirements
+~~~~~~~~~~~~
+
+This is a Unix-only tutorial, as the requirements can not be satisfied on a Windows machine (Untested on other
+systems).
+
+The following Python packages are required:
+
+pip install openml[examples,docs] fanova ConfigSpace<1.0
 """
 
 # License: BSD 3-Clause
@@ -26,148 +38,151 @@
     )
     exit()
 
-import json
-import fanova
-import matplotlib.pyplot as plt
-import pandas as pd
-import seaborn as sns
-
-import openml
-
-
-##############################################################################
-# With the advent of automated machine learning, automated hyperparameter
-# optimization methods are by now routinely used in data mining. However, this
-# progress is not yet matched by equal progress on automatic analyses that
-# yield information beyond performance-optimizing hyperparameter settings.
-# In this example, we aim to answer the following two questions: Given an
-# algorithm, what are generally its most important hyperparameters?
-#
-# This work is carried out on the OpenML-100 benchmark suite, which can be
-# obtained by ``openml.study.get_suite('OpenML100')``. In this example, we
-# conduct the experiment on the Support Vector Machine (``flow_id=7707``)
-# with specific kernel (we will perform a post-process filter operation for
-# this). We should set some other experimental parameters (number of results
-# per task, evaluation measure and the number of trees of the internal
-# functional Anova) before the fun can begin.
-#
-# Note that we simplify the example in several ways:
-#
-# 1) We only consider numerical hyperparameters
-# 2) We consider all hyperparameters that are numerical (in reality, some
-#    hyperparameters might be inactive (e.g., ``degree``) or irrelevant
-#    (e.g., ``random_state``)
-# 3) We assume all hyperparameters to be on uniform scale
-#
-# Any difference in conclusion between the actual paper and the presented
-# results is most likely due to one of these simplifications. For example,
-# the hyperparameter C looks rather insignificant, whereas it is quite
-# important when it is put on a log-scale. All these simplifications can be
-# addressed by defining a ConfigSpace. For a more elaborated example that uses
-# this, please see:
-# https://github.com/janvanrijn/openml-pimp/blob/d0a14f3eb480f2a90008889f00041bdccc7b9265/examples/plot/plot_fanova_aggregates.py # noqa F401
-
-suite = openml.study.get_suite("OpenML100")
-flow_id = 7707
-parameter_filters = {"sklearn.svm.classes.SVC(17)_kernel": "sigmoid"}
-evaluation_measure = "predictive_accuracy"
-limit_per_task = 500
-limit_nr_tasks = 15
-n_trees = 16
-
-fanova_results = []
-# we will obtain all results from OpenML per task. Practice has shown that this places the bottleneck on the
-# communication with OpenML, and for iterated experimenting it is better to cache the results in a local file.
-for idx, task_id in enumerate(suite.tasks):
-    if limit_nr_tasks is not None and idx >= limit_nr_tasks:
-        continue
-    print(
-        "Starting with task %d (%d/%d)"
-        % (task_id, idx + 1, len(suite.tasks) if limit_nr_tasks is None else limit_nr_tasks)
-    )
-    # note that we explicitly only include tasks from the benchmark suite that was specified (as per the for-loop)
-    evals = openml.evaluations.list_evaluations_setups(
-        evaluation_measure,
-        flows=[flow_id],
-        tasks=[task_id],
-        size=limit_per_task,
-        output_format="dataframe",
-    )
-
-    performance_column = "value"
-    # make a DataFrame consisting of all hyperparameters (which is a dict in setup['parameters']) and the performance
-    # value (in setup['value']). The following line looks a bit complicated, but combines 2 tasks: a) combine
-    # hyperparameters and performance data in a single dict, b) cast hyperparameter values to the appropriate format
-    # Note that the ``json.loads(...)`` requires the content to be in JSON format, which is only the case for
-    # scikit-learn setups (and even there some legacy setups might violate this requirement). It will work for the
-    # setups that belong to the flows embedded in this example though.
-    try:
-        setups_evals = pd.DataFrame(
-            [
-                dict(
-                    **{name: json.loads(value) for name, value in setup["parameters"].items()},
-                    **{performance_column: setup[performance_column]}
-                )
-                for _, setup in evals.iterrows()
-            ]
+# DEPRECATED EXAMPLE -- Avoid running this code in our CI/CD pipeline
+print("This example is deprecated, remove the `if False` in this code to use it manually.")
+if False:
+    import json
+    import fanova
+    import matplotlib.pyplot as plt
+    import pandas as pd
+    import seaborn as sns
+
+    import openml
+
+
+    ##############################################################################
+    # With the advent of automated machine learning, automated hyperparameter
+    # optimization methods are by now routinely used in data mining. However, this
+    # progress is not yet matched by equal progress on automatic analyses that
+    # yield information beyond performance-optimizing hyperparameter settings.
+    # In this example, we aim to answer the following two questions: Given an
+    # algorithm, what are generally its most important hyperparameters?
+    #
+    # This work is carried out on the OpenML-100 benchmark suite, which can be
+    # obtained by ``openml.study.get_suite('OpenML100')``. In this example, we
+    # conduct the experiment on the Support Vector Machine (``flow_id=7707``)
+    # with specific kernel (we will perform a post-process filter operation for
+    # this). We should set some other experimental parameters (number of results
+    # per task, evaluation measure and the number of trees of the internal
+    # functional Anova) before the fun can begin.
+    #
+    # Note that we simplify the example in several ways:
+    #
+    # 1) We only consider numerical hyperparameters
+    # 2) We consider all hyperparameters that are numerical (in reality, some
+    #    hyperparameters might be inactive (e.g., ``degree``) or irrelevant
+    #    (e.g., ``random_state``)
+    # 3) We assume all hyperparameters to be on uniform scale
+    #
+    # Any difference in conclusion between the actual paper and the presented
+    # results is most likely due to one of these simplifications. For example,
+    # the hyperparameter C looks rather insignificant, whereas it is quite
+    # important when it is put on a log-scale. All these simplifications can be
+    # addressed by defining a ConfigSpace. For a more elaborated example that uses
+    # this, please see:
+    # https://github.com/janvanrijn/openml-pimp/blob/d0a14f3eb480f2a90008889f00041bdccc7b9265/examples/plot/plot_fanova_aggregates.py # noqa F401
+
+    suite = openml.study.get_suite("OpenML100")
+    flow_id = 7707
+    parameter_filters = {"sklearn.svm.classes.SVC(17)_kernel": "sigmoid"}
+    evaluation_measure = "predictive_accuracy"
+    limit_per_task = 500
+    limit_nr_tasks = 15
+    n_trees = 16
+
+    fanova_results = []
+    # we will obtain all results from OpenML per task. Practice has shown that this places the bottleneck on the
+    # communication with OpenML, and for iterated experimenting it is better to cache the results in a local file.
+    for idx, task_id in enumerate(suite.tasks):
+        if limit_nr_tasks is not None and idx >= limit_nr_tasks:
+            continue
+        print(
+            "Starting with task %d (%d/%d)"
+            % (task_id, idx + 1, len(suite.tasks) if limit_nr_tasks is None else limit_nr_tasks)
         )
-    except json.decoder.JSONDecodeError as e:
-        print("Task %d error: %s" % (task_id, e))
-        continue
-    # apply our filters, to have only the setups that comply to the hyperparameters we want
-    for filter_key, filter_value in parameter_filters.items():
-        setups_evals = setups_evals[setups_evals[filter_key] == filter_value]
-    # in this simplified example, we only display numerical and float hyperparameters. For categorical hyperparameters,
-    # the fanova library needs to be informed by using a configspace object.
-    setups_evals = setups_evals.select_dtypes(include=["int64", "float64"])
-    # drop rows with unique values. These are by definition not an interesting hyperparameter, e.g., ``axis``,
-    # ``verbose``.
-    setups_evals = setups_evals[
-        [
-            c
-            for c in list(setups_evals)
-            if len(setups_evals[c].unique()) > 1 or c == performance_column
-        ]
-    ]
-    # We are done with processing ``setups_evals``. Note that we still might have some irrelevant hyperparameters, e.g.,
-    # ``random_state``. We have dropped some relevant hyperparameters, i.e., several categoricals. Let's check it out:
-
-    # determine x values to pass to fanova library
-    parameter_names = [
-        pname for pname in setups_evals.columns.to_numpy() if pname != performance_column
-    ]
-    evaluator = fanova.fanova.fANOVA(
-        X=setups_evals[parameter_names].to_numpy(),
-        Y=setups_evals[performance_column].to_numpy(),
-        n_trees=n_trees,
-    )
-    for idx, pname in enumerate(parameter_names):
+        # note that we explicitly only include tasks from the benchmark suite that was specified (as per the for-loop)
+        evals = openml.evaluations.list_evaluations_setups(
+            evaluation_measure,
+            flows=[flow_id],
+            tasks=[task_id],
+            size=limit_per_task,
+            output_format="dataframe",
+        )
+
+        performance_column = "value"
+        # make a DataFrame consisting of all hyperparameters (which is a dict in setup['parameters']) and the performance
+        # value (in setup['value']). The following line looks a bit complicated, but combines 2 tasks: a) combine
+        # hyperparameters and performance data in a single dict, b) cast hyperparameter values to the appropriate format
+        # Note that the ``json.loads(...)`` requires the content to be in JSON format, which is only the case for
+        # scikit-learn setups (and even there some legacy setups might violate this requirement). It will work for the
+        # setups that belong to the flows embedded in this example though.
         try:
-            fanova_results.append(
-                {
-                    "hyperparameter": pname.split(".")[-1],
-                    "fanova": evaluator.quantify_importance([idx])[(idx,)]["individual importance"],
-                }
+            setups_evals = pd.DataFrame(
+                [
+                    dict(
+                        **{name: json.loads(value) for name, value in setup["parameters"].items()},
+                        **{performance_column: setup[performance_column]}
+                    )
+                    for _, setup in evals.iterrows()
+                ]
             )
-        except RuntimeError as e:
-            # functional ANOVA sometimes crashes with a RuntimeError, e.g., on tasks where the performance is constant
-            # for all configurations (there is no variance). We will skip these tasks (like the authors did in the
-            # paper).
+        except json.decoder.JSONDecodeError as e:
             print("Task %d error: %s" % (task_id, e))
             continue
+        # apply our filters, to have only the setups that comply to the hyperparameters we want
+        for filter_key, filter_value in parameter_filters.items():
+            setups_evals = setups_evals[setups_evals[filter_key] == filter_value]
+        # in this simplified example, we only display numerical and float hyperparameters. For categorical hyperparameters,
+        # the fanova library needs to be informed by using a configspace object.
+        setups_evals = setups_evals.select_dtypes(include=["int64", "float64"])
+        # drop rows with unique values. These are by definition not an interesting hyperparameter, e.g., ``axis``,
+        # ``verbose``.
+        setups_evals = setups_evals[
+            [
+                c
+                for c in list(setups_evals)
+                if len(setups_evals[c].unique()) > 1 or c == performance_column
+            ]
+        ]
+        # We are done with processing ``setups_evals``. Note that we still might have some irrelevant hyperparameters, e.g.,
+        # ``random_state``. We have dropped some relevant hyperparameters, i.e., several categoricals. Let's check it out:
 
-# transform ``fanova_results`` from a list of dicts into a DataFrame
-fanova_results = pd.DataFrame(fanova_results)
-
-##############################################################################
-# make the boxplot of the variance contribution. Obviously, we can also use
-# this data to make the Nemenyi plot, but this relies on the rather complex
-# ``Orange`` dependency (``pip install Orange3``). For the complete example,
-# the reader is referred to the more elaborate script (referred to earlier)
-fig, ax = plt.subplots()
-sns.boxplot(x="hyperparameter", y="fanova", data=fanova_results, ax=ax)
-ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
-ax.set_ylabel("Variance Contribution")
-ax.set_xlabel(None)
-plt.tight_layout()
-plt.show()
+        # determine x values to pass to fanova library
+        parameter_names = [
+            pname for pname in setups_evals.columns.to_numpy() if pname != performance_column
+        ]
+        evaluator = fanova.fanova.fANOVA(
+            X=setups_evals[parameter_names].to_numpy(),
+            Y=setups_evals[performance_column].to_numpy(),
+            n_trees=n_trees,
+        )
+        for idx, pname in enumerate(parameter_names):
+            try:
+                fanova_results.append(
+                    {
+                        "hyperparameter": pname.split(".")[-1],
+                        "fanova": evaluator.quantify_importance([idx])[(idx,)]["individual importance"],
+                    }
+                )
+            except RuntimeError as e:
+                # functional ANOVA sometimes crashes with a RuntimeError, e.g., on tasks where the performance is constant
+                # for all configurations (there is no variance). We will skip these tasks (like the authors did in the
+                # paper).
+                print("Task %d error: %s" % (task_id, e))
+                continue
+
+    # transform ``fanova_results`` from a list of dicts into a DataFrame
+    fanova_results = pd.DataFrame(fanova_results)
+
+    ##############################################################################
+    # make the boxplot of the variance contribution. Obviously, we can also use
+    # this data to make the Nemenyi plot, but this relies on the rather complex
+    # ``Orange`` dependency (``pip install Orange3``). For the complete example,
+    # the reader is referred to the more elaborate script (referred to earlier)
+    fig, ax = plt.subplots()
+    sns.boxplot(x="hyperparameter", y="fanova", data=fanova_results, ax=ax)
+    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
+    ax.set_ylabel("Variance Contribution")
+    ax.set_xlabel(None)
+    plt.tight_layout()
+    plt.show()
diff --git a/openml/__version__.py b/openml/__version__.py
index 6632a85f4..392bf4b37 100644
--- a/openml/__version__.py
+++ b/openml/__version__.py
@@ -5,4 +5,4 @@
 # The following line *must* be the last in the module, exactly as formatted:
 from __future__ import annotations
 
-__version__ = "0.15.0"
+__version__ = "0.15.1"
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index 4f673186e..3509f18e7 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -24,6 +24,7 @@
 from .__version__ import __version__
 from .exceptions import (
     OpenMLHashException,
+    OpenMLNotAuthorizedError,
     OpenMLServerError,
     OpenMLServerException,
     OpenMLServerNoResult,
@@ -36,6 +37,8 @@
 FILE_ELEMENTS_TYPE = Dict[str, Union[str, Tuple[str, str]]]
 DATABASE_CONNECTION_ERRCODE = 107
 
+API_TOKEN_HELP_LINK = "https://openml.github.io/openml-python/main/examples/20_basic/introduction_tutorial.html#authentication"  # noqa: S105
+
 
 def _robot_delay(n: int) -> float:
     wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60
@@ -208,6 +211,8 @@ def _download_minio_bucket(source: str, destination: str | Path) -> None:
     for file_object in client.list_objects(bucket, prefix=prefix, recursive=True):
         if file_object.object_name is None:
             raise ValueError(f"Object name is None for object {file_object!r}")
+        if file_object.etag is None:
+            raise ValueError(f"Object etag is None for object {file_object!r}")
 
         marker = destination / file_object.etag
         if marker.exists():
@@ -351,7 +356,7 @@ def __is_checksum_equal(downloaded_file_binary: bytes, md5_checksum: str | None
     return md5_checksum == md5_checksum_download
 
 
-def _send_request(  # noqa: C901
+def _send_request(  # noqa: C901, PLR0912
     request_method: str,
     url: str,
     data: DATA_TYPE,
@@ -387,18 +392,15 @@ def _send_request(  # noqa: C901
                     # -- Check if encoding is not UTF-8 perhaps
                     if __is_checksum_equal(response.content, md5_checksum):
                         raise OpenMLHashException(
-                            "Checksum of downloaded file is unequal to the expected checksum {}"
-                            "because the text encoding is not UTF-8 when downloading {}. "
-                            "There might be a sever-sided issue with the file, "
-                            "see: https://github.com/openml/openml-python/issues/1180.".format(
-                                md5_checksum,
-                                url,
-                            ),
+                            f"Checksum of downloaded file is unequal to the expected checksum"
+                            f"{md5_checksum} because the text encoding is not UTF-8 when "
+                            f"downloading {url}. There might be a sever-sided issue with the file, "
+                            "see: https://github.com/openml/openml-python/issues/1180.",
                         )
 
                     raise OpenMLHashException(
-                        "Checksum of downloaded file is unequal to the expected checksum {} "
-                        "when downloading {}.".format(md5_checksum, url),
+                        f"Checksum of downloaded file is unequal to the expected checksum "
+                        f"{md5_checksum} when downloading {url}.",
                     )
 
                 return response
@@ -457,26 +459,33 @@ def __parse_server_exception(
     url: str,
     file_elements: FILE_ELEMENTS_TYPE | None,
 ) -> OpenMLServerError:
-    if response.status_code == 414:
+    if response.status_code == requests.codes.URI_TOO_LONG:
         raise OpenMLServerError(f"URI too long! ({url})")
 
+    # OpenML has a sophisticated error system where information about failures is provided,
+    # in the response body itself.
+    # First, we need to parse it out.
     try:
         server_exception = xmltodict.parse(response.text)
     except xml.parsers.expat.ExpatError as e:
         raise e
-    except Exception as e:  # noqa: BLE001
-        # OpenML has a sophisticated error system
-        # where information about failures is provided. try to parse this
+    except Exception as e:
+        # If we failed to parse it out, then something has gone wrong in the body we have sent back
+        # from the server and there is little extra information we can capture.
         raise OpenMLServerError(
             f"Unexpected server error when calling {url}. Please contact the developers!\n"
             f"Status code: {response.status_code}\n{response.text}",
         ) from e
 
+    # Now we can parse out the specific error codes that we return. These
+    # are in addition to the typical HTTP error codes, but encode more
+    # specific informtion. You can find these codes here:
+    # https://github.com/openml/OpenML/blob/develop/openml_OS/views/pages/api_new/v1/xml/pre.php
     server_error = server_exception["oml:error"]
     code = int(server_error["oml:code"])
     message = server_error["oml:message"]
     additional_information = server_error.get("oml:additional_information")
-    if code in [372, 512, 500, 482, 542, 674]:
+    if code in [111, 372, 512, 500, 482, 542, 674]:
         if additional_information:
             full_message = f"{message} - {additional_information}"
         else:
@@ -484,10 +493,9 @@ def __parse_server_exception(
 
         # 512 for runs, 372 for datasets, 500 for flows
         # 482 for tasks, 542 for evaluations, 674 for setups
-        return OpenMLServerNoResult(
-            code=code,
-            message=full_message,
-        )
+        # 111 for dataset descriptions
+        return OpenMLServerNoResult(code=code, message=full_message, url=url)
+
     # 163: failure to validate flow XML (https://www.openml.org/api_docs#!/flow/post_flow)
     if code in [163] and file_elements is not None and "description" in file_elements:
         # file_elements['description'] is the XML file description of the flow
@@ -498,4 +506,21 @@ def __parse_server_exception(
         )
     else:
         full_message = f"{message} - {additional_information}"
+
+    if code in [
+        102,  # flow/exists post
+        137,  # dataset post
+        350,  # dataset/42 delete
+        310,  # flow/<something> post
+        320,  # flow/42 delete
+        400,  # run/42 delete
+        460,  # task/42 delete
+    ]:
+        msg = (
+            f"The API call {url} requires authentication via an API key.\nPlease configure "
+            "OpenML-Python to use your API as described in this example:"
+            "\nhttps://openml.github.io/openml-python/main/examples/20_basic/introduction_tutorial.html#authentication"
+        )
+        return OpenMLNotAuthorizedError(message=msg)
+
     return OpenMLServerException(code=code, message=full_message, url=url)
diff --git a/openml/cli.py b/openml/cli.py
index 5732442d0..d0a46e498 100644
--- a/openml/cli.py
+++ b/openml/cli.py
@@ -1,4 +1,5 @@
-""""Command Line Interface for `openml` to configure its settings."""
+"""Command Line Interface for `openml` to configure its settings."""
+
 from __future__ import annotations
 
 import argparse
diff --git a/openml/config.py b/openml/config.py
index 6a37537dc..d838b070a 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -8,10 +8,12 @@
 import logging.handlers
 import os
 import platform
+import shutil
 import warnings
+from contextlib import contextmanager
 from io import StringIO
 from pathlib import Path
-from typing import Any, cast
+from typing import Any, Iterator, cast
 from typing_extensions import Literal, TypedDict
 from urllib.parse import urlparse
 
@@ -20,6 +22,9 @@
 console_handler: logging.StreamHandler | None = None
 file_handler: logging.handlers.RotatingFileHandler | None = None
 
+OPENML_CACHE_DIR_ENV_VAR = "OPENML_CACHE_DIR"
+OPENML_SKIP_PARQUET_ENV_VAR = "OPENML_SKIP_PARQUET"
+
 
 class _Config(TypedDict):
     apikey: str
@@ -101,14 +106,50 @@ def set_file_log_level(file_output_level: int) -> None:
 
 # Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards)
 _user_path = Path("~").expanduser().absolute()
+
+
+def _resolve_default_cache_dir() -> Path:
+    user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR)
+    if user_defined_cache_dir is not None:
+        return Path(user_defined_cache_dir)
+
+    if platform.system().lower() != "linux":
+        return _user_path / ".openml"
+
+    xdg_cache_home = os.environ.get("XDG_CACHE_HOME")
+    if xdg_cache_home is None:
+        return Path("~", ".cache", "openml")
+
+    # This is the proper XDG_CACHE_HOME directory, but
+    # we unfortunately had a problem where we used XDG_CACHE_HOME/org,
+    # we check heuristically if this old directory still exists and issue
+    # a warning if it does. There's too much data to move to do this for the user.
+
+    # The new cache directory exists
+    cache_dir = Path(xdg_cache_home) / "openml"
+    if cache_dir.exists():
+        return cache_dir
+
+    # The old cache directory *does not* exist
+    heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml"
+    if not heuristic_dir_for_backwards_compat.exists():
+        return cache_dir
+
+    root_dir_to_delete = Path(xdg_cache_home) / "org"
+    openml_logger.warning(
+        "An old cache directory was found at '%s'. This directory is no longer used by "
+        "OpenML-Python. To silence this warning you would need to delete the old cache "
+        "directory. The cached files will then be located in '%s'.",
+        root_dir_to_delete,
+        cache_dir,
+    )
+    return Path(xdg_cache_home)
+
+
 _defaults: _Config = {
     "apikey": "",
     "server": "https://www.openml.org/api/v1/xml",
-    "cachedir": (
-        Path(os.environ.get("XDG_CACHE_HOME", _user_path / ".cache" / "openml"))
-        if platform.system() == "Linux"
-        else _user_path / ".openml"
-    ),
+    "cachedir": _resolve_default_cache_dir(),
     "avoid_duplicate_runs": True,
     "retry_policy": "human",
     "connection_n_retries": 5,
@@ -135,11 +176,11 @@ def get_server_base_url() -> str:
 apikey: str = _defaults["apikey"]
 show_progress: bool = _defaults["show_progress"]
 # The current cache directory (without the server name)
-_root_cache_directory = Path(_defaults["cachedir"])
+_root_cache_directory: Path = Path(_defaults["cachedir"])
 avoid_duplicate_runs = _defaults["avoid_duplicate_runs"]
 
-retry_policy = _defaults["retry_policy"]
-connection_n_retries = _defaults["connection_n_retries"]
+retry_policy: Literal["human", "robot"] = _defaults["retry_policy"]
+connection_n_retries: int = _defaults["connection_n_retries"]
 
 
 def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = None) -> None:
@@ -218,11 +259,66 @@ def stop_using_configuration_for_example(cls) -> None:
         cls._start_last_called = False
 
 
+def _handle_xdg_config_home_backwards_compatibility(
+    xdg_home: str,
+) -> Path:
+    # NOTE(eddiebergman): A previous bug results in the config
+    # file being located at `${XDG_CONFIG_HOME}/config` instead
+    # of `${XDG_CONFIG_HOME}/openml/config`. As to maintain backwards
+    # compatibility, where users may already may have had a configuration,
+    # we copy it over an issue a warning until it's deleted.
+    # As a heurisitic to ensure that it's "our" config file, we try parse it first.
+    config_dir = Path(xdg_home) / "openml"
+
+    backwards_compat_config_file = Path(xdg_home) / "config"
+    if not backwards_compat_config_file.exists():
+        return config_dir
+
+    # If it errors, that's a good sign it's not ours and we can
+    # safely ignore it, jumping out of this block. This is a heurisitc
+    try:
+        _parse_config(backwards_compat_config_file)
+    except Exception:  # noqa: BLE001
+        return config_dir
+
+    # Looks like it's ours, lets try copy it to the correct place
+    correct_config_location = config_dir / "config"
+    try:
+        # We copy and return the new copied location
+        shutil.copy(backwards_compat_config_file, correct_config_location)
+        openml_logger.warning(
+            "An openml configuration file was found at the old location "
+            f"at {backwards_compat_config_file}. We have copied it to the new "
+            f"location at {correct_config_location}. "
+            "\nTo silence this warning please verify that the configuration file "
+            f"at {correct_config_location} is correct and delete the file at "
+            f"{backwards_compat_config_file}."
+        )
+        return config_dir
+    except Exception as e:  # noqa: BLE001
+        # We failed to copy and its ours, return the old one.
+        openml_logger.warning(
+            "While attempting to perform a backwards compatible fix, we "
+            f"failed to copy the openml config file at "
+            f"{backwards_compat_config_file}' to {correct_config_location}"
+            f"\n{type(e)}: {e}",
+            "\n\nTo silence this warning, please copy the file "
+            "to the new location and delete the old file at "
+            f"{backwards_compat_config_file}.",
+        )
+        return backwards_compat_config_file
+
+
 def determine_config_file_path() -> Path:
-    if platform.system() == "Linux":
-        config_dir = Path(os.environ.get("XDG_CONFIG_HOME", Path("~") / ".config" / "openml"))
+    if platform.system().lower() == "linux":
+        xdg_home = os.environ.get("XDG_CONFIG_HOME")
+        if xdg_home is not None:
+            config_dir = _handle_xdg_config_home_backwards_compatibility(xdg_home)
+        else:
+            config_dir = Path("~", ".config", "openml")
     else:
         config_dir = Path("~") / ".openml"
+
     # Still use os.path.expanduser to trigger the mock in the unit test
     config_dir = Path(config_dir).expanduser().resolve()
     return config_dir / "config"
@@ -251,7 +347,10 @@ def _setup(config: _Config | None = None) -> None:
         if not config_dir.exists():
             config_dir.mkdir(exist_ok=True, parents=True)
     except PermissionError:
-        pass
+        openml_logger.warning(
+            f"No permission to create OpenML directory at {config_dir}!"
+            " This can result in OpenML-Python not working properly."
+        )
 
     if config is None:
         config = _parse_config(config_file)
@@ -260,36 +359,29 @@ def _setup(config: _Config | None = None) -> None:
     apikey = config["apikey"]
     server = config["server"]
     show_progress = config["show_progress"]
-    short_cache_dir = Path(config["cachedir"])
     n_retries = int(config["connection_n_retries"])
 
     set_retry_policy(config["retry_policy"], n_retries)
 
+    user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR)
+    if user_defined_cache_dir is not None:
+        short_cache_dir = Path(user_defined_cache_dir)
+    else:
+        short_cache_dir = Path(config["cachedir"])
     _root_cache_directory = short_cache_dir.expanduser().resolve()
 
     try:
         cache_exists = _root_cache_directory.exists()
-    except PermissionError:
-        cache_exists = False
-
-    # create the cache subdirectory
-    try:
-        if not _root_cache_directory.exists():
+        # create the cache subdirectory
+        if not cache_exists:
             _root_cache_directory.mkdir(exist_ok=True, parents=True)
+        _create_log_handlers()
     except PermissionError:
         openml_logger.warning(
-            "No permission to create openml cache directory at %s! This can result in "
-            "OpenML-Python not working properly." % _root_cache_directory,
+            f"No permission to create OpenML directory at {_root_cache_directory}!"
+            " This can result in OpenML-Python not working properly."
         )
-
-    if cache_exists:
-        _create_log_handlers()
-    else:
         _create_log_handlers(create_file_handler=False)
-        openml_logger.warning(
-            "No permission to create OpenML directory at %s! This can result in OpenML-Python "
-            "not working properly." % config_dir,
-        )
 
 
 def set_field_in_config_file(field: str, value: Any) -> None:
@@ -407,6 +499,18 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None:
 stop_using_configuration_for_example = ConfigurationForExamples.stop_using_configuration_for_example
 
 
+@contextmanager
+def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]:
+    """A context manager to temporarily override variables in the configuration."""
+    existing_config = get_config_as_dict()
+    merged_config = {**existing_config, **config}
+
+    _setup(merged_config)  # type: ignore
+    yield merged_config  # type: ignore
+
+    _setup(existing_config)
+
+
 __all__ = [
     "get_cache_directory",
     "set_root_cache_directory",
diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
index 30febcba5..5190ac522 100644
--- a/openml/datasets/dataset.py
+++ b/openml/datasets/dataset.py
@@ -3,6 +3,7 @@
 
 import gzip
 import logging
+import os
 import pickle
 import re
 import warnings
@@ -17,6 +18,7 @@
 import xmltodict
 
 from openml.base import OpenMLBase
+from openml.config import OPENML_SKIP_PARQUET_ENV_VAR
 from openml.exceptions import PyOpenMLError
 
 from .data_feature import OpenMLDataFeature
@@ -156,14 +158,14 @@ def find_invalid_characters(string: str, pattern: str) -> str:
             )
 
         if dataset_id is None:
-            pattern = "^[\x00-\x7F]*$"
+            pattern = "^[\x00-\x7f]*$"
             if description and not re.match(pattern, description):
                 # not basiclatin (XSD complains)
                 invalid_characters = find_invalid_characters(description, pattern)
                 raise ValueError(
                     f"Invalid symbols {invalid_characters} in description: {description}",
                 )
-            pattern = "^[\x00-\x7F]*$"
+            pattern = "^[\x00-\x7f]*$"
             if citation and not re.match(pattern, citation):
                 # not basiclatin (XSD complains)
                 invalid_characters = find_invalid_characters(citation, pattern)
@@ -329,13 +331,26 @@ def __eq__(self, other: Any) -> bool:
             "version",
             "upload_date",
             "url",
+            "_parquet_url",
             "dataset",
             "data_file",
+            "format",
+            "cache_format",
+        }
+
+        cache_fields = {
+            "_dataset",
+            "data_file",
+            "data_pickle_file",
+            "data_feather_file",
+            "feather_attribute_file",
+            "parquet_file",
         }
 
         # check that common keys and values are identical
-        self_keys = set(self.__dict__.keys()) - server_fields
-        other_keys = set(other.__dict__.keys()) - server_fields
+        ignore_fields = server_fields | cache_fields
+        self_keys = set(self.__dict__.keys()) - ignore_fields
+        other_keys = set(other.__dict__.keys()) - ignore_fields
         return self_keys == other_keys and all(
             self.__dict__[key] == other.__dict__[key] for key in self_keys
         )
@@ -345,8 +360,10 @@ def _download_data(self) -> None:
         # import required here to avoid circular import.
         from .functions import _get_dataset_arff, _get_dataset_parquet
 
-        if self._parquet_url is not None:
-            self.parquet_file = str(_get_dataset_parquet(self))
+        skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true"
+        if self._parquet_url is not None and not skip_parquet:
+            parquet_file = _get_dataset_parquet(self)
+            self.parquet_file = None if parquet_file is None else str(parquet_file)
         if self.parquet_file is None:
             self.data_file = str(_get_dataset_arff(self))
 
@@ -574,7 +591,7 @@ def _parse_data_from_file(self, data_file: Path) -> tuple[list[str], list[bool],
     def _parse_data_from_pq(self, data_file: Path) -> tuple[list[str], list[bool], pd.DataFrame]:
         try:
             data = pd.read_parquet(data_file)
-        except Exception as e:  # noqa: BLE001
+        except Exception as e:
             raise Exception(f"File: {data_file}") from e
         categorical = [data[c].dtype.name == "category" for c in data.columns]
         attribute_names = list(data.columns)
@@ -816,7 +833,7 @@ def get_data(  # noqa: C901, PLR0912, PLR0915
                 to_exclude.extend(self.ignore_attribute)
 
         if len(to_exclude) > 0:
-            logger.info("Going to remove the following attributes: %s" % to_exclude)
+            logger.info(f"Going to remove the following attributes: {to_exclude}")
             keep = np.array([column not in to_exclude for column in attribute_names])
             data = data.loc[:, keep] if isinstance(data, pd.DataFrame) else data[:, keep]
 
@@ -1077,7 +1094,9 @@ def _read_features(features_file: Path) -> dict[int, OpenMLDataFeature]:
 
 
 def _parse_features_xml(features_xml_string: str) -> dict[int, OpenMLDataFeature]:
-    xml_dict = xmltodict.parse(features_xml_string, force_list=("oml:feature", "oml:nominal_value"))
+    xml_dict = xmltodict.parse(
+        features_xml_string, force_list=("oml:feature", "oml:nominal_value"), strip_whitespace=False
+    )
     features_xml = xml_dict["oml:data_features"]
 
     features: dict[int, OpenMLDataFeature] = {}
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 410867b01..3f3c709f9 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -3,9 +3,11 @@
 from __future__ import annotations
 
 import logging
+import os
 import warnings
 from collections import OrderedDict
 from pathlib import Path
+from pyexpat import ExpatError
 from typing import TYPE_CHECKING, Any, overload
 from typing_extensions import Literal
 
@@ -15,11 +17,11 @@
 import pandas as pd
 import urllib3
 import xmltodict
-from pyexpat import ExpatError
 from scipy.sparse import coo_matrix
 
 import openml._api_calls
 import openml.utils
+from openml.config import OPENML_SKIP_PARQUET_ENV_VAR
 from openml.exceptions import (
     OpenMLHashException,
     OpenMLPrivateDatasetError,
@@ -85,8 +87,7 @@ def list_datasets(
     *,
     output_format: Literal["dataframe"],
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 @overload
@@ -98,8 +99,7 @@ def list_datasets(
     tag: str | None,
     output_format: Literal["dataframe"],
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 @overload
@@ -111,8 +111,7 @@ def list_datasets(
     tag: str | None = ...,
     output_format: Literal["dict"] = "dict",
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def list_datasets(
@@ -207,8 +206,7 @@ def _list_datasets(
     data_id: list | None = ...,
     output_format: Literal["dict"] = "dict",
     **kwargs: Any,
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
@@ -216,8 +214,7 @@ def _list_datasets(
     data_id: list | None = ...,
     output_format: Literal["dataframe"] = "dataframe",
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def _list_datasets(
@@ -256,18 +253,16 @@ def _list_datasets(
         for operator, value in kwargs.items():
             api_call += f"/{operator}/{value}"
     if data_id is not None:
-        api_call += "/data_id/%s" % ",".join([str(int(i)) for i in data_id])
+        api_call += "/data_id/{}".format(",".join([str(int(i)) for i in data_id]))
     return __list_datasets(api_call=api_call, output_format=output_format)
 
 
 @overload
-def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict:
-    ...
+def __list_datasets(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ...
 
 
 @overload
-def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame:
-    ...
+def __list_datasets(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ...
 
 
 def __list_datasets(
@@ -484,7 +479,7 @@ def get_dataset(  # noqa: C901, PLR0912
     Parameters
     ----------
     dataset_id : int or str
-        Dataset ID of the dataset to download
+        The ID or name of the dataset to download.
     download_data : bool (default=False)
         If True, also download the data file. Beware that some datasets are large and it might
         make the operation noticeably slower. Metadata is also still retrieved.
@@ -567,7 +562,10 @@ def get_dataset(  # noqa: C901, PLR0912
         if download_qualities:
             qualities_file = _get_dataset_qualities_file(did_cache_dir, dataset_id)
 
-        if "oml:parquet_url" in description and download_data:
+        parquet_file = None
+        skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true"
+        download_parquet = "oml:parquet_url" in description and not skip_parquet
+        if download_parquet and (download_data or download_all_files):
             try:
                 parquet_file = _get_dataset_parquet(
                     description,
@@ -575,12 +573,11 @@ def get_dataset(  # noqa: C901, PLR0912
                 )
             except urllib3.exceptions.MaxRetryError:
                 parquet_file = None
-        else:
-            parquet_file = None
 
         arff_file = None
         if parquet_file is None and download_data:
-            logger.warning("Failed to download parquet, fallback on ARFF.")
+            if download_parquet:
+                logger.warning("Failed to download parquet, fallback on ARFF.")
             arff_file = _get_dataset_arff(description)
 
         remove_dataset_cache = False
@@ -785,10 +782,8 @@ def create_dataset(  # noqa: C901, PLR0912, PLR0915
         if not is_row_id_an_attribute:
             raise ValueError(
                 "'row_id_attribute' should be one of the data attribute. "
-                " Got '{}' while candidates are {}.".format(
-                    row_id_attribute,
-                    [attr[0] for attr in attributes_],
-                ),
+                f" Got '{row_id_attribute}' while candidates are"
+                f" {[attr[0] for attr in attributes_]}.",
             )
 
     if isinstance(data, pd.DataFrame):
@@ -870,7 +865,7 @@ def status_update(data_id: int, status: Literal["active", "deactivated"]) -> Non
     Updates the status of a dataset to either 'active' or 'deactivated'.
     Please see the OpenML API documentation for a description of the status
     and all legal status transitions:
-    https://docs.openml.org/#dataset-status
+    https://docs.openml.org/concepts/data/#dataset-status
 
     Parameters
     ----------
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
index a854686d1..a39096a58 100644
--- a/openml/evaluations/functions.py
+++ b/openml/evaluations/functions.py
@@ -32,8 +32,7 @@ def list_evaluations(
     per_fold: bool | None = ...,
     sort_order: str | None = ...,
     output_format: Literal["dict", "object"] = "dict",
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
@@ -51,8 +50,7 @@ def list_evaluations(
     per_fold: bool | None = ...,
     sort_order: str | None = ...,
     output_format: Literal["dataframe"] = ...,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def list_evaluations(
@@ -204,24 +202,24 @@ def _list_evaluations(
     -------
     dict of objects, or dataframe
     """
-    api_call = "evaluation/list/function/%s" % function
+    api_call = f"evaluation/list/function/{function}"
     if kwargs is not None:
         for operator, value in kwargs.items():
             api_call += f"/{operator}/{value}"
     if tasks is not None:
-        api_call += "/task/%s" % ",".join([str(int(i)) for i in tasks])
+        api_call += "/task/{}".format(",".join([str(int(i)) for i in tasks]))
     if setups is not None:
-        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setups])
+        api_call += "/setup/{}".format(",".join([str(int(i)) for i in setups]))
     if flows is not None:
-        api_call += "/flow/%s" % ",".join([str(int(i)) for i in flows])
+        api_call += "/flow/{}".format(",".join([str(int(i)) for i in flows]))
     if runs is not None:
-        api_call += "/run/%s" % ",".join([str(int(i)) for i in runs])
+        api_call += "/run/{}".format(",".join([str(int(i)) for i in runs]))
     if uploaders is not None:
-        api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploaders])
+        api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploaders]))
     if study is not None:
         api_call += "/study/%d" % study
     if sort_order is not None:
-        api_call += "/sort_order/%s" % sort_order
+        api_call += f"/sort_order/{sort_order}"
 
     return __list_evaluations(api_call, output_format=output_format)
 
@@ -236,7 +234,7 @@ def __list_evaluations(
     # Minimalistic check if the XML is useful
     if "oml:evaluations" not in evals_dict:
         raise ValueError(
-            "Error in return XML, does not contain " '"oml:evaluations": %s' % str(evals_dict),
+            "Error in return XML, does not contain " f'"oml:evaluations": {evals_dict!s}',
         )
 
     assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), type(
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index 02322196e..2d40d03b8 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -48,12 +48,27 @@
     r"(?P<version>(\d+\.)?(\d+\.)?(\d+)?(dev)?[0-9]*))?$",
 )
 
-sctypes = np.sctypes if Version(np.__version__) < Version("2.0") else np.core.sctypes
+# NOTE(eddiebergman): This was imported before but became deprecated,
+# as a result I just enumerated them manually by copy-ing and pasting,
+# recommended solution in Numpy 2.0 guide was to explicitly list them.
 SIMPLE_NUMPY_TYPES = [
-    nptype
-    for type_cat, nptypes in sctypes.items()
-    for nptype in nptypes  # type: ignore
-    if type_cat != "others"
+    np.int8,
+    np.int16,
+    np.int32,
+    np.int64,
+    np.longlong,
+    np.uint8,
+    np.uint16,
+    np.uint32,
+    np.uint64,
+    np.ulonglong,
+    np.float16,
+    np.float32,
+    np.float64,
+    np.longdouble,
+    np.complex64,
+    np.complex128,
+    np.clongdouble,
 ]
 SIMPLE_TYPES = (bool, int, float, str, *SIMPLE_NUMPY_TYPES)
 
@@ -312,7 +327,7 @@ def flow_to_model(
             strict_version=strict_version,
         )
 
-    def _deserialize_sklearn(  # noqa: PLR0915, C901, PLR0913, PLR0912
+    def _deserialize_sklearn(  # noqa: PLR0915, C901, PLR0912
         self,
         o: Any,
         components: dict | None = None,
@@ -419,7 +434,7 @@ def _deserialize_sklearn(  # noqa: PLR0915, C901, PLR0913, PLR0912
                         strict_version=strict_version,
                     )
                 else:
-                    raise ValueError("Cannot flow_to_sklearn %s" % serialized_type)
+                    raise ValueError(f"Cannot flow_to_sklearn {serialized_type}")
 
             else:
                 rval = OrderedDict(
@@ -979,17 +994,17 @@ def flatten_all(list_):
                         # length 2 is for {VotingClassifier.estimators,
                         # Pipeline.steps, FeatureUnion.transformer_list}
                         # length 3 is for ColumnTransformer
-                        msg = "Length of tuple of type {} does not match assumptions".format(
-                            sub_component_type,
+                        raise ValueError(
+                            f"Length of tuple of type {sub_component_type}"
+                            " does not match assumptions"
                         )
-                        raise ValueError(msg)
 
                     if isinstance(sub_component, str):
                         if sub_component not in SKLEARN_PIPELINE_STRING_COMPONENTS:
                             msg = (
                                 "Second item of tuple does not match assumptions. "
                                 "If string, can be only 'drop' or 'passthrough' but"
-                                "got %s" % sub_component
+                                f"got {sub_component}"
                             )
                             raise ValueError(msg)
                     elif sub_component is None:
@@ -1002,15 +1017,15 @@ def flatten_all(list_):
                     elif not isinstance(sub_component, OpenMLFlow):
                         msg = (
                             "Second item of tuple does not match assumptions. "
-                            "Expected OpenMLFlow, got %s" % type(sub_component)
+                            f"Expected OpenMLFlow, got {type(sub_component)}"
                         )
                         raise TypeError(msg)
 
                     if identifier in reserved_keywords:
                         parent_model = f"{model.__module__}.{model.__class__.__name__}"
-                        msg = "Found element shadowing official " "parameter for {}: {}".format(
-                            parent_model,
-                            identifier,
+                        msg = (
+                            "Found element shadowing official "
+                            f"parameter for {parent_model}: {identifier}"
                         )
                         raise PyOpenMLError(msg)
 
@@ -1035,9 +1050,9 @@ def flatten_all(list_):
                             model=None,
                         )
                         component_reference: OrderedDict[str, str | dict] = OrderedDict()
-                        component_reference[
-                            "oml-python:serialized_object"
-                        ] = COMPOSITION_STEP_CONSTANT
+                        component_reference["oml-python:serialized_object"] = (
+                            COMPOSITION_STEP_CONSTANT
+                        )
                         cr_value: dict[str, Any] = OrderedDict()
                         cr_value["key"] = identifier
                         cr_value["step_name"] = identifier
@@ -1218,7 +1233,7 @@ def _check_dependencies(
         for dependency_string in dependencies_list:
             match = DEPENDENCIES_PATTERN.match(dependency_string)
             if not match:
-                raise ValueError("Cannot parse dependency %s" % dependency_string)
+                raise ValueError(f"Cannot parse dependency {dependency_string}")
 
             dependency_name = match.group("name")
             operation = match.group("operation")
@@ -1237,7 +1252,7 @@ def _check_dependencies(
                     installed_version > required_version or installed_version == required_version
                 )
             else:
-                raise NotImplementedError("operation '%s' is not supported" % operation)
+                raise NotImplementedError(f"operation '{operation}' is not supported")
             message = (
                 "Trying to deserialize a model with dependency "
                 f"{dependency_string} not satisfied."
@@ -1363,7 +1378,7 @@ def _serialize_cross_validator(self, o: Any) -> OrderedDict[str, str | dict]:
             with warnings.catch_warnings(record=True) as w:
                 warnings.simplefilter("always", DeprecationWarning)
                 value = getattr(o, key, None)
-                if w is not None and len(w) and w[0].category == DeprecationWarning:
+                if w is not None and len(w) and w[0].category is DeprecationWarning:
                     # if the parameter is deprecated, don't show it
                     continue
 
@@ -1812,9 +1827,9 @@ def _prediction_to_probabilities(
                     # then we need to add a column full of zeros into the probabilities
                     # for class 3 because the rest of the library expects that the
                     # probabilities are ordered the same way as the classes are ordered).
-                    message = "Estimator only predicted for {}/{} classes!".format(
-                        proba_y.shape[1],
-                        len(task.class_labels),
+                    message = (
+                        f"Estimator only predicted for {proba_y.shape[1]}/{len(task.class_labels)}"
+                        " classes!"
                     )
                     warnings.warn(message, stacklevel=2)
                     openml.config.logger.warning(message)
@@ -2008,9 +2023,8 @@ def is_subcomponent_specification(values):
                                 pass
                             else:
                                 raise TypeError(
-                                    "Subcomponent flow should be of type flow, but is {}".format(
-                                        type(subcomponent_flow),
-                                    ),
+                                    "Subcomponent flow should be of type flow, but is"
+                                    f" {type(subcomponent_flow)}",
                                 )
 
                         current = {
@@ -2129,8 +2143,8 @@ def instantiate_model_from_hpo_class(
         """
         if not self._is_hpo_class(model):
             raise AssertionError(
-                "Flow model %s is not an instance of sklearn.model_selection._search.BaseSearchCV"
-                % model,
+                f"Flow model {model} is not an instance of"
+                " sklearn.model_selection._search.BaseSearchCV",
             )
         base_estimator = model.estimator
         base_estimator.set_params(**trace_iteration.get_parameters())
@@ -2197,8 +2211,8 @@ def _obtain_arff_trace(
         """
         if not self._is_hpo_class(model):
             raise AssertionError(
-                "Flow model %s is not an instance of sklearn.model_selection._search.BaseSearchCV"
-                % model,
+                f"Flow model {model} is not an instance of "
+                "sklearn.model_selection._search.BaseSearchCV",
             )
         if not hasattr(model, "cv_results_"):
             raise ValueError("model should contain `cv_results_`")
@@ -2235,7 +2249,7 @@ def _obtain_arff_trace(
                         # hyperparameter layer_sizes of MLPClassifier
                         type = "STRING"  # noqa: A001
                     else:
-                        raise TypeError("Unsupported param type in param grid: %s" % key)
+                        raise TypeError(f"Unsupported param type in param grid: {key}")
 
                 # renamed the attribute param to parameter, as this is a required
                 # OpenML convention - this also guards against name collisions
diff --git a/openml/flows/flow.py b/openml/flows/flow.py
index 4e437e35c..a3ff50ca1 100644
--- a/openml/flows/flow.py
+++ b/openml/flows/flow.py
@@ -135,15 +135,13 @@ def __init__(  # noqa: PLR0913
         keys_parameters_meta_info = set(parameters_meta_info.keys())
         if len(keys_parameters.difference(keys_parameters_meta_info)) > 0:
             raise ValueError(
-                "Parameter %s only in parameters, but not in "
-                "parameters_meta_info."
-                % str(keys_parameters.difference(keys_parameters_meta_info)),
+                f"Parameter {keys_parameters.difference(keys_parameters_meta_info)!s} only in "
+                "parameters, but not in parameters_meta_info.",
             )
         if len(keys_parameters_meta_info.difference(keys_parameters)) > 0:
             raise ValueError(
-                "Parameter %s only in parameters_meta_info, "
-                "but not in parameters."
-                % str(keys_parameters_meta_info.difference(keys_parameters)),
+                f"Parameter {keys_parameters_meta_info.difference(keys_parameters)!s} only in "
+                " parameters_meta_info, but not in parameters.",
             )
 
         self.external_version = external_version
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index b01e54b44..3d056ac60 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -140,8 +140,7 @@ def list_flows(
     tag: str | None = ...,
     output_format: Literal["dict"] = "dict",
     **kwargs: Any,
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
@@ -152,8 +151,7 @@ def list_flows(
     *,
     output_format: Literal["dataframe"],
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 @overload
@@ -163,8 +161,7 @@ def list_flows(
     tag: str | None,
     output_format: Literal["dataframe"],
     **kwargs: Any,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def list_flows(
@@ -243,18 +240,15 @@ def list_flows(
 
 
 @overload
-def _list_flows(output_format: Literal["dict"] = ..., **kwargs: Any) -> dict:
-    ...
+def _list_flows(output_format: Literal["dict"] = ..., **kwargs: Any) -> dict: ...
 
 
 @overload
-def _list_flows(*, output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame:
-    ...
+def _list_flows(*, output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: ...
 
 
 @overload
-def _list_flows(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame:
-    ...
+def _list_flows(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: ...
 
 
 def _list_flows(
@@ -391,13 +385,11 @@ def get_flow_id(
 
 
 @overload
-def __list_flows(api_call: str, output_format: Literal["dict"] = "dict") -> dict:
-    ...
+def __list_flows(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ...
 
 
 @overload
-def __list_flows(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame:
-    ...
+def __list_flows(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ...
 
 
 def __list_flows(
@@ -453,7 +445,7 @@ def _check_flow_for_server_id(flow: OpenMLFlow) -> None:
     while len(stack) > 0:
         current = stack.pop()
         if current.flow_id is None:
-            raise ValueError("Flow %s has no flow_id!" % current.name)
+            raise ValueError(f"Flow {current.name} has no flow_id!")
 
         for component in current.components.values():
             stack.append(component)
@@ -492,10 +484,10 @@ def assert_flows_equal(  # noqa: C901, PLR0912, PLR0913, PLR0915
         Whether to ignore matching of flow descriptions.
     """
     if not isinstance(flow1, OpenMLFlow):
-        raise TypeError("Argument 1 must be of type OpenMLFlow, but is %s" % type(flow1))
+        raise TypeError(f"Argument 1 must be of type OpenMLFlow, but is {type(flow1)}")
 
     if not isinstance(flow2, OpenMLFlow):
-        raise TypeError("Argument 2 must be of type OpenMLFlow, but is %s" % type(flow2))
+        raise TypeError(f"Argument 2 must be of type OpenMLFlow, but is {type(flow2)}")
 
     # TODO as they are actually now saved during publish, it might be good to
     # check for the equality of these as well.
@@ -522,11 +514,11 @@ def assert_flows_equal(  # noqa: C901, PLR0912, PLR0913, PLR0915
             for name in set(attr1.keys()).union(attr2.keys()):
                 if name not in attr1:
                     raise ValueError(
-                        "Component %s only available in " "argument2, but not in argument1." % name,
+                        f"Component {name} only available in " "argument2, but not in argument1.",
                     )
                 if name not in attr2:
                     raise ValueError(
-                        "Component %s only available in " "argument2, but not in argument1." % name,
+                        f"Component {name} only available in " "argument2, but not in argument1.",
                     )
                 assert_flows_equal(
                     attr1[name],
@@ -549,9 +541,9 @@ def assert_flows_equal(  # noqa: C901, PLR0912, PLR0913, PLR0915
                     symmetric_difference = params_flow_1 ^ params_flow_2
                     if len(symmetric_difference) > 0:
                         raise ValueError(
-                            "Flow %s: parameter set of flow "
+                            f"Flow {flow1.name}: parameter set of flow "
                             "differs from the parameters stored "
-                            "on the server." % flow1.name,
+                            "on the server.",
                         )
 
                 if ignore_parameter_values_on_older_children:
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index f7963297d..b6f950020 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -74,8 +74,7 @@ def run_model_on_task(  # noqa: PLR0913
     ----------
     model : sklearn model
         A model which has a function fit(X,Y) and predict(X),
-        all supervised estimators of scikit learn follow this definition of a model
-        (https://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html)
+        all supervised estimators of scikit learn follow this definition of a model.
     task : OpenMLTask or int or str
         Task to perform or Task id.
         This may be a model instead if the first argument is an OpenMLTask.
@@ -199,16 +198,12 @@ def run_flow_on_task(  # noqa: C901, PLR0912, PLR0915, PLR0913
     flow : OpenMLFlow
         A flow wraps a machine learning model together with relevant information.
         The model has a function fit(X,Y) and predict(X),
-        all supervised estimators of scikit learn follow this definition of a model
-        (https://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html)
+        all supervised estimators of scikit learn follow this definition of a model.
     task : OpenMLTask
         Task to perform. This may be an OpenMLFlow instead if the first argument is an OpenMLTask.
     avoid_duplicate_runs : bool, optional (default=True)
         If True, the run will throw an error if the setup/task combination is already present on
         the server. This feature requires an internet connection.
-    avoid_duplicate_runs : bool, optional (default=True)
-        If True, the run will throw an error if the setup/task combination is already present on
-        the server. This feature requires an internet connection.
     flow_tags : List[str], optional (default=None)
         A list of tags that the flow should have at creation.
     seed: int, optional (default=None)
@@ -367,7 +362,7 @@ def get_run_trace(run_id: int) -> OpenMLRunTrace:
     return OpenMLRunTrace.trace_from_xml(trace_xml)
 
 
-def initialize_model_from_run(run_id: int) -> Any:
+def initialize_model_from_run(run_id: int, *, strict_version: bool = True) -> Any:
     """
     Initialized a model based on a run_id (i.e., using the exact
     same parameter settings)
@@ -376,6 +371,8 @@ def initialize_model_from_run(run_id: int) -> Any:
     ----------
     run_id : int
         The Openml run_id
+    strict_version: bool (default=True)
+        See `flow_to_model` strict_version.
 
     Returns
     -------
@@ -385,7 +382,7 @@ def initialize_model_from_run(run_id: int) -> Any:
     # TODO(eddiebergman): I imagine this is None if it's not published,
     # might need to raise an explicit error for that
     assert run.setup_id is not None
-    return initialize_model(run.setup_id)
+    return initialize_model(setup_id=run.setup_id, strict_version=strict_version)
 
 
 def initialize_model_from_trace(
@@ -679,9 +676,9 @@ def _calculate_local_measure(  # type: ignore
             user_defined_measures_per_fold[measure][rep_no][fold_no] = user_defined_measures_fold[
                 measure
             ]
-            user_defined_measures_per_sample[measure][rep_no][fold_no][
-                sample_no
-            ] = user_defined_measures_fold[measure]
+            user_defined_measures_per_sample[measure][rep_no][fold_no][sample_no] = (
+                user_defined_measures_fold[measure]
+            )
 
     trace: OpenMLRunTrace | None = None
     if len(traces) > 0:
@@ -783,13 +780,9 @@ def _run_task_get_arffcontent_parallel_helper(  # noqa: PLR0913
         raise NotImplementedError(task.task_type)
 
     config.logger.info(
-        "Going to run model {} on dataset {} for repeat {} fold {} sample {}".format(
-            str(model),
-            openml.datasets.get_dataset(task.dataset_id).name,
-            rep_no,
-            fold_no,
-            sample_no,
-        ),
+        f"Going to run model {model!s} on "
+        f"dataset {openml.datasets.get_dataset(task.dataset_id).name} "
+        f"for repeat {rep_no} fold {fold_no} sample {sample_no}"
     )
     (
         pred_y,
@@ -865,7 +858,7 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun:  # noqa: FBT0
     return _create_run_from_xml(run_xml)
 
 
-def _create_run_from_xml(xml: str, from_server: bool = True) -> OpenMLRun:  # noqa: PLR0915, PLR0912, C901, , FBT001, FBT002FBT
+def _create_run_from_xml(xml: str, from_server: bool = True) -> OpenMLRun:  # noqa: PLR0915, PLR0912, C901, FBT001, FBT002
     """Create a run object from xml returned from server.
 
     Parameters
@@ -978,7 +971,7 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):  # type: ignore
                 else:
                     raise ValueError(
                         'Could not find keys "value" or '
-                        '"array_data" in %s' % str(evaluation_dict.keys()),
+                        f'"array_data" in {evaluation_dict.keys()!s}',
                     )
                 if (
                     "@repeat" in evaluation_dict
@@ -1211,15 +1204,15 @@ def _list_runs(  # noqa: PLR0913
         for operator, value in kwargs.items():
             api_call += f"/{operator}/{value}"
     if id is not None:
-        api_call += "/run/%s" % ",".join([str(int(i)) for i in id])
+        api_call += "/run/{}".format(",".join([str(int(i)) for i in id]))
     if task is not None:
-        api_call += "/task/%s" % ",".join([str(int(i)) for i in task])
+        api_call += "/task/{}".format(",".join([str(int(i)) for i in task]))
     if setup is not None:
-        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setup])
+        api_call += "/setup/{}".format(",".join([str(int(i)) for i in setup]))
     if flow is not None:
-        api_call += "/flow/%s" % ",".join([str(int(i)) for i in flow])
+        api_call += "/flow/{}".format(",".join([str(int(i)) for i in flow]))
     if uploader is not None:
-        api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploader])
+        api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploader]))
     if study is not None:
         api_call += "/study/%d" % study
     if display_errors:
diff --git a/openml/runs/run.py b/openml/runs/run.py
index 766f8c97f..945264131 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -480,7 +480,7 @@ def _generate_arff_dict(self) -> OrderedDict[str, Any]:
             ]
 
         else:
-            raise NotImplementedError("Task type %s is not yet supported." % str(task.task_type))
+            raise NotImplementedError(f"Task type {task.task_type!s} is not yet supported.")
 
         return arff_dict
 
diff --git a/openml/runs/trace.py b/openml/runs/trace.py
index 3b7d60c2f..bc9e1b5d6 100644
--- a/openml/runs/trace.py
+++ b/openml/runs/trace.py
@@ -80,8 +80,8 @@ def __post_init__(self) -> None:
 
         if self.parameters is not None and not isinstance(self.parameters, dict):
             raise TypeError(
-                "argument parameters is not an instance of OrderedDict, but %s"
-                % str(type(self.parameters)),
+                f"argument parameters is not an instance of OrderedDict, but"
+                f" {type(self.parameters)!s}",
             )
 
     def get_parameters(self) -> dict[str, Any]:
@@ -351,7 +351,7 @@ def _trace_from_arff_struct(
 
         for required_attribute in REQUIRED_ATTRIBUTES:
             if required_attribute not in attribute_idx:
-                raise ValueError("arff misses required attribute: %s" % required_attribute)
+                raise ValueError(f"arff misses required attribute: {required_attribute}")
         if "setup_string" in attribute_idx:
             raise ValueError(error_message)
 
@@ -383,7 +383,7 @@ def _trace_from_arff_struct(
             else:
                 raise ValueError(
                     'expected {"true", "false"} value for selected field, '
-                    "received: %s" % selected_value,
+                    f"received: {selected_value}",
                 )
 
             parameters = {
@@ -448,7 +448,7 @@ def trace_from_xml(cls, xml: str | Path | IO) -> OpenMLRunTrace:
             else:
                 raise ValueError(
                     'expected {"true", "false"} value for '
-                    "selected field, received: %s" % selected_value,
+                    f"selected field, received: {selected_value}",
                 )
 
             current = OpenMLTraceIteration(
@@ -504,10 +504,8 @@ def merge_traces(cls, traces: list[OpenMLRunTrace]) -> OpenMLRunTrace:
                     if list(param_keys) != list(trace_itr_keys):
                         raise ValueError(
                             "Cannot merge traces because the parameters are not equal: "
-                            "{} vs {}".format(
-                                list(trace_itr.parameters.keys()),
-                                list(iteration.parameters.keys()),
-                            ),
+                            f"{list(trace_itr.parameters.keys())} vs "
+                            f"{list(iteration.parameters.keys())}",
                         )
 
                 if key in merged_trace:
@@ -521,9 +519,9 @@ def merge_traces(cls, traces: list[OpenMLRunTrace]) -> OpenMLRunTrace:
         return cls(None, merged_trace)
 
     def __repr__(self) -> str:
-        return "[Run id: {}, {} trace iterations]".format(
-            -1 if self.run_id is None else self.run_id,
-            len(self.trace_iterations),
+        return (
+            f"[Run id: {-1 if self.run_id is None else self.run_id}, "
+            f"{len(self.trace_iterations)} trace iterations]"
         )
 
     def __iter__(self) -> Iterator[OpenMLTraceIteration]:
diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index ee0c6d707..877384636 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -212,7 +212,7 @@ def _list_setups(
     """
     api_call = "setup/list"
     if setup is not None:
-        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setup])
+        api_call += "/setup/{}".format(",".join([str(int(i)) for i in setup]))
     if kwargs is not None:
         for operator, value in kwargs.items():
             api_call += f"/{operator}/{value}"
@@ -230,13 +230,12 @@ def __list_setups(
     # Minimalistic check if the XML is useful
     if "oml:setups" not in setups_dict:
         raise ValueError(
-            'Error in return XML, does not contain "oml:setups":' " %s" % str(setups_dict),
+            'Error in return XML, does not contain "oml:setups":' f" {setups_dict!s}",
         )
 
     if "@xmlns:oml" not in setups_dict["oml:setups"]:
         raise ValueError(
-            "Error in return XML, does not contain "
-            '"oml:setups"/@xmlns:oml: %s' % str(setups_dict),
+            "Error in return XML, does not contain " f'"oml:setups"/@xmlns:oml: {setups_dict!s}',
         )
 
     if setups_dict["oml:setups"]["@xmlns:oml"] != openml_uri:
@@ -266,7 +265,7 @@ def __list_setups(
     return setups
 
 
-def initialize_model(setup_id: int) -> Any:
+def initialize_model(setup_id: int, *, strict_version: bool = True) -> Any:
     """
     Initialized a model based on a setup_id (i.e., using the exact
     same parameter settings)
@@ -275,6 +274,8 @@ def initialize_model(setup_id: int) -> Any:
     ----------
     setup_id : int
         The Openml setup_id
+    strict_version: bool (default=True)
+        See `flow_to_model` strict_version.
 
     Returns
     -------
@@ -295,7 +296,7 @@ def initialize_model(setup_id: int) -> Any:
                 subflow = flow
             subflow.parameters[hyperparameter.parameter_name] = hyperparameter.value
 
-    return flow.extension.flow_to_model(flow)
+    return flow.extension.flow_to_model(flow, strict_version=strict_version)
 
 
 def _to_dict(
@@ -364,7 +365,7 @@ def _create_setup_from_xml(
         else:
             raise ValueError(
                 "Expected None, list or dict, received "
-                "something else: %s" % str(type(xml_parameters)),
+                f"something else: {type(xml_parameters)!s}",
             )
 
     if _output_format in ["dataframe", "dict"]:
diff --git a/openml/study/functions.py b/openml/study/functions.py
index 9d726d286..7fdc6f636 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -90,7 +90,7 @@ def _get_study(id_: int | str, entity_type: str) -> BaseStudy:
     )
     result_dict = xmltodict.parse(xml_string, force_list=force_list_tags)["oml:study"]
     study_id = int(result_dict["oml:id"])
-    alias = result_dict["oml:alias"] if "oml:alias" in result_dict else None
+    alias = result_dict.get("oml:alias", None)
     main_entity_type = result_dict["oml:main_entity_type"]
 
     if entity_type != main_entity_type:
@@ -99,9 +99,7 @@ def _get_study(id_: int | str, entity_type: str) -> BaseStudy:
             f", expected '{entity_type}'"
         )
 
-    benchmark_suite = (
-        result_dict["oml:benchmark_suite"] if "oml:benchmark_suite" in result_dict else None
-    )
+    benchmark_suite = result_dict.get("oml:benchmark_suite", None)
     name = result_dict["oml:name"]
     description = result_dict["oml:description"]
     status = result_dict["oml:status"]
@@ -300,7 +298,7 @@ def update_study_status(study_id: int, status: str) -> None:
     """
     legal_status = {"active", "deactivated"}
     if status not in legal_status:
-        raise ValueError("Illegal status value. " "Legal values: %s" % legal_status)
+        raise ValueError("Illegal status value. " f"Legal values: {legal_status}")
     data = {"study_id": study_id, "status": status}  # type: openml._api_calls.DATA_TYPE
     result_xml = openml._api_calls._perform_api_call("study/status/update", "post", data=data)
     result = xmltodict.parse(result_xml)
@@ -442,8 +440,7 @@ def list_suites(
     status: str | None = ...,
     uploader: list[int] | None = ...,
     output_format: Literal["dict"] = "dict",
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
@@ -453,8 +450,7 @@ def list_suites(
     status: str | None = ...,
     uploader: list[int] | None = ...,
     output_format: Literal["dataframe"] = "dataframe",
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def list_suites(
@@ -538,8 +534,7 @@ def list_studies(
     uploader: list[str] | None = ...,
     benchmark_suite: int | None = ...,
     output_format: Literal["dict"] = "dict",
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
@@ -550,8 +545,7 @@ def list_studies(
     uploader: list[str] | None = ...,
     benchmark_suite: int | None = ...,
     output_format: Literal["dataframe"] = "dataframe",
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def list_studies(
@@ -637,13 +631,11 @@ def list_studies(
 
 
 @overload
-def _list_studies(output_format: Literal["dict"] = "dict", **kwargs: Any) -> dict:
-    ...
+def _list_studies(output_format: Literal["dict"] = "dict", **kwargs: Any) -> dict: ...
 
 
 @overload
-def _list_studies(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame:
-    ...
+def _list_studies(output_format: Literal["dataframe"], **kwargs: Any) -> pd.DataFrame: ...
 
 
 def _list_studies(
@@ -674,13 +666,11 @@ def _list_studies(
 
 
 @overload
-def __list_studies(api_call: str, output_format: Literal["dict"] = "dict") -> dict:
-    ...
+def __list_studies(api_call: str, output_format: Literal["dict"] = "dict") -> dict: ...
 
 
 @overload
-def __list_studies(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame:
-    ...
+def __list_studies(api_call: str, output_format: Literal["dataframe"]) -> pd.DataFrame: ...
 
 
 def __list_studies(
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index 9fd2e4be1..54030422d 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -98,8 +98,9 @@ def _get_estimation_procedure_list() -> list[dict[str, Any]]:
         raise ValueError(
             "Error in return XML, value of "
             "oml:estimationprocedures/@xmlns:oml is not "
-            "http://openml.org/openml, but %s"
-            % str(procs_dict["oml:estimationprocedures"]["@xmlns:oml"]),
+            "http://openml.org/openml, but {}".format(
+                str(procs_dict["oml:estimationprocedures"]["@xmlns:oml"])
+            ),
         )
 
     procs: list[dict[str, Any]] = []
@@ -276,7 +277,7 @@ def __list_tasks(  # noqa: PLR0912, C901
         raise ValueError(
             "Error in return XML, value of  "
             '"oml:runs"/@xmlns:oml is not '
-            '"http://openml.org/openml": %s' % str(tasks_dict),
+            f'"http://openml.org/openml": {tasks_dict!s}',
         )
 
     assert isinstance(tasks_dict["oml:tasks"]["oml:task"], list), type(tasks_dict["oml:tasks"])
@@ -527,7 +528,7 @@ def _create_task_from_xml(xml: str) -> OpenMLTask:
         TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
     }.get(task_type)
     if cls is None:
-        raise NotImplementedError("Task type %s not supported." % common_kwargs["task_type"])
+        raise NotImplementedError("Task type {} not supported.".format(common_kwargs["task_type"]))
     return cls(**common_kwargs)  # type: ignore
 
 
diff --git a/openml/tasks/split.py b/openml/tasks/split.py
index 81105f1fd..ac538496e 100644
--- a/openml/tasks/split.py
+++ b/openml/tasks/split.py
@@ -177,9 +177,9 @@ def get(self, repeat: int = 0, fold: int = 0, sample: int = 0) -> tuple[np.ndarr
             If the specified repeat, fold, or sample is not known.
         """
         if repeat not in self.split:
-            raise ValueError("Repeat %s not known" % str(repeat))
+            raise ValueError(f"Repeat {repeat!s} not known")
         if fold not in self.split[repeat]:
-            raise ValueError("Fold %s not known" % str(fold))
+            raise ValueError(f"Fold {fold!s} not known")
         if sample not in self.split[repeat][fold]:
-            raise ValueError("Sample %s not known" % str(sample))
+            raise ValueError(f"Sample {sample!s} not known")
         return self.split[repeat][fold][sample]
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
index 064b834ba..e7d19bdce 100644
--- a/openml/tasks/task.py
+++ b/openml/tasks/task.py
@@ -207,7 +207,7 @@ def _to_dict(self) -> dict[str, dict[str, int | str | list[dict[str, Any]]]]:
             {"@name": "source_data", "#text": str(self.dataset_id)},
             {"@name": "estimation_procedure", "#text": str(self.estimation_procedure_id)},
         ]
-        if self.evaluation_measure is not None:  #
+        if self.evaluation_measure is not None:
             oml_input.append({"@name": "evaluation_measures", "#text": self.evaluation_measure})
 
         return {
@@ -283,8 +283,7 @@ def get_X_and_y(
     ) -> tuple[
         np.ndarray | scipy.sparse.spmatrix,
         np.ndarray | None,
-    ]:
-        ...
+    ]: ...
 
     @overload
     def get_X_and_y(
@@ -292,8 +291,7 @@ def get_X_and_y(
     ) -> tuple[
         pd.DataFrame,
         pd.Series | pd.DataFrame | None,
-    ]:
-        ...
+    ]: ...
 
     # TODO(eddiebergman): Do all OpenMLSupervisedTask have a `y`?
     def get_X_and_y(
@@ -542,12 +540,10 @@ def __init__(  # noqa: PLR0913
     def get_X(
         self,
         dataset_format: Literal["array"] = "array",
-    ) -> np.ndarray | scipy.sparse.spmatrix:
-        ...
+    ) -> np.ndarray | scipy.sparse.spmatrix: ...
 
     @overload
-    def get_X(self, dataset_format: Literal["dataframe"]) -> pd.DataFrame:
-        ...
+    def get_X(self, dataset_format: Literal["dataframe"]) -> pd.DataFrame: ...
 
     def get_X(
         self,
diff --git a/openml/testing.py b/openml/testing.py
index 529a304d4..9016ff6a9 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -182,7 +182,7 @@ def _get_sentinel(self, sentinel: str | None = None) -> str:
             md5.update(str(time.time()).encode("utf-8"))
             md5.update(str(os.getpid()).encode("utf-8"))
             sentinel = md5.hexdigest()[:10]
-            sentinel = "TEST%s" % sentinel
+            sentinel = f"TEST{sentinel}"
         return sentinel
 
     def _add_sentinel_to_flow_name(
diff --git a/openml/utils.py b/openml/utils.py
index a03610512..82859fd40 100644
--- a/openml/utils.py
+++ b/openml/utils.py
@@ -35,8 +35,7 @@ def extract_xml_tags(
     node: Mapping[str, Any],
     *,
     allow_none: Literal[True] = ...,
-) -> Any | None:
-    ...
+) -> Any | None: ...
 
 
 @overload
@@ -45,8 +44,7 @@ def extract_xml_tags(
     node: Mapping[str, Any],
     *,
     allow_none: Literal[False],
-) -> Any:
-    ...
+) -> Any: ...
 
 
 def extract_xml_tags(
@@ -198,7 +196,7 @@ def _delete_entity(entity_type: str, entity_id: int) -> bool:
         "user",
     }
     if entity_type not in legal_entities:
-        raise ValueError("Can't delete a %s" % entity_type)
+        raise ValueError(f"Can't delete a {entity_type}")
 
     url_suffix = "%s/%d" % (entity_type, entity_id)
     try:
@@ -236,7 +234,7 @@ def _delete_entity(entity_type: str, entity_id: int) -> bool:
                     " please open an issue at: https://github.com/openml/openml/issues/new"
                 ),
             ) from e
-        raise
+        raise e
 
 
 @overload
@@ -245,8 +243,7 @@ def _list_all(
     list_output_format: Literal["dict"] = ...,
     *args: P.args,
     **filters: P.kwargs,
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
@@ -255,8 +252,7 @@ def _list_all(
     list_output_format: Literal["object"],
     *args: P.args,
     **filters: P.kwargs,
-) -> dict:
-    ...
+) -> dict: ...
 
 
 @overload
@@ -265,8 +261,7 @@ def _list_all(
     list_output_format: Literal["dataframe"],
     *args: P.args,
     **filters: P.kwargs,
-) -> pd.DataFrame:
-    ...
+) -> pd.DataFrame: ...
 
 
 def _list_all(  # noqa: C901, PLR0912
@@ -376,7 +371,7 @@ def _create_cache_directory(key: str) -> Path:
 
     try:
         cache_dir.mkdir(exist_ok=True, parents=True)
-    except Exception as e:  # noqa: BLE001
+    except Exception as e:
         raise openml.exceptions.OpenMLCacheException(
             f"Cannot create cache directory {cache_dir}."
         ) from e
@@ -412,7 +407,7 @@ def _create_cache_directory_for_id(key: str, id_: int) -> Path:
     """
     cache_dir = _get_cache_dir_for_id(key, id_, create=True)
     if cache_dir.exists() and not cache_dir.is_dir():
-        raise ValueError("%s cache dir exists but is not a directory!" % key)
+        raise ValueError(f"{key} cache dir exists but is not a directory!")
 
     cache_dir.mkdir(exist_ok=True, parents=True)
     return cache_dir
diff --git a/pyproject.toml b/pyproject.toml
index ffb1eb001..83f0793f7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -86,7 +86,6 @@ examples=[
     "ipykernel",
     "seaborn",
 ]
-examples_unix=["fanova"]
 docs=[
     "sphinx>=3",
     "sphinx-gallery",
@@ -127,12 +126,79 @@ markers = [
 
 # https://github.com/charliermarsh/ruff
 [tool.ruff]
-target-version = "py37"
+target-version = "py38"
 line-length = 100
-show-source = true
+output-format = "grouped"
 src = ["openml", "tests", "examples"]
 unsafe-fixes = true
 
+exclude = [
+  # TODO(eddiebergman): Tests should be re-enabled after the refactor
+  "tests",
+  #
+  ".bzr",
+  ".direnv",
+  ".eggs",
+  ".git",
+  ".hg",
+  ".mypy_cache",
+  ".nox",
+  ".pants.d",
+  ".ruff_cache",
+  ".svn",
+  ".tox",
+  ".venv",
+  "__pypackages__",
+  "_build",
+  "buck-out",
+  "build",
+  "dist",
+  "node_modules",
+  "venv",
+  "docs",
+]
+
+# Exclude a variety of commonly ignored directories.
+[tool.ruff.lint.per-file-ignores]
+"tests/*.py" = [
+  "D100",   # Undocumented public module
+  "D101",   # Missing docstring in public class
+  "D102",   # Missing docstring in public method
+  "D103",   # Missing docstring in public function
+  "S101",   # Use of assert
+  "ANN201", # Missing return type annotation for public function
+  "FBT001", # Positional boolean argument
+  "PLR2004",# No use of magic numbers
+  "PD901",  #  X is a bad variable name. (pandas)
+  "TCH",    # https://docs.astral.sh/ruff/rules/#flake8-type-checking-tch
+  "N803",   # Argument name {name} should be lowercase
+]
+"openml/cli.py" = [
+  "T201",   # print found
+  "T203",   # pprint found
+]
+"openml/__version__.py" = [
+  "D100",   # Undocumented public module
+]
+"__init__.py" = [
+  "I002",   # Missing required import (i.e. from __future__ import annotations)
+]
+"examples/*.py" = [
+  "D101",   # Missing docstring in public class
+  "D102",   # Missing docstring in public method
+  "D103",   # Missing docstring in public function
+  "D415",   # First line should end with a . or ? or !
+  "INP001", # File is part of an implicit namespace package, add an __init__.py
+  "I002",   # Missing required import (i.e. from __future__ import annotations)
+  "E741",   # Ambigiuous variable name
+  "T201",   # print found
+  "T203",   # pprint found
+  "ERA001", # found commeneted out code
+  "E402",   # Module level import not at top of cell
+  "E501",   # Line too long
+]
+
+[tool.ruff.lint]
 # Allow unused variables when underscore-prefixed.
 dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
 
@@ -212,74 +278,9 @@ ignore = [
   "N802",    # Public function name should be lower case (i.e. get_X())
 ]
 
-exclude = [
-  # TODO(eddiebergman): Tests should be re-enabled after the refactor
-  "tests",
-  #
-  ".bzr",
-  ".direnv",
-  ".eggs",
-  ".git",
-  ".hg",
-  ".mypy_cache",
-  ".nox",
-  ".pants.d",
-  ".ruff_cache",
-  ".svn",
-  ".tox",
-  ".venv",
-  "__pypackages__",
-  "_build",
-  "buck-out",
-  "build",
-  "dist",
-  "node_modules",
-  "venv",
-  "docs",
-]
-
-# Exclude a variety of commonly ignored directories.
-[tool.ruff.per-file-ignores]
-"tests/*.py" = [
-  "D100",   # Undocumented public module
-  "D101",   # Missing docstring in public class
-  "D102",   # Missing docstring in public method
-  "D103",   # Missing docstring in public function
-  "S101",   # Use of assert
-  "ANN201", # Missing return type annotation for public function
-  "FBT001", # Positional boolean argument
-  "PLR2004",# No use of magic numbers
-  "PD901",  #  X is a bad variable name. (pandas)
-  "TCH",    # https://docs.astral.sh/ruff/rules/#flake8-type-checking-tch
-  "N803",   # Argument name {name} should be lowercase
-]
-"openml/cli.py" = [
-  "T201",   # print found
-  "T203",   # pprint found
-]
-"openml/__version__.py" = [
-  "D100",   # Undocumented public module
-]
-"__init__.py" = [
-  "I002",   # Missing required import (i.e. from __future__ import annotations)
-]
-"examples/*.py" = [
-  "D101",   # Missing docstring in public class
-  "D102",   # Missing docstring in public method
-  "D103",   # Missing docstring in public function
-  "D415",   # First line should end with a . or ? or !
-  "INP001", # File is part of an implicit namespace package, add an __init__.py
-  "I002",   # Missing required import (i.e. from __future__ import annotations) 
-  "E741",   # Ambigiuous variable name
-  "T201",   # print found
-  "T203",   # pprint found
-  "ERA001", # found commeneted out code
-  "E402",   # Module level import not at top of cell
-  "E501",   # Line too long
-]
 
 
-[tool.ruff.isort]
+[tool.ruff.lint.isort]
 known-first-party = ["openml"]
 no-lines-before = ["future"]
 required-imports = ["from __future__ import annotations"]
@@ -287,11 +288,11 @@ combine-as-imports = true
 extra-standard-library = ["typing_extensions"]
 force-wrap-aliases = true
 
-[tool.ruff.pydocstyle]
+[tool.ruff.lint.pydocstyle]
 convention = "numpy"
 
 [tool.mypy]
-python_version = "3.7"
+python_version = "3.8"
 packages = ["openml", "tests"]
 
 show_error_codes = true
diff --git a/tests/conftest.py b/tests/conftest.py
index 62fe3c7e8..79ee2bbd3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -23,8 +23,10 @@
 # License: BSD 3-Clause
 from __future__ import annotations
 
+from collections.abc import Iterator
 import logging
 import os
+import shutil
 from pathlib import Path
 import pytest
 
@@ -164,6 +166,15 @@ def pytest_sessionfinish() -> None:
         # Local file deletion
         new_file_list = read_file_list()
         compare_delete_files(file_list, new_file_list)
+
+        # Delete any test dirs that remain
+        # In edge cases due to a mixture of pytest parametrization and oslo concurrency,
+        # some file lock are created after leaving the test. This removes these files!
+        test_files_dir=Path(__file__).parent.parent / "openml"
+        for f in test_files_dir.glob("tests.*"):
+            if f.is_dir():
+                shutil.rmtree(f)
+
         logger.info("Local files deleted")
 
     logger.info(f"{worker} is killed")
@@ -185,55 +196,90 @@ def pytest_addoption(parser):
 def _expected_static_cache_state(root_dir: Path) -> list[Path]:
     _c_root_dir = root_dir / "org" / "openml" / "test"
     res_paths = [root_dir, _c_root_dir]
-    
+
     for _d in ["datasets", "tasks", "runs", "setups"]:
         res_paths.append(_c_root_dir / _d)
 
-    for _id in ["-1","2"]:
+    for _id in ["-1", "2"]:
         tmp_p = _c_root_dir / "datasets" / _id
-        res_paths.extend([
-            tmp_p / "dataset.arff",
-            tmp_p / "features.xml",
-            tmp_p / "qualities.xml",
-            tmp_p / "description.xml",
-        ])
+        res_paths.extend(
+            [
+                tmp_p / "dataset.arff",
+                tmp_p / "features.xml",
+                tmp_p / "qualities.xml",
+                tmp_p / "description.xml",
+            ]
+        )
 
     res_paths.append(_c_root_dir / "datasets" / "30" / "dataset_30.pq")
     res_paths.append(_c_root_dir / "runs" / "1" / "description.xml")
     res_paths.append(_c_root_dir / "setups" / "1" / "description.xml")
-    
+
     for _id in ["1", "3", "1882"]:
         tmp_p = _c_root_dir / "tasks" / _id
-        res_paths.extend([
-            tmp_p / "datasplits.arff",
-            tmp_p / "task.xml",
-        ])
-    
+        res_paths.extend(
+            [
+                tmp_p / "datasplits.arff",
+                tmp_p / "task.xml",
+            ]
+        )
+
     return res_paths
 
 
 def assert_static_test_cache_correct(root_dir: Path) -> None:
     for p in _expected_static_cache_state(root_dir):
-        assert p.exists(), f"Expected path {p} does not exist"
-    
+        assert p.exists(), f"Expected path {p} exists"
+
 
 @pytest.fixture(scope="class")
 def long_version(request):
     request.cls.long_version = request.config.getoption("--long")
 
 
-@pytest.fixture()
+@pytest.fixture(scope="session")
 def test_files_directory() -> Path:
     return Path(__file__).parent / "files"
 
 
-@pytest.fixture()
+@pytest.fixture(scope="session")
 def test_api_key() -> str:
     return "c0c42819af31e706efe1f4b88c23c6c1"
 
 
-@pytest.fixture(autouse=True)
-def verify_cache_state(test_files_directory) -> None:
+@pytest.fixture(autouse=True, scope="function")
+def verify_cache_state(test_files_directory) -> Iterator[None]:
     assert_static_test_cache_correct(test_files_directory)
     yield
     assert_static_test_cache_correct(test_files_directory)
+
+
+@pytest.fixture(autouse=True, scope="session")
+def as_robot() -> Iterator[None]:
+    policy = openml.config.retry_policy
+    n_retries = openml.config.connection_n_retries
+    openml.config.set_retry_policy("robot", n_retries=20)
+    yield
+    openml.config.set_retry_policy(policy, n_retries)
+
+
+@pytest.fixture(autouse=True, scope="session")
+def with_test_server():
+    openml.config.start_using_configuration_for_example()
+    yield
+    openml.config.stop_using_configuration_for_example()
+
+
+@pytest.fixture(autouse=True)
+def with_test_cache(test_files_directory, request):
+    if not test_files_directory.exists():
+        raise ValueError(
+            f"Cannot find test cache dir, expected it to be {test_files_directory!s}!",
+        )
+    _root_cache_directory = openml.config._root_cache_directory
+    tmp_cache = test_files_directory / request.node.name
+    openml.config.set_root_cache_directory(tmp_cache)
+    yield
+    openml.config.set_root_cache_directory(_root_cache_directory)
+    if tmp_cache.exists():
+        shutil.rmtree(tmp_cache)
diff --git a/tests/files/misc/features_with_whitespaces.xml b/tests/files/misc/features_with_whitespaces.xml
new file mode 100644
index 000000000..2b542d167
--- /dev/null
+++ b/tests/files/misc/features_with_whitespaces.xml
@@ -0,0 +1,22 @@
+<oml:data_features xmlns:oml="http://openml.org/openml">
+    <oml:feature>
+        <oml:index>0</oml:index>
+        <oml:name>V1</oml:name>
+        <oml:data_type>numeric</oml:data_type>
+            <oml:is_target>false</oml:is_target>
+        <oml:is_ignore>false</oml:is_ignore>
+        <oml:is_row_identifier>false</oml:is_row_identifier>
+        <oml:number_of_missing_values>0</oml:number_of_missing_values>
+    </oml:feature>
+    <oml:feature>
+        <oml:index>1</oml:index>
+        <oml:name>V42</oml:name>
+        <oml:data_type>nominal</oml:data_type>
+              <oml:nominal_value> - 50000.</oml:nominal_value>
+              <oml:nominal_value> 50000+.</oml:nominal_value>
+            <oml:is_target>false</oml:is_target>
+        <oml:is_ignore>false</oml:is_ignore>
+        <oml:is_row_identifier>false</oml:is_row_identifier>
+        <oml:number_of_missing_values>0</oml:number_of_missing_values>
+    </oml:feature>
+</oml:data_features>
diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
index 80da9c842..4598b8985 100644
--- a/tests/test_datasets/test_dataset.py
+++ b/tests/test_datasets/test_dataset.py
@@ -309,6 +309,10 @@ def test_lazy_loading_metadata(self):
         assert _dataset.features == _compare_dataset.features
         assert _dataset.qualities == _compare_dataset.qualities
 
+    def test_equality_comparison(self):
+        self.assertEqual(self.iris, self.iris)
+        self.assertNotEqual(self.iris, self.titanic)
+        self.assertNotEqual(self.titanic, 'Wrong_object')
 
 class OpenMLDatasetTestOnTestServer(TestBase):
     def setUp(self):
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 47e97496d..a15100070 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -43,6 +43,7 @@
     OpenMLNotAuthorizedError,
     OpenMLPrivateDatasetError,
     OpenMLServerException,
+    OpenMLServerNoResult,
 )
 from openml.tasks import TaskType, create_task
 from openml.testing import TestBase, create_request_response
@@ -274,9 +275,7 @@ def test_get_dataset_cannot_access_private_data(self):
     @pytest.mark.skip("Need to find dataset name of private dataset")
     def test_dataset_by_name_cannot_access_private_data(self):
         openml.config.server = self.production_server
-        self.assertRaises(
-            OpenMLPrivateDatasetError, openml.datasets.get_dataset, "NAME_GOES_HERE"
-        )
+        self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, "NAME_GOES_HERE")
 
     def test_get_dataset_lazy_all_functions(self):
         """Test that all expected functionality is available without downloading the dataset."""
@@ -285,9 +284,7 @@ def test_get_dataset_lazy_all_functions(self):
 
         def ensure_absence_of_real_data():
             assert not os.path.exists(
-                os.path.join(
-                    openml.config.get_cache_directory(), "datasets", "1", "dataset.arff"
-                )
+                os.path.join(openml.config.get_cache_directory(), "datasets", "1", "dataset.arff")
             )
 
         tag = "test_lazy_tag_%d" % random.randint(1, 1000000)
@@ -509,12 +506,8 @@ def test_deletion_of_cache_dir(self):
     @mock.patch("openml.datasets.functions._get_dataset_description")
     def test_deletion_of_cache_dir_faulty_download(self, patch):
         patch.side_effect = Exception("Boom!")
-        self.assertRaisesRegex(
-            Exception, "Boom!", openml.datasets.get_dataset, dataset_id=1
-        )
-        datasets_cache_dir = os.path.join(
-            self.workdir, "org", "openml", "test", "datasets"
-        )
+        self.assertRaisesRegex(Exception, "Boom!", openml.datasets.get_dataset, dataset_id=1)
+        datasets_cache_dir = os.path.join(self.workdir, "org", "openml", "test", "datasets")
         assert len(os.listdir(datasets_cache_dir)) == 0
 
     def test_publish_dataset(self):
@@ -555,9 +548,7 @@ def test__retrieve_class_labels(self):
         # Test workaround for string-typed class labels
         custom_ds = openml.datasets.get_dataset(2)
         custom_ds.features[31].data_type = "string"
-        labels = custom_ds.retrieve_class_labels(
-            target_name=custom_ds.features[31].name
-        )
+        labels = custom_ds.retrieve_class_labels(target_name=custom_ds.features[31].name)
         assert labels == ["COIL", "SHEET"]
 
     def test_upload_dataset_with_url(self):
@@ -600,9 +591,7 @@ def test_data_status(self):
         )
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.id)
-        TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], dataset.id)
-        )
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
         did = dataset.id
 
         # admin key for test server (only adminds can activate datasets.
@@ -678,8 +667,7 @@ def test_attributes_arff_from_df_unknown_dtype(self):
         for arr, dt in zip(data, dtype):
             df = pd.DataFrame(arr)
             err_msg = (
-                f"The dtype '{dt}' of the column '0' is not currently "
-                "supported by liac-arff"
+                f"The dtype '{dt}' of the column '0' is not currently " "supported by liac-arff"
             )
             with pytest.raises(ValueError, match=err_msg):
                 attributes_arff_from_df(df)
@@ -710,16 +698,12 @@ def test_create_dataset_numpy(self):
 
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.id)
-        TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], dataset.id)
-        )
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
 
         assert (
             _get_online_dataset_arff(dataset.id) == dataset._dataset
         ), "Uploaded arff does not match original one"
-        assert (
-            _get_online_dataset_format(dataset.id) == "arff"
-        ), "Wrong format for dataset"
+        assert _get_online_dataset_format(dataset.id) == "arff", "Wrong format for dataset"
 
     def test_create_dataset_list(self):
         data = [
@@ -769,15 +753,11 @@ def test_create_dataset_list(self):
 
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.id)
-        TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], dataset.id)
-        )
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
         assert (
             _get_online_dataset_arff(dataset.id) == dataset._dataset
         ), "Uploaded ARFF does not match original one"
-        assert (
-            _get_online_dataset_format(dataset.id) == "arff"
-        ), "Wrong format for dataset"
+        assert _get_online_dataset_format(dataset.id) == "arff", "Wrong format for dataset"
 
     def test_create_dataset_sparse(self):
         # test the scipy.sparse.coo_matrix
@@ -974,9 +954,7 @@ def test_create_dataset_pandas(self):
         )
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.id)
-        TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], dataset.id)
-        )
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
         assert (
             _get_online_dataset_arff(dataset.id) == dataset._dataset
         ), "Uploaded ARFF does not match original one"
@@ -991,9 +969,7 @@ def test_create_dataset_pandas(self):
         column_names = ["input1", "input2", "y"]
         df = pd.DataFrame.sparse.from_spmatrix(sparse_data, columns=column_names)
         # meta-information
-        description = (
-            "Synthetic dataset created from a Pandas DataFrame with Sparse columns"
-        )
+        description = "Synthetic dataset created from a Pandas DataFrame with Sparse columns"
         dataset = openml.datasets.functions.create_dataset(
             name=name,
             description=description,
@@ -1014,15 +990,11 @@ def test_create_dataset_pandas(self):
         )
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.id)
-        TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], dataset.id)
-        )
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
         assert (
             _get_online_dataset_arff(dataset.id) == dataset._dataset
         ), "Uploaded ARFF does not match original one"
-        assert (
-            _get_online_dataset_format(dataset.id) == "sparse_arff"
-        ), "Wrong format for dataset"
+        assert _get_online_dataset_format(dataset.id) == "sparse_arff", "Wrong format for dataset"
 
         # Check that we can overwrite the attributes
         data = [["a"], ["b"], ["c"], ["d"], ["e"]]
@@ -1050,13 +1022,9 @@ def test_create_dataset_pandas(self):
         )
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.id)
-        TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], dataset.id)
-        )
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
         downloaded_data = _get_online_dataset_arff(dataset.id)
-        assert (
-            downloaded_data == dataset._dataset
-        ), "Uploaded ARFF does not match original one"
+        assert downloaded_data == dataset._dataset, "Uploaded ARFF does not match original one"
         assert "@ATTRIBUTE rnd_str {a, b, c, d, e, f, g}" in downloaded_data
 
     def test_ignore_attributes_dataset(self):
@@ -1217,9 +1185,7 @@ def test_publish_fetch_ignore_attribute(self):
         # publish dataset
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.id)
-        TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], dataset.id)
-        )
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
         # test if publish was successful
         assert isinstance(dataset.id, int)
 
@@ -1403,9 +1369,7 @@ def test_get_dataset_cache_format_feather(self):
         cache_dir = openml.config.get_cache_directory()
         cache_dir_for_id = os.path.join(cache_dir, "datasets", "128")
         feather_file = os.path.join(cache_dir_for_id, "dataset.feather")
-        pickle_file = os.path.join(
-            cache_dir_for_id, "dataset.feather.attributes.pkl.py3"
-        )
+        pickle_file = os.path.join(cache_dir_for_id, "dataset.feather.attributes.pkl.py3")
         data = pd.read_feather(feather_file)
         assert os.path.isfile(feather_file), "Feather file is missing"
         assert os.path.isfile(pickle_file), "Attributes pickle file is missing"
@@ -1450,9 +1414,7 @@ def test_data_edit_critical_field(self):
         # for this, we need to first clone a dataset to do changes
         did = fork_dataset(1)
         self._wait_for_dataset_being_processed(did)
-        result = edit_dataset(
-            did, default_target_attribute="shape", ignore_attribute="oil"
-        )
+        result = edit_dataset(did, default_target_attribute="shape", ignore_attribute="oil")
         assert did == result
 
         n_tries = 10
@@ -1460,9 +1422,7 @@ def test_data_edit_critical_field(self):
         for i in range(n_tries):
             edited_dataset = openml.datasets.get_dataset(did)
             try:
-                assert (
-                    edited_dataset.default_target_attribute == "shape"
-                ), edited_dataset
+                assert edited_dataset.default_target_attribute == "shape", edited_dataset
                 assert edited_dataset.ignore_attribute == ["oil"], edited_dataset
                 break
             except AssertionError as e:
@@ -1471,9 +1431,7 @@ def test_data_edit_critical_field(self):
                 time.sleep(10)
                 # Delete the cache dir to get the newer version of the dataset
                 shutil.rmtree(
-                    os.path.join(
-                        self.workdir, "org", "openml", "test", "datasets", str(did)
-                    ),
+                    os.path.join(self.workdir, "org", "openml", "test", "datasets", str(did)),
                 )
 
     def test_data_edit_requires_field(self):
@@ -1564,9 +1522,7 @@ def test_list_datasets_with_high_size_parameter(self):
         openml.config.server = self.production_server
 
         datasets_a = openml.datasets.list_datasets(output_format="dataframe")
-        datasets_b = openml.datasets.list_datasets(
-            output_format="dataframe", size=np.inf
-        )
+        datasets_b = openml.datasets.list_datasets(output_format="dataframe", size=np.inf)
 
         # Reverting to test server
         openml.config.server = self.test_server
@@ -1646,9 +1602,7 @@ def test_invalid_attribute_validations(
         (None, None, ["outlook", "windy"]),
     ],
 )
-def test_valid_attribute_validations(
-    default_target_attribute, row_id_attribute, ignore_attribute
-):
+def test_valid_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute):
     data = [
         ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
         ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
@@ -1749,10 +1703,7 @@ def test_delete_dataset(self):
 def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_key):
     openml.config.start_using_configuration_for_example()
     content_file = (
-        test_files_directory
-        / "mock_responses"
-        / "datasets"
-        / "data_delete_not_owned.xml"
+        test_files_directory / "mock_responses" / "datasets" / "data_delete_not_owned.xml"
     )
     mock_delete.return_value = create_request_response(
         status_code=412,
@@ -1774,10 +1725,7 @@ def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_ke
 def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key):
     openml.config.start_using_configuration_for_example()
     content_file = (
-        test_files_directory
-        / "mock_responses"
-        / "datasets"
-        / "data_delete_has_tasks.xml"
+        test_files_directory / "mock_responses" / "datasets" / "data_delete_has_tasks.xml"
     )
     mock_delete.return_value = create_request_response(
         status_code=412,
@@ -1799,10 +1747,7 @@ def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key
 def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key):
     openml.config.start_using_configuration_for_example()
     content_file = (
-        test_files_directory
-        / "mock_responses"
-        / "datasets"
-        / "data_delete_successful.xml"
+        test_files_directory / "mock_responses" / "datasets" / "data_delete_successful.xml"
     )
     mock_delete.return_value = create_request_response(
         status_code=200,
@@ -1821,10 +1766,7 @@ def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key)
 def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key):
     openml.config.start_using_configuration_for_example()
     content_file = (
-        test_files_directory
-        / "mock_responses"
-        / "datasets"
-        / "data_delete_not_exist.xml"
+        test_files_directory / "mock_responses" / "datasets" / "data_delete_not_exist.xml"
     )
     mock_delete.return_value = create_request_response(
         status_code=412,
@@ -1861,9 +1803,7 @@ def test_list_datasets(all_datasets: pd.DataFrame):
 
 
 def test_list_datasets_by_tag(all_datasets: pd.DataFrame):
-    tag_datasets = openml.datasets.list_datasets(
-        tag="study_14", output_format="dataframe"
-    )
+    tag_datasets = openml.datasets.list_datasets(tag="study_14", output_format="dataframe")
     assert 0 < len(tag_datasets) < len(all_datasets)
     _assert_datasets_have_id_and_valid_status(tag_datasets)
 
@@ -2001,15 +1941,22 @@ def test_get_dataset_lazy_behavior(
         with_features=with_features,
         with_data=with_data,
     )
-    assert (
-        dataset.features
-    ), "Features should be downloaded on-demand if not during get_dataset"
-    assert (
-        dataset.qualities
-    ), "Qualities should be downloaded on-demand if not during get_dataset"
-    assert (
-        dataset.get_data()
-    ), "Data should be downloaded on-demand if not during get_dataset"
+    assert dataset.features, "Features should be downloaded on-demand if not during get_dataset"
+    assert dataset.qualities, "Qualities should be downloaded on-demand if not during get_dataset"
+    assert dataset.get_data(), "Data should be downloaded on-demand if not during get_dataset"
     _assert_datasets_retrieved_successfully(
         [1], with_qualities=True, with_features=True, with_data=True
     )
+
+
+def test_get_dataset_with_invalid_id() -> None:
+    INVALID_ID = 123819023109238  # Well, at some point this will probably be valid...
+    with pytest.raises(OpenMLServerNoResult, match="Unknown dataset") as e:
+        openml.datasets.get_dataset(INVALID_ID)
+        assert e.value.code == 111
+
+def test_read_features_from_xml_with_whitespace() -> None:
+    from openml.datasets.dataset import _read_features
+    features_file = Path(__file__).parent.parent / "files" / "misc" / "features_with_whitespaces.xml"
+    dict = _read_features(features_file)
+    assert dict[1].nominal_values == [" - 50000.", " 50000+."]
diff --git a/tests/test_evaluations/test_evaluations_example.py b/tests/test_evaluations/test_evaluations_example.py
index bf5b03f3f..a0980f5f9 100644
--- a/tests/test_evaluations/test_evaluations_example.py
+++ b/tests/test_evaluations/test_evaluations_example.py
@@ -3,35 +3,47 @@
 
 import unittest
 
+from openml.config import overwrite_config_context
+
 
 class TestEvaluationsExample(unittest.TestCase):
     def test_example_python_paper(self):
         # Example script which will appear in the upcoming OpenML-Python paper
         # This test ensures that the example will keep running!
-
-        import matplotlib.pyplot as plt
-        import numpy as np
-
-        import openml
-
-        df = openml.evaluations.list_evaluations_setups(
-            "predictive_accuracy",
-            flows=[8353],
-            tasks=[6],
-            output_format="dataframe",
-            parameters_in_separate_columns=True,
-        )  # Choose an SVM flow, for example 8353, and a task.
-
-        hp_names = ["sklearn.svm.classes.SVC(16)_C", "sklearn.svm.classes.SVC(16)_gamma"]
-        df[hp_names] = df[hp_names].astype(float).apply(np.log)
-        C, gamma, score = df[hp_names[0]], df[hp_names[1]], df["value"]
-
-        cntr = plt.tricontourf(C, gamma, score, levels=12, cmap="RdBu_r")
-        plt.colorbar(cntr, label="accuracy")
-        plt.xlim((min(C), max(C)))
-        plt.ylim((min(gamma), max(gamma)))
-        plt.xlabel("C (log10)", size=16)
-        plt.ylabel("gamma (log10)", size=16)
-        plt.title("SVM performance landscape", size=20)
-
-        plt.tight_layout()
+        with overwrite_config_context(
+            {
+                "server": "https://www.openml.org/api/v1/xml",
+                "apikey": None,
+            }
+        ):
+            import matplotlib.pyplot as plt
+            import numpy as np
+
+            import openml
+
+            df = openml.evaluations.list_evaluations_setups(
+                "predictive_accuracy",
+                flows=[8353],
+                tasks=[6],
+                output_format="dataframe",
+                parameters_in_separate_columns=True,
+            )  # Choose an SVM flow, for example 8353, and a task.
+
+            assert len(df) > 0, (
+                "No evaluation found for flow 8353 on task 6, could "
+                "be that this task is not available on the test server."
+            )
+
+            hp_names = ["sklearn.svm.classes.SVC(16)_C", "sklearn.svm.classes.SVC(16)_gamma"]
+            df[hp_names] = df[hp_names].astype(float).apply(np.log)
+            C, gamma, score = df[hp_names[0]], df[hp_names[1]], df["value"]
+
+            cntr = plt.tricontourf(C, gamma, score, levels=12, cmap="RdBu_r")
+            plt.colorbar(cntr, label="accuracy")
+            plt.xlim((min(C), max(C)))
+            plt.ylim((min(gamma), max(gamma)))
+            plt.xlabel("C (log10)", size=16)
+            plt.ylabel("gamma (log10)", size=16)
+            plt.title("SVM performance landscape", size=20)
+
+            plt.tight_layout()
diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py
index c6df73e0a..51123b0d8 100644
--- a/tests/test_openml/test_api_calls.py
+++ b/tests/test_openml/test_api_calls.py
@@ -9,8 +9,9 @@
 import pytest
 
 import openml
+from openml.config import ConfigurationForExamples
 import openml.testing
-from openml._api_calls import _download_minio_bucket
+from openml._api_calls import _download_minio_bucket, API_TOKEN_HELP_LINK
 
 
 class TestConfig(openml.testing.TestBase):
@@ -36,8 +37,12 @@ def test_retry_on_database_error(self, Session_class_mock, _):
 
         assert Session_class_mock.return_value.__enter__.return_value.get.call_count == 20
 
+
 class FakeObject(NamedTuple):
     object_name: str
+    etag: str
+    """We use the etag of a Minio object as the name of a marker if we already downloaded it."""
+
 
 class FakeMinio:
     def __init__(self, objects: Iterable[FakeObject] | None = None):
@@ -60,7 +65,7 @@ def test_download_all_files_observes_cache(mock_minio, tmp_path: Path) -> None:
     some_url = f"https://not.real.com/bucket/{some_object_path}"
     mock_minio.return_value = FakeMinio(
         objects=[
-            FakeObject(some_object_path),
+            FakeObject(object_name=some_object_path, etag=str(hash(some_object_path))),
         ],
     )
 
@@ -71,3 +76,50 @@ def test_download_all_files_observes_cache(mock_minio, tmp_path: Path) -> None:
     time_modified = (tmp_path / some_filename).stat().st_mtime
 
     assert time_created == time_modified
+
+
+@mock.patch.object(minio, "Minio")
+def test_download_minio_failure(mock_minio, tmp_path: Path) -> None:
+    some_prefix, some_filename = "some/prefix", "dataset.arff"
+    some_object_path = f"{some_prefix}/{some_filename}"
+    some_url = f"https://not.real.com/bucket/{some_object_path}"
+    mock_minio.return_value = FakeMinio(
+        objects=[
+            FakeObject(object_name=None, etag="tmp"),
+        ],
+    )
+
+    with pytest.raises(ValueError):
+        _download_minio_bucket(source=some_url, destination=tmp_path)
+
+    mock_minio.return_value = FakeMinio(
+        objects=[
+            FakeObject(object_name="tmp", etag=None),
+        ],
+    )
+
+    with pytest.raises(ValueError):
+        _download_minio_bucket(source=some_url, destination=tmp_path)
+
+
+@pytest.mark.parametrize(
+    "endpoint, method",
+    [
+        # https://github.com/openml/OpenML/blob/develop/openml_OS/views/pages/api_new/v1/xml/pre.php
+        ("flow/exists", "post"),  # 102
+        ("dataset", "post"),  # 137
+        ("dataset/42", "delete"),  # 350
+        # ("flow/owned", "post"),  # 310 - Couldn't find what would trigger this
+        ("flow/42", "delete"),  # 320
+        ("run/42", "delete"),  # 400
+        ("task/42", "delete"),  # 460
+    ],
+)
+def test_authentication_endpoints_requiring_api_key_show_relevant_help_link(
+    endpoint: str,
+    method: str,
+) -> None:
+    # We need to temporarily disable the API key to test the error message
+    with openml.config.overwrite_config_context({"apikey": None}):
+        with pytest.raises(openml.exceptions.OpenMLNotAuthorizedError, match=API_TOKEN_HELP_LINK):
+            openml._api_calls._perform_api_call(call=endpoint, request_method=method, data=None)
diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py
index a92cd0cfd..f9ab5eb9f 100644
--- a/tests/test_openml/test_config.py
+++ b/tests/test_openml/test_config.py
@@ -1,11 +1,14 @@
 # License: BSD 3-Clause
 from __future__ import annotations
 
+from contextlib import contextmanager
 import os
 import tempfile
 import unittest.mock
 from copy import copy
+from typing import Any, Iterator
 from pathlib import Path
+import platform
 
 import pytest
 
@@ -13,10 +16,32 @@
 import openml.testing
 
 
+@contextmanager
+def safe_environ_patcher(key: str, value: Any) -> Iterator[None]:
+    """Context manager to temporarily set an environment variable.
+
+    Safe to errors happening in the yielded to function.
+    """
+    _prev = os.environ.get(key)
+    os.environ[key] = value
+    try:
+        yield
+    except Exception as e:
+        raise e
+    finally:
+        os.environ.pop(key)
+        if _prev is not None:
+            os.environ[key] = _prev
+
+
 class TestConfig(openml.testing.TestBase):
     @unittest.mock.patch("openml.config.openml_logger.warning")
     @unittest.mock.patch("openml.config._create_log_handlers")
     @unittest.skipIf(os.name == "nt", "https://github.com/openml/openml-python/issues/1033")
+    @unittest.skipIf(
+        platform.uname().release.endswith(("-Microsoft", "microsoft-standard-WSL2")),
+        "WSL does nto support chmod as we would need here, see https://github.com/microsoft/WSL/issues/81",
+    )
     def test_non_writable_home(self, log_handler_mock, warnings_mock):
         with tempfile.TemporaryDirectory(dir=self.workdir) as td:
             os.chmod(td, 0o444)
@@ -24,20 +49,28 @@ def test_non_writable_home(self, log_handler_mock, warnings_mock):
             _dd["cachedir"] = Path(td) / "something-else"
             openml.config._setup(_dd)
 
-        assert warnings_mock.call_count == 2
+        assert warnings_mock.call_count == 1
         assert log_handler_mock.call_count == 1
         assert not log_handler_mock.call_args_list[0][1]["create_file_handler"]
         assert openml.config._root_cache_directory == Path(td) / "something-else"
 
-    @unittest.mock.patch("os.path.expanduser")
-    def test_XDG_directories_do_not_exist(self, expanduser_mock):
+    @unittest.skipIf(platform.system() != "Linux","XDG only exists for Linux systems.")
+    def test_XDG_directories_do_not_exist(self):
         with tempfile.TemporaryDirectory(dir=self.workdir) as td:
+            # Save previous state
+            path = Path(td) / "fake_xdg_cache_home"
+            with safe_environ_patcher("XDG_CONFIG_HOME", str(path)):
+                expected_config_dir = path / "openml"
+                expected_determined_config_file_path = expected_config_dir / "config"
 
-            def side_effect(path_):
-                return os.path.join(td, str(path_).replace("~/", ""))
+                # Ensure that it correctly determines the path to the config file
+                determined_config_file_path = openml.config.determine_config_file_path()
+                assert determined_config_file_path == expected_determined_config_file_path
 
-            expanduser_mock.side_effect = side_effect
-            openml.config._setup()
+                # Ensure that setup will create the config folder as the configuration
+                # will be written to that location.
+                openml.config._setup()
+                assert expected_config_dir.exists()
 
     def test_get_config_as_dict(self):
         """Checks if the current configuration is returned accurately as a dict."""
@@ -121,7 +154,7 @@ def test_example_configuration_start_twice(self):
 
 
 def test_configuration_file_not_overwritten_on_load():
-    """ Regression test for #1337 """
+    """Regression test for #1337"""
     config_file_content = "apikey = abcd"
     with tempfile.TemporaryDirectory() as tmpdir:
         config_file_path = Path(tmpdir) / "config"
@@ -136,12 +169,22 @@ def test_configuration_file_not_overwritten_on_load():
     assert config_file_content == new_file_content
     assert "abcd" == read_config["apikey"]
 
+
 def test_configuration_loads_booleans(tmp_path):
     config_file_content = "avoid_duplicate_runs=true\nshow_progress=false"
-    with (tmp_path/"config").open("w") as config_file:
+    with (tmp_path / "config").open("w") as config_file:
         config_file.write(config_file_content)
         read_config = openml.config._parse_config(tmp_path)
 
     # Explicit test to avoid truthy/falsy modes of other types
     assert True == read_config["avoid_duplicate_runs"]
     assert False == read_config["show_progress"]
+
+
+def test_openml_cache_dir_env_var(tmp_path: Path) -> None:
+    expected_path = tmp_path / "test-cache"
+
+    with safe_environ_patcher("OPENML_CACHE_DIR", str(expected_path)):
+        openml.config._setup()
+        assert openml.config._root_cache_directory == expected_path
+        assert openml.config.get_cache_directory() == str(expected_path / "org" / "openml" / "www")
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 40a778d8b..2bd9ee0ed 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -119,7 +119,6 @@ def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
         # time.time() works in seconds
         start_time = time.time()
         while time.time() - start_time < max_waiting_time_seconds:
-
             try:
                 openml.runs.get_run_trace(run_id)
             except openml.exceptions.OpenMLServerException:
@@ -131,7 +130,9 @@ def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
                 time.sleep(10)
                 continue
 
-            assert len(run.evaluations) > 0, "Expect not-None evaluations to always contain elements."
+            assert (
+                len(run.evaluations) > 0
+            ), "Expect not-None evaluations to always contain elements."
             return
 
         raise RuntimeError(
@@ -557,7 +558,7 @@ def determine_grid_size(param_grid):
             fold_evaluations=run.fold_evaluations,
             num_repeats=1,
             num_folds=num_folds,
-            task_type=task_type
+            task_type=task_type,
         )
 
         # Check if run string and print representation do not run into an error
@@ -796,7 +797,9 @@ def test_run_and_upload_knn_pipeline(self, warnings_mock):
 
     @pytest.mark.sklearn()
     def test_run_and_upload_gridsearch(self):
-        estimator_name = "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
+        estimator_name = (
+            "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
+        )
         gridsearch = GridSearchCV(
             BaggingClassifier(**{estimator_name: SVC()}),
             {f"{estimator_name}__C": [0.01, 0.1, 10], f"{estimator_name}__gamma": [0.01, 0.1, 10]},
@@ -1826,7 +1829,9 @@ def test_joblib_backends(self, parallel_mock):
         num_instances = x.shape[0]
         line_length = 6 + len(task.class_labels)
 
-        backend_choice = "loky" if Version(joblib.__version__) > Version("0.11") else "multiprocessing"
+        backend_choice = (
+            "loky" if Version(joblib.__version__) > Version("0.11") else "multiprocessing"
+        )
         for n_jobs, backend, call_count in [
             (1, backend_choice, 10),
             (2, backend_choice, 10),
@@ -1877,20 +1882,39 @@ def test_joblib_backends(self, parallel_mock):
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
     )
     def test_delete_run(self):
-        rs = 1
+        rs = np.random.randint(1, 2**31 - 1)
         clf = sklearn.pipeline.Pipeline(
-            steps=[("imputer", SimpleImputer()), ("estimator", DecisionTreeClassifier())],
+            steps=[
+                (f"test_server_imputer_{rs}", SimpleImputer()),
+                ("estimator", DecisionTreeClassifier()),
+            ],
         )
         task = openml.tasks.get_task(32)  # diabetes; crossvalidation
 
-        run = openml.runs.run_model_on_task(model=clf, task=task, seed=rs)
+        run = openml.runs.run_model_on_task(
+            model=clf, task=task, seed=rs, avoid_duplicate_runs=False
+        )
         run.publish()
+
+        with pytest.raises(openml.exceptions.OpenMLRunsExistError):
+            openml.runs.run_model_on_task(model=clf, task=task, seed=rs, avoid_duplicate_runs=True)
+
         TestBase._mark_entity_for_removal("run", run.run_id)
         TestBase.logger.info(f"collected from test_run_functions: {run.run_id}")
 
         _run_id = run.run_id
         assert delete_run(_run_id)
 
+    @unittest.skipIf(
+        Version(sklearn.__version__) < Version("0.20"),
+        reason="SimpleImputer doesn't handle mixed type DataFrame as input",
+    )
+    def test_initialize_model_from_run_nonstrict(self):
+        # We cannot guarantee that a run with an older version exists on the server.
+        # Thus, we test it simply with a run that we know exists that might not be loose.
+        # This tests all lines of code for OpenML but not the initialization, which we do not want to guarantee anyhow.
+        _ = openml.runs.initialize_model_from_run(run_id=1, strict_version=False)
+
 
 @mock.patch.object(requests.Session, "delete")
 def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key):
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 9e357f6aa..259cb98b4 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -115,6 +115,7 @@ def test_existing_setup_exists_3(self):
             ),
         )
 
+    @pytest.mark.production()
     def test_get_setup(self):
         # no setups in default test server
         openml.config.server = "https://www.openml.org/api/v1/xml/"
diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py
index cae947917..d900671b7 100644
--- a/tests/test_utils/test_utils.py
+++ b/tests/test_utils/test_utils.py
@@ -8,37 +8,6 @@
 from openml.testing import _check_dataset
 
 
-@pytest.fixture(autouse=True)
-def as_robot():
-    policy = openml.config.retry_policy
-    n_retries = openml.config.connection_n_retries
-    openml.config.set_retry_policy("robot", n_retries=20)
-    yield
-    openml.config.set_retry_policy(policy, n_retries)
-
-
-@pytest.fixture(autouse=True)
-def with_test_server():
-    openml.config.start_using_configuration_for_example()
-    yield
-    openml.config.stop_using_configuration_for_example()
-
-
-@pytest.fixture(autouse=True)
-def with_test_cache(test_files_directory, request):
-    if not test_files_directory.exists():
-        raise ValueError(
-            f"Cannot find test cache dir, expected it to be {test_files_directory!s}!",
-        )
-    _root_cache_directory = openml.config._root_cache_directory
-    tmp_cache = test_files_directory / request.node.name
-    openml.config.set_root_cache_directory(tmp_cache)
-    yield
-    openml.config.set_root_cache_directory(_root_cache_directory)
-    if tmp_cache.exists():
-        shutil.rmtree(tmp_cache)
-
-
 @pytest.fixture()
 def min_number_tasks_on_test_server() -> int:
     """After a reset at least 1068 tasks are on the test server"""