fix/maint: resolve tests that used old default format

openml · LennartPurucker · Jun 17, 2025 · Oct 16, 2024 · Oct 16, 2024 · Oct 17, 2024
commit 466022e38a67f9d7978faf293a03311b5bb127a6
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
@@ -1144,7 +1144,7 @@ def _get_fn_arguments_with_defaults(self, fn_name: Callable) -> tuple[dict, set]
                 optional_params[param] = default_val
         return optional_params, required_params
 
-    def _deserialize_model(
+    def _deserialize_model(  # noqa: C901
         self,
         flow: OpenMLFlow,
         keep_defaults: bool,  # noqa: FBT001
@@ -1219,6 +1219,20 @@ def _deserialize_model(
                 if param not in components:
                     del parameter_dict[param]
 
+        if not strict_version:
+            # Ignore incompatible parameters
+            allowed_parameter = list(inspect.signature(model_class.__init__).parameters)
+            for p in list(parameter_dict.keys()):
+                if p not in allowed_parameter:
+                    warnings.warn(
+                        f"While deserializing in a non-strict way, parameter {p} is not "
+                        f"allowed for {model_class.__name__} likely due to a version mismatch. "
+                        "We ignore the parameter.",
+                        UserWarning,
+                        stacklevel=2,
+                    )
+                    del parameter_dict[p]
+
         return model_class(**parameter_dict)
 
     def _check_dependencies(
@@ -1254,8 +1268,7 @@ def _check_dependencies(
             else:
                 raise NotImplementedError(f"operation '{operation}' is not supported")
             message = (
-                "Trying to deserialize a model with dependency "
-                f"{dependency_string} not satisfied."
+                f"Trying to deserialize a model with dependency {dependency_string} not satisfied."
             )
             if not check:
                 if strict_version:
@@ -1497,7 +1510,7 @@ def _prevent_optimize_n_jobs(self, model):
             )
             if len(n_jobs_vals) > 0:
                 raise PyOpenMLError(
-                    "openml-python should not be used to " "optimize the n_jobs parameter.",
+                    "openml-python should not be used to optimize the n_jobs parameter.",
                 )
 
     ################################################################################################
@@ -1555,7 +1568,7 @@ def _seed_current_object(current_value):
 
             if current_value is not None:
                 raise ValueError(
-                    "Models should be seeded with int or None (this should never " "happen). ",
+                    "Models should be seeded with int or None (this should never happen). ",
                 )
 
             return True
@@ -1780,10 +1793,10 @@ def _prediction_to_probabilities(
             # to handle the case when dataset is numpy and categories are encoded
             # however the class labels stored in task are still categories
             if isinstance(y_train, np.ndarray) and isinstance(
-                cast(List, task.class_labels)[0],
+                cast("List", task.class_labels)[0],
                 str,
             ):
-                model_classes = [cast(List[str], task.class_labels)[i] for i in model_classes]
+                model_classes = [cast("List[str]", task.class_labels)[i] for i in model_classes]
 
         modelpredict_start_cputime = time.process_time()
         modelpredict_start_walltime = time.time()
@@ -2006,7 +2019,7 @@ def is_subcomponent_specification(values):
                         # (mixed)). OpenML replaces the subcomponent by an
                         # OpenMLFlow object.
                         if len(subcomponent) < 2 or len(subcomponent) > 3:
-                            raise ValueError("Component reference should be " "size {2,3}. ")
+                            raise ValueError("Component reference should be size {2,3}. ")
 
                         subcomponent_identifier = subcomponent[0]
                         subcomponent_flow = subcomponent[1]

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -23,6 +23,10 @@
 # License: BSD 3-Clause
 from __future__ import annotations
 
+import multiprocessing
+
+multiprocessing.set_start_method("spawn", force=True)
+
 from collections.abc import Iterator
 import logging
 import os
@@ -33,6 +37,7 @@
 import openml
 from openml.testing import TestBase
 
+
 # creating logger for unit test file deletion status
 logger = logging.getLogger("unit_tests")
 logger.setLevel(logging.DEBUG)
@@ -170,7 +175,7 @@ def pytest_sessionfinish() -> None:
         # Delete any test dirs that remain
         # In edge cases due to a mixture of pytest parametrization and oslo concurrency,
         # some file lock are created after leaving the test. This removes these files!
-        test_files_dir=Path(__file__).parent.parent / "openml"
+        test_files_dir = Path(__file__).parent.parent / "openml"
         for f in test_files_dir.glob("tests.*"):
             if f.is_dir():
                 shutil.rmtree(f)

diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
@@ -272,7 +272,7 @@ def _remove_random_state(flow):
         task = openml.tasks.get_task(task_id)
 
         X, y = task.get_X_and_y()
-        assert np.count_nonzero(np.isnan(X)) == n_missing_vals
+        assert X.isna().sum().sum() == n_missing_vals
         run = openml.runs.run_flow_on_task(
             flow=flow,
             task=task,
@@ -401,7 +401,7 @@ def _check_sample_evaluations(
 
     @pytest.mark.sklearn()
     def test_run_regression_on_classif_task(self):
-        task_id = 115  # diabetes; crossvalidation
+        task_id = 259  # collins; crossvalidation; has numeric targets
 
         clf = LinearRegression()
         task = openml.tasks.get_task(task_id)
@@ -1758,7 +1758,26 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
         num_instances = x.shape[0]
         line_length = 6 + len(task.class_labels)
         loss = "log" if Version(sklearn.__version__) < Version("1.3") else "log_loss"
-        clf = SGDClassifier(loss=loss, random_state=1)
+        clf = sklearn.pipeline.Pipeline(
+            [
+                (
+                    "cat_handling",
+                    ColumnTransformer(
+                        transformers=[
+                            (
+                                "cat",
+                                OrdinalEncoder(
+                                    handle_unknown="use_encoded_value", unknown_value=-1
+                                ),
+                                x.select_dtypes(include=["object", "category"]).columns,
+                            )
+                        ],
+                        remainder="passthrough",
+                    ),
+                ),
+                ("clf", SGDClassifier(loss=loss, random_state=1)),
+            ]
+        )
         n_jobs = 2
         backend = "loky" if Version(joblib.__version__) > Version("0.11") else "multiprocessing"
         with parallel_backend(backend, n_jobs=n_jobs):
@@ -1767,7 +1786,6 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
                 model=clf,
                 task=task,
                 add_local_measures=True,
-                # dataset_format="array",  # "dataframe" would require handling of categoricals
                 n_jobs=n_jobs,
             )
         # This unit test will fail if joblib is unable to distribute successfully since the
@@ -1784,16 +1802,16 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
         assert len(res[2]) == 7
         assert len(res[3]) == 7
         expected_scores = [
-            0.965625,
             0.94375,
-            0.946875,
-            0.953125,
+            0.95625,
+            0.959375,
+            0.96875,
+            0.96875,
             0.96875,
-            0.965625,
-            0.9435736677115988,
             0.9467084639498433,
-            0.9749216300940439,
-            0.9655172413793104,
+            0.9373040752351097,
+            0.9561128526645768,
+            0.9467084639498433
         ]
         scores = [v for k, v in res[2]["predictive_accuracy"][0].items()]
         np.testing.assert_array_almost_equal(

diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
@@ -183,20 +183,21 @@ def test_publish_study(self):
         self.assertSetEqual(set(study_downloaded.tasks), set(fixt_task_ids))
 
         # test whether the list run function also handles study data fine
-        run_ids = openml.runs.list_runs(study=study.id)
-        self.assertSetEqual(set(run_ids), set(study_downloaded.runs))
+        run_ids = openml.runs.list_runs(study=study.id) # returns DF
+        self.assertSetEqual(set(run_ids["run_id"]), set(study_downloaded.runs))
 
         # test whether the list evaluation function also handles study data fine
-        run_ids = openml.evaluations.list_evaluations(
+        run_ids = openml.evaluations.list_evaluations( # returns list of objects
             "predictive_accuracy",
             size=None,
             study=study.id,
+            output_format="object", # making the default explicit
         )
         self.assertSetEqual(set(run_ids), set(study_downloaded.runs))
 
         # attach more runs, since we fetch 11 here, at least one is non-overlapping
         run_list_additional = openml.runs.list_runs(size=11, offset=10)
-        run_list_additional = set(run_list_additional) - set(run_ids)
+        run_list_additional = set(run_list_additional["run_id"]) - set(run_ids)
         openml.study.attach_to_study(study.id, list(run_list_additional))
         study_downloaded = openml.study.get_study(study.id)
         # verify again
@@ -227,7 +228,7 @@ def test_study_attach_illegal(self):
             benchmark_suite=None,
             name="study with illegal runs",
             description="none",
-            run_ids=list(run_list.keys()),
+            run_ids=list(run_list["run_id"]),
         )
         study.publish()
         TestBase._mark_entity_for_removal("study", study.id)
@@ -246,7 +247,7 @@ def test_study_attach_illegal(self):
             match="Problem attaching entities.",
         ):
             # some runs already attached
-            openml.study.attach_to_study(study.id, list(run_list_more.keys()))
+            openml.study.attach_to_study(study.id, list(run_list_more["run_id"]))
         study_downloaded = openml.study.get_study(study.id)
         self.assertListEqual(study_original.runs, study_downloaded.runs)