fix/maint: update and fix tests for new dataframe default

openml · LennartPurucker · Jun 17, 2025 · Oct 16, 2024 · Oct 16, 2024 · Oct 17, 2024
commit 6517f6abef063b2ec0190f5074b35a4a2d664926
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -61,27 +61,31 @@ def __init__(self, boolean, integer, floating_point_value):
     def fit(self, X, y):
         pass
 
+
+def _cat_col_selector(X):
+    return X.select_dtypes(include=["object", "category"]).columns
+
+
 def _get_sklearn_preprocessing():
-    from sklearn.compose import ColumnTransformer, make_column_selector
+    from sklearn.compose import ColumnTransformer
     from sklearn.preprocessing import OrdinalEncoder
 
     return [
-            (
-                "cat_handling",
-                ColumnTransformer(
-                    transformers=[
-                        (
-                            "cat",
-                            OrdinalEncoder(
-                                handle_unknown="use_encoded_value", unknown_value=np.nan
-                            ),
-                            make_column_selector(dtype_include=["object", "category"]),
-                        )
-                    ],
-                    remainder="passthrough",
-                ),
+        (
+            "cat_handling",
+            ColumnTransformer(
+                transformers=[
+                    (
+                        "cat",
+                        OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=np.nan),
+                        _cat_col_selector,
+                    )
+                ],
+                remainder="passthrough",
             ),
-            ("imp", SimpleImputer())]
+        ),
+        ("imp", SimpleImputer()),
+    ]
 
 
 class TestSklearnExtensionFlowFunctions(TestBase):
@@ -1904,7 +1908,10 @@ def test_run_model_on_fold_classification_2(self):
 
         pipeline = sklearn.model_selection.GridSearchCV(
             sklearn.pipeline.Pipeline(
-                steps=[*_get_sklearn_preprocessing(), ("clf", sklearn.tree.DecisionTreeClassifier())],
+                steps=[
+                    *_get_sklearn_preprocessing(),
+                    ("clf", sklearn.tree.DecisionTreeClassifier()),
+                ],
             ),
             {"clf__max_depth": [1, 2]},
         )

diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
@@ -204,17 +204,42 @@ def test_to_from_filesystem_no_model(self):
         with self.assertRaises(ValueError, msg="Could not find model.pkl"):
             openml.runs.OpenMLRun.from_filesystem(cache_path)
 
+    @staticmethod
+    def _cat_col_selector(X):
+        return X.select_dtypes(include=["object", "category"]).columns
+
     @staticmethod
     def _get_models_tasks_for_tests():
+        from sklearn.compose import ColumnTransformer
+        from sklearn.preprocessing import OrdinalEncoder
+
+        basic_preprocessing = [
+            (
+                "cat_handling",
+                ColumnTransformer(
+                    transformers=[
+                        (
+                            "cat",
+                            OrdinalEncoder(
+                                handle_unknown="use_encoded_value", unknown_value=np.nan
+                            ),
+                            TestRun._cat_col_selector,
+                        )
+                    ],
+                    remainder="passthrough",
+                ),
+            ),
+            ("imp", SimpleImputer()),
+        ]
         model_clf = Pipeline(
             [
-                ("imputer", SimpleImputer(strategy="mean")),
+                *basic_preprocessing,
                 ("classifier", DummyClassifier(strategy="prior")),
             ],
         )
         model_reg = Pipeline(
             [
-                ("imputer", SimpleImputer(strategy="mean")),
+                *basic_preprocessing,
                 (
                     "regressor",
                     # LR because dummy does not produce enough float-like values
@@ -263,9 +288,8 @@ def assert_run_prediction_data(task, run, model):
 
             assert_method = np.testing.assert_array_almost_equal
             if task.task_type == "Supervised Classification":
-                y_pred = np.take(task.class_labels, y_pred)
-                y_test = np.take(task.class_labels, y_test)
                 assert_method = np.testing.assert_array_equal
+            y_test = y_test.values
 
             # Assert correctness
             assert_method(y_pred, saved_y_pred)

diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
@@ -26,9 +26,10 @@
 from sklearn.model_selection._search import BaseSearchCV
 from sklearn.naive_bayes import GaussianNB
 from sklearn.pipeline import Pipeline, make_pipeline
-from sklearn.preprocessing import OneHotEncoder, StandardScaler
+from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder
 from sklearn.svm import SVC
 from sklearn.tree import DecisionTreeClassifier
+from sklearn.compose import ColumnTransformer
 
 import openml
 import openml._api_calls
@@ -130,9 +131,9 @@ def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
                 time.sleep(10)
                 continue
 
-            assert (
-                len(run.evaluations) > 0
-            ), "Expect not-None evaluations to always contain elements."
+            assert len(run.evaluations) > 0, (
+                "Expect not-None evaluations to always contain elements."
+            )
             return
 
         raise RuntimeError(
@@ -306,7 +307,7 @@ def _remove_random_state(flow):
             flow_server = self.extension.model_to_flow(clf_server)
 
             if flow.class_name not in classes_without_random_state:
-                error_msg = "Flow class %s (id=%d) does not have a random " "state parameter" % (
+                error_msg = "Flow class %s (id=%d) does not have a random state parameter" % (
                     flow.class_name,
                     flow.flow_id,
                 )
@@ -479,7 +480,7 @@ def determine_grid_size(param_grid):
                     grid_iterations += determine_grid_size(sub_grid)
                 return grid_iterations
             else:
-                raise TypeError("Param Grid should be of type list " "(GridSearch only) or dict")
+                raise TypeError("Param Grid should be of type list (GridSearch only) or dict")
 
         run = self._perform_run(
             task_id,
@@ -1286,7 +1287,7 @@ def test_run_with_illegal_flow_id_1(self):
         flow_new = self.extension.model_to_flow(clf)
 
         flow_new.flow_id = -1
-        expected_message_regex = "Local flow_id does not match server flow_id: " "'-1' vs '[0-9]+'"
+        expected_message_regex = "Local flow_id does not match server flow_id: '-1' vs '[0-9]+'"
         with pytest.raises(openml.exceptions.PyOpenMLError, match=expected_message_regex):
             openml.runs.run_flow_on_task(
                 task=task,
@@ -1326,7 +1327,7 @@ def test_run_with_illegal_flow_id_1_after_load(self):
         run.to_filesystem(cache_path)
         loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
 
-        expected_message_regex = "Local flow_id does not match server flow_id: " "'-1' vs '[0-9]+'"
+        expected_message_regex = "Local flow_id does not match server flow_id: '-1' vs '[0-9]+'"
         self.assertRaisesRegex(
             openml.exceptions.PyOpenMLError,
             expected_message_regex,
@@ -1827,14 +1828,33 @@ def test_joblib_backends(self, parallel_mock):
             (1, "sequential", 40),
         ]:
             clf = sklearn.model_selection.RandomizedSearchCV(
-                estimator=sklearn.ensemble.RandomForestClassifier(n_estimators=5),
+                estimator=sklearn.pipeline.Pipeline(
+                    [
+                        (
+                            "cat_handling",
+                            ColumnTransformer(
+                                transformers=[
+                                    (
+                                        "cat",
+                                        OrdinalEncoder(
+                                            handle_unknown="use_encoded_value", unknown_value=-1
+                                        ),
+                                        x.select_dtypes(include=["object", "category"]).columns,
+                                    )
+                                ],
+                                remainder="passthrough",
+                            ),
+                        ),
+                        ("clf", sklearn.ensemble.RandomForestClassifier(n_estimators=5)),
+                    ]
+                ),
                 param_distributions={
-                    "max_depth": [3, None],
-                    "max_features": [1, 2, 3, 4],
-                    "min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
-                    "min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-                    "bootstrap": [True, False],
-                    "criterion": ["gini", "entropy"],
+                    "clf__max_depth": [3, None],
+                    "clf__max_features": [1, 2, 3, 4],
+                    "clf__min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
+                    "clf__min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                    "clf__bootstrap": [True, False],
+                    "clf__criterion": ["gini", "entropy"],
                 },
                 random_state=1,
                 cv=sklearn.model_selection.StratifiedKFold(
@@ -1851,7 +1871,6 @@ def test_joblib_backends(self, parallel_mock):
                     model=clf,
                     task=task,
                     add_local_measures=True,
-                    # dataset_format="array",  # "dataframe" would require handling of categoricals
                     n_jobs=n_jobs,
                 )
             assert type(res[0]) == list

diff --git a/tests/test_tasks/test_classification_task.py b/tests/test_tasks/test_classification_task.py
@@ -1,7 +1,7 @@
 # License: BSD 3-Clause
 from __future__ import annotations
 
-import numpy as np
+import pandas as pd
 
 from openml.tasks import TaskType, get_task
 
@@ -20,10 +20,10 @@ def setUp(self, n_levels: int = 1):
     def test_get_X_and_Y(self):
         X, Y = super().test_get_X_and_Y()
         assert X.shape == (768, 8)
-        assert isinstance(X, np.ndarray)
+        assert isinstance(X, pd.DataFrame)
         assert Y.shape == (768,)
-        assert isinstance(Y, np.ndarray)
-        assert Y.dtype == int
+        assert isinstance(Y, pd.Series)
+        assert pd.api.types.is_categorical_dtype(Y)
 
     def test_download_task(self):
         task = super().test_download_task()

diff --git a/tests/test_tasks/test_learning_curve_task.py b/tests/test_tasks/test_learning_curve_task.py
@@ -1,7 +1,7 @@
 # License: BSD 3-Clause
 from __future__ import annotations
 
-import numpy as np
+import pandas as pd
 
 from openml.tasks import TaskType, get_task
 
@@ -20,10 +20,10 @@ def setUp(self, n_levels: int = 1):
     def test_get_X_and_Y(self):
         X, Y = super().test_get_X_and_Y()
         assert X.shape == (768, 8)
-        assert isinstance(X, np.ndarray)
+        assert isinstance(X, pd.DataFrame)
         assert Y.shape == (768,)
-        assert isinstance(Y, np.ndarray)
-        assert Y.dtype == int
+        assert isinstance(Y, pd.Series)
+        assert pd.api.types.is_categorical_dtype(Y)
 
     def test_download_task(self):
         task = super().test_download_task()

diff --git a/tests/test_tasks/test_regression_task.py b/tests/test_tasks/test_regression_task.py
@@ -3,7 +3,7 @@
 
 import ast
 
-import numpy as np
+import pandas as pd
 
 import openml
 from openml.exceptions import OpenMLServerException
@@ -51,10 +51,10 @@ def setUp(self, n_levels: int = 1):
     def test_get_X_and_Y(self):
         X, Y = super().test_get_X_and_Y()
         assert X.shape == (194, 32)
-        assert isinstance(X, np.ndarray)
+        assert isinstance(X, pd.DataFrame)
         assert Y.shape == (194,)
-        assert isinstance(Y, np.ndarray)
-        assert Y.dtype == float
+        assert isinstance(Y, pd.Series)
+        assert pd.api.types.is_numeric_dtype(Y)
 
     def test_download_task(self):
         task = super().test_download_task()

diff --git a/tests/test_tasks/test_supervised_task.py b/tests/test_tasks/test_supervised_task.py
@@ -3,7 +3,7 @@
 
 import unittest
 
-import numpy as np
+import pandas as pd
 
 from openml.tasks import get_task
 
@@ -27,7 +27,7 @@ def setUpClass(cls):
     def setUp(self, n_levels: int = 1):
         super().setUp()
 
-    def test_get_X_and_Y(self) -> tuple[np.ndarray, np.ndarray]:
+    def test_get_X_and_Y(self) -> tuple[pd.DataFrame, pd.Series]:
         task = get_task(self.task_id)
         X, Y = task.get_X_and_y()
         return X, Y