[ENH] V1 → V2 API Migration - estimation procedures (#1604)

EmanAbdelhaleem · satvshr · geetu040 · web-flow · commit e653ef6a8564 · 2026-03-26T09:53:51.000+01:00
Fixes #1622 Depends on #1576 Related to: #1575 #### Details This PR implements `EstimationProcedures` resource, and refactor its existing functions --------- Co-authored-by: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Co-authored-by: geetu040 <raoarmaghanshakir040@gmail.com> Co-authored-by: Franz Király <fkiraly@gcos.ai> Co-authored-by: Simon Blanke <simon.blanke@yahoo.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Matthias Feurer <lists@matthiasfeurer.de> Co-authored-by: Pieter Gijsbers <p.gijsbers@tue.nl>
diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py
@@ -10,6 +10,7 @@
 from .base import ResourceAPI
 
 if TYPE_CHECKING:
+    from openml.estimation_procedures import OpenMLEstimationProcedure
     from openml.evaluations import OpenMLEvaluation
     from openml.flows.flow import OpenMLFlow
     from openml.setups.setup import OpenMLSetup
@@ -41,6 +42,9 @@ class EstimationProcedureAPI(ResourceAPI):
 
     resource_type: ResourceType = ResourceType.ESTIMATION_PROCEDURE
 
+    @abstractmethod
+    def list(self) -> list[OpenMLEstimationProcedure]: ...
+
 
 class EvaluationAPI(ResourceAPI):
     """Abstract API interface for evaluation resources."""
diff --git a/openml/_api/resources/estimation_procedure.py b/openml/_api/resources/estimation_procedure.py
@@ -1,11 +1,84 @@
 from __future__ import annotations
 
+import warnings
+
+import xmltodict
+
+from openml.estimation_procedures.estimation_procedure import OpenMLEstimationProcedure
+from openml.tasks.task import TaskType
+
 from .base import EstimationProcedureAPI, ResourceV1API, ResourceV2API
 
 
 class EstimationProcedureV1API(ResourceV1API, EstimationProcedureAPI):
-    """Version 1 API implementation for estimation procedure resources."""
+    """V1 API implementation for estimation procedures.
+
+    Fetches estimation procedures from the v1 XML API endpoint.
+    """
+
+    def list(self) -> list[OpenMLEstimationProcedure]:
+        """Return a list of all estimation procedures which are on OpenML.
+
+        Returns
+        -------
+        procedures : list
+            A list of all estimation procedures. Every procedure is represented by
+            a dictionary containing the following information: id, task type id,
+            name, type, repeats, folds, stratified.
+        """
+        path = "estimationprocedure/list"
+        response = self._http.get(path)
+        xml_content = response.text
+
+        procs_dict = xmltodict.parse(xml_content)
+
+        # Minimalistic check if the XML is useful
+        if "oml:estimationprocedures" not in procs_dict:
+            raise ValueError("Error in return XML, does not contain tag oml:estimationprocedures.")
+
+        if "@xmlns:oml" not in procs_dict["oml:estimationprocedures"]:
+            raise ValueError(
+                "Error in return XML, does not contain tag "
+                "@xmlns:oml as a child of oml:estimationprocedures.",
+            )
+
+        if procs_dict["oml:estimationprocedures"]["@xmlns:oml"] != "http://openml.org/openml":
+            raise ValueError(
+                "Error in return XML, value of "
+                "oml:estimationprocedures/@xmlns:oml is not "
+                "http://openml.org/openml, but {}".format(
+                    str(procs_dict["oml:estimationprocedures"]["@xmlns:oml"])
+                ),
+            )
+
+        procs: list[OpenMLEstimationProcedure] = []
+        for proc_ in procs_dict["oml:estimationprocedures"]["oml:estimationprocedure"]:
+            task_type_int = int(proc_["oml:ttid"])
+            try:
+                task_type_id = TaskType(task_type_int)
+                procs.append(
+                    OpenMLEstimationProcedure(
+                        id=int(proc_["oml:id"]),
+                        task_type_id=task_type_id,
+                        name=proc_["oml:name"],
+                        type=proc_["oml:type"],
+                    )
+                )
+            except ValueError as e:
+                warnings.warn(
+                    f"Could not create task type id for {task_type_int} due to error {e}",
+                    RuntimeWarning,
+                    stacklevel=2,
+                )
+
+        return procs
 
 
 class EstimationProcedureV2API(ResourceV2API, EstimationProcedureAPI):
-    """Version 2 API implementation for estimation procedure resources."""
+    """V2 API implementation for estimation procedures.
+
+    Fetches estimation procedures from the v2 JSON API endpoint.
+    """
+
+    def list(self) -> list[OpenMLEstimationProcedure]:
+        self._not_supported(method="list")
diff --git a/openml/estimation_procedures/__init__.py b/openml/estimation_procedures/__init__.py
@@ -0,0 +1,5 @@
+# License: BSD 3-Clause
+
+from .estimation_procedure import OpenMLEstimationProcedure
+
+__all__ = ["OpenMLEstimationProcedure"]
diff --git a/openml/estimation_procedures/estimation_procedure.py b/openml/estimation_procedures/estimation_procedure.py
@@ -0,0 +1,50 @@
+# License: BSD 3-Clause
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from openml.tasks import TaskType
+
+
+@dataclass
+class OpenMLEstimationProcedure:
+    """
+    Contains all meta-information about a run / evaluation combination,
+    according to the evaluation/list function
+
+    Parameters
+    ----------
+    id : int
+        ID of estimation procedure
+    task_type_id : TaskType
+        Assosiated task type
+    name : str
+        Name of estimation procedure
+    type : str
+        Type of estimation procedure
+    """
+
+    id: int
+    task_type_id: TaskType
+    name: str
+    type: str
+
+    def _to_dict(self) -> dict:
+        return asdict(self)
+
+    def __repr__(self) -> str:
+        header = "OpenML Estimation Procedure"
+        header = f"{header}\n{'=' * len(header)}\n"
+
+        fields = {
+            "ID": self.id,
+            "Name": self.name,
+            "Type": self.type,
+            "Task Type": self.task_type_id,
+        }
+        longest_field_name_length = max(len(name) for name in fields)
+        field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
+        body = "\n".join(field_line_format.format(name, value) for name, value in fields.items())
+        return header + body
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
@@ -9,7 +9,6 @@
 
 import numpy as np
 import pandas as pd
-import xmltodict
 
 import openml
 import openml._api_calls
@@ -167,24 +166,8 @@ def list_estimation_procedures() -> list[str]:
     -------
     list
     """
-    api_call = "estimationprocedure/list"
-    xml_string = openml._api_calls._perform_api_call(api_call, "get")
-    api_results = xmltodict.parse(xml_string)
-
-    # Minimalistic check if the XML is useful
-    if "oml:estimationprocedures" not in api_results:
-        raise ValueError('Error in return XML, does not contain "oml:estimationprocedures"')
-
-    if "oml:estimationprocedure" not in api_results["oml:estimationprocedures"]:
-        raise ValueError('Error in return XML, does not contain "oml:estimationprocedure"')
-
-    if not isinstance(api_results["oml:estimationprocedures"]["oml:estimationprocedure"], list):
-        raise TypeError('Error in return XML, does not contain "oml:estimationprocedure" as a list')
-
-    return [
-        prod["oml:name"]
-        for prod in api_results["oml:estimationprocedures"]["oml:estimationprocedure"]
-    ]
+    result = openml._backend.estimation_procedure.list()
+    return [i.name for i in result]
 
 
 def list_evaluations_setups(
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
@@ -80,50 +80,8 @@ def _get_estimation_procedure_list() -> list[dict[str, Any]]:
         a dictionary containing the following information: id, task type id,
         name, type, repeats, folds, stratified.
     """
-    url_suffix = "estimationprocedure/list"
-    xml_string = openml._api_calls._perform_api_call(url_suffix, "get")
-
-    procs_dict = xmltodict.parse(xml_string)
-    # Minimalistic check if the XML is useful
-    if "oml:estimationprocedures" not in procs_dict:
-        raise ValueError("Error in return XML, does not contain tag oml:estimationprocedures.")
-
-    if "@xmlns:oml" not in procs_dict["oml:estimationprocedures"]:
-        raise ValueError(
-            "Error in return XML, does not contain tag "
-            "@xmlns:oml as a child of oml:estimationprocedures.",
-        )
-
-    if procs_dict["oml:estimationprocedures"]["@xmlns:oml"] != "http://openml.org/openml":
-        raise ValueError(
-            "Error in return XML, value of "
-            "oml:estimationprocedures/@xmlns:oml is not "
-            "http://openml.org/openml, but {}".format(
-                str(procs_dict["oml:estimationprocedures"]["@xmlns:oml"])
-            ),
-        )
-
-    procs: list[dict[str, Any]] = []
-    for proc_ in procs_dict["oml:estimationprocedures"]["oml:estimationprocedure"]:
-        task_type_int = int(proc_["oml:ttid"])
-        try:
-            task_type_id = TaskType(task_type_int)
-            procs.append(
-                {
-                    "id": int(proc_["oml:id"]),
-                    "task_type_id": task_type_id,
-                    "name": proc_["oml:name"],
-                    "type": proc_["oml:type"],
-                },
-            )
-        except ValueError as e:
-            warnings.warn(
-                f"Could not create task type id for {task_type_int} due to error {e}",
-                RuntimeWarning,
-                stacklevel=2,
-            )
-
-    return procs
+    result = openml._backend.estimation_procedure.list()
+    return [i._to_dict() for i in result]
 
 
 def list_tasks(  # noqa: PLR0913
diff --git a/tests/test_api/test_estimation_procedure.py b/tests/test_api/test_estimation_procedure.py
@@ -0,0 +1,32 @@
+# License: BSD 3-Clause  
+from __future__ import annotations  
+  
+import pytest    
+from openml._api import EstimationProcedureV1API, EstimationProcedureV2API
+from openml.exceptions import OpenMLNotSupportedError
+from openml.estimation_procedures import OpenMLEstimationProcedure
+
+
+@pytest.fixture
+def estimation_procedure_v1(http_client_v1, minio_client) -> EstimationProcedureV1API:
+    return EstimationProcedureV1API(http=http_client_v1, minio=minio_client)
+
+
+@pytest.fixture
+def estimation_procedure_v2(http_client_v2, minio_client) -> EstimationProcedureV2API:
+    return EstimationProcedureV2API(http=http_client_v2, minio=minio_client)
+
+
+@pytest.mark.test_server()
+def test_v1_list(estimation_procedure_v1):
+    details = estimation_procedure_v1.list()
+    
+    assert isinstance(details, list)
+    assert len(details) > 0
+    assert all(isinstance(d, OpenMLEstimationProcedure) for d in details)
+
+
+@pytest.mark.test_server()
+def test_v2_list(estimation_procedure_v2):
+    with pytest.raises(OpenMLNotSupportedError):
+        estimation_procedure_v2.list()