Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Fix get_flow() with reinstantiate=True for flows without extensions
- Modify get_flow() to gracefully handle flows without registered extensions
  when reinstantiate=True is passed
- Log a warning instead of raising ValueError when no extension is available
- Update docstring to document the new behavior
- Add tests for OpenML-native flows and flows without extensions
- Fixes issue #1626

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
  • Loading branch information
ssudhiravinesh and qwencoder committed Mar 7, 2026
commit 1c04aedb91af051c42e46454d71adb544df0c05a
43 changes: 33 additions & 10 deletions openml/flows/functions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# License: BSD 3-Clause
from __future__ import annotations

import logging
import os
import re
from collections import OrderedDict
Expand Down Expand Up @@ -83,9 +84,11 @@ def get_flow(flow_id: int, reinstantiate: bool = False, strict_version: bool = T
flow_id : int
The OpenML flow id.
reinstantiate : bool, optional (default=False)
If True, convert the flow description into a concrete model instance
using the flow's extension (e.g., sklearn). If conversion fails and
``strict_version`` is True, an exception will be raised.
If True, attempt to convert the flow description into a concrete model
instance using the flow's extension (e.g., sklearn). If no extension is
registered for the flow type (e.g., for OpenML-native evaluation functions
or flows from uninstalled extensions), a warning is logged and the flow
is returned without instantiating the model.
strict_version : bool, optional (default=True)
When ``reinstantiate`` is True, whether to enforce exact version
requirements for the extension/model. If False, a new flow may
Expand All @@ -95,7 +98,7 @@ def get_flow(flow_id: int, reinstantiate: bool = False, strict_version: bool = T
-------
OpenMLFlow
The flow object with metadata; ``model`` may be populated when
``reinstantiate=True``.
``reinstantiate=True`` and an extension is available.

Raises
------
Expand All @@ -108,6 +111,8 @@ def get_flow(flow_id: int, reinstantiate: bool = False, strict_version: bool = T
------------
- Writes to ``openml.config.cache_directory/flows/{flow_id}/flow.xml``
when the flow is downloaded from the server.
- Logs a warning if ``reinstantiate=True`` but no extension is available
for the flow type.

Preconditions
-------------
Expand All @@ -124,17 +129,35 @@ def get_flow(flow_id: int, reinstantiate: bool = False, strict_version: bool = T
--------
>>> import openml
>>> flow = openml.flows.get_flow(5) # doctest: +SKIP
>>> # Reinstantiate a sklearn flow (requires openml-sklearn extension)
>>> flow_with_model = openml.flows.get_flow(5, reinstantiate=True) # doctest: +SKIP
>>> # For flows without extensions, a warning is logged and flow.model remains None
>>> flow_no_model = openml.flows.get_flow(1, reinstantiate=True) # doctest: +SKIP
"""
flow_id = int(flow_id)
flow = _get_flow_description(flow_id)

if reinstantiate:
flow.model = flow.extension.flow_to_model(flow, strict_version=strict_version)
if not strict_version:
# check if we need to return a new flow b/c of version mismatch
new_flow = flow.extension.model_to_flow(flow.model)
if new_flow.dependencies != flow.dependencies:
return new_flow
# Try to get an extension that can handle this flow
extension = flow._extension
if extension is None:
extension = openml.extensions.get_extension_by_flow(flow, raise_if_no_extension=False)

if extension is not None:
flow.model = extension.flow_to_model(flow, strict_version=strict_version)
if not strict_version:
# check if we need to return a new flow b/c of version mismatch
new_flow = extension.model_to_flow(flow.model)
if new_flow.dependencies != flow.dependencies:
return new_flow
else:
# No extension available for this flow type, log a warning
logger = logging.getLogger(__name__)
logger.warning(
f"Cannot reinstantiate flow {flow_id} ({flow.name}): "
f"no extension registered that can handle this flow type. "
f"Returning flow without model instance."
)
return flow


Expand Down
41 changes: 33 additions & 8 deletions tests/test_flows/test_flow_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,14 +324,39 @@ def test_get_flow_reinstantiate_model(self):

@pytest.mark.test_server()
def test_get_flow_reinstantiate_model_no_extension(self):
# Flow 10 is a WEKA flow
self.assertRaisesRegex(
ValueError,
".* flow: 10 \(weka.SMO\). ",
openml.flows.get_flow,
flow_id=10,
reinstantiate=True,
)
# Flow 10 is a WEKA flow without a registered extension
# Should not raise an error, but log a warning and return flow without model
with self.assertLogs("openml.flows.functions", level="WARNING") as cm:
flow = openml.flows.get_flow(flow_id=10, reinstantiate=True)

# Verify that a warning was logged
assert any("Cannot reinstantiate flow" in log for log in cm.output)
assert any("no extension registered" in log for log in cm.output)

# Verify that the flow is returned but without a model
assert flow is not None
assert flow.flow_id == 10
assert flow.model is None

@pytest.mark.test_server()
def test_get_flow_reinstantiate_openml_native_flow(self):
"""Test for issue #1626: get_flow() with reinstantiate=True for OpenML-native flows.

OpenML-native flows (e.g., weka.ZeroR on test server) don't have
registered extensions. When reinstantiate=True is passed, the function should
log a warning and return the flow without raising an error.
"""
with self.assertLogs("openml.flows.functions", level="WARNING") as cm:
flow = openml.flows.get_flow(flow_id=1, reinstantiate=True)

# Verify that a warning was logged
assert any("Cannot reinstantiate flow 1" in log for log in cm.output)
assert any("no extension registered" in log for log in cm.output)

# Verify that the flow is returned but without a model
assert flow is not None
assert flow.flow_id == 1
assert flow.model is None

@pytest.mark.sklearn()
@unittest.skipIf(
Expand Down