Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
4956a51
add test and fix for switch of ground truth and predictions
LennartPurucker Feb 20, 2023
fc642c1
undo import optimization
LennartPurucker Feb 20, 2023
2da1109
fix bug with model passing to function
LennartPurucker Feb 20, 2023
0583668
fix order in other tests
LennartPurucker Feb 20, 2023
1fe8bc9
Merge branch 'openml:develop' into develop
LennartPurucker Feb 20, 2023
14cbd04
update progress.rst
LennartPurucker Feb 21, 2023
ceb1d53
new unit test for run consistency and bug fixed
LennartPurucker Feb 21, 2023
37500a7
clarify new assert
LennartPurucker Feb 21, 2023
921cf10
Merge pull request #1 from LennartPurucker/develop_ext
LennartPurucker Feb 22, 2023
3e97992
Merge branch 'openml:develop' into develop
LennartPurucker Feb 22, 2023
9f47b91
minor loop refactor
LennartPurucker Feb 22, 2023
14d4299
Merge remote-tracking branch 'origin/develop' into develop
LennartPurucker Feb 22, 2023
8686317
refactor default to None
LennartPurucker Feb 22, 2023
8adb0bd
directly test prediction data equal
LennartPurucker Feb 23, 2023
04ca611
Update tests/test_runs/test_run.py
LennartPurucker Feb 23, 2023
f996c0a
Merge branch 'develop' into develop
LennartPurucker Feb 23, 2023
3dac7a7
Mark sklearn tests (#1202)
PGijsbers Feb 23, 2023
1bf8c0e
add test and fix for switch of ground truth and predictions
LennartPurucker Feb 20, 2023
74e9c38
undo import optimization
LennartPurucker Feb 20, 2023
794cce8
Merge branch 'develop' of https://github.com/openml/openml-python int…
LennartPurucker Feb 23, 2023
b4c2030
fix mask error resulting from rebase
LennartPurucker Feb 23, 2023
3c5ff3e
make dummy classifier strategy consistent to avoid problems as a resu…
LennartPurucker Feb 24, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
add test and fix for switch of ground truth and predictions
  • Loading branch information
LennartPurucker committed Feb 20, 2023
commit 4956a51c4e40a7e6fccd7539f7747164aca24070
37 changes: 20 additions & 17 deletions openml/runs/functions.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,31 @@
# License: BSD 3-Clause

from collections import OrderedDict
import io
import itertools
import os
import time
from typing import Any, List, Dict, Optional, Set, Tuple, Union, TYPE_CHECKING # noqa F401
import warnings
from collections import OrderedDict
from typing import Any, List, Dict, Optional, Set, Tuple, Union, TYPE_CHECKING # noqa F401

import sklearn.metrics
import xmltodict
import numpy as np
import pandas as pd
import sklearn.metrics
import xmltodict
from joblib.parallel import Parallel, delayed

import openml
import openml.utils
import openml._api_calls
import openml.utils
from openml import config
from openml.exceptions import PyOpenMLError
from openml.extensions import get_extension_by_model
from openml import config
from openml.flows.flow import _copy_server_fields
from .run import OpenMLRun
from .trace import OpenMLRunTrace
from ..exceptions import OpenMLCacheException, OpenMLServerException, OpenMLRunsExistError
from ..flows import get_flow, flow_exists, OpenMLFlow
from ..setups import setup_exists, initialize_model
from ..exceptions import OpenMLCacheException, OpenMLServerException, OpenMLRunsExistError
from ..tasks import (
OpenMLTask,
OpenMLClassificationTask,
Expand All @@ -32,8 +34,6 @@
OpenMLSupervisedTask,
OpenMLLearningCurveTask,
)
from .run import OpenMLRun
from .trace import OpenMLRunTrace
from ..tasks import TaskType, get_task

# Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
Expand Down Expand Up @@ -155,7 +155,6 @@ def run_flow_on_task(
dataset_format: str = "dataframe",
n_jobs: Optional[int] = None,
) -> OpenMLRun:

"""Run the model provided by the flow on the dataset defined by task.

Takes the flow and repeat information into account.
Expand Down Expand Up @@ -515,13 +514,13 @@ def _calculate_local_measure(sklearn_fn, openml_name):
else pred_y[i]
)
if isinstance(test_y, pd.Series):
test_prediction = (
truth = (
task.class_labels[test_y.iloc[i]]
if isinstance(test_y.iloc[i], int)
else test_y.iloc[i]
)
else:
test_prediction = (
truth = (
task.class_labels[test_y[i]]
if isinstance(test_y[i], (int, np.integer))
else test_y[i]
Expand All @@ -535,7 +534,7 @@ def _calculate_local_measure(sklearn_fn, openml_name):
sample=sample_no,
index=tst_idx,
prediction=prediction,
truth=test_prediction,
truth=truth,
proba=dict(zip(task.class_labels, pred_prob)),
)
else:
Expand All @@ -552,14 +551,14 @@ def _calculate_local_measure(sklearn_fn, openml_name):
elif isinstance(task, OpenMLRegressionTask):

for i, _ in enumerate(test_indices):
test_prediction = test_y.iloc[i] if isinstance(test_y, pd.Series) else test_y[i]
truth = test_y.iloc[i] if isinstance(test_y, pd.Series) else test_y[i]
arff_line = format_prediction(
task=task,
repeat=rep_no,
fold=fold_no,
index=test_indices[i],
prediction=pred_y[i],
truth=test_prediction,
truth=truth,
)

arff_datacontent.append(arff_line)
Expand Down Expand Up @@ -1186,6 +1185,10 @@ def format_prediction(
-------
A list with elements for the prediction results of a run.

The returned order of the elements is (if available):
[repeat, fold, sample, index, prediction, truth, *probabilities]

This order follows the R Client API.
"""
if isinstance(task, OpenMLClassificationTask):
if proba is None:
Expand All @@ -1200,8 +1203,8 @@ def format_prediction(
else:
sample = 0
probabilities = [proba[c] for c in task.class_labels]
return [repeat, fold, sample, index, *probabilities, truth, prediction]
return [repeat, fold, sample, index, prediction, truth, *probabilities]
elif isinstance(task, OpenMLRegressionTask):
return [repeat, fold, index, truth, prediction]
return [repeat, fold, index, prediction, truth]
else:
raise NotImplementedError(f"Formatting for {type(task)} is not supported.")
10 changes: 6 additions & 4 deletions openml/runs/run.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# License: BSD 3-Clause

from collections import OrderedDict
import os
import pickle
import time
from collections import OrderedDict
from typing import Any, IO, TextIO, List, Union, Tuple, Optional, Dict # noqa F401
import os

import arff
import numpy as np
Expand Down Expand Up @@ -304,6 +304,8 @@ def _generate_arff_dict(self) -> "OrderedDict[str, Any]":

Assumes that the run has been executed.

The order of the attributes follows the order defined by the Client API for R.

Returns
-------
arf_dict : dict
Expand Down Expand Up @@ -337,11 +339,11 @@ def _generate_arff_dict(self) -> "OrderedDict[str, Any]":
if class_labels is not None:
arff_dict["attributes"] = (
arff_dict["attributes"]
+ [("prediction", class_labels), ("correct", class_labels)]
+ [
("confidence." + class_labels[i], "NUMERIC")
for i in range(len(class_labels))
]
+ [("prediction", class_labels), ("correct", class_labels)]
)
else:
raise ValueError("The task has no class labels")
Expand All @@ -362,7 +364,7 @@ def _generate_arff_dict(self) -> "OrderedDict[str, Any]":
]
prediction_and_true = [("prediction", class_labels), ("correct", class_labels)]
arff_dict["attributes"] = (
arff_dict["attributes"] + prediction_confidences + prediction_and_true
arff_dict["attributes"] + prediction_and_true + prediction_confidences
)
else:
raise ValueError("The task has no class labels")
Expand Down
128 changes: 93 additions & 35 deletions tests/test_runs/test_run.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
# License: BSD 3-Clause

import numpy as np
import random
import os
import random
from time import time

import numpy as np
import pytest
import xmltodict
from sklearn.dummy import DummyClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier

from openml import OpenMLRun
from openml.testing import TestBase, SimpleImputer
import openml
import openml.extensions.sklearn

import pytest
from openml import OpenMLRun
from openml.testing import TestBase, SimpleImputer


class TestRun(TestBase):
Expand Down Expand Up @@ -189,47 +189,105 @@ def test_to_from_filesystem_no_model(self):
with self.assertRaises(ValueError, msg="Could not find model.pkl"):
openml.runs.OpenMLRun.from_filesystem(cache_path)

@staticmethod
def assert_run_prediction_data(task, run):
# -- Get y_pred and y_true as it should be stored in the run
fold_map = np.full(int(task.get_dataset().qualities["NumberOfInstances"]), -1)
s_d = task.get_split_dimensions()
if (s_d[0] > 1) or (s_d[2] > 1):
raise ValueError("Test does not support this task type's split dimensions.")

for fold_id in range(s_d[1]):
_, test_indices = task.get_train_test_split_indices(repeat=0, fold=fold_id, sample=0)
fold_map[test_indices] = fold_id

X, y = task.get_X_and_y()

# Check correctness of y_ture and y_pred in run
for fold_id in range(s_d[1]):
# Get data for fold
Comment thread
LennartPurucker marked this conversation as resolved.
test_indices = np.where(fold_map == fold_id)[0]
train_mask = np.full(len(fold_map), True)
train_mask[test_indices] = False
X_train = X[train_mask]
y_train = y[train_mask]
X_test = X[test_indices]
y_test = y[test_indices]
y_pred = LinearRegression().fit(X_train, y_train).predict(X_test)

# Get stored data for fold
saved_fold_data = run.predictions[run.predictions["fold"] == fold_id].sort_values(
by="row_id"
)
saved_y_pred = saved_fold_data["prediction"].values
gt_key = "truth" if "truth" in list(saved_fold_data) else "correct"
saved_y_test = saved_fold_data[gt_key].values

assert_method = np.testing.assert_array_almost_equal
if task.task_type == "Supervised Classification":
y_pred = np.take(task.class_labels, y_pred)
y_test = np.take(task.class_labels, y_test)
assert_method = np.testing.assert_array_equal

# Assert correctness
assert_method(y_pred, saved_y_pred)
assert_method(y_test, saved_y_test)

def test_publish_with_local_loaded_flow(self):
"""
Publish a run tied to a local flow after it has first been saved to
and loaded from disk.
"""
extension = openml.extensions.sklearn.SklearnExtension()

model = Pipeline(
model_clf = Pipeline(
[("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())]
)
task = openml.tasks.get_task(119) # diabetes; crossvalidation

# Make sure the flow does not exist on the server yet.
flow = extension.model_to_flow(model)
self._add_sentinel_to_flow_name(flow)
self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))

run = openml.runs.run_flow_on_task(
flow=flow,
task=task,
add_local_measures=False,
avoid_duplicate_runs=False,
upload_flow=False,
model_reg = Pipeline(
[
("imputer", SimpleImputer(strategy="mean")),
(
"regressor",
# LR because dummy does not produce enough float-like values
LinearRegression(),
),
]
)

# Make sure that the flow has not been uploaded as requested.
self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))
task_clf = openml.tasks.get_task(119) # diabetes; hold out validation
task_reg = openml.tasks.get_task(733) # quake; crossvalidation

for model, task in [(model_clf, task_clf), (model_reg, task_reg)]:
# Make sure the flow does not exist on the server yet.
flow = extension.model_to_flow(model)
self._add_sentinel_to_flow_name(flow)
self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))

run = openml.runs.run_flow_on_task(
flow=flow,
task=task,
add_local_measures=False,
avoid_duplicate_runs=False,
upload_flow=False,
)

cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
run.to_filesystem(cache_path)
# obtain run from filesystem
loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
loaded_run.publish()
TestBase._mark_entity_for_removal("run", loaded_run.run_id)
TestBase.logger.info(
"collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id)
)
# Make sure that the flow has not been uploaded as requested.
self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))
self.assert_run_prediction_data(task, run)

cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
run.to_filesystem(cache_path)
# obtain run from filesystem
loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
loaded_run.publish()
TestBase._mark_entity_for_removal("run", loaded_run.run_id)
TestBase.logger.info(
"collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id)
)

# make sure the flow is published as part of publishing the run.
self.assertTrue(openml.flows.flow_exists(flow.name, flow.external_version))
openml.runs.get_run(loaded_run.run_id)
# make sure the flow is published as part of publishing the run.
self.assertTrue(openml.flows.flow_exists(flow.name, flow.external_version))
openml.runs.get_run(loaded_run.run_id)

def test_run_setup_string_included_in_xml(self):
SETUP_STRING = "setup-string"
Expand Down