Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Unit test fixing + removing redundant parameter
  • Loading branch information
Neeratyoy committed Mar 4, 2021
commit 909d560fe82fe3b48b750bf8135ae0f8aae74c66
17 changes: 7 additions & 10 deletions openml/runs/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,6 @@ def run_flow_on_task(

# execute the run
res = _run_task_get_arffcontent(
flow=flow,
model=flow.model,
task=task,
extension=flow.extension,
Expand Down Expand Up @@ -432,7 +431,6 @@ def run_exists(task_id: int, setup_id: int) -> Set[int]:


def _run_task_get_arffcontent(
flow: OpenMLFlow,
model: Any,
task: OpenMLTask,
extension: "Extension",
Expand Down Expand Up @@ -476,7 +474,6 @@ def _run_task_get_arffcontent(
job_rvals = Parallel(verbose=0, n_jobs=n_jobs)(
delayed(_run_task_get_arffcontent_parallel_helper)(
extension=extension,
flow=flow,
fold_no=fold_no,
model=model,
rep_no=rep_no,
Expand Down Expand Up @@ -613,7 +610,6 @@ def _calculate_local_measure(sklearn_fn, openml_name):

def _run_task_get_arffcontent_parallel_helper(
extension: "Extension",
flow: OpenMLFlow,
fold_no: int,
model: Any,
rep_no: int,
Expand Down Expand Up @@ -661,12 +657,13 @@ def _run_task_get_arffcontent_parallel_helper(
else:
raise NotImplementedError(task.task_type)
config.logger.info(
"Going to execute flow '%s' on task %d for repeat %d fold %d sample %d.",
str(model),
task.task_id,
rep_no,
fold_no,
sample_no,
"Going to run model {} on dataset {} for repeat {} fold {} sample {}".format(
str(model),
openml.datasets.get_dataset(task.dataset_id).name,
rep_no,
fold_no,
sample_no,
)
)
pred_y, proba_y, user_defined_measures_fold, trace, = extension._run_model_on_fold(
model=model,
Expand Down
13 changes: 2 additions & 11 deletions tests/test_runs/test_run_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import time
import sys
import ast
import pytest
import unittest.mock

import numpy as np
Expand Down Expand Up @@ -1188,13 +1189,10 @@ def test__run_task_get_arffcontent(self):
num_folds = 10
num_repeats = 1

flow = unittest.mock.Mock()
flow.name = "dummy"
clf = make_pipeline(
OneHotEncoder(handle_unknown="ignore"), SGDClassifier(loss="log", random_state=1)
)
res = openml.runs.functions._run_task_get_arffcontent(
flow=flow,
extension=self.extension,
model=clf,
task=task,
Expand Down Expand Up @@ -1405,8 +1403,6 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
# Check that _run_task_get_arffcontent works when one of the class
# labels only declared in the arff file, but is not present in the
# actual data
flow = unittest.mock.Mock()
flow.name = "dummy"
task = openml.tasks.get_task(2) # anneal; crossvalidation

from sklearn.compose import ColumnTransformer
Expand All @@ -1421,7 +1417,6 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
) # build a sklearn classifier

data_content, _, _, _ = _run_task_get_arffcontent(
flow=flow,
model=model,
task=task,
extension=self.extension,
Expand All @@ -1443,8 +1438,6 @@ def test_run_on_dataset_with_missing_labels_array(self):
# Check that _run_task_get_arffcontent works when one of the class
# labels only declared in the arff file, but is not present in the
# actual data
flow = unittest.mock.Mock()
flow.name = "dummy"
task = openml.tasks.get_task(2) # anneal; crossvalidation
# task_id=2 on test server has 38 columns with 6 numeric columns
cont_idx = [3, 4, 8, 32, 33, 34]
Expand All @@ -1466,7 +1459,6 @@ def test_run_on_dataset_with_missing_labels_array(self):
) # build a sklearn classifier

data_content, _, _, _ = _run_task_get_arffcontent(
flow=flow,
model=model,
task=task,
extension=self.extension,
Expand Down Expand Up @@ -1594,7 +1586,6 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
backend = "loky" if LooseVersion(joblib.__version__) > "0.11" else "multiprocessing"
with parallel_backend(backend, n_jobs=n_jobs):
res = openml.runs.functions._run_task_get_arffcontent(
flow=None,
extension=self.extension,
model=clf,
task=task,
Expand Down Expand Up @@ -1630,6 +1621,7 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
scores = [v for k, v in res[2]["predictive_accuracy"][0].items()]
self.assertSequenceEqual(scores, expected_scores, seq_type=list)

@pytest.mark.flaky() # appears to fail stochastically on test server
@unittest.skipIf(
LooseVersion(sklearn.__version__) < "0.21",
reason="couldn't perform local tests successfully w/o bloating RAM",
Expand Down Expand Up @@ -1670,7 +1662,6 @@ def test_joblib_backends(self, parallel_mock):
)
with parallel_backend(backend, n_jobs=n_jobs):
res = openml.runs.functions._run_task_get_arffcontent(
flow=None,
extension=self.extension,
model=clf,
task=task,
Expand Down