Speed up tests (#977)

* Cache _list_all we don't need the latest list The test does not require the list of flows to be updated, to a single cached version will do fine (this call otherwise would take ~40 seconds). * Reduce the amount of verified runs Downloading a run takes a non-significant amount of time (est. 300ms on my current setup). It is unnecessary to compare against all >=100 runs, while a handful should do fine (perhaps even just one should do). * Increase the batch size to avoid more than 2 pages The batch size required in some pages over 40 pages to be loaded, which increased the workload unnecessarily. These changing preserve pagination tests while lowering the amount of round trips required. * Mark as test_get_run_trace as skip Since it is already covered by test_run_and_upload_randomsearch. * Filter on dataset id serverside Speeds up ~25x, and reduces network traffic. * Reduce the amount of pages loaded Loading a page takes ~600ms. I don't think testing with 3 pages is any worse than 10. I also think this is an ideal candidate of test that could be split up into (1) testing the url is generated correctly, (2) testing a pre-cached result is parsed correctly and (3) testing the url gives the expected response (the actual integration test). * Simplify model tested in swapped parameter test If the test is that swapped parameters work, we don't need a complicated pipeline or dataset. * Add a cli flag to toggle short/long scenarios Some tests support both, by checking e.g. only a few runs vs all runs. * Skip time measurement on any Windows machine * Invoke the --long versions on the COVERAGE job * Add long/short versions for some long tests * Check the trace can be retrieved individually To cover for the skipping of test_get_run_trace * Remove old test * Use patch isolate list_all caching to one test * Fix decorator call
openml · mfeurer · Apr 8, 2021 · Oct 25, 2020 · Oct 26, 2020 · Oct 29, 2020
commit 07e87add438cd36008442a3aaecfbea25fc7e10b
diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh
@@ -19,7 +19,7 @@ run_tests() {
     cd $TEST_DIR
 
     if [[ "$COVERAGE" == "true" ]]; then
-        PYTEST_ARGS='--cov=openml'
+        PYTEST_ARGS='--cov=openml --long'
     else
         PYTEST_ARGS=''
     fi

diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -347,7 +347,7 @@ def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
     dict
         A dictionary with items {did: bool}
     """
-    dataset_list = list_datasets(status="all")
+    dataset_list = list_datasets(status="all", data_id=dataset_ids)
     active = {}
 
     for did in dataset_ids:

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -25,6 +25,7 @@
 import os
 import logging
 from typing import List
+import pytest
 
 import openml
 from openml.testing import TestBase
@@ -182,3 +183,17 @@ def pytest_sessionfinish() -> None:
         logger.info("Local files deleted")
 
     logger.info("{} is killed".format(worker))
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--long",
+        action="store_true",
+        default=False,
+        help="Run the long version of tests which support both short and long scenarios.",
+    )
+
+
+@pytest.fixture(scope="class")
+def long_version(request):
+    request.cls.long_version = request.config.getoption("--long")
diff --git a/tests/test_evaluations/test_evaluation_functions.py b/tests/test_evaluations/test_evaluation_functions.py
@@ -1,10 +1,12 @@
 # License: BSD 3-Clause
+import pytest
 
 import openml
 import openml.evaluations
 from openml.testing import TestBase
 
 
+@pytest.mark.usefixtures("long_version")
 class TestEvaluationFunctions(TestBase):
     _multiprocess_can_split_ = True
 
@@ -27,6 +29,10 @@ def _check_list_evaluation_setups(self, **kwargs):
 
         # Check if output and order of list_evaluations is preserved
         self.assertSequenceEqual(evals_setups["run_id"].tolist(), evals["run_id"].tolist())
+
+        if not self.long_version:
+            evals_setups = evals_setups.head(1)
+
         # Check if the hyper-parameter column is as accurate and flow_id
         for index, row in evals_setups.iterrows():
             params = openml.runs.get_run(row["run_id"]).parameter_settings

diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
@@ -2,18 +2,22 @@
 
 from collections import OrderedDict
 import copy
+import functools
 import unittest
+from unittest.mock import patch
 
 from distutils.version import LooseVersion
 import sklearn
 from sklearn import ensemble
 import pandas as pd
+import pytest
 
 import openml
 from openml.testing import TestBase
 import openml.extensions.sklearn
 
 
+@pytest.mark.usefixtures("long_version")
 class TestFlowFunctions(TestBase):
     _multiprocess_can_split_ = True
 
@@ -334,20 +338,27 @@ def test_get_flow_reinstantiate_model_wrong_version(self):
             assert "0.19.1" not in flow.dependencies
 
     def test_get_flow_id(self):
-        clf = sklearn.tree.DecisionTreeClassifier()
-        flow = openml.extensions.get_extension_by_model(clf).model_to_flow(clf).publish()
-
-        self.assertEqual(openml.flows.get_flow_id(model=clf, exact_version=True), flow.flow_id)
-        flow_ids = openml.flows.get_flow_id(model=clf, exact_version=False)
-        self.assertIn(flow.flow_id, flow_ids)
-        self.assertGreater(len(flow_ids), 2)
-
-        # Check that the output of get_flow_id is identical if only the name is given, no matter
-        # whether exact_version is set to True or False.
-        flow_ids_exact_version_True = openml.flows.get_flow_id(name=flow.name, exact_version=True)
-        flow_ids_exact_version_False = openml.flows.get_flow_id(
-            name=flow.name, exact_version=False,
-        )
-        self.assertEqual(flow_ids_exact_version_True, flow_ids_exact_version_False)
-        self.assertIn(flow.flow_id, flow_ids_exact_version_True)
-        self.assertGreater(len(flow_ids_exact_version_True), 2)
+        if self.long_version:
+            list_all = openml.utils._list_all
+        else:
+            list_all = functools.lru_cache()(openml.utils._list_all)
+        with patch("openml.utils._list_all", list_all):
+            clf = sklearn.tree.DecisionTreeClassifier()
+            flow = openml.extensions.get_extension_by_model(clf).model_to_flow(clf).publish()
+
+            self.assertEqual(openml.flows.get_flow_id(model=clf, exact_version=True), flow.flow_id)
+            flow_ids = openml.flows.get_flow_id(model=clf, exact_version=False)
+            self.assertIn(flow.flow_id, flow_ids)
+            self.assertGreater(len(flow_ids), 2)
+
+            # Check that the output of get_flow_id is identical if only the name is given, no matter
+            # whether exact_version is set to True or False.
+            flow_ids_exact_version_True = openml.flows.get_flow_id(
+                name=flow.name, exact_version=True
+            )
+            flow_ids_exact_version_False = openml.flows.get_flow_id(
+                name=flow.name, exact_version=False,
+            )
+            self.assertEqual(flow_ids_exact_version_True, flow_ids_exact_version_False)
+            self.assertIn(flow.flow_id, flow_ids_exact_version_True)
+            self.assertGreater(len(flow_ids_exact_version_True), 2)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
@@ -10,7 +10,6 @@
 import unittest.mock
 
 import numpy as np
-import pytest
 
 import openml
 import openml.exceptions
@@ -335,7 +334,7 @@ def _check_sample_evaluations(
                         for sample in range(num_sample_entrees):
                             evaluation = sample_evaluations[measure][rep][fold][sample]
                             self.assertIsInstance(evaluation, float)
-                            if not os.environ.get("CI_WINDOWS"):
+                            if not (os.environ.get("CI_WINDOWS") or os.name == "nt"):
                                 # Either Appveyor is much faster than Travis
                                 # and/or measurements are not as accurate.
                                 # Either way, windows seems to get an eval-time
@@ -682,6 +681,8 @@ def test_run_and_upload_randomsearch(self):
             flow_expected_rsv="12172",
         )
         self.assertEqual(len(run.trace.trace_iterations), 5)
+        trace = openml.runs.get_run_trace(run.run_id)
+        self.assertEqual(len(trace.trace_iterations), 5)
 
     def test_run_and_upload_maskedarrays(self):
         # This testcase is important for 2 reasons:
@@ -828,31 +829,12 @@ def _test_local_evaluations(self, run):
                 self.assertGreaterEqual(alt_scores[idx], 0)
                 self.assertLessEqual(alt_scores[idx], 1)
 
-    @unittest.skipIf(
-        LooseVersion(sklearn.__version__) < "0.20",
-        reason="SimpleImputer doesn't handle mixed type DataFrame as input",
-    )
     def test_local_run_swapped_parameter_order_model(self):
+        clf = DecisionTreeClassifier()
+        australian_task = 595
+        task = openml.tasks.get_task(australian_task)
 
-        # construct sci-kit learn classifier
-        clf = Pipeline(
-            steps=[
-                (
-                    "imputer",
-                    make_pipeline(
-                        SimpleImputer(strategy="most_frequent"),
-                        OneHotEncoder(handle_unknown="ignore"),
-                    ),
-                ),
-                # random forest doesn't take categoricals
-                ("estimator", RandomForestClassifier()),
-            ]
-        )
-
-        # download task
-        task = openml.tasks.get_task(7)
-
-        # invoke OpenML run
+        # task and clf are purposely in the old order
         run = openml.runs.run_model_on_task(
             task, clf, avoid_duplicate_runs=False, upload_flow=False,
         )
@@ -950,55 +932,6 @@ def test_initialize_model_from_run(self):
         self.assertEqual(flowS.components["Imputer"].parameters["strategy"], '"most_frequent"')
         self.assertEqual(flowS.components["VarianceThreshold"].parameters["threshold"], "0.05")
 
-    @pytest.mark.flaky()
-    def test_get_run_trace(self):
-        # get_run_trace is already tested implicitly in test_run_and_publish
-        # this test is a bit additional.
-        num_iterations = 10
-        num_folds = 1
-        task_id = 119
-
-        task = openml.tasks.get_task(task_id)
-
-        # IMPORTANT! Do not sentinel this flow. is faster if we don't wait
-        # on openml server
-        clf = RandomizedSearchCV(
-            RandomForestClassifier(random_state=42, n_estimators=5),
-            {
-                "max_depth": [3, None],
-                "max_features": [1, 2, 3, 4],
-                "bootstrap": [True, False],
-                "criterion": ["gini", "entropy"],
-            },
-            num_iterations,
-            random_state=42,
-            cv=3,
-        )
-
-        # [SPEED] make unit test faster by exploiting run information
-        # from the past
-        try:
-            # in case the run did not exists yet
-            run = openml.runs.run_model_on_task(model=clf, task=task, avoid_duplicate_runs=True,)
-
-            self.assertEqual(
-                len(run.trace.trace_iterations), num_iterations * num_folds,
-            )
-            run = run.publish()
-            TestBase._mark_entity_for_removal("run", run.run_id)
-            TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
-            self._wait_for_processed_run(run.run_id, 400)
-            run_id = run.run_id
-        except openml.exceptions.OpenMLRunsExistError as e:
-            # The only error we expect, should fail otherwise.
-            run_ids = [int(run_id) for run_id in e.run_ids]
-            self.assertGreater(len(run_ids), 0)
-            run_id = random.choice(list(run_ids))
-
-        # now the actual unit test ...
-        run_trace = openml.runs.get_run_trace(run_id)
-        self.assertEqual(len(run_trace.trace_iterations), num_iterations * num_folds)
-
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",

diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
@@ -110,7 +110,7 @@ def test_list_tasks_paginate(self):
                 self._check_task(tasks[tid])
 
     def test_list_tasks_per_type_paginate(self):
-        size = 10
+        size = 40
         max = 100
         task_types = [
             TaskType.SUPERVISED_CLASSIFICATION,

diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py
@@ -11,7 +11,6 @@
 
 class OpenMLTaskTest(TestBase):
     _multiprocess_can_split_ = True
-    _batch_size = 25
 
     def mocked_perform_api_call(call, request_method):
         # TODO: JvR: Why is this not a staticmethod?
@@ -33,7 +32,7 @@ def test_list_all_few_results_available(self, _perform_api_call):
 
     def test_list_all_for_datasets(self):
         required_size = 127  # default test server reset value
-        datasets = openml.datasets.list_datasets(batch_size=self._batch_size, size=required_size)
+        datasets = openml.datasets.list_datasets(batch_size=100, size=required_size)
 
         self.assertEqual(len(datasets), required_size)
         for did in datasets:
@@ -53,13 +52,13 @@ def test_list_datasets_with_high_size_parameter(self):
 
     def test_list_all_for_tasks(self):
         required_size = 1068  # default test server reset value
-        tasks = openml.tasks.list_tasks(batch_size=self._batch_size, size=required_size)
+        tasks = openml.tasks.list_tasks(batch_size=1000, size=required_size)
 
         self.assertEqual(len(tasks), required_size)
 
     def test_list_all_for_flows(self):
         required_size = 15  # default test server reset value
-        flows = openml.flows.list_flows(batch_size=self._batch_size, size=required_size)
+        flows = openml.flows.list_flows(batch_size=25, size=required_size)
 
         self.assertEqual(len(flows), required_size)
 
@@ -73,7 +72,7 @@ def test_list_all_for_setups(self):
 
     def test_list_all_for_runs(self):
         required_size = 21
-        runs = openml.runs.list_runs(batch_size=self._batch_size, size=required_size)
+        runs = openml.runs.list_runs(batch_size=25, size=required_size)
 
         # might not be on test server after reset, please rerun test at least once if fails
         self.assertEqual(len(runs), required_size)