Refining unit tests

Neeratyoy · Neeratyoy · commit 17e3916b8fbf · 2021-02-16T18:39:50.000+01:00
diff --git a/openml/testing.py b/openml/testing.py
@@ -110,8 +110,6 @@ def setUp(self, n_levels: int = 1):
         # Increase the number of retries to avoid spurious server failures
         self.connection_n_retries = openml.config.connection_n_retries
         openml.config.connection_n_retries = 10
-        # Number of processes to parallelize any evaluation made by a unit test
-        self.n_jobs = 2
 
     def tearDown(self):
         os.chdir(self.cwd)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
@@ -1581,7 +1581,7 @@ def test_format_prediction_task_regression(self):
         LooseVersion(sklearn.__version__) < "0.21",
         reason="couldn't perform local tests successfully w/o bloating RAM",
     )
-    @unittest.mock.patch("joblib.parallel_backend")
+    @unittest.mock.patch("openml.extensions.sklearn.SklearnExtension._run_model_on_fold")
     def test__run_task_get_arffcontent_2(self, parallel_mock):
         """ Tests if a run executed in parallel is collated correctly. """
         task = openml.tasks.get_task(7)  # Supervised Classification on kr-vs-kp
@@ -1591,16 +1591,22 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
         flow = unittest.mock.Mock()
         flow.name = "dummy"
         clf = SGDClassifier(loss="log", random_state=1)
-        with parallel_backend("loky", n_jobs=self.n_jobs):
+        n_jobs = 2
+        with parallel_backend("loky", n_jobs=n_jobs):
             res = openml.runs.functions._run_task_get_arffcontent(
                 flow=flow,
                 extension=self.extension,
                 model=clf,
                 task=task,
                 add_local_measures=True,
                 dataset_format="array",  # "dataframe" would require handling of categoricals
-                n_jobs=self.n_jobs,
+                n_jobs=n_jobs,
             )
+        # This unit test will fail if joblib is unable to distribute successfully since the
+        # function _run_model_on_fold is being mocked out. However, for a new spawned worker, it
+        # is not and the mock call_count should remain 0 while the subsequent check of actual
+        # results should also hold, only on successful distribution of tasks to workers.
+        self.assertEqual(parallel_mock.call_count, 0)
         self.assertIsInstance(res[0], list)
         self.assertEqual(len(res[0]), num_instances)
         self.assertEqual(len(res[0][0]), line_length)
@@ -1620,13 +1626,13 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
         ]
         scores = [v for k, v in res[2]["predictive_accuracy"][0].items()]
         self.assertSequenceEqual(scores, expected_scores, seq_type=list)
-        self.assertEqual(parallel_mock.call_count, 0)
 
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.21",
         reason="couldn't perform local tests successfully w/o bloating RAM",
     )
-    def test_joblib_backends(self):
+    @unittest.mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
+    def test_joblib_backends(self, parallel_mock):
         """ Tests evaluation of a run using various joblib backends and n_jobs. """
         task = openml.tasks.get_task(7)  # Supervised Classification on kr-vs-kp
         x, y = task.get_X_and_y(dataset_format="dataframe")
@@ -1635,13 +1641,13 @@ def test_joblib_backends(self):
         flow = unittest.mock.Mock()
         flow.name = "dummy"
 
-        for n_jobs, backend, len_time_stats in [
-            (1, "loky", 7),
-            (2, "loky", 4),
-            (-1, "loky", 1),
-            (1, "threading", 7),
-            (-1, "threading", 1),
-            (1, "sequential", 7),
+        for n_jobs, backend, len_time_stats, call_count in [
+            (1, "loky", 7, 10),
+            (2, "loky", 4, 10),
+            (-1, "loky", 1, 10),
+            (1, "threading", 7, 20),
+            (-1, "threading", 1, 30),
+            (1, "sequential", 7, 40),
         ]:
             clf = sklearn.model_selection.RandomizedSearchCV(
                 estimator=sklearn.ensemble.RandomForestClassifier(n_estimators=5),
@@ -1679,3 +1685,4 @@ def test_joblib_backends(self):
             self.assertEqual(len(res[3]), len_time_stats)
             self.assertEqual(len(res[2]["predictive_accuracy"][0]), 10)
             self.assertEqual(len(res[3]["predictive_accuracy"][0]), 10)
+            self.assertEqual(parallel_mock.call_count, call_count)