@@ -1581,7 +1581,7 @@ def test_format_prediction_task_regression(self):
15811581 LooseVersion (sklearn .__version__ ) < "0.21" ,
15821582 reason = "couldn't perform local tests successfully w/o bloating RAM" ,
15831583 )
1584- @unittest .mock .patch ("joblib.parallel_backend " )
1584+ @unittest .mock .patch ("openml.extensions.sklearn.SklearnExtension._run_model_on_fold " )
15851585 def test__run_task_get_arffcontent_2 (self , parallel_mock ):
15861586 """ Tests if a run executed in parallel is collated correctly. """
15871587 task = openml .tasks .get_task (7 ) # Supervised Classification on kr-vs-kp
@@ -1591,16 +1591,22 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
15911591 flow = unittest .mock .Mock ()
15921592 flow .name = "dummy"
15931593 clf = SGDClassifier (loss = "log" , random_state = 1 )
1594- with parallel_backend ("loky" , n_jobs = self .n_jobs ):
1594+ n_jobs = 2
1595+ with parallel_backend ("loky" , n_jobs = n_jobs ):
15951596 res = openml .runs .functions ._run_task_get_arffcontent (
15961597 flow = flow ,
15971598 extension = self .extension ,
15981599 model = clf ,
15991600 task = task ,
16001601 add_local_measures = True ,
16011602 dataset_format = "array" , # "dataframe" would require handling of categoricals
1602- n_jobs = self . n_jobs ,
1603+ n_jobs = n_jobs ,
16031604 )
1605+ # This unit test will fail if joblib is unable to distribute successfully since the
1606+ # function _run_model_on_fold is being mocked out. However, for a new spawned worker, it
1607+ # is not and the mock call_count should remain 0 while the subsequent check of actual
1608+ # results should also hold, only on successful distribution of tasks to workers.
1609+ self .assertEqual (parallel_mock .call_count , 0 )
16041610 self .assertIsInstance (res [0 ], list )
16051611 self .assertEqual (len (res [0 ]), num_instances )
16061612 self .assertEqual (len (res [0 ][0 ]), line_length )
@@ -1620,13 +1626,13 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
16201626 ]
16211627 scores = [v for k , v in res [2 ]["predictive_accuracy" ][0 ].items ()]
16221628 self .assertSequenceEqual (scores , expected_scores , seq_type = list )
1623- self .assertEqual (parallel_mock .call_count , 0 )
16241629
16251630 @unittest .skipIf (
16261631 LooseVersion (sklearn .__version__ ) < "0.21" ,
16271632 reason = "couldn't perform local tests successfully w/o bloating RAM" ,
16281633 )
1629- def test_joblib_backends (self ):
1634+ @unittest .mock .patch ("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs" )
1635+ def test_joblib_backends (self , parallel_mock ):
16301636 """ Tests evaluation of a run using various joblib backends and n_jobs. """
16311637 task = openml .tasks .get_task (7 ) # Supervised Classification on kr-vs-kp
16321638 x , y = task .get_X_and_y (dataset_format = "dataframe" )
@@ -1635,13 +1641,13 @@ def test_joblib_backends(self):
16351641 flow = unittest .mock .Mock ()
16361642 flow .name = "dummy"
16371643
1638- for n_jobs , backend , len_time_stats in [
1639- (1 , "loky" , 7 ),
1640- (2 , "loky" , 4 ),
1641- (- 1 , "loky" , 1 ),
1642- (1 , "threading" , 7 ),
1643- (- 1 , "threading" , 1 ),
1644- (1 , "sequential" , 7 ),
1644+ for n_jobs , backend , len_time_stats , call_count in [
1645+ (1 , "loky" , 7 , 10 ),
1646+ (2 , "loky" , 4 , 10 ),
1647+ (- 1 , "loky" , 1 , 10 ),
1648+ (1 , "threading" , 7 , 20 ),
1649+ (- 1 , "threading" , 1 , 30 ),
1650+ (1 , "sequential" , 7 , 40 ),
16451651 ]:
16461652 clf = sklearn .model_selection .RandomizedSearchCV (
16471653 estimator = sklearn .ensemble .RandomForestClassifier (n_estimators = 5 ),
@@ -1679,3 +1685,4 @@ def test_joblib_backends(self):
16791685 self .assertEqual (len (res [3 ]), len_time_stats )
16801686 self .assertEqual (len (res [2 ]["predictive_accuracy" ][0 ]), 10 )
16811687 self .assertEqual (len (res [3 ]["predictive_accuracy" ][0 ]), 10 )
1688+ self .assertEqual (parallel_mock .call_count , call_count )
0 commit comments