sklearn model fit check

openml · PGijsbers · Nov 3, 2020 · Oct 28, 2020 · Oct 30, 2020 · Oct 30, 2020
commit d224845a327e9e5e754f8e3b7a9c2ffc5c7fbe8f
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
@@ -1652,6 +1652,22 @@ def _prediction_to_probabilities(y: np.ndarray, model_classes: List[Any]) -> pd.
 
         user_defined_measures = OrderedDict()  # type: 'OrderedDict[str, float]'
 
+        try:
+            # check if model is fitted
+            # 'predict' internally calls sklearn.utils.validation.check_is_fitted for every
+            # model-specific attribute it excepts, thus offering a more robust check than
+            # a generic simplified call of check_is_fitted(model_copy)
+            from sklearn.exceptions import NotFittedError
+
+            model_copy.predict(X_train)
+            warnings.warn(
+                "The model is already fitted!"
+                " This might cause inconsistency in comparison of results."
+            )
+        except NotFittedError:
+            # model is not fitted, as is required
+            pass
+
         try:
             # for measuring runtime. Only available since Python 3.3
             modelfit_start_cputime = time.process_time()