[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
TheAlgorithms · Sananda-Dutta · Mar 29, 2026 · Mar 29, 2026 · Mar 29, 2026 · Mar 29, 2026
commit 0776097bc5481227a79a9d06f4c7458d5149c147
diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py
@@ -4,8 +4,8 @@
 Output: The decision tree maps a real number input to a real number output.
 """

 import numpy as np
 from collections import Counter


 class DecisionTree:
@@ -18,7 +18,7 @@
         self.prediction = None
         self.task = task
         self.criterion = criterion
-        
+
     def mean_squared_error(self, labels, prediction):
         """
         mean_squared_error:
@@ -51,23 +51,23 @@
         would be incorrectly classified.
         Formula: Gini = 1 - sum(p_i^2)
         where p_i is the probability of class i.
-    
+
         Lower Gini value indicates better purity (best split).
         """
         classes, counts = np.unique(y, return_counts=True)
         prob = counts / counts.sum()
-        return 1 - np.sum(prob ** 2)
+        return 1 - np.sum(prob**2)
 
     def entropy(self, y):
         """
         Computes the entropy (impurity) of a set of labels.
         Entropy measures the randomness or disorder in the data.
         Formula: Entropy = - sum(p_i * log2(p_i))
         where p_i is the probability of class i.
-    
+
         Lower entropy means higher purity.
         """
         classes, counts = np.unique(y, return_counts=True)
        prob = counts / counts.sum()
        return -np.sum(prob * np.log2(prob + 1e-9))

@@ -76,8 +76,8 @@
        Computes the information gain from splitting a dataset.
         Information gain represents the reduction in impurity
         after a dataset is split into left and right subsets.
         Formula: IG = Impurity(parent) - [weighted impurity(left) + weighted impurity(right)]
-    
+
         Higher information gain indicates a better split.
         """
         if self.criterion == "gini":
@@ -90,9 +90,7 @@
         weight_l = len(left) / len(parent)
         weight_r = len(right) / len(parent)
 
-        return func(parent) - (
-            weight_l * func(left) + weight_r * func(right)
-        )
+        return func(parent) - (weight_l * func(left) + weight_r * func(right))
 
     def most_common_label(self, y):
         return Counter(y).most_common(1)[0][0]
@@ -150,17 +148,17 @@
             return
 
         best_split = 0
-        
+
         """
         loop over all possible splits for the decision tree. find the best split.
         if no split exists that is less than 2 * error for the entire array
        then the data set is not split and the average for the entire array is used as
        the predictor
        """
        if self.task == "regression":
            best_score = float("inf")
        else:
            best_score = -float("inf")

        for i in range(len(x)):
            if len(x[:i]) < self.min_leaf_size:
@@ -180,7 +178,7 @@
                     best_score = score
                     best_split = i
 
-            else:  
+            else:
                 gain = self.information_gain(y, left_y, right_y)
 
                 if gain > best_score:
@@ -211,8 +209,8 @@
            self.left.train(left_x, left_y)
            self.right.train(right_x, right_y)

        else:
            if self.task == "regression":
                self.prediction = np.mean(y)
            else:
                self.prediction = self.most_common_label(y)
@@ -234,7 +232,7 @@
 
         raise ValueError("Decision tree not yet trained")
 
-        
+
 class TestDecisionTree:
     """Decision Tres test class"""
 
@@ -252,7 +250,7 @@
 
         return float(squared_error_sum / labels.size)
 
-        
+
 def main():
     """
     In this demonstration we're generating a sample data set from the sin function in
@@ -270,15 +268,17 @@
     x_cls = np.array([1, 2, 3, 4, 5, 6])
     y_cls = np.array([0, 0, 0, 1, 1, 1])
 
-    clf = DecisionTree(depth=3, min_leaf_size=1, task="classification", criterion="gini")
+    clf = DecisionTree(
+        depth=3, min_leaf_size=1, task="classification", criterion="gini"
+    )
     clf.train(x_cls, y_cls)
 
-    print("Classification prediction (2):", clf.predict(2)) 
-    print("Classification prediction (5):", clf.predict(5))  
+    print("Classification prediction (2):", clf.predict(2))
+    print("Classification prediction (5):", clf.predict(5))
 
 
 if __name__ == "__main__":
     main()
     import doctest
 
-    doctest.testmod(name="mean_squared_error", verbose=True)
+    doctest.testmod(name="mean_squared_error", verbose=True)