gh-146306: Specialize float/float true division in tier 2 optimizer

Add inplace float true division ops that the tier 2 optimizer emits when at least one operand is a known float: - _BINARY_OP_TRUEDIV_FLOAT_INPLACE (unique LHS) - _BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT (unique RHS) The optimizer inserts _GUARD_TOS_FLOAT / _GUARD_NOS_FLOAT for operands not yet known to be float, enabling specialization in expressions like `(a + b) / c`. Also marks the result of all NB_TRUE_DIVIDE operations as unique float in the abstract interpreter, enabling downstream inplace ops even for generic `a / b` (the `+=` can reuse the division result). Speeds up chain division patterns by ~2.3x and simple `total += a/b` by ~1.5x. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
python · Fidget-Spinner · Apr 14, 2026 · Mar 24, 2026 · Mar 24, 2026 · Mar 25, 2026
commit c846269e1038a4d3cb9a4b88754db9a1cfa0b711
diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h
diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
@@ -3237,6 +3237,76 @@ def testfunc(args):
         uops = get_opnames(ex)
         self.assertNotIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)
 
+    def test_float_truediv_inplace_unique_lhs(self):
+        # (a + b) produces a unique float; dividing by c reuses it
+        def testfunc(args):
+            a, b, c, n = args
+            total = 0.0
+            for _ in range(n):
+                total += (a + b) / c
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
+        self.assertAlmostEqual(res, TIER2_THRESHOLD * 1.25)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        self.assertIn("_BINARY_OP_TRUEDIV_FLOAT_INPLACE", uops)
+
+    def test_float_truediv_inplace_unique_rhs(self):
+        # (a + b) produces a unique float on the right side of /
+        def testfunc(args):
+            a, b, c, n = args
+            total = 0.0
+            for _ in range(n):
+                total += c / (a + b)
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
+        self.assertAlmostEqual(res, TIER2_THRESHOLD * 0.8)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        self.assertIn("_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT", uops)
+
+    def test_float_truediv_type_propagation(self):
+        # (a/b) + (c/d): inner divisions are generic _BINARY_OP but
+        # type propagation marks their results as float, so the +
+        # is specialized and the += uses inplace on the unique result
+        def testfunc(args):
+            a, b, c, d, n = args
+            total = 0.0
+            for _ in range(n):
+                total += (a / b) + (c / d)
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, (10.0, 3.0, 4.0, 5.0, TIER2_THRESHOLD))
+        expected = TIER2_THRESHOLD * (10.0 / 3.0 + 4.0 / 5.0)
+        self.assertAlmostEqual(res, expected)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        # The + between the two division results should use inplace
+        # (the a/b result is unique from type propagation)
+        self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE", uops)
+        # The += should also use inplace (the + result is unique)
+        self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT", uops)
+
+    def test_float_truediv_unique_result_enables_inplace_add(self):
+        # a / b: the generic division result is marked as unique float
+        # by type propagation, so total += (a / b) uses inplace add
+        def testfunc(args):
+            a, b, n = args
+            total = 0.0
+            for _ in range(n):
+                total += a / b
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, (10.0, 3.0, TIER2_THRESHOLD))
+        expected = TIER2_THRESHOLD * (10.0 / 3.0)
+        self.assertAlmostEqual(res, expected)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        # The += uses inplace because the division result is unique
+        self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT", uops)
+
     def test_load_attr_instance_value(self):
         def testfunc(n):
             class C():

@@ -838,6 +838,28 @@ dummy_func(
             INPUTS_DEAD();
         }
 
+        tier2 op(_BINARY_OP_TRUEDIV_FLOAT_INPLACE, (left, right -- res, l, r)) {
+            FLOAT_INPLACE_DIVOP(left, right, left);
+            if (_divop_err) {
+                ERROR_NO_POP();
+            }
+            res = left;
+            l = PyStackRef_NULL;
+            r = right;
+            INPUTS_DEAD();
+        }
+
+        tier2 op(_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT, (left, right -- res, l, r)) {
+            FLOAT_INPLACE_DIVOP(left, right, right);
+            if (_divop_err) {
+                ERROR_NO_POP();
+            }
+            res = right;
+            l = left;
+            r = PyStackRef_NULL;
+            INPUTS_DEAD();
+        }
+
         pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res, l, r)) {
             PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
             PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);

@@ -562,3 +562,27 @@ gen_try_set_executing(PyGenObject *gen)
         ((PyFloatObject *)PyStackRef_AsPyObjectBorrow(TARGET))           \
             ->ob_fval = _dres;                                           \
     } while (0)
+
+// Inplace float true division. Sets _divop_err to 1 on zero division.
+// Caller must check _divop_err and call ERROR_NO_POP() if set.
+#define FLOAT_INPLACE_DIVOP(left, right, TARGET)                         \
+    int _divop_err = 0;                                                  \
+    do {                                                                 \
+        PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);            \
+        PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);          \
+        assert(PyFloat_CheckExact(left_o));                              \
+        assert(PyFloat_CheckExact(right_o));                             \
+        assert(_PyObject_IsUniquelyReferenced(                           \
+            PyStackRef_AsPyObjectBorrow(TARGET)));                       \
+        STAT_INC(BINARY_OP, hit);                                        \
+        double _divisor = ((PyFloatObject *)right_o)->ob_fval;           \
+        if (_divisor == 0.0) {                                           \
+            PyErr_SetString(PyExc_ZeroDivisionError,                     \
+                            "float division by zero");                   \
+            _divop_err = 1;                                              \
+            break;                                                       \
+        }                                                                \
+        double _dres = ((PyFloatObject *)left_o)->ob_fval / _divisor;    \
+        ((PyFloatObject *)PyStackRef_AsPyObjectBorrow(TARGET))           \
+            ->ob_fval = _dres;                                           \
+    } while (0)
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h