Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Use sym_new_null for consumed operand slots in inplace float ops
The inplace ops set l or r to PyStackRef_NULL at runtime, so the
optimizer should model this as sym_new_null(ctx) rather than
PyJitRef_Borrow(). Both produce _POP_TOP_NOP but sym_new_null
correctly matches the runtime semantics.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
  • Loading branch information
eendebakpt and claude committed Mar 24, 2026
commit c8f860f521a0e9392a97c0d03cacb7c5135e8857
28 changes: 14 additions & 14 deletions Include/internal/pycore_uop_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 12 additions & 16 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -785,13 +785,12 @@ dummy_func(
macro(BINARY_OP_SUBTRACT_FLOAT) =
_GUARD_TOS_FLOAT + _GUARD_NOS_FLOAT + unused/5 + _BINARY_OP_SUBTRACT_FLOAT + _POP_TOP_FLOAT + _POP_TOP_FLOAT;

// Inplace float ops: mutate the uniquely-referenced left operand
// Inplace float ops: mutate the uniquely-referenced operand
// instead of allocating a new float. Tier 2 only.
// The optimizer sets l to null so the following _POP_TOP_FLOAT
// becomes _POP_TOP_NOP.
// Note: read into a local double and write back to avoid compound
// assignment (+=) on ob_fval, which generates problematic JIT
// stencils on i686-pc-windows-msvc.

tier2 op(_BINARY_OP_ADD_FLOAT_INPLACE, (left, right -- res, l, r)) {
Copy link
Copy Markdown
Member

@markshannon markshannon Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This op and its variants all share a lot of common code.
Could you factor out the code into a macro to perform the inplace operation?
Like:

tier2 op(_BINARY_OP_ADD_FLOAT_INPLACE, (left, right -- res, l, r)) {
    res = FLOAT_INPLACE_OP(left, +, right);
    l = PyStackRef_NULL;
    r = right;
    INPUTS_DEAD();
}

tier2 op(_BINARY_OP_MULTIPLY_FLOAT_INPLACE_RIGHT, (left, right -- res, l, r)) {
    res = FLOAT_INPLACE_OP(right, *, left);
    l = left
    r = PyStackRef_NULL;
    INPUTS_DEAD();
}

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normal C macros are not allowed in the bytecodes.c opcodes (they are not expanded). I have not yet found a way to refactor this nicely.

We could add a new macro to the DSL (like INPUTS_DEAD) for this, but it feels a bit odd to add something to the DSL for this particular case.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normal C macros are not allowed in the bytecodes.c opcodes

They are. You just need to define them in ceval_macros.h. Would you like me to do this, or would you like to do it?

PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
Expand All @@ -801,12 +800,10 @@ dummy_func(
STAT_INC(BINARY_OP, hit);
double dres = ((PyFloatObject *)left_o)->ob_fval + ((PyFloatObject *)right_o)->ob_fval;
((PyFloatObject *)left_o)->ob_fval = dres;
// Transfer ownership of left to res.
// Original left is now dead.
res = left;
INPUTS_DEAD();
l = PyStackRef_NULL;
r = right;
Comment thread
eendebakpt marked this conversation as resolved.
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_SUBTRACT_FLOAT_INPLACE, (left, right -- res, l, r)) {
Expand All @@ -819,9 +816,9 @@ dummy_func(
double dres = ((PyFloatObject *)left_o)->ob_fval - ((PyFloatObject *)right_o)->ob_fval;
((PyFloatObject *)left_o)->ob_fval = dres;
res = left;
INPUTS_DEAD();
l = PyStackRef_NULL;
r = right;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_MULTIPLY_FLOAT_INPLACE, (left, right -- res, l, r)) {
Expand All @@ -834,12 +831,11 @@ dummy_func(
double dres = ((PyFloatObject *)left_o)->ob_fval * ((PyFloatObject *)right_o)->ob_fval;
((PyFloatObject *)left_o)->ob_fval = dres;
res = left;
INPUTS_DEAD();
l = PyStackRef_NULL;
r = right;
INPUTS_DEAD();
}

// Inplace RIGHT variants: mutate the uniquely-referenced right operand.
tier2 op(_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT, (left, right -- res, l, r)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
Expand All @@ -850,39 +846,39 @@ dummy_func(
double dres = ((PyFloatObject *)left_o)->ob_fval + ((PyFloatObject *)right_o)->ob_fval;
((PyFloatObject *)right_o)->ob_fval = dres;
res = right;
INPUTS_DEAD();
l = left;
r = PyStackRef_NULL;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_MULTIPLY_FLOAT_INPLACE_RIGHT, (left, right -- res, l, r)) {
tier2 op(_BINARY_OP_SUBTRACT_FLOAT_INPLACE_RIGHT, (left, right -- res, l, r)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
assert(PyFloat_CheckExact(left_o));
assert(PyFloat_CheckExact(right_o));
assert(_PyObject_IsUniquelyReferenced(right_o));
STAT_INC(BINARY_OP, hit);
double dres = ((PyFloatObject *)left_o)->ob_fval * ((PyFloatObject *)right_o)->ob_fval;
double dres = ((PyFloatObject *)left_o)->ob_fval - ((PyFloatObject *)right_o)->ob_fval;
((PyFloatObject *)right_o)->ob_fval = dres;
res = right;
INPUTS_DEAD();
l = left;
r = PyStackRef_NULL;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_SUBTRACT_FLOAT_INPLACE_RIGHT, (left, right -- res, l, r)) {
tier2 op(_BINARY_OP_MULTIPLY_FLOAT_INPLACE_RIGHT, (left, right -- res, l, r)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
assert(PyFloat_CheckExact(left_o));
assert(PyFloat_CheckExact(right_o));
assert(_PyObject_IsUniquelyReferenced(right_o));
STAT_INC(BINARY_OP, hit);
double dres = ((PyFloatObject *)left_o)->ob_fval - ((PyFloatObject *)right_o)->ob_fval;
double dres = ((PyFloatObject *)left_o)->ob_fval * ((PyFloatObject *)right_o)->ob_fval;
((PyFloatObject *)right_o)->ob_fval = dres;
res = right;
INPUTS_DEAD();
l = left;
r = PyStackRef_NULL;
INPUTS_DEAD();
}

pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res, l, r)) {
Expand Down
24 changes: 12 additions & 12 deletions Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions Python/optimizer_bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -327,13 +327,13 @@ dummy_func(void) {
op(_BINARY_OP_ADD_FLOAT, (left, right -- res, l, r)) {
if (PyJitRef_IsUnique(left)) {
ADD_OP(_BINARY_OP_ADD_FLOAT_INPLACE, 0, 0);
l = PyJitRef_Borrow(left);
l = sym_new_null(ctx);
r = right;
}
else if (PyJitRef_IsUnique(right)) {
ADD_OP(_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT, 0, 0);
l = left;
r = PyJitRef_Borrow(right);
r = sym_new_null(ctx);
}
else {
l = left;
Expand All @@ -345,13 +345,13 @@ dummy_func(void) {
op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res, l, r)) {
if (PyJitRef_IsUnique(left)) {
ADD_OP(_BINARY_OP_SUBTRACT_FLOAT_INPLACE, 0, 0);
l = PyJitRef_Borrow(left);
l = sym_new_null(ctx);
r = right;
}
else if (PyJitRef_IsUnique(right)) {
ADD_OP(_BINARY_OP_SUBTRACT_FLOAT_INPLACE_RIGHT, 0, 0);
l = left;
r = PyJitRef_Borrow(right);
r = sym_new_null(ctx);
}
else {
l = left;
Expand All @@ -363,13 +363,13 @@ dummy_func(void) {
op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res, l, r)) {
if (PyJitRef_IsUnique(left)) {
ADD_OP(_BINARY_OP_MULTIPLY_FLOAT_INPLACE, 0, 0);
l = PyJitRef_Borrow(left);
l = sym_new_null(ctx);
r = right;
}
else if (PyJitRef_IsUnique(right)) {
ADD_OP(_BINARY_OP_MULTIPLY_FLOAT_INPLACE_RIGHT, 0, 0);
l = left;
r = PyJitRef_Borrow(right);
r = sym_new_null(ctx);
}
else {
l = left;
Expand Down
16 changes: 8 additions & 8 deletions Python/optimizer_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.