Skip to content
Closed
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions Include/internal/pycore_long.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ static inline PyObject* _PyLong_GetZero(void)
static inline PyObject* _PyLong_GetOne(void)
{ return (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS+1]; }

PyObject *_PyLong_Add(PyLongObject *left, PyLongObject *right);
PyObject *_PyLong_Multiply(PyLongObject *left, PyLongObject *right);
PyObject *_PyLong_Subtract(PyLongObject *left, PyLongObject *right);

/* Used by Python/mystrtoul.c, _PyBytes_FromHex(),
_PyBytes_DecodeEscape(), etc. */
PyAPI_DATA(unsigned char) _PyLong_DigitValue[256];
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Improve the performance of specialized :class:`int` and :class:`float`
operations by mutating the left operand in-place when it is safe to do so.
40 changes: 10 additions & 30 deletions Objects/longobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3139,9 +3139,10 @@ x_sub(PyLongObject *a, PyLongObject *b)
return maybe_small_long(long_normalize(z));
}

PyObject *
_PyLong_Add(PyLongObject *a, PyLongObject *b)
static PyObject *
long_add(PyLongObject *a, PyLongObject *b)
{
CHECK_BINOP(a, b);
if (IS_MEDIUM_VALUE(a) && IS_MEDIUM_VALUE(b)) {
return _PyLong_FromSTwoDigits(medium_value(a) + medium_value(b));
}
Expand Down Expand Up @@ -3172,20 +3173,14 @@ _PyLong_Add(PyLongObject *a, PyLongObject *b)
}

static PyObject *
long_add(PyLongObject *a, PyLongObject *b)
long_sub(PyLongObject *a, PyLongObject *b)
{
CHECK_BINOP(a, b);
return _PyLong_Add(a, b);
}

PyObject *
_PyLong_Subtract(PyLongObject *a, PyLongObject *b)
{
PyLongObject *z;

if (IS_MEDIUM_VALUE(a) && IS_MEDIUM_VALUE(b)) {
return _PyLong_FromSTwoDigits(medium_value(a) - medium_value(b));
}

PyLongObject *z;
if (Py_SIZE(a) < 0) {
if (Py_SIZE(b) < 0) {
z = x_sub(b, a);
Expand All @@ -3207,13 +3202,6 @@ _PyLong_Subtract(PyLongObject *a, PyLongObject *b)
return (PyObject *)z;
}

static PyObject *
long_sub(PyLongObject *a, PyLongObject *b)
{
CHECK_BINOP(a, b);
return _PyLong_Subtract(a, b);
}

/* Grade school multiplication, ignoring the signs.
* Returns the absolute value of the product, or NULL if error.
*/
Expand Down Expand Up @@ -3631,18 +3619,17 @@ k_lopsided_mul(PyLongObject *a, PyLongObject *b)
return NULL;
}

PyObject *
_PyLong_Multiply(PyLongObject *a, PyLongObject *b)
static PyObject *
long_mul(PyLongObject *a, PyLongObject *b)
{
PyLongObject *z;

CHECK_BINOP(a, b);
/* fast path for single-digit multiplication */
if (IS_MEDIUM_VALUE(a) && IS_MEDIUM_VALUE(b)) {
stwodigits v = medium_value(a) * medium_value(b);
return _PyLong_FromSTwoDigits(v);
}

z = k_mul(a, b);
PyLongObject *z = k_mul(a, b);
/* Negate if exactly one of the inputs is negative. */
if (((Py_SIZE(a) ^ Py_SIZE(b)) < 0) && z) {
_PyLong_Negate(&z);
Expand All @@ -3652,13 +3639,6 @@ _PyLong_Multiply(PyLongObject *a, PyLongObject *b)
return (PyObject *)z;
}

static PyObject *
long_mul(PyLongObject *a, PyLongObject *b)
{
CHECK_BINOP(a, b);
return _PyLong_Multiply(a, b);
}

/* Fast modulo division for single-digit longs. */
static PyObject *
fast_mod(PyLongObject *a, PyLongObject *b)
Expand Down
165 changes: 76 additions & 89 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,76 @@ static const binaryfunc binary_ops[] = {
[NB_INPLACE_XOR] = PyNumber_InPlaceXor,
};

#define BINARY_OP_FAST_INT(OP) \
do { \
PyObject *lhs = SECOND(); \
PyObject *rhs = TOP(); \
DEOPT_IF(!PyLong_CheckExact(lhs), BINARY_OP); \
DEOPT_IF(!PyLong_CheckExact(rhs), BINARY_OP); \
DEOPT_IF(1 < Py_ABS(Py_SIZE(lhs)), BINARY_OP); \
DEOPT_IF(1 < Py_ABS(Py_SIZE(rhs)), BINARY_OP); \
STAT_INC(BINARY_OP, hit); \
PyLongObject *lhs_long = (PyLongObject *)lhs; \
PyLongObject *rhs_long = (PyLongObject *)rhs; \
stwodigits l = Py_SIZE(lhs) * (sdigit)lhs_long->ob_digit[0]; \
stwodigits r = Py_SIZE(rhs) * (sdigit)rhs_long->ob_digit[0]; \
stwodigits i = l OP r; \
Py_DECREF(rhs); \
STACK_SHRINK(1); \
if (-_PY_NSMALLNEGINTS <= i && i < _PY_NSMALLPOSINTS) { \
Py_DECREF(lhs); \
PyLongObject *res = &_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + i]; \
Py_INCREF(res); \
SET_TOP((PyObject *)res); \
DISPATCH(); \
} \
uint16_t maybe_store = GET_CACHE()->adaptive.index; \
bool inplace = maybe_store && GETLOCAL(maybe_store - 1) == lhs; \
if (Py_ABS(i) < PyLong_BASE && Py_REFCNT(lhs) == inplace + 1) { \
/* If this assert fails, it's probably one of two things: */ \
/* - If lhs lives in _PyLong_SMALL_INTS, its refcount is wrong. */ \
/* - Whatever created lhs should have used _PyLong_SMALL_INTS, */ \
/* but didn't. Examples of this can be found in bpo-46361. */ \
assert(l < -_PY_NSMALLNEGINTS || _PY_NSMALLPOSINTS <= l); \
lhs_long->ob_digit[0] = (digit)Py_ABS(i); \
Py_SET_SIZE(lhs, i < 0 ? -1 : 1); \
DISPATCH(); \
} \
Py_DECREF(lhs); \
PyObject *res = PyLong_FromLongLong(i); \
SET_TOP(res); \
if (res == NULL) { \
goto error; \
} \
DISPATCH(); \
} while (0)

#define BINARY_OP_FAST_FLOAT(OP) \
do { \
PyObject *lhs = SECOND(); \
PyObject *rhs = TOP(); \
DEOPT_IF(!PyFloat_CheckExact(lhs), BINARY_OP); \
DEOPT_IF(!PyFloat_CheckExact(rhs), BINARY_OP); \
STAT_INC(BINARY_OP, hit); \
double l = PyFloat_AS_DOUBLE(lhs); \
double r = PyFloat_AS_DOUBLE(rhs); \
double d = l OP r; \
Py_DECREF(rhs); \
STACK_SHRINK(1); \
uint16_t maybe_store = GET_CACHE()->adaptive.index; \
bool inplace = maybe_store && GETLOCAL(maybe_store - 1) == lhs; \
if (Py_REFCNT(lhs) == inplace + 1) { \
PyFloat_AS_DOUBLE(lhs) = d; \
DISPATCH(); \
} \
Py_DECREF(lhs); \
PyObject *res = PyFloat_FromDouble(d); \
SET_TOP(res); \
if (res == NULL) { \
goto error; \
} \
DISPATCH(); \
} while (0)

// PEP 634: Structural Pattern Matching

Expand Down Expand Up @@ -2048,74 +2118,19 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
}

TARGET(BINARY_OP_MULTIPLY_INT) {
PyObject *left = SECOND();
PyObject *right = TOP();
DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP);
DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP);
STAT_INC(BINARY_OP, hit);
PyObject *prod = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right);
SET_SECOND(prod);
Py_DECREF(right);
Py_DECREF(left);
STACK_SHRINK(1);
if (prod == NULL) {
goto error;
}
DISPATCH();
BINARY_OP_FAST_INT(*);
}

TARGET(BINARY_OP_MULTIPLY_FLOAT) {
PyObject *left = SECOND();
PyObject *right = TOP();
DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP);
DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP);
STAT_INC(BINARY_OP, hit);
double dprod = ((PyFloatObject *)left)->ob_fval *
((PyFloatObject *)right)->ob_fval;
PyObject *prod = PyFloat_FromDouble(dprod);
SET_SECOND(prod);
Py_DECREF(right);
Py_DECREF(left);
STACK_SHRINK(1);
if (prod == NULL) {
goto error;
}
DISPATCH();
BINARY_OP_FAST_FLOAT(*);
}

TARGET(BINARY_OP_SUBTRACT_INT) {
PyObject *left = SECOND();
PyObject *right = TOP();
DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP);
DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP);
STAT_INC(BINARY_OP, hit);
PyObject *sub = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right);
SET_SECOND(sub);
Py_DECREF(right);
Py_DECREF(left);
STACK_SHRINK(1);
if (sub == NULL) {
goto error;
}
DISPATCH();
BINARY_OP_FAST_INT(-);
}

TARGET(BINARY_OP_SUBTRACT_FLOAT) {
PyObject *left = SECOND();
PyObject *right = TOP();
DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP);
DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP);
STAT_INC(BINARY_OP, hit);
double dsub = ((PyFloatObject *)left)->ob_fval - ((PyFloatObject *)right)->ob_fval;
PyObject *sub = PyFloat_FromDouble(dsub);
SET_SECOND(sub);
Py_DECREF(right);
Py_DECREF(left);
STACK_SHRINK(1);
if (sub == NULL) {
goto error;
}
DISPATCH();
BINARY_OP_FAST_FLOAT(-);
}

TARGET(BINARY_OP_ADD_UNICODE) {
Expand Down Expand Up @@ -2164,39 +2179,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
}

TARGET(BINARY_OP_ADD_FLOAT) {
PyObject *left = SECOND();
PyObject *right = TOP();
DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP);
DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP);
STAT_INC(BINARY_OP, hit);
double dsum = ((PyFloatObject *)left)->ob_fval +
((PyFloatObject *)right)->ob_fval;
PyObject *sum = PyFloat_FromDouble(dsum);
SET_SECOND(sum);
Py_DECREF(right);
Py_DECREF(left);
STACK_SHRINK(1);
if (sum == NULL) {
goto error;
}
DISPATCH();
BINARY_OP_FAST_FLOAT(+);
}

TARGET(BINARY_OP_ADD_INT) {
PyObject *left = SECOND();
PyObject *right = TOP();
DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP);
DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP);
STAT_INC(BINARY_OP, hit);
PyObject *sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right);
SET_SECOND(sum);
Py_DECREF(right);
Py_DECREF(left);
STACK_SHRINK(1);
if (sum == NULL) {
goto error;
}
DISPATCH();
BINARY_OP_FAST_INT(+);
}

TARGET(BINARY_SUBSCR) {
Expand Down
42 changes: 33 additions & 9 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -1621,14 +1621,22 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
_Py_OPARG(*instr));
goto success;
}
if (PyLong_CheckExact(lhs)) {
if (PyLong_CheckExact(lhs) &&
Py_ABS(Py_SIZE(lhs)) < 2 && Py_ABS(Py_SIZE(rhs)) < 2)
{
int next_opcode = _Py_OPCODE(instr[1]);
Comment thread
brandtbucher marked this conversation as resolved.
Outdated
if (next_opcode == STORE_FAST ||
next_opcode == STORE_FAST__LOAD_FAST)
{
adaptive->index = _Py_OPARG(instr[1]) + 1;
}
*instr = _Py_MAKECODEUNIT(BINARY_OP_ADD_INT, _Py_OPARG(*instr));
goto success;
goto success_set_index;
}
if (PyFloat_CheckExact(lhs)) {
*instr = _Py_MAKECODEUNIT(BINARY_OP_ADD_FLOAT,
_Py_OPARG(*instr));
goto success;
goto success_set_index;
}
break;
case NB_MULTIPLY:
Expand All @@ -1637,15 +1645,17 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
SPECIALIZATION_FAIL(BINARY_OP, SPEC_FAIL_DIFFERENT_TYPES);
goto failure;
}
if (PyLong_CheckExact(lhs)) {
if (PyLong_CheckExact(lhs) &&
Py_ABS(Py_SIZE(lhs)) < 2 && Py_ABS(Py_SIZE(rhs)) < 2)
{
*instr = _Py_MAKECODEUNIT(BINARY_OP_MULTIPLY_INT,
_Py_OPARG(*instr));
goto success;
goto success_set_index;
}
if (PyFloat_CheckExact(lhs)) {
*instr = _Py_MAKECODEUNIT(BINARY_OP_MULTIPLY_FLOAT,
_Py_OPARG(*instr));
goto success;
goto success_set_index;
}
break;
case NB_SUBTRACT:
Expand All @@ -1654,15 +1664,17 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
SPECIALIZATION_FAIL(BINARY_OP, SPEC_FAIL_DIFFERENT_TYPES);
goto failure;
}
if (PyLong_CheckExact(lhs)) {
if (PyLong_CheckExact(lhs) &&
Py_ABS(Py_SIZE(lhs)) < 2 && Py_ABS(Py_SIZE(rhs)) < 2)
{
*instr = _Py_MAKECODEUNIT(BINARY_OP_SUBTRACT_INT,
_Py_OPARG(*instr));
goto success;
goto success_set_index;
}
if (PyFloat_CheckExact(lhs)) {
*instr = _Py_MAKECODEUNIT(BINARY_OP_SUBTRACT_FLOAT,
_Py_OPARG(*instr));
goto success;
goto success_set_index;
}
break;
default:
Expand All @@ -1676,6 +1688,18 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
STAT_INC(BINARY_OP, failure);
cache_backoff(adaptive);
return;
success_set_index:
; // The technology just isn't there yet...
int next_opcode = _Py_OPCODE(instr[1]);
int next_oparg = _Py_OPARG(instr[1]);
if ((next_opcode == STORE_FAST || next_opcode == STORE_FAST__LOAD_FAST) &&
next_oparg < UINT16_MAX)
{
adaptive->index = next_oparg + 1;
}
else {
adaptive->index = 0;
}
success:
STAT_INC(BINARY_OP, success);
adaptive->counter = initial_counter_value();
Expand Down