Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
81 commits
Select commit Hold shift + click to select a range
d716faa
Experiment with borrowing load_fast
mpage Feb 5, 2025
3736923
Checkpoint poc
mpage Feb 5, 2025
7a14254
Fix pyframe copy
mpage Feb 6, 2025
b1607aa
Strengthen refs when frame is copied
mpage Feb 6, 2025
e765735
Cleanup
mpage Feb 6, 2025
291ace9
Consider all instructions when computing mutations
mpage Feb 6, 2025
17d6dd6
Add a super instruction
mpage Feb 7, 2025
0a74052
Don't optimize during quickening
mpage Feb 12, 2025
afbfd88
Use abstract interpretation
mpage Feb 11, 2025
696c630
Fix test_generators
mpage Feb 12, 2025
483ac7a
Optimize returns
mpage Feb 13, 2025
259d5db
Remove unused arg
mpage Feb 13, 2025
aeafa98
Make sure we convert borrowed refs on frame
mpage Feb 15, 2025
85f9a64
Don't test with malformed bytecode
mpage Feb 15, 2025
b6ab2f7
Make sure we convert borrowed refs to func/code when copying generato…
mpage Feb 15, 2025
fd1ad3d
Add support for disassembling LOAD_FAST_BORROW_LOAD_FAST_BORROW
mpage Feb 15, 2025
eee2195
Make sure exc_obj is always defined
mpage Feb 15, 2025
d75ec9a
Make sure we store new stackrefs for frame executable/funcobj
mpage Feb 19, 2025
66f5351
Remove refcount check
mpage Feb 19, 2025
7ef6a0b
Don't hardcode initial refcount in refcount tests
mpage Feb 19, 2025
2af2bbc
Remove invalid bytecode from `test_peepholer`
mpage Feb 19, 2025
bf19b7d
Fix invalid bytecode in `test_peepholer.DirectCfgOptimizerTests.test_…
mpage Feb 19, 2025
a9bca03
Fix tests that checked for `LOAD_FAST` instructions that are now opti…
mpage Feb 20, 2025
293c317
Update disassembly in test_dis to match new bytecode
mpage Feb 20, 2025
a12ccd9
Fix refleak in _BINARY_OP_INPLACE_ADD_UNICODE
mpage Feb 21, 2025
1ef26c5
Create new references to fast locals overwritten via f_locals
mpage Feb 21, 2025
1eb9226
Implement two missing opcodes in the static analysis
mpage Feb 21, 2025
7291c49
Use g_block_list when resetting stack depth
mpage Feb 21, 2025
90bf8df
Avoid reallocating state for each basic block
mpage Feb 21, 2025
9bfa922
Generators
mpage Feb 21, 2025
bf6222b
Move optimize after all other passes have run
mpage Feb 24, 2025
dd97d0c
Don't promote borrowed references in STORE_FAST
mpage Feb 24, 2025
6680709
Track reasons for not being able to optimize instructions
mpage Feb 24, 2025
6568fd9
Rename PyStackRef_DupDeferred
mpage Feb 24, 2025
6fde7b0
Rename _PyStackRef_StealIfUnborrowed
mpage Feb 25, 2025
de13810
Avoid extra copies in take_ownership
mpage Feb 25, 2025
fdeae7d
Make the default build work
mpage Feb 25, 2025
1aed281
Add docs for new opcodes
mpage Feb 25, 2025
c332912
Fix flag array size computation
mpage Feb 26, 2025
76a75a7
Add a high level comment explaining our approach
mpage Feb 26, 2025
dd6426f
Bump magic number after merge (was bumped on main)
mpage Feb 28, 2025
bf68eb9
Add more tests
mpage Feb 28, 2025
474a587
Update commented out assertion
mpage Feb 28, 2025
4b3aacf
Add NEWS entry
mpage Feb 28, 2025
e692037
Remove debug print
mpage Feb 28, 2025
2ecfe08
Merge branch 'main' into load-fast-borrow-absinterp
mpage Feb 28, 2025
f98d91d
Fix doctest
mpage Feb 28, 2025
725dc8e
Fix JIT tests
mpage Mar 1, 2025
03d35b2
Fix missed doctest
mpage Mar 1, 2025
39ff3f0
Fix narrowing
mpage Mar 1, 2025
f012a9f
Formatting
mpage Mar 1, 2025
b4b7f73
Merge branch 'main' into load-fast-borrow-absinterp
mpage Mar 13, 2025
8ea82b5
Implement PyStackRef_{Is,Make}HeapSafe
mpage Mar 13, 2025
9bec9f5
Add missing error handling
mpage Mar 13, 2025
902ae84
Simplify frees
mpage Mar 13, 2025
b0ea38f
Get rid of `PyStackRef_IsBorrowed`
mpage Mar 13, 2025
1f5cfcd
Use PyStackRef_Borrow as the new API
mpage Mar 14, 2025
d1e8e45
Make the default build work
mpage Mar 14, 2025
00c95cc
Merge branch 'main' into load-fast-borrow-absinterp
mpage Mar 17, 2025
cc01a30
Regen frozenmain
mpage Mar 17, 2025
6c5faab
Add a workaround for failing tests rather than change marshal.c
mpage Mar 17, 2025
32bd0c6
Update dis.rst to reflect support for LOAD_FAST_BORROW in the default…
mpage Mar 17, 2025
fdb8a82
Exclude immortal objects when keeping overwritten locals alive
mpage Mar 17, 2025
a962017
Use a tuple to store overwritten fast locals
mpage Mar 17, 2025
6c2f07d
Fix off-by-one error
mpage Mar 17, 2025
85b0b00
Merge branch 'main' into load-fast-borrow-absinterp
mpage Mar 20, 2025
5ff2dea
Fix post-merge issues
mpage Mar 20, 2025
0c1e67f
English is hard
mpage Mar 20, 2025
ae2ec65
Improve readability of test cases
mpage Mar 20, 2025
03c474e
Elaborate in the blurb
mpage Mar 20, 2025
60665c9
Remove parameter to calculate stackdepth
mpage Mar 20, 2025
ac8940b
Update comment
mpage Mar 20, 2025
f12573f
Test optimize_load_fast as part of OptimizeCfg
mpage Mar 21, 2025
44f7ffc
Remove test with invalid bytecode
mpage Mar 21, 2025
818e94e
Add helper macro for pushing refs
mpage Mar 21, 2025
c30e1e9
Handle opcodes that leave at least one input on the stack
mpage Mar 22, 2025
112cee6
Merge branch 'main' into load-fast-borrow-absinterp
mpage Mar 23, 2025
80fc5aa
Avoid having stackref only visible from the c stack
mpage Mar 23, 2025
492cce1
Merge branch 'main' into load-fast-borrow-absinterp
mpage Mar 24, 2025
2e38f0d
Merge branch 'main' into load-fast-borrow-absinterp
mpage Mar 31, 2025
2c55722
Merge branch 'main' into load-fast-borrow-absinterp
mpage Apr 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Use abstract interpretation
  • Loading branch information
mpage committed Feb 28, 2025
commit afbfd886099d3d033c12376785889642bb9ea5d6
293 changes: 293 additions & 0 deletions Python/flowgraph.c
Original file line number Diff line number Diff line change
Expand Up @@ -2415,6 +2415,298 @@ insert_superinstructions(cfg_builder *g)
return res;
}

typedef struct {
// Index of instruction that produced the reference or -1.
int instr;

// The local to which the reference refers or -1.
int local;
} ref;

#define NOT_LOCAL -1

#define DUMMY_REF (ref){-1, NOT_LOCAL}

typedef struct {
ref *refs;
Py_ssize_t size;
Py_ssize_t capacity;
} ref_stack;

static bool
ref_stack_has_refs_from_instr(ref_stack *stack, int instr)
{
for (Py_ssize_t i = 0; i < stack->size; i++) {
if (stack->refs[i].instr == instr) {
return true;
}
}
return false;
}

static int
ref_stack_push(ref_stack *stack, ref r)
{
if (stack->size == stack->capacity) {
Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a utility for growing arrays in Include/internal/pycore_c_array.h (_Py_c_array_t).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the code ends up being clearer if we use that here.

if (refs == NULL) {
PyErr_NoMemory();
return -1;
}
stack->refs = refs;
stack->capacity = new_cap;
}
stack->refs[stack->size] = r;
stack->size++;
return 0;
}

static ref
ref_stack_pop(ref_stack *stack)
{
assert(stack->size > 0);
stack->size--;
ref r = stack->refs[stack->size];
return r;
}

static void
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
{
Py_ssize_t idx = stack->size - off;
assert(idx >= 0 && idx < stack->size);
ref tmp = stack->refs[idx];
stack->refs[idx] = stack->refs[stack->size - 1];
stack->refs[stack->size - 1] = tmp;
}

static ref
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
{
assert(idx >= 0 && idx < stack->size);
return stack->refs[idx];
}

static void
ref_stack_clear(ref_stack *stack)
{
stack->size = 0;
}

static void
ref_stack_fini(ref_stack *stack)
{
if (stack->refs != NULL) {
PyMem_Free(stack->refs);
}
stack->refs = NULL;
stack->capacity = 0;
stack->size = 0;
}

static void
kill_local(bool *has_killed_refs, Py_ssize_t size, ref_stack *refs, int local)
{
for (Py_ssize_t i = 0; i < refs->size; i++) {
ref r = ref_stack_at(refs, i);
if (r.local == local) {
assert(r.instr >= 0);
has_killed_refs[r.instr] = true;
}
}
}

static void
load_fast_push_block(basicblock ***sp, basicblock *target, int start_depth)
{
assert(!target->b_visited || (target->b_startdepth == start_depth));
if (!target->b_visited) {
assert(target->b_startdepth == -1);
target->b_startdepth = start_depth;
target->b_visited = 1;
*(*sp)++ = target;
}
}

static int
optimize_load_fast(cfg_builder *g)
{
int status;
ref_stack refs = {0};
bool *has_killed_refs = NULL;
basicblock *entryblock = g->g_entryblock;
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
b->b_startdepth = -1;
}
basicblock **blocks = make_cfg_traversal_stack(entryblock);
if (blocks == NULL) {
status = ERROR;
goto done;
}
basicblock **sp = blocks;
*sp = entryblock;
sp++;
entryblock->b_startdepth = 0;
entryblock->b_visited = 1;

while (sp != blocks) {
basicblock *block = *--sp;
assert(block->b_startdepth > -1);

// Reset state that tracks which instructions produce references to
// locals that are on the stack while the local is overwritten.
int size = sizeof(*has_killed_refs) * block->b_iused;
bool *p = PyMem_Realloc(has_killed_refs, size);
if (p == NULL) {
PyErr_NoMemory();
status = ERROR;
goto done;
}
else {
has_killed_refs = p;
}
memset(has_killed_refs, 0, size);

// Reset the stack of refs. We don't track references on the stack
// across basic blocks, but the bytecode will expect their
// presence. Add dummy references as necessary.
ref_stack_clear(&refs);
for (int i = 0; i < block->b_startdepth; i++) {
ref_stack_push(&refs, DUMMY_REF);
Comment thread
mpage marked this conversation as resolved.
Outdated
}

for (int i = 0; i < block->b_iused; i++) {
cfg_instr *instr = &block->b_instr[i];
int opcode = instr->i_opcode;
int oparg = instr->i_oparg;
assert(opcode != EXTENDED_ARG);
switch (opcode) {
case COPY: {
Py_ssize_t idx = refs.size - oparg;
ref r = ref_stack_at(&refs, idx);
if (ref_stack_push(&refs, r) < 0) {
status = ERROR;
goto done;
}
break;
}

case LOAD_FAST: {
if (ref_stack_push(&refs, (ref){i, oparg}) < 0) {
status = ERROR;
goto done;
}
break;
}

case LOAD_FAST_LOAD_FAST: {
if (ref_stack_push(&refs, (ref){i, oparg >> 4}) < 0) {
status = ERROR;
goto done;
}
if (ref_stack_push(&refs, (ref){i, oparg & 15}) < 0) {
status = ERROR;
goto done;
}
break;
}

case RETURN_VALUE: {
// We need to return a new reference so there is no point
// optimizing the instruction that produced the returned
// reference.
ref r = ref_stack_pop(&refs);
if (r.local != NOT_LOCAL) {
assert(r.instr >= 0);
has_killed_refs[r.instr] = true;
}
break;
}

case STORE_FAST: {
kill_local(has_killed_refs, block->b_iused, &refs, oparg);
ref_stack_pop(&refs);
break;
}

case STORE_FAST_STORE_FAST: {
kill_local(has_killed_refs, block->b_iused, &refs, oparg >> 4);
kill_local(has_killed_refs, block->b_iused, &refs, oparg & 15);
ref_stack_pop(&refs);
ref_stack_pop(&refs);
break;
}

case SWAP: {
ref_stack_swap_top(&refs, oparg);
break;
}

default: {
int num_popped = _PyOpcode_num_popped(opcode, oparg);
int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
if (HAS_TARGET(instr->i_opcode)) {
load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
}
if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
// Block push opcodes only affect the stack when jumping
// to the target.
for (int j = 0; j < num_popped; j++) {
ref_stack_pop(&refs);
}
for (int j = 0; j < num_pushed; j++) {
if (ref_stack_push(&refs, (ref){i, NOT_LOCAL}) < 0) {
status = ERROR;
goto done;
}
}
}
break;
}
}
}

// Optimize instructions
for (int i = 0; i < block->b_iused; i++) {
if (!has_killed_refs[i] && !ref_stack_has_refs_from_instr(&refs, i)) {
cfg_instr *instr = &block->b_instr[i];
switch (instr->i_opcode) {
case LOAD_FAST:
instr->i_opcode = LOAD_FAST_BORROW;
break;
case LOAD_FAST_LOAD_FAST:
instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
break;
default:
break;
}
}
}

// Push fallthrough block
cfg_instr *term = basicblock_last_instr(block);
if (term != NULL && block->b_next != NULL &&
!(IS_UNCONDITIONAL_JUMP_OPCODE(term->i_opcode) ||
IS_SCOPE_EXIT_OPCODE(term->i_opcode))) {
assert(BB_HAS_FALLTHROUGH(block));
load_fast_push_block(&sp, block->b_next, refs.size);
}
}

status = SUCCESS;

done:
ref_stack_fini(&refs);
if (has_killed_refs != NULL) {
PyMem_Free(has_killed_refs);
}
if (blocks != NULL) {
PyMem_Free(blocks);
}
return status;
}

// helper functions for add_checks_for_loads_of_unknown_variables
static inline void
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
Expand Down Expand Up @@ -3028,6 +3320,7 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
add_checks_for_loads_of_uninitialized_variables(
g->g_entryblock, nlocals, nparams));
RETURN_IF_ERROR(insert_superinstructions(g));
RETURN_IF_ERROR(optimize_load_fast(g));

RETURN_IF_ERROR(push_cold_blocks_to_end(g));
RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
Expand Down