Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
resolve_line_numbers as part of cfg-opt
  • Loading branch information
iritkatriel committed Apr 1, 2023
commit d01b033de21764f142d8bb869696bdfc05f87d0d
18 changes: 18 additions & 0 deletions Include/internal/pycore_compile.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,24 @@ extern int _PyAST_Optimize(
struct _arena *arena,
_PyASTOptimizeState *state);


typedef struct {
int i_opcode;
int i_oparg;
_PyCompilerSrcLocation i_loc;
} _PyCompilerInstruction;

typedef struct {
_PyCompilerInstruction *s_instrs;
int s_allocated;
int s_used;

int *s_labelmap; /* label id --> instr offset */
int s_labelmap_size;
int s_next_free_label; /* next free label id */
} _PyCompile_InstructionSequence;


/* Utility for a number of growing arrays used in the compiler */
int _PyCompile_EnsureArrayLargeEnough(
int idx,
Expand Down
3 changes: 1 addition & 2 deletions Include/internal/pycore_flowgraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,9 @@ void _PyCfgBuilder_Fini(_PyCfgBuilder *g);

_PyCfgInstruction* _PyCfg_BasicblockLastInstr(const _PyCfgBasicblock *b);
int _PyCfg_OptimizeCodeUnit(_PyCfgBuilder *g, PyObject *consts, PyObject *const_cache,
int code_flags, int nlocals, int nparams);
int code_flags, int nlocals, int nparamsi, int firstlineno);
Comment thread
iritkatriel marked this conversation as resolved.
Outdated
int _PyCfg_Stackdepth(_PyCfgBasicblock *entryblock, int code_flags);
void _PyCfg_ConvertExceptionHandlersToNops(_PyCfgBasicblock *entryblock);
int _PyCfg_ResolveLineNumbers(_PyCfgBuilder *g, int firstlineno);
int _PyCfg_ResolveJumps(_PyCfgBuilder *g);
int _PyCfg_InstrSize(_PyCfgInstruction *instruction);

Expand Down
100 changes: 63 additions & 37 deletions Python/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,21 +198,8 @@ enum {
COMPILER_SCOPE_COMPREHENSION,
};

typedef struct {
int i_opcode;
int i_oparg;
location i_loc;
} instruction;

typedef struct instr_sequence_ {
instruction *s_instrs;
int s_allocated;
int s_used;

int *s_labelmap; /* label id --> instr offset */
int s_labelmap_size;
int s_next_free_label; /* next free label id */
} instr_sequence;
typedef _PyCompilerInstruction instruction;
typedef _PyCompile_InstructionSequence instr_sequence;

#define INITIAL_INSTR_SEQUENCE_SIZE 100
#define INITIAL_INSTR_SEQUENCE_LABELS_MAP_SIZE 10
Expand Down Expand Up @@ -7389,11 +7376,15 @@ add_return_at_end(struct compiler *c, int addNone)
return SUCCESS;
}

static int cfg_to_instr_sequence(cfg_builder *g, instr_sequence *seq);

static PyCodeObject *
assemble_code_unit(struct compiler_unit *u, PyObject *const_cache,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this name still correct? Was it ever?

It seems to do optimization and assembly.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There used to be just assemble(C) which takes the compiler and and does all the optimization and assembly. I split out assemble_code_unit (which takes just the c->u and not the whole c) and does most of what used to be in assemble(). So the name was always incorrect (assemble() called the optimizer). I think the reason it was done like this is that assemble() gets called from a number of places in the compiler (lambda, class, module, etc). So it was convenient to have one function that does opt+assemble and returns a code object.

We can rename assemble() and assemble_code_unit(). Any suggestions what to call it? something like instr_sequence_to_code_object()?

int code_flags, PyObject *filename)
{
instr_sequence optimized_instrs;
memset(&optimized_instrs, 0, sizeof(instr_sequence));

PyCodeObject *co = NULL;
PyObject *consts = consts_dict_keys_inorder(u->u_consts);
if (consts == NULL) {
Expand All @@ -7405,24 +7396,17 @@ assemble_code_unit(struct compiler_unit *u, PyObject *const_cache,
}
int nparams = (int)PyList_GET_SIZE(u->u_ste->ste_varnames);
int nlocals = (int)PyDict_GET_SIZE(u->u_varnames);
if (_PyCfg_OptimizeCodeUnit(&g, consts, const_cache, code_flags, nlocals, nparams) < 0) {
assert(u->u_firstlineno);
if (_PyCfg_OptimizeCodeUnit(&g, consts, const_cache, code_flags, nlocals,
nparams, u->u_firstlineno) < 0) {
goto error;
}

/** Assembly **/
/* Set firstlineno if it wasn't explicitly set. */
if (!u->u_firstlineno) {
if (g.g_entryblock->b_instr && g.g_entryblock->b_instr->i_loc.lineno) {
u->u_firstlineno = g.g_entryblock->b_instr->i_loc.lineno;
}
else {
u->u_firstlineno = 1;
}
}
if (_PyCfg_ResolveLineNumbers(&g, u->u_firstlineno) < 0) {
if (cfg_to_instr_sequence(&g, &optimized_instrs) < 0) {
goto error;
}

/** Assembly **/
int nlocalsplus = prepare_localsplus(u, &g, code_flags);
if (nlocalsplus < 0) {
goto error;
Expand All @@ -7441,6 +7425,10 @@ assemble_code_unit(struct compiler_unit *u, PyObject *const_cache,
if (_PyCfg_ResolveJumps(&g) < 0) {
goto error;
}
if (cfg_to_instr_sequence(&g, &optimized_instrs) < 0) {
goto error;
}


/* Can't modify the bytecode after computing jump offsets. */

Expand All @@ -7454,6 +7442,7 @@ assemble_code_unit(struct compiler_unit *u, PyObject *const_cache,

error:
Py_XDECREF(consts);
instr_sequence_fini(&optimized_instrs);
_PyCfgBuilder_Fini(&g);
return co;
}
Expand All @@ -7477,6 +7466,29 @@ assemble(struct compiler *c, int addNone)
return assemble_code_unit(u, const_cache, code_flags, filename);
}

static int
cfg_to_instr_sequence(cfg_builder *g, instr_sequence *seq)
{
int lbl = 0;
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
b->b_label = (jump_target_label){lbl};
lbl += b->b_iused;
}
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
RETURN_IF_ERROR(instr_sequence_use_label(seq, b->b_label.id));
for (int i = 0; i < b->b_iused; i++) {
cfg_instr *instr = &b->b_instr[i];
int arg = HAS_TARGET(instr->i_opcode) ?
instr->i_target->b_label.id :
instr->i_oparg;
RETURN_IF_ERROR(
instr_sequence_addop(seq, instr->i_opcode, arg, instr->i_loc));
}
}
return SUCCESS;
}


/* Access to compiler optimizations for unit tests.
*
* _PyCompile_CodeGen takes and AST, applies code-gen and
Expand All @@ -7492,7 +7504,7 @@ assemble(struct compiler *c, int addNone)
*/

static int
instructions_to_cfg(PyObject *instructions, cfg_builder *g)
instructions_to_instr_sequence(PyObject *instructions, instr_sequence *seq)
{
assert(PyList_Check(instructions));

Expand Down Expand Up @@ -7526,8 +7538,7 @@ instructions_to_cfg(PyObject *instructions, cfg_builder *g)

for (int i = 0; i < num_insts; i++) {
if (is_target[i]) {
jump_target_label lbl = {i};
RETURN_IF_ERROR(_PyCfgBuilder_UseLabel(g, lbl));
RETURN_IF_ERROR(instr_sequence_use_label(seq, i));
}
PyObject *item = PyList_GET_ITEM(instructions, i);
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 6) {
Expand Down Expand Up @@ -7565,11 +7576,10 @@ instructions_to_cfg(PyObject *instructions, cfg_builder *g)
if (PyErr_Occurred()) {
goto error;
}
RETURN_IF_ERROR(_PyCfgBuilder_Addop(g, opcode, oparg, loc));
RETURN_IF_ERROR(instr_sequence_addop(seq, opcode, oparg, loc));
}
cfg_instr *last = _PyCfg_BasicblockLastInstr(g->g_curblock);
if (last && !IS_TERMINATOR_OPCODE(last->i_opcode)) {
RETURN_IF_ERROR(_PyCfgBuilder_Addop(g, RETURN_VALUE, 0, NO_LOCATION));
if (seq->s_used && !IS_TERMINATOR_OPCODE(seq->s_instrs[seq->s_used-1].i_opcode)) {
RETURN_IF_ERROR(instr_sequence_addop(seq, RETURN_VALUE, 0, NO_LOCATION));
}
PyMem_Free(is_target);
return SUCCESS;
Expand All @@ -7578,8 +7588,23 @@ instructions_to_cfg(PyObject *instructions, cfg_builder *g)
return ERROR;
}

static int
instructions_to_cfg(PyObject *instructions, cfg_builder *g)
{
instr_sequence seq;
memset(&seq, 0, sizeof(instr_sequence));

RETURN_IF_ERROR(
instructions_to_instr_sequence(instructions, &seq));

RETURN_IF_ERROR(instr_sequence_to_cfg(&seq, g));
instr_sequence_fini(&seq);
return SUCCESS;
}

static PyObject *
instr_sequence_to_instructions(instr_sequence *seq) {
instr_sequence_to_instructions(instr_sequence *seq)
{
PyObject *instructions = PyList_New(0);
if (instructions == NULL) {
return NULL;
Expand Down Expand Up @@ -7709,8 +7734,9 @@ _PyCompile_OptimizeCfg(PyObject *instructions, PyObject *consts)
if (instructions_to_cfg(instructions, &g) < 0) {
goto error;
}
int code_flags = 0, nlocals = 0, nparams = 0;
if (_PyCfg_OptimizeCodeUnit(&g, consts, const_cache, code_flags, nlocals, nparams) < 0) {
int code_flags = 0, nlocals = 0, nparams = 0, firstlineno = 1;
if (_PyCfg_OptimizeCodeUnit(&g, consts, const_cache, code_flags, nlocals,
nparams, firstlineno) < 0) {
goto error;
}
res = cfg_to_instructions(&g);
Expand Down
7 changes: 4 additions & 3 deletions Python/flowgraph.c
Original file line number Diff line number Diff line change
Expand Up @@ -1980,7 +1980,7 @@ push_cold_blocks_to_end(cfg_builder *g, int code_flags) {

int
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
int code_flags, int nlocals, int nparams)
int code_flags, int nlocals, int nparams, int firstlineno)
{
assert(cfg_builder_check(g));
/** Preprocessing **/
Expand All @@ -1997,6 +1997,7 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
g->g_entryblock, nlocals, nparams));

RETURN_IF_ERROR(push_cold_blocks_to_end(g, code_flags));
RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
return SUCCESS;
}

Expand Down Expand Up @@ -2149,8 +2150,8 @@ guarantee_lineno_for_exits(basicblock *entryblock, int firstlineno) {
}
}

int
_PyCfg_ResolveLineNumbers(cfg_builder *g, int firstlineno)
static int
resolve_line_numbers(cfg_builder *g, int firstlineno)
{
RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
propagate_line_numbers(g->g_entryblock);
Expand Down