Skip to content

Commit 9b7f583

Browse files
committed
py: Reorganise bytecode layout so it's more structured, easier to edit.
1 parent f882d53 commit 9b7f583

File tree

7 files changed

+130
-123
lines changed

7 files changed

+130
-123
lines changed

py/bc.c

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,21 +84,25 @@ STATIC void dump_args(const mp_obj_t *a, mp_uint_t sz) {
8484

8585
// On entry code_state should be allocated somewhere (stack/heap) and
8686
// contain the following valid entries:
87-
// - code_state->code_info should be the offset in bytes from the start of
88-
// the bytecode chunk to the start of the code-info within the bytecode
8987
// - code_state->ip should contain the offset in bytes from the start of
90-
// the bytecode chunk to the start of the prelude within the bytecode
88+
// the bytecode chunk to just after n_state and n_exc_stack
9189
// - code_state->n_state should be set to the state size (locals plus stack)
9290
void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args) {
9391
// This function is pretty complicated. It's main aim is to be efficient in speed and RAM
9492
// usage for the common case of positional only args.
9593
mp_obj_fun_bc_t *self = self_in;
9694
mp_uint_t n_state = code_state->n_state;
9795

96+
// ip comes in as an offset into bytecode, so turn it into a true pointer
97+
code_state->ip = self->bytecode + (mp_uint_t)code_state->ip;
98+
9899
#if MICROPY_STACKLESS
99100
code_state->prev = NULL;
100101
#endif
101-
code_state->code_info = self->bytecode + (mp_uint_t)code_state->code_info;
102+
103+
// align ip
104+
code_state->ip = MP_ALIGN(code_state->ip, sizeof(mp_uint_t));
105+
102106
code_state->sp = &code_state->state[0] - 1;
103107
code_state->exc_sp = (mp_exc_stack_t*)(code_state->state + n_state) - 1;
104108

@@ -156,13 +160,8 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t
156160
*var_pos_kw_args = dict;
157161
}
158162

159-
// get pointer to arg_names array at start of bytecode prelude
160-
const mp_obj_t *arg_names;
161-
{
162-
const byte *code_info = code_state->code_info;
163-
mp_uint_t code_info_size = mp_decode_uint(&code_info);
164-
arg_names = (const mp_obj_t*)(code_state->code_info + code_info_size);
165-
}
163+
// get pointer to arg_names array
164+
const mp_obj_t *arg_names = (const mp_obj_t*)code_state->ip;
166165

167166
for (mp_uint_t i = 0; i < n_kw; i++) {
168167
mp_obj_t wanted_arg_name = kwargs[2 * i];
@@ -235,8 +234,19 @@ continue2:;
235234
}
236235
}
237236

237+
// get the ip and skip argument names
238+
const byte *ip = code_state->ip;
239+
ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_uint_t);
240+
241+
// store pointer to code_info and jump over it
242+
{
243+
code_state->code_info = ip;
244+
const byte *ip2 = ip;
245+
mp_uint_t code_info_size = mp_decode_uint(&ip2);
246+
ip += code_info_size;
247+
}
248+
238249
// bytecode prelude: initialise closed over variables
239-
const byte *ip = self->bytecode + (mp_uint_t)code_state->ip;
240250
mp_uint_t local_num;
241251
while ((local_num = *ip++) != 255) {
242252
code_state->state[n_state - 1 - local_num] =

py/bc.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,30 @@
2929
#include "py/runtime.h"
3030
#include "py/obj.h"
3131

32+
// bytecode layout:
33+
//
34+
// n_state : var uint
35+
// n_exc_stack : var uint
36+
//
37+
// <word alignment padding>
38+
//
39+
// argname0 : obj (qstr)
40+
// ... : obj (qstr)
41+
// argnameN : obj (qstr) N = num_pos_args + num_kwonly_args
42+
//
43+
// code_info_size : var uint | code_info_size counts bytes in this chunk
44+
// simple_name : var qstr |
45+
// source_file : var qstr |
46+
// <line number info> |
47+
// <word alignment padding> |
48+
//
49+
// num_cells : byte number of locals that are cells
50+
// local_num0 : byte
51+
// ... : byte
52+
// local_numN : byte N = num_cells
53+
//
54+
// <bytecode>
55+
3256
// Exception stack entry
3357
typedef struct _mp_exc_stack {
3458
const byte *handler;

py/emitbc.c

Lines changed: 45 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -111,14 +111,25 @@ STATIC void emit_align_code_info_to_machine_word(emit_t *emit) {
111111
emit->code_info_offset = (emit->code_info_offset + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1));
112112
}
113113

114-
STATIC void emit_write_code_info_uint(emit_t *emit, mp_uint_t val) {
114+
STATIC void emit_write_code_info_byte(emit_t* emit, byte val) {
115+
*emit_get_cur_to_write_code_info(emit, 1) = val;
116+
}
117+
118+
STATIC void emit_write_code_info_uint(emit_t* emit, mp_uint_t val) {
115119
emit_write_uint(emit, emit_get_cur_to_write_code_info, val);
116120
}
117121

118122
STATIC void emit_write_code_info_qstr(emit_t *emit, qstr qst) {
119123
emit_write_uint(emit, emit_get_cur_to_write_code_info, qst);
120124
}
121125

126+
STATIC void emit_write_code_info_prealigned_ptr(emit_t* emit, void *ptr) {
127+
mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_code_info(emit, sizeof(mp_uint_t));
128+
// Verify thar c is already uint-aligned
129+
assert(c == MP_ALIGN(c, sizeof(mp_uint_t)));
130+
*c = (mp_uint_t)ptr;
131+
}
132+
122133
#if MICROPY_ENABLE_SOURCE_LINE
123134
STATIC void emit_write_code_info_bytes_lines(emit_t *emit, mp_uint_t bytes_to_skip, mp_uint_t lines_to_skip) {
124135
assert(bytes_to_skip > 0 || lines_to_skip > 0);
@@ -167,11 +178,7 @@ STATIC void emit_write_bytecode_byte(emit_t *emit, byte b1) {
167178
c[0] = b1;
168179
}
169180

170-
STATIC void emit_write_bytecode_uint(emit_t *emit, mp_uint_t val) {
171-
emit_write_uint(emit, emit_get_cur_to_write_bytecode, val);
172-
}
173-
174-
STATIC void emit_write_bytecode_byte_byte(emit_t *emit, byte b1, byte b2) {
181+
STATIC void emit_write_bytecode_byte_byte(emit_t* emit, byte b1, byte b2) {
175182
assert((b2 & (~0xff)) == 0);
176183
byte *c = emit_get_cur_to_write_bytecode(emit, 2);
177184
c[0] = b1;
@@ -210,13 +217,6 @@ STATIC void emit_write_bytecode_byte_uint(emit_t *emit, byte b, mp_uint_t val) {
210217
emit_write_uint(emit, emit_get_cur_to_write_bytecode, val);
211218
}
212219

213-
STATIC void emit_write_bytecode_prealigned_ptr(emit_t *emit, void *ptr) {
214-
mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_bytecode(emit, sizeof(mp_uint_t));
215-
// Verify thar c is already uint-aligned
216-
assert(c == MP_ALIGN(c, sizeof(mp_uint_t)));
217-
*c = (mp_uint_t)ptr;
218-
}
219-
220220
// aligns the pointer so it is friendly to GC
221221
STATIC void emit_write_bytecode_byte_ptr(emit_t *emit, byte b, void *ptr) {
222222
emit_write_bytecode_byte(emit, b);
@@ -227,15 +227,7 @@ STATIC void emit_write_bytecode_byte_ptr(emit_t *emit, byte b, void *ptr) {
227227
*c = (mp_uint_t)ptr;
228228
}
229229

230-
/* currently unused
231-
STATIC void emit_write_bytecode_byte_uint_uint(emit_t *emit, byte b, mp_uint_t num1, mp_uint_t num2) {
232-
emit_write_bytecode_byte(emit, b);
233-
emit_write_bytecode_byte_uint(emit, num1);
234-
emit_write_bytecode_byte_uint(emit, num2);
235-
}
236-
*/
237-
238-
STATIC void emit_write_bytecode_byte_qstr(emit_t *emit, byte b, qstr qst) {
230+
STATIC void emit_write_bytecode_byte_qstr(emit_t* emit, byte b, qstr qst) {
239231
emit_write_bytecode_byte_uint(emit, b, qst);
240232
}
241233

@@ -289,19 +281,26 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
289281
emit->bytecode_offset = 0;
290282
emit->code_info_offset = 0;
291283

292-
// Write code info size as compressed uint. If we are not in the final pass
293-
// then space for this uint is reserved in emit_bc_end_pass.
294-
if (pass == MP_PASS_EMIT) {
295-
emit_write_code_info_uint(emit, emit->code_info_size);
284+
// Write local state size and exception stack size.
285+
{
286+
mp_uint_t n_state = scope->num_locals + scope->stack_size;
287+
if (n_state == 0) {
288+
// Need at least 1 entry in the state, in the case an exception is
289+
// propagated through this function, the exception is returned in
290+
// the highest slot in the state (fastn[0], see vm.c).
291+
n_state = 1;
292+
}
293+
emit_write_code_info_uint(emit, n_state);
294+
emit_write_code_info_uint(emit, scope->exc_stack_size);
296295
}
297296

298-
// write the name and source file of this function
299-
emit_write_code_info_qstr(emit, scope->simple_name);
300-
emit_write_code_info_qstr(emit, scope->source_file);
297+
// Align code-info so that following pointers are aligned on a machine word.
298+
emit_align_code_info_to_machine_word(emit);
301299

302-
// bytecode prelude: argument names (needed to resolve positional args passed as keywords)
303-
// we store them as full word-sized objects for efficient access in mp_setup_code_state
304-
// this is the start of the prelude and is guaranteed to be aligned on a word boundary
300+
// Write argument names (needed to resolve positional args passed as
301+
// keywords). We store them as full word-sized objects for efficient access
302+
// in mp_setup_code_state this is the start of the prelude and is guaranteed
303+
// to be aligned on a word boundary.
305304
{
306305
// For a given argument position (indexed by i) we need to find the
307306
// corresponding id_info which is a parameter, as it has the correct
@@ -322,23 +321,23 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
322321
break;
323322
}
324323
}
325-
emit_write_bytecode_prealigned_ptr(emit, MP_OBJ_NEW_QSTR(qst));
324+
emit_write_code_info_prealigned_ptr(emit, MP_OBJ_NEW_QSTR(qst));
326325
}
327326
}
328327

329-
// bytecode prelude: local state size and exception stack size
330-
{
331-
mp_uint_t n_state = scope->num_locals + scope->stack_size;
332-
if (n_state == 0) {
333-
// Need at least 1 entry in the state, in the case an exception is
334-
// propagated through this function, the exception is returned in
335-
// the highest slot in the state (fastn[0], see vm.c).
336-
n_state = 1;
337-
}
338-
emit_write_bytecode_uint(emit, n_state);
339-
emit_write_bytecode_uint(emit, scope->exc_stack_size);
328+
// Write size of the rest of the code info. We don't know how big this
329+
// variable uint will be on the MP_PASS_CODE_SIZE pass so we reserve 2 bytes
330+
// for it and hope that is enough! TODO assert this or something.
331+
if (pass == MP_PASS_EMIT) {
332+
emit_write_code_info_uint(emit, emit->code_info_size - emit->code_info_offset);
333+
} else {
334+
emit_get_cur_to_write_code_info(emit, 2);
340335
}
341336

337+
// Write the name and source file of this function.
338+
emit_write_code_info_qstr(emit, scope->simple_name);
339+
emit_write_code_info_qstr(emit, scope->source_file);
340+
342341
// bytecode prelude: initialise closed over variables
343342
for (int i = 0; i < scope->id_info_len; i++) {
344343
id_info_t *id = &scope->id_info[i];
@@ -360,25 +359,10 @@ void mp_emit_bc_end_pass(emit_t *emit) {
360359
mp_printf(&mp_plat_print, "ERROR: stack size not back to zero; got %d\n", emit->stack_size);
361360
}
362361

363-
*emit_get_cur_to_write_code_info(emit, 1) = 0; // end of line number info
362+
emit_write_code_info_byte(emit, 0); // end of line number info
364363

365364
if (emit->pass == MP_PASS_CODE_SIZE) {
366-
// Need to make sure we have enough room in the code-info block to write
367-
// the size of the code-info block. Since the size is written as a
368-
// compressed uint, we don't know its size until we write it! Thus, we
369-
// take the biggest possible value it could be and write that here.
370-
// Then there will be enough room to write the value, and any leftover
371-
// space will be absorbed in the alignment at the end of the code-info
372-
// block.
373-
mp_uint_t max_code_info_size =
374-
emit->code_info_offset // current code-info size
375-
+ BYTES_FOR_INT // maximum space for compressed uint
376-
+ BYTES_PER_WORD - 1; // maximum space for alignment padding
377-
emit_write_code_info_uint(emit, max_code_info_size);
378-
379-
// Align code-info so that following bytecode is aligned on a machine word.
380-
// We don't need to write anything here, it's just dead space between the
381-
// code-info block and the bytecode block that follows it.
365+
// so bytecode is aligned
382366
emit_align_code_info_to_machine_word(emit);
383367

384368
// calculate size of total code-info + bytecode, in bytes

py/emitnative.c

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -566,8 +566,6 @@ struct _emit_t {
566566
stack_info_t *stack_info;
567567
vtype_kind_t saved_stack_vtype;
568568

569-
int code_info_size;
570-
int code_info_offset;
571569
int prelude_offset;
572570
int n_state;
573571
int stack_start;
@@ -774,10 +772,6 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
774772
ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_ARG_1);
775773
#endif
776774

777-
// set code_state.code_info (offset from start of this function to code_info data)
778-
// XXX this encoding may change size
779-
ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->code_info_offset, offsetof(mp_code_state, code_info) / sizeof(mp_uint_t), REG_ARG_1);
780-
781775
// set code_state.ip (offset from start of this function to prelude info)
782776
// XXX this encoding may change size
783777
ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->prelude_offset, offsetof(mp_code_state, ip) / sizeof(mp_uint_t), REG_ARG_1);
@@ -829,11 +823,10 @@ STATIC void emit_native_end_pass(emit_t *emit) {
829823
}
830824

831825
if (!emit->do_viper_types) {
832-
// write dummy code info (for mp_setup_code_state to parse) and arg names
833-
emit->code_info_offset = ASM_GET_CODE_POS(emit->as);
834-
ASM_DATA(emit->as, 1, emit->code_info_size);
826+
emit->prelude_offset = ASM_GET_CODE_POS(emit->as);
835827
ASM_ALIGN(emit->as, ASM_WORD_SIZE);
836-
emit->code_info_size = ASM_GET_CODE_POS(emit->as) - emit->code_info_offset;
828+
829+
// write argument names as qstr objects
837830
// see comment in corresponding part of emitbc.c about the logic here
838831
for (int i = 0; i < emit->scope->num_pos_args + emit->scope->num_kwonly_args; i++) {
839832
qstr qst = MP_QSTR__star_;
@@ -847,8 +840,10 @@ STATIC void emit_native_end_pass(emit_t *emit) {
847840
ASM_DATA(emit->as, ASM_WORD_SIZE, (mp_uint_t)MP_OBJ_NEW_QSTR(qst));
848841
}
849842

843+
// write dummy code info (for mp_setup_code_state to parse)
844+
ASM_DATA(emit->as, 1, 1);
845+
850846
// bytecode prelude: initialise closed over variables
851-
emit->prelude_offset = ASM_GET_CODE_POS(emit->as);
852847
for (int i = 0; i < emit->scope->id_info_len; i++) {
853848
id_info_t *id = &emit->scope->id_info[i];
854849
if (id->kind == ID_INFO_KIND_CELL) {

py/objfun.c

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,13 @@ qstr mp_obj_fun_get_name(mp_const_obj_t fun_in) {
121121
return MP_QSTR_;
122122
}
123123
#endif
124-
const byte *code_info = fun->bytecode;
125-
return mp_obj_code_get_name(code_info);
124+
125+
const byte *bc = fun->bytecode;
126+
mp_decode_uint(&bc); // skip n_state
127+
mp_decode_uint(&bc); // skip n_exc_stack
128+
bc = MP_ALIGN(bc, sizeof(mp_uint_t)); // align
129+
bc += (fun->n_pos_args + fun->n_kwonly_args) * sizeof(mp_uint_t); // skip arg names
130+
return mp_obj_code_get_name(bc);
126131
}
127132

128133
#if MICROPY_CPYTHON_COMPAT
@@ -158,13 +163,8 @@ mp_code_state *mp_obj_fun_bc_prepare_codestate(mp_obj_t self_in, mp_uint_t n_arg
158163
MP_STACK_CHECK();
159164
mp_obj_fun_bc_t *self = self_in;
160165

161-
// skip code-info block
162-
const byte *code_info = self->bytecode;
163-
mp_uint_t code_info_size = mp_decode_uint(&code_info);
164-
const byte *ip = self->bytecode + code_info_size;
165-
166-
// bytecode prelude: skip arg names
167-
ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_obj_t);
166+
// get start of bytecode
167+
const byte *ip = self->bytecode;
168168

169169
// bytecode prelude: state size and exception stack size
170170
mp_uint_t n_state = mp_decode_uint(&ip);
@@ -178,9 +178,8 @@ mp_code_state *mp_obj_fun_bc_prepare_codestate(mp_obj_t self_in, mp_uint_t n_arg
178178
return NULL;
179179
}
180180

181+
code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack
181182
code_state->n_state = n_state;
182-
code_state->code_info = 0; // offset to code-info
183-
code_state->ip = (byte*)(ip - self->bytecode); // offset to prelude
184183
mp_setup_code_state(code_state, self_in, n_args, n_kw, args);
185184

186185
// execute the byte code with the correct globals context
@@ -202,13 +201,8 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw,
202201
mp_obj_fun_bc_t *self = self_in;
203202
DEBUG_printf("Func n_def_args: %d\n", self->n_def_args);
204203

205-
// skip code-info block
206-
const byte *code_info = self->bytecode;
207-
mp_uint_t code_info_size = mp_decode_uint(&code_info);
208-
const byte *ip = self->bytecode + code_info_size;
209-
210-
// bytecode prelude: skip arg names
211-
ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_obj_t);
204+
// get start of bytecode
205+
const byte *ip = self->bytecode;
212206

213207
// bytecode prelude: state size and exception stack size
214208
mp_uint_t n_state = mp_decode_uint(&ip);
@@ -229,9 +223,8 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw,
229223
state_size = 0; // indicate that we allocated using alloca
230224
}
231225

226+
code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack
232227
code_state->n_state = n_state;
233-
code_state->code_info = 0; // offset to code-info
234-
code_state->ip = (byte*)(ip - self->bytecode); // offset to prelude
235228
mp_setup_code_state(code_state, self_in, n_args, n_kw, args);
236229

237230
// execute the byte code with the correct globals context

0 commit comments

Comments
 (0)