Skip to content

Commit 71a3d6e

Browse files
committed
py: Reduce size of mp_code_state_t structure.
Instead of caching data that is constant (code_info, const_table and n_state), store just a pointer to the underlying function object from which this data can be derived. This helps reduce stack usage for the case when the mp_code_state_t structure is stored on the stack, as well as heap usage when it's stored on the heap. The downside is that the VM becomes a little more complex because it now needs to derive the data from the underlying function object. But this doesn't impact the performance by much (if at all) because most of the decoding of data is done outside the main opcode loop. Measurements using pystone show that little to no performance is lost. This patch also fixes a nasty bug whereby the bytecode can be reclaimed by the GC during execution. With this patch there is always a pointer to the function object held by the VM during execution, since it's stored in the mp_code_state_t structure.
1 parent eeff0c3 commit 71a3d6e

6 files changed

Lines changed: 62 additions & 59 deletions

File tree

py/bc.c

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -86,25 +86,26 @@ STATIC void dump_args(const mp_obj_t *a, size_t sz) {
8686

8787
// On entry code_state should be allocated somewhere (stack/heap) and
8888
// contain the following valid entries:
89-
// - code_state->ip should contain the offset in bytes from the start of
90-
// the bytecode chunk to just after n_state and n_exc_stack
91-
// - code_state->n_state should be set to the state size (locals plus stack)
92-
void mp_setup_code_state(mp_code_state_t *code_state, mp_obj_fun_bc_t *self, size_t n_args, size_t n_kw, const mp_obj_t *args) {
89+
// - code_state->fun_bc should contain a pointer to the function object
90+
// - code_state->ip should contain the offset in bytes from the pointer
91+
// code_state->fun_bc->bytecode to the entry n_state (0 for bytecode, non-zero for native)
92+
void mp_setup_code_state(mp_code_state_t *code_state, size_t n_args, size_t n_kw, const mp_obj_t *args) {
9393
// This function is pretty complicated. It's main aim is to be efficient in speed and RAM
9494
// usage for the common case of positional only args.
95-
size_t n_state = code_state->n_state;
95+
96+
// get the function object that we want to set up (could be bytecode or native code)
97+
mp_obj_fun_bc_t *self = code_state->fun_bc;
9698

9799
// ip comes in as an offset into bytecode, so turn it into a true pointer
98100
code_state->ip = self->bytecode + (size_t)code_state->ip;
99101

100-
// store pointer to constant table
101-
code_state->const_table = self->const_table;
102-
103102
#if MICROPY_STACKLESS
104103
code_state->prev = NULL;
105104
#endif
106105

107106
// get params
107+
size_t n_state = mp_decode_uint(&code_state->ip);
108+
mp_decode_uint(&code_state->ip); // skip n_exc_stack
108109
size_t scope_flags = *code_state->ip++;
109110
size_t n_pos_args = *code_state->ip++;
110111
size_t n_kwonly_args = *code_state->ip++;
@@ -168,7 +169,7 @@ void mp_setup_code_state(mp_code_state_t *code_state, mp_obj_fun_bc_t *self, siz
168169
}
169170

170171
// get pointer to arg_names array
171-
const mp_obj_t *arg_names = (const mp_obj_t*)code_state->const_table;
172+
const mp_obj_t *arg_names = (const mp_obj_t*)self->const_table;
172173

173174
for (size_t i = 0; i < n_kw; i++) {
174175
// the keys in kwargs are expected to be qstr objects
@@ -244,9 +245,8 @@ continue2:;
244245
// get the ip and skip argument names
245246
const byte *ip = code_state->ip;
246247

247-
// store pointer to code_info and jump over it
248+
// jump over code info (source file and line-number mapping)
248249
{
249-
code_state->code_info = ip;
250250
const byte *ip2 = ip;
251251
size_t code_info_size = mp_decode_uint(&ip2);
252252
ip += code_info_size;

py/bc.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
#include "py/runtime.h"
3030
#include "py/obj.h"
31+
#include "py/objfun.h"
3132

3233
// bytecode layout:
3334
//
@@ -70,17 +71,19 @@ typedef struct _mp_exc_stack_t {
7071
} mp_exc_stack_t;
7172

7273
typedef struct _mp_code_state_t {
73-
const byte *code_info;
74+
// The fun_bc entry points to the underlying function object that is being executed.
75+
// It is needed to access the start of bytecode and the const_table.
76+
// It is also needed to prevent the GC from reclaiming the bytecode during execution,
77+
// because the ip pointer below will always point to the interior of the bytecode.
78+
mp_obj_fun_bc_t *fun_bc;
7479
const byte *ip;
75-
const mp_uint_t *const_table;
7680
mp_obj_t *sp;
7781
// bit 0 is saved currently_in_except_block value
7882
mp_exc_stack_t *exc_sp;
7983
mp_obj_dict_t *old_globals;
8084
#if MICROPY_STACKLESS
8185
struct _mp_code_state_t *prev;
8286
#endif
83-
size_t n_state;
8487
// Variable-length
8588
mp_obj_t state[0];
8689
// Variable-length, never accessed by name, only as (void*)(state + n_state)
@@ -91,8 +94,7 @@ mp_uint_t mp_decode_uint(const byte **ptr);
9194

9295
mp_vm_return_kind_t mp_execute_bytecode(mp_code_state_t *code_state, volatile mp_obj_t inject_exc);
9396
mp_code_state_t *mp_obj_fun_bc_prepare_codestate(mp_obj_t func, size_t n_args, size_t n_kw, const mp_obj_t *args);
94-
struct _mp_obj_fun_bc_t;
95-
void mp_setup_code_state(mp_code_state_t *code_state, struct _mp_obj_fun_bc_t *self, size_t n_args, size_t n_kw, const mp_obj_t *args);
97+
void mp_setup_code_state(mp_code_state_t *code_state, size_t n_args, size_t n_kw, const mp_obj_t *args);
9698
void mp_bytecode_print(const void *descr, const byte *code, mp_uint_t len, const mp_uint_t *const_table);
9799
void mp_bytecode_print2(const byte *code, size_t len, const mp_uint_t *const_table);
98100
const byte *mp_bytecode_print_str(const byte *ip);

py/emitnative.c

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -407,43 +407,29 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
407407
#endif
408408

409409
// prepare incoming arguments for call to mp_setup_code_state
410+
410411
#if N_X86
411-
asm_x86_mov_arg_to_r32(emit->as, 0, REG_ARG_2);
412-
asm_x86_mov_arg_to_r32(emit->as, 1, REG_ARG_3);
413-
asm_x86_mov_arg_to_r32(emit->as, 2, REG_ARG_4);
414-
asm_x86_mov_arg_to_r32(emit->as, 3, REG_ARG_5);
415-
#else
416-
#if N_THUMB
417-
ASM_MOV_REG_REG(emit->as, ASM_THUMB_REG_R4, REG_ARG_4);
418-
#elif N_ARM
419-
ASM_MOV_REG_REG(emit->as, ASM_ARM_REG_R4, REG_ARG_4);
420-
#else
421-
ASM_MOV_REG_REG(emit->as, REG_ARG_5, REG_ARG_4);
422-
#endif
423-
ASM_MOV_REG_REG(emit->as, REG_ARG_4, REG_ARG_3);
424-
ASM_MOV_REG_REG(emit->as, REG_ARG_3, REG_ARG_2);
425-
ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_ARG_1);
412+
asm_x86_mov_arg_to_r32(emit->as, 0, REG_ARG_1);
413+
asm_x86_mov_arg_to_r32(emit->as, 1, REG_ARG_2);
414+
asm_x86_mov_arg_to_r32(emit->as, 2, REG_ARG_3);
415+
asm_x86_mov_arg_to_r32(emit->as, 3, REG_ARG_4);
426416
#endif
427417

418+
// set code_state.fun_bc
419+
ASM_MOV_REG_TO_LOCAL(emit->as, REG_ARG_1, offsetof(mp_code_state_t, fun_bc) / sizeof(uintptr_t));
420+
428421
// set code_state.ip (offset from start of this function to prelude info)
429422
// XXX this encoding may change size
430-
ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->prelude_offset, offsetof(mp_code_state_t, ip) / sizeof(mp_uint_t), REG_ARG_1);
431-
432-
// set code_state.n_state
433-
ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->n_state, offsetof(mp_code_state_t, n_state) / sizeof(mp_uint_t), REG_ARG_1);
423+
ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->prelude_offset, offsetof(mp_code_state_t, ip) / sizeof(uintptr_t), REG_ARG_1);
434424

435425
// put address of code_state into first arg
436426
ASM_MOV_LOCAL_ADDR_TO_REG(emit->as, 0, REG_ARG_1);
437427

438428
// call mp_setup_code_state to prepare code_state structure
439429
#if N_THUMB
440-
asm_thumb_op16(emit->as, 0xb400 | (1 << ASM_THUMB_REG_R4)); // push 5th arg
441430
asm_thumb_bl_ind(emit->as, mp_fun_table[MP_F_SETUP_CODE_STATE], MP_F_SETUP_CODE_STATE, ASM_THUMB_REG_R4);
442-
asm_thumb_op16(emit->as, 0xbc00 | (1 << REG_RET)); // pop dummy (was 5th arg)
443431
#elif N_ARM
444-
asm_arm_push(emit->as, 1 << ASM_ARM_REG_R4); // push 5th arg
445432
asm_arm_bl_ind(emit->as, mp_fun_table[MP_F_SETUP_CODE_STATE], MP_F_SETUP_CODE_STATE, ASM_ARM_REG_R4);
446-
asm_arm_pop(emit->as, 1 << REG_RET); // pop dummy (was 5th arg)
447433
#else
448434
ASM_CALL_IND(emit->as, mp_fun_table[MP_F_SETUP_CODE_STATE], MP_F_SETUP_CODE_STATE);
449435
#endif
@@ -477,6 +463,9 @@ STATIC void emit_native_end_pass(emit_t *emit) {
477463

478464
if (!emit->do_viper_types) {
479465
emit->prelude_offset = mp_asm_base_get_code_pos(&emit->as->base);
466+
mp_asm_base_data(&emit->as->base, 1, 0x80 | ((emit->n_state >> 7) & 0x7f));
467+
mp_asm_base_data(&emit->as->base, 1, emit->n_state & 0x7f);
468+
mp_asm_base_data(&emit->as->base, 1, 0); // n_exc_stack
480469
mp_asm_base_data(&emit->as->base, 1, emit->scope->scope_flags);
481470
mp_asm_base_data(&emit->as->base, 1, emit->scope->num_pos_args);
482471
mp_asm_base_data(&emit->as->base, 1, emit->scope->num_kwonly_args);

py/objfun.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -220,9 +220,9 @@ mp_code_state_t *mp_obj_fun_bc_prepare_codestate(mp_obj_t self_in, size_t n_args
220220
return NULL;
221221
}
222222

223-
code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack
224-
code_state->n_state = n_state;
225-
mp_setup_code_state(code_state, self, n_args, n_kw, args);
223+
code_state->fun_bc = self;
224+
code_state->ip = 0;
225+
mp_setup_code_state(code_state, n_args, n_kw, args);
226226

227227
// execute the byte code with the correct globals context
228228
code_state->old_globals = mp_globals_get();
@@ -265,9 +265,9 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const
265265
state_size = 0; // indicate that we allocated using alloca
266266
}
267267

268-
code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack
269-
code_state->n_state = n_state;
270-
mp_setup_code_state(code_state, self, n_args, n_kw, args);
268+
code_state->fun_bc = self;
269+
code_state->ip = 0;
270+
mp_setup_code_state(code_state, n_args, n_kw, args);
271271

272272
// execute the byte code with the correct globals context
273273
code_state->old_globals = mp_globals_get();

py/objgenerator.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,9 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, size_t n_args, size_t n_kw, cons
6767
o->base.type = &mp_type_gen_instance;
6868

6969
o->globals = self_fun->globals;
70-
o->code_state.n_state = n_state;
71-
o->code_state.ip = (byte*)(ip - self_fun->bytecode); // offset to prelude
72-
mp_setup_code_state(&o->code_state, self_fun, n_args, n_kw, args);
70+
o->code_state.fun_bc = self_fun;
71+
o->code_state.ip = 0;
72+
mp_setup_code_state(&o->code_state, n_args, n_kw, args);
7373
return MP_OBJ_FROM_PTR(o);
7474
}
7575

@@ -92,7 +92,7 @@ mp_obj_t mp_obj_new_gen_wrap(mp_obj_t fun) {
9292
STATIC void gen_instance_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
9393
(void)kind;
9494
mp_obj_gen_instance_t *self = MP_OBJ_TO_PTR(self_in);
95-
mp_printf(print, "<generator object '%q' at %p>", mp_obj_code_get_name(self->code_state.code_info), self);
95+
mp_printf(print, "<generator object '%q' at %p>", mp_obj_fun_get_name(MP_OBJ_FROM_PTR(self->code_state.fun_bc)), self);
9696
}
9797

9898
mp_vm_return_kind_t mp_obj_gen_resume(mp_obj_t self_in, mp_obj_t send_value, mp_obj_t throw_value, mp_obj_t *ret_val) {
@@ -134,10 +134,13 @@ mp_vm_return_kind_t mp_obj_gen_resume(mp_obj_t self_in, mp_obj_t send_value, mp_
134134
}
135135
break;
136136

137-
case MP_VM_RETURN_EXCEPTION:
137+
case MP_VM_RETURN_EXCEPTION: {
138+
const byte *bc = self->code_state.fun_bc->bytecode;
139+
size_t n_state = mp_decode_uint(&bc);
138140
self->code_state.ip = 0;
139-
*ret_val = self->code_state.state[self->code_state.n_state - 1];
141+
*ret_val = self->code_state.state[n_state - 1];
140142
break;
143+
}
141144
}
142145

143146
return ret_kind;

py/vm.c

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,10 @@ typedef enum {
7373
ip += 2;
7474
#define DECODE_PTR \
7575
DECODE_UINT; \
76-
void *ptr = (void*)(uintptr_t)code_state->const_table[unum]
76+
void *ptr = (void*)(uintptr_t)code_state->fun_bc->const_table[unum]
7777
#define DECODE_OBJ \
7878
DECODE_UINT; \
79-
mp_obj_t obj = (mp_obj_t)code_state->const_table[unum]
79+
mp_obj_t obj = (mp_obj_t)code_state->fun_bc->const_table[unum]
8080

8181
#else
8282

@@ -162,8 +162,10 @@ mp_vm_return_kind_t mp_execute_bytecode(mp_code_state_t *code_state, volatile mp
162162
run_code_state: ;
163163
#endif
164164
// Pointers which are constant for particular invocation of mp_execute_bytecode()
165-
mp_obj_t * /*const*/ fastn = &code_state->state[code_state->n_state - 1];
166-
mp_exc_stack_t * /*const*/ exc_stack = (mp_exc_stack_t*)(code_state->state + code_state->n_state);
165+
const byte *temp_bc = code_state->fun_bc->bytecode;
166+
size_t n_state = mp_decode_uint(&temp_bc);
167+
mp_obj_t * /*const*/ fastn = &code_state->state[n_state - 1];
168+
mp_exc_stack_t * /*const*/ exc_stack = (mp_exc_stack_t*)(code_state->state + n_state);
167169

168170
// variables that are visible to the exception handler (declared volatile)
169171
volatile bool currently_in_except_block = MP_TAGPTR_TAG0(code_state->exc_sp); // 0 or 1, to detect nested exceptions
@@ -1327,8 +1329,16 @@ unwind_jump:;
13271329
// But consider how to handle nested exceptions.
13281330
// TODO need a better way of not adding traceback to constant objects (right now, just GeneratorExit_obj and MemoryError_obj)
13291331
if (nlr.ret_val != &mp_const_GeneratorExit_obj && nlr.ret_val != &mp_const_MemoryError_obj) {
1330-
const byte *ip = code_state->code_info;
1332+
const byte *ip = code_state->fun_bc->bytecode;
1333+
mp_decode_uint(&ip); // skip n_state
1334+
mp_decode_uint(&ip); // skip n_exc_stack
1335+
ip++; // skip scope_params
1336+
ip++; // skip n_pos_args
1337+
ip++; // skip n_kwonly_args
1338+
ip++; // skip n_def_pos_args
1339+
size_t bc = code_state->ip - ip;
13311340
size_t code_info_size = mp_decode_uint(&ip);
1341+
bc -= code_info_size;
13321342
#if MICROPY_PERSISTENT_CODE
13331343
qstr block_name = ip[0] | (ip[1] << 8);
13341344
qstr source_file = ip[2] | (ip[3] << 8);
@@ -1337,7 +1347,6 @@ unwind_jump:;
13371347
qstr block_name = mp_decode_uint(&ip);
13381348
qstr source_file = mp_decode_uint(&ip);
13391349
#endif
1340-
size_t bc = code_state->ip - code_state->code_info - code_info_size;
13411350
size_t source_line = 1;
13421351
size_t c;
13431352
while ((c = *ip)) {
@@ -1393,8 +1402,8 @@ unwind_jump:;
13931402
} else if (code_state->prev != NULL) {
13941403
mp_globals_set(code_state->old_globals);
13951404
code_state = code_state->prev;
1396-
fastn = &code_state->state[code_state->n_state - 1];
1397-
exc_stack = (mp_exc_stack_t*)(code_state->state + code_state->n_state);
1405+
fastn = &code_state->state[n_state - 1];
1406+
exc_stack = (mp_exc_stack_t*)(code_state->state + n_state);
13981407
// variables that are visible to the exception handler (declared volatile)
13991408
currently_in_except_block = MP_TAGPTR_TAG0(code_state->exc_sp); // 0 or 1, to detect nested exceptions
14001409
exc_sp = MP_TAGPTR_PTR(code_state->exc_sp); // stack grows up, exc_sp points to top of stack

0 commit comments

Comments
 (0)