Skip to content

Commit c8e9c0d

Browse files
committed
py: Add MICROPY_PERSISTENT_CODE so code can persist beyond the runtime.
Main changes when MICROPY_PERSISTENT_CODE is enabled are: - qstrs are encoded as 2-byte fixed width in the bytecode - all pointers are removed from bytecode and put in const_table (this includes const objects and raw code pointers) Ultimately this option will enable persistence for not just bytecode but also native code.
1 parent 713ea18 commit c8e9c0d

6 files changed

Lines changed: 140 additions & 21 deletions

File tree

py/emitbc.c

Lines changed: 80 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,14 @@ struct _emit_t {
5656
mp_uint_t bytecode_offset;
5757
mp_uint_t bytecode_size;
5858
byte *code_base; // stores both byte code and code info
59+
60+
#if MICROPY_PERSISTENT_CODE
61+
uint16_t ct_cur_obj;
62+
uint16_t ct_num_obj;
63+
uint16_t ct_cur_raw_code;
64+
#endif
5965
mp_uint_t *const_table;
66+
6067
// Accessed as mp_uint_t, so must be aligned as such
6168
byte dummy_data[DUMMY_DATA_SIZE];
6269
};
@@ -108,10 +115,6 @@ STATIC byte *emit_get_cur_to_write_code_info(emit_t *emit, int num_bytes_to_writ
108115
}
109116
}
110117

111-
STATIC void emit_align_code_info_to_machine_word(emit_t *emit) {
112-
emit->code_info_offset = (emit->code_info_offset + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1));
113-
}
114-
115118
STATIC void emit_write_code_info_byte(emit_t* emit, byte val) {
116119
*emit_get_cur_to_write_code_info(emit, 1) = val;
117120
}
@@ -121,7 +124,14 @@ STATIC void emit_write_code_info_uint(emit_t* emit, mp_uint_t val) {
121124
}
122125

123126
STATIC void emit_write_code_info_qstr(emit_t *emit, qstr qst) {
127+
#if MICROPY_PERSISTENT_CODE
128+
assert((qst >> 16) == 0);
129+
byte *c = emit_get_cur_to_write_code_info(emit, 2);
130+
c[0] = qst;
131+
c[1] = qst >> 8;
132+
#else
124133
emit_write_uint(emit, emit_get_cur_to_write_code_info, qst);
134+
#endif
125135
}
126136

127137
#if MICROPY_ENABLE_SOURCE_LINE
@@ -163,10 +173,6 @@ STATIC byte *emit_get_cur_to_write_bytecode(emit_t *emit, int num_bytes_to_write
163173
}
164174
}
165175

166-
STATIC void emit_align_bytecode_to_machine_word(emit_t *emit) {
167-
emit->bytecode_offset = (emit->bytecode_offset + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1));
168-
}
169-
170176
STATIC void emit_write_bytecode_byte(emit_t *emit, byte b1) {
171177
byte *c = emit_get_cur_to_write_bytecode(emit, 1);
172178
c[0] = b1;
@@ -211,18 +217,55 @@ STATIC void emit_write_bytecode_byte_uint(emit_t *emit, byte b, mp_uint_t val) {
211217
emit_write_uint(emit, emit_get_cur_to_write_bytecode, val);
212218
}
213219

214-
// aligns the pointer so it is friendly to GC
220+
#if MICROPY_PERSISTENT_CODE
221+
STATIC void emit_write_bytecode_byte_const(emit_t *emit, byte b, mp_uint_t n, mp_uint_t c) {
222+
if (emit->pass == MP_PASS_EMIT) {
223+
emit->const_table[n] = c;
224+
}
225+
emit_write_bytecode_byte_uint(emit, b, n);
226+
}
227+
#else
215228
STATIC void emit_write_bytecode_byte_ptr(emit_t *emit, byte b, void *ptr) {
229+
// aligns the pointer so it is friendly to GC
216230
emit_write_bytecode_byte(emit, b);
217-
emit_align_bytecode_to_machine_word(emit);
231+
emit->bytecode_offset = (mp_uint_t)MP_ALIGN(emit->bytecode_offset, sizeof(mp_uint_t));
218232
mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_bytecode(emit, sizeof(mp_uint_t));
219233
// Verify thar c is already uint-aligned
220234
assert(c == MP_ALIGN(c, sizeof(mp_uint_t)));
221235
*c = (mp_uint_t)ptr;
222236
}
237+
#endif
223238

224239
STATIC void emit_write_bytecode_byte_qstr(emit_t* emit, byte b, qstr qst) {
240+
#if MICROPY_PERSISTENT_CODE
241+
assert((qst >> 16) == 0);
242+
byte *c = emit_get_cur_to_write_bytecode(emit, 3);
243+
c[0] = b;
244+
c[1] = qst;
245+
c[2] = qst >> 8;
246+
#else
225247
emit_write_bytecode_byte_uint(emit, b, qst);
248+
#endif
249+
}
250+
251+
STATIC void emit_write_bytecode_byte_obj(emit_t *emit, byte b, void *ptr) {
252+
#if MICROPY_PERSISTENT_CODE
253+
emit_write_bytecode_byte_const(emit, b,
254+
emit->scope->num_pos_args + emit->scope->num_kwonly_args
255+
+ emit->ct_cur_obj++, (mp_uint_t)ptr);
256+
#else
257+
emit_write_bytecode_byte_ptr(emit, b, ptr);
258+
#endif
259+
}
260+
261+
STATIC void emit_write_bytecode_byte_raw_code(emit_t *emit, byte b, mp_raw_code_t *rc) {
262+
#if MICROPY_PERSISTENT_CODE
263+
emit_write_bytecode_byte_const(emit, b,
264+
emit->scope->num_pos_args + emit->scope->num_kwonly_args
265+
+ emit->ct_num_obj + emit->ct_cur_raw_code++, (mp_uint_t)rc);
266+
#else
267+
emit_write_bytecode_byte_ptr(emit, b, rc);
268+
#endif
226269
}
227270

228271
// unsigned labels are relative to ip following this instruction, stored as 16 bits
@@ -318,6 +361,11 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
318361
}
319362
emit_write_bytecode_byte(emit, 255); // end of list sentinel
320363

364+
#if MICROPY_PERSISTENT_CODE
365+
emit->ct_cur_obj = 0;
366+
emit->ct_cur_raw_code = 0;
367+
#endif
368+
321369
if (pass == MP_PASS_EMIT) {
322370
// Write argument names (needed to resolve positional args passed as
323371
// keywords). We store them as full word-sized objects for efficient access
@@ -360,16 +408,30 @@ void mp_emit_bc_end_pass(emit_t *emit) {
360408

361409
emit_write_code_info_byte(emit, 0); // end of line number info
362410

411+
#if MICROPY_PERSISTENT_CODE
412+
assert(emit->pass <= MP_PASS_STACK_SIZE || (emit->ct_num_obj == emit->ct_cur_obj));
413+
emit->ct_num_obj = emit->ct_cur_obj;
414+
#endif
415+
363416
if (emit->pass == MP_PASS_CODE_SIZE) {
417+
#if !MICROPY_PERSISTENT_CODE
364418
// so bytecode is aligned
365-
emit_align_code_info_to_machine_word(emit);
419+
emit->code_info_offset = (mp_uint_t)MP_ALIGN(emit->code_info_offset, sizeof(mp_uint_t));
420+
#endif
366421

367422
// calculate size of total code-info + bytecode, in bytes
368423
emit->code_info_size = emit->code_info_offset;
369424
emit->bytecode_size = emit->bytecode_offset;
370425
emit->code_base = m_new0(byte, emit->code_info_size + emit->bytecode_size);
371426

372-
emit->const_table = m_new0(mp_uint_t, emit->scope->num_pos_args + emit->scope->num_kwonly_args);
427+
#if MICROPY_PERSISTENT_CODE
428+
emit->const_table = m_new0(mp_uint_t,
429+
emit->scope->num_pos_args + emit->scope->num_kwonly_args
430+
+ emit->ct_cur_obj + emit->ct_cur_raw_code);
431+
#else
432+
emit->const_table = m_new0(mp_uint_t,
433+
emit->scope->num_pos_args + emit->scope->num_kwonly_args);
434+
#endif
373435

374436
} else if (emit->pass == MP_PASS_EMIT) {
375437
mp_emit_glue_assign_bytecode(emit->scope->raw_code, emit->code_base,
@@ -457,7 +519,7 @@ void mp_emit_bc_load_const_tok(emit_t *emit, mp_token_kind_t tok) {
457519
case MP_TOKEN_KW_NONE: emit_write_bytecode_byte(emit, MP_BC_LOAD_CONST_NONE); break;
458520
case MP_TOKEN_KW_TRUE: emit_write_bytecode_byte(emit, MP_BC_LOAD_CONST_TRUE); break;
459521
no_other_choice:
460-
case MP_TOKEN_ELLIPSIS: emit_write_bytecode_byte_ptr(emit, MP_BC_LOAD_CONST_OBJ, (void*)&mp_const_ellipsis_obj); break;
522+
case MP_TOKEN_ELLIPSIS: emit_write_bytecode_byte_obj(emit, MP_BC_LOAD_CONST_OBJ, (void*)&mp_const_ellipsis_obj); break;
461523
default: assert(0); goto no_other_choice; // to help flow control analysis
462524
}
463525
}
@@ -478,7 +540,7 @@ void mp_emit_bc_load_const_str(emit_t *emit, qstr qst) {
478540

479541
void mp_emit_bc_load_const_obj(emit_t *emit, void *obj) {
480542
emit_bc_pre(emit, 1);
481-
emit_write_bytecode_byte_ptr(emit, MP_BC_LOAD_CONST_OBJ, obj);
543+
emit_write_bytecode_byte_obj(emit, MP_BC_LOAD_CONST_OBJ, obj);
482544
}
483545

484546
void mp_emit_bc_load_null(emit_t *emit) {
@@ -821,22 +883,22 @@ void mp_emit_bc_unpack_ex(emit_t *emit, mp_uint_t n_left, mp_uint_t n_right) {
821883
void mp_emit_bc_make_function(emit_t *emit, scope_t *scope, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults) {
822884
if (n_pos_defaults == 0 && n_kw_defaults == 0) {
823885
emit_bc_pre(emit, 1);
824-
emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_FUNCTION, scope->raw_code);
886+
emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_FUNCTION, scope->raw_code);
825887
} else {
826888
emit_bc_pre(emit, -1);
827-
emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_FUNCTION_DEFARGS, scope->raw_code);
889+
emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_FUNCTION_DEFARGS, scope->raw_code);
828890
}
829891
}
830892

831893
void mp_emit_bc_make_closure(emit_t *emit, scope_t *scope, mp_uint_t n_closed_over, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults) {
832894
if (n_pos_defaults == 0 && n_kw_defaults == 0) {
833895
emit_bc_pre(emit, -n_closed_over + 1);
834-
emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_CLOSURE, scope->raw_code);
896+
emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_CLOSURE, scope->raw_code);
835897
emit_write_bytecode_byte(emit, n_closed_over);
836898
} else {
837899
assert(n_closed_over <= 255);
838900
emit_bc_pre(emit, -2 - n_closed_over + 1);
839-
emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_CLOSURE_DEFARGS, scope->raw_code);
901+
emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_CLOSURE_DEFARGS, scope->raw_code);
840902
emit_write_bytecode_byte(emit, n_closed_over);
841903
}
842904
}

py/emitnative.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -830,10 +830,16 @@ STATIC void emit_native_end_pass(emit_t *emit) {
830830
ASM_DATA(emit->as, 1, emit->scope->num_kwonly_args);
831831
ASM_DATA(emit->as, 1, emit->scope->num_def_pos_args);
832832

833-
// write code info (just contains block name and source file)
833+
// write code info
834+
#if MICROPY_PERSISTENT_CODE
834835
ASM_DATA(emit->as, 1, 5);
835-
ASM_DATA(emit->as, 2, emit->scope->simple_name);
836-
ASM_DATA(emit->as, 2, emit->scope->source_file);
836+
ASM_DATA(emit->as, 1, emit->scope->simple_name);
837+
ASM_DATA(emit->as, 1, emit->scope->simple_name >> 8);
838+
ASM_DATA(emit->as, 1, emit->scope->source_file);
839+
ASM_DATA(emit->as, 1, emit->scope->source_file >> 8);
840+
#else
841+
ASM_DATA(emit->as, 1, 1);
842+
#endif
837843

838844
// bytecode prelude: initialise closed over variables
839845
for (int i = 0; i < emit->scope->id_info_len; i++) {

py/mpconfig.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,11 @@
192192
/*****************************************************************************/
193193
/* Micro Python emitters */
194194

195+
// Whether generated code can persist independently of the VM/runtime instance
196+
#ifndef MICROPY_PERSISTENT_CODE
197+
#define MICROPY_PERSISTENT_CODE (0)
198+
#endif
199+
195200
// Whether to emit x64 native code
196201
#ifndef MICROPY_EMIT_X64
197202
#define MICROPY_EMIT_X64 (0)

py/objfun.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,11 @@ const mp_obj_type_t mp_type_fun_builtin = {
106106

107107
qstr mp_obj_code_get_name(const byte *code_info) {
108108
mp_decode_uint(&code_info); // skip code_info_size entry
109+
#if MICROPY_PERSISTENT_CODE
110+
return code_info[0] | (code_info[1] << 8);
111+
#else
109112
return mp_decode_uint(&code_info);
113+
#endif
110114
}
111115

112116
#if MICROPY_EMIT_NATIVE

py/showbc.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,18 @@
4040
}
4141
#define DECODE_ULABEL do { unum = (ip[0] | (ip[1] << 8)); ip += 2; } while (0)
4242
#define DECODE_SLABEL do { unum = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2; } while (0)
43+
44+
#if MICROPY_PERSISTENT_CODE
45+
46+
#define DECODE_QSTR \
47+
qst = ip[0] | ip[1] << 8; \
48+
ip += 2;
49+
#define DECODE_PTR \
50+
DECODE_UINT; \
51+
unum = mp_showbc_const_table[unum]
52+
53+
#else
54+
4355
#define DECODE_QSTR { \
4456
qst = 0; \
4557
do { \
@@ -52,10 +64,14 @@
5264
ip += sizeof(mp_uint_t); \
5365
} while (0)
5466

67+
#endif
68+
5569
const byte *mp_showbc_code_start;
70+
const mp_uint_t *mp_showbc_const_table;
5671

5772
void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const mp_uint_t *const_table) {
5873
mp_showbc_code_start = ip;
74+
mp_showbc_const_table = const_table;
5975

6076
// get bytecode parameters
6177
mp_uint_t n_state = mp_decode_uint(&ip);
@@ -69,8 +85,13 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const m
6985
mp_uint_t code_info_size = mp_decode_uint(&code_info);
7086
ip += code_info_size;
7187

88+
#if MICROPY_PERSISTENT_CODE
89+
qstr block_name = code_info[0] | (code_info[1] << 8);
90+
qstr source_file = code_info[2] | (code_info[3] << 8);
91+
#else
7292
qstr block_name = mp_decode_uint(&code_info);
7393
qstr source_file = mp_decode_uint(&code_info);
94+
#endif
7495
printf("File %s, code block '%s' (descriptor: %p, bytecode @%p " UINT_FMT " bytes)\n",
7596
qstr_str(source_file), qstr_str(block_name), descr, mp_showbc_code_start, len);
7697

py/vm.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,18 @@ typedef enum {
6565
} while ((*ip++ & 0x80) != 0)
6666
#define DECODE_ULABEL mp_uint_t ulab = (ip[0] | (ip[1] << 8)); ip += 2
6767
#define DECODE_SLABEL mp_uint_t slab = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2
68+
69+
#if MICROPY_PERSISTENT_CODE
70+
71+
#define DECODE_QSTR \
72+
qstr qst = ip[0] | ip[1] << 8; \
73+
ip += 2;
74+
#define DECODE_PTR \
75+
DECODE_UINT; \
76+
void *ptr = (void*)code_state->const_table[unum]
77+
78+
#else
79+
6880
#define DECODE_QSTR qstr qst = 0; \
6981
do { \
7082
qst = (qst << 7) + (*ip & 0x7f); \
@@ -73,6 +85,9 @@ typedef enum {
7385
ip = (byte*)(((mp_uint_t)ip + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1))); /* align ip */ \
7486
void *ptr = (void*)*(mp_uint_t*)ip; \
7587
ip += sizeof(mp_uint_t)
88+
89+
#endif
90+
7691
#define PUSH(val) *++sp = (val)
7792
#define POP() (*sp--)
7893
#define TOP() (*sp)
@@ -1280,8 +1295,14 @@ unwind_jump:;
12801295
if (mp_obj_is_exception_instance(nlr.ret_val) && nlr.ret_val != &mp_const_GeneratorExit_obj && nlr.ret_val != &mp_const_MemoryError_obj) {
12811296
const byte *ip = code_state->code_info;
12821297
mp_uint_t code_info_size = mp_decode_uint(&ip);
1298+
#if MICROPY_PERSISTENT_CODE
1299+
qstr block_name = ip[0] | (ip[1] << 8);
1300+
qstr source_file = ip[2] | (ip[3] << 8);
1301+
ip += 4;
1302+
#else
12831303
qstr block_name = mp_decode_uint(&ip);
12841304
qstr source_file = mp_decode_uint(&ip);
1305+
#endif
12851306
mp_uint_t bc = code_state->ip - code_state->code_info - code_info_size;
12861307
mp_uint_t source_line = 1;
12871308
mp_uint_t c;

0 commit comments

Comments
 (0)