Skip to content

Commit c8c0fd4

Browse files
committed
py: Rework and compress second part of bytecode prelude.
This patch compresses the second part of the bytecode prelude which contains the source file name, function name, source-line-number mapping and cell closure information. This part of the prelude now begins with a single varible length unsigned integer which encodes 2 numbers, being the byte-size of the following 2 sections in the header: the "source info section" and the "closure section". After decoding this variable unsigned integer it's possible to skip over one or both of these sections very easily. This scheme saves about 2 bytes for most functions compared to the original format: one in the case that there are no closure cells, and one because padding was eliminated.
1 parent b5ebfad commit c8c0fd4

14 files changed

Lines changed: 162 additions & 119 deletions

File tree

py/bc.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -269,19 +269,25 @@ continue2:;
269269
}
270270
}
271271

272-
// get the ip and skip argument names
272+
// read the size part of the prelude
273273
const byte *ip = code_state->ip;
274+
MP_BC_PRELUDE_SIZE_DECODE(ip);
274275

275276
// jump over code info (source file and line-number mapping)
276-
ip += mp_decode_uint_value(ip);
277+
ip += n_info;
277278

278279
// bytecode prelude: initialise closed over variables
279-
size_t local_num;
280-
while ((local_num = *ip++) != 255) {
280+
for (; n_cell; --n_cell) {
281+
size_t local_num = *ip++;
281282
code_state->state[n_state - 1 - local_num] =
282283
mp_obj_new_cell(code_state->state[n_state - 1 - local_num]);
283284
}
284285

286+
#if !MICROPY_PERSISTENT_CODE
287+
// so bytecode is aligned
288+
ip = MP_ALIGN(ip, sizeof(mp_uint_t));
289+
#endif
290+
285291
// now that we skipped over the prelude, set the ip for the VM
286292
code_state->ip = ip;
287293

py/bc.h

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,17 +42,25 @@
4242
// K = n_kwonly_args number of keyword-only arguments this function takes
4343
// D = n_def_pos_args number of default positional arguments
4444
//
45-
// code_info_size : var uint | code_info_size counts bytes in this chunk
46-
// simple_name : var qstr |
47-
// source_file : var qstr |
48-
// <line number info> |
49-
// <word alignment padding> | only needed if bytecode contains pointers
45+
// prelude size : var uint
46+
// contains two values interleaved bit-wise as: xIIIIIIC repeated
47+
// x = extension another byte follows
48+
// I = n_info number of bytes in source info section
49+
// C = n_cells number of bytes/cells in closure section
50+
//
51+
// source info section:
52+
// simple_name : var qstr
53+
// source_file : var qstr
54+
// <line number info>
55+
//
56+
// closure section:
57+
// local_num0 : byte
58+
// ... : byte
59+
// local_numN : byte N = n_cells-1
60+
//
61+
// <word alignment padding> only needed if bytecode contains pointers
5062
//
51-
// local_num0 : byte |
52-
// ... : byte |
53-
// local_numN : byte | N = num_cells
54-
// 255 : byte | end of list sentinel
55-
// <bytecode> |
63+
// <bytecode>
5664
//
5765
//
5866
// constant table layout:
@@ -122,6 +130,41 @@ do { \
122130
size_t n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args; \
123131
MP_BC_PRELUDE_SIG_DECODE_INTO(ip, n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args)
124132

133+
#define MP_BC_PRELUDE_SIZE_ENCODE(I, C, out_byte, out_env) \
134+
do { \
135+
/* Encode bit-wise as: xIIIIIIC */ \
136+
uint8_t z = 0; \
137+
do { \
138+
z = (I & 0x3f) << 1 | (C & 1); \
139+
C >>= 1; \
140+
I >>= 6; \
141+
if (C | I) { \
142+
z |= 0x80; \
143+
} \
144+
out_byte(out_env, z); \
145+
} while (C | I); \
146+
} while (0)
147+
148+
#define MP_BC_PRELUDE_SIZE_DECODE_INTO(ip, I, C) \
149+
do { \
150+
uint8_t z; \
151+
C = 0; \
152+
I = 0; \
153+
for (unsigned n = 0;; ++n) { \
154+
z = *(ip)++; \
155+
/* xIIIIIIC */ \
156+
C |= (z & 1) << n; \
157+
I |= ((z & 0x7e) >> 1) << (6 * n); \
158+
if (!(z & 0x80)) { \
159+
break; \
160+
} \
161+
} \
162+
} while (0)
163+
164+
#define MP_BC_PRELUDE_SIZE_DECODE(ip) \
165+
size_t n_info, n_cell; \
166+
MP_BC_PRELUDE_SIZE_DECODE_INTO(ip, n_info, n_cell)
167+
125168
// Sentinel value for mp_code_state_t.exc_sp_idx
126169
#define MP_CODE_STATE_EXC_SP_IDX_SENTINEL ((uint16_t)-1)
127170

@@ -139,7 +182,6 @@ typedef struct _mp_bytecode_prelude_t {
139182
qstr qstr_block_name;
140183
qstr qstr_source_file;
141184
const byte *line_info;
142-
const byte *locals;
143185
const byte *opcodes;
144186
} mp_bytecode_prelude_t;
145187

py/emitbc.c

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ struct _emit_t {
6464
size_t bytecode_size;
6565
byte *code_base; // stores both byte code and code info
6666

67+
size_t n_info;
68+
size_t n_cell;
69+
6770
#if MICROPY_PERSISTENT_CODE
6871
uint16_t ct_cur_obj;
6972
uint16_t ct_num_obj;
@@ -123,10 +126,6 @@ STATIC void emit_write_code_info_byte(emit_t* emit, byte val) {
123126
*emit_get_cur_to_write_code_info(emit, 1) = val;
124127
}
125128

126-
STATIC void emit_write_code_info_uint(emit_t* emit, mp_uint_t val) {
127-
emit_write_uint(emit, emit_get_cur_to_write_code_info, val);
128-
}
129-
130129
STATIC void emit_write_code_info_qstr(emit_t *emit, qstr qst) {
131130
#if MICROPY_PERSISTENT_CODE
132131
assert((qst >> 16) == 0);
@@ -346,29 +345,17 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
346345
MP_BC_PRELUDE_SIG_ENCODE(n_state, n_exc_stack, scope, emit_write_code_info_byte, emit);
347346
}
348347

349-
// Write size of the rest of the code info. We don't know how big this
350-
// variable uint will be on the MP_PASS_CODE_SIZE pass so we reserve 2 bytes
351-
// for it and hope that is enough! TODO assert this or something.
352-
if (pass == MP_PASS_EMIT) {
353-
emit_write_code_info_uint(emit, emit->code_info_size - emit->code_info_offset);
354-
} else {
355-
emit_get_cur_to_write_code_info(emit, 2);
348+
// Write number of cells and size of the source code info
349+
if (pass >= MP_PASS_CODE_SIZE) {
350+
MP_BC_PRELUDE_SIZE_ENCODE(emit->n_info, emit->n_cell, emit_write_code_info_byte, emit);
356351
}
357352

353+
emit->n_info = emit->code_info_offset;
354+
358355
// Write the name and source file of this function.
359356
emit_write_code_info_qstr(emit, scope->simple_name);
360357
emit_write_code_info_qstr(emit, scope->source_file);
361358

362-
// bytecode prelude: initialise closed over variables
363-
for (int i = 0; i < scope->id_info_len; i++) {
364-
id_info_t *id = &scope->id_info[i];
365-
if (id->kind == ID_INFO_KIND_CELL) {
366-
assert(id->local_num < 255);
367-
emit_write_bytecode_raw_byte(emit, id->local_num); // write the local which should be converted to a cell
368-
}
369-
}
370-
emit_write_bytecode_raw_byte(emit, 255); // end of list sentinel
371-
372359
#if MICROPY_PERSISTENT_CODE
373360
emit->ct_cur_obj = 0;
374361
emit->ct_cur_raw_code = 0;
@@ -414,6 +401,20 @@ void mp_emit_bc_end_pass(emit_t *emit) {
414401

415402
emit_write_code_info_byte(emit, 0); // end of line number info
416403

404+
// Calculate size of source code info section
405+
emit->n_info = emit->code_info_offset - emit->n_info;
406+
407+
// Emit closure section of prelude
408+
emit->n_cell = 0;
409+
for (size_t i = 0; i < emit->scope->id_info_len; ++i) {
410+
id_info_t *id = &emit->scope->id_info[i];
411+
if (id->kind == ID_INFO_KIND_CELL) {
412+
assert(id->local_num <= 255);
413+
emit_write_code_info_byte(emit, id->local_num); // write the local which should be converted to a cell
414+
++emit->n_cell;
415+
}
416+
}
417+
417418
#if MICROPY_PERSISTENT_CODE
418419
assert(emit->pass <= MP_PASS_STACK_SIZE || (emit->ct_num_obj == emit->ct_cur_obj));
419420
emit->ct_num_obj = emit->ct_cur_obj;

py/emitnative.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ struct _emit_t {
208208
uint16_t code_state_start;
209209
uint16_t stack_start;
210210
int stack_size;
211+
uint16_t n_cell;
211212

212213
uint16_t const_table_cur_obj;
213214
uint16_t const_table_num_obj;
@@ -587,9 +588,14 @@ STATIC void emit_native_end_pass(emit_t *emit) {
587588
size_t n_exc_stack = 0; // exc-stack not needed for native code
588589
MP_BC_PRELUDE_SIG_ENCODE(n_state, n_exc_stack, emit->scope, emit_native_write_code_info_byte, emit);
589590

590-
// write code info
591591
#if MICROPY_PERSISTENT_CODE
592-
mp_asm_base_data(&emit->as->base, 1, 5);
592+
size_t n_info = 4;
593+
#else
594+
size_t n_info = 1;
595+
#endif
596+
MP_BC_PRELUDE_SIZE_ENCODE(n_info, emit->n_cell, emit_native_write_code_info_byte, emit);
597+
598+
#if MICROPY_PERSISTENT_CODE
593599
mp_asm_base_data(&emit->as->base, 1, emit->scope->simple_name);
594600
mp_asm_base_data(&emit->as->base, 1, emit->scope->simple_name >> 8);
595601
mp_asm_base_data(&emit->as->base, 1, emit->scope->source_file);
@@ -599,14 +605,15 @@ STATIC void emit_native_end_pass(emit_t *emit) {
599605
#endif
600606

601607
// bytecode prelude: initialise closed over variables
608+
size_t cell_start = mp_asm_base_get_code_pos(&emit->as->base);
602609
for (int i = 0; i < emit->scope->id_info_len; i++) {
603610
id_info_t *id = &emit->scope->id_info[i];
604611
if (id->kind == ID_INFO_KIND_CELL) {
605-
assert(id->local_num < 255);
612+
assert(id->local_num <= 255);
606613
mp_asm_base_data(&emit->as->base, 1, id->local_num); // write the local which should be converted to a cell
607614
}
608615
}
609-
mp_asm_base_data(&emit->as->base, 1, 255); // end of list sentinel
616+
emit->n_cell = mp_asm_base_get_code_pos(&emit->as->base) - cell_start;
610617
}
611618

612619
ASM_END_PASS(emit->as);

py/objfun.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ const mp_obj_type_t mp_type_fun_builtin_var = {
139139
/* byte code functions */
140140

141141
qstr mp_obj_code_get_name(const byte *code_info) {
142-
code_info = mp_decode_uint_skip(code_info); // skip code_info_size entry
142+
MP_BC_PRELUDE_SIZE_DECODE(code_info);
143143
#if MICROPY_PERSISTENT_CODE
144144
return code_info[0] | (code_info[1] << 8);
145145
#else

py/persistentcode.c

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -167,11 +167,10 @@ STATIC void extract_prelude(const byte **ip, const byte **ip2, bytecode_prelude_
167167
prelude->n_pos_args = n_pos_args;
168168
prelude->n_kwonly_args = n_kwonly_args;
169169
prelude->n_def_pos_args = n_def_pos_args;
170+
MP_BC_PRELUDE_SIZE_DECODE(*ip);
170171
*ip2 = *ip;
171-
prelude->code_info_size = mp_decode_uint(ip2);
172-
*ip += prelude->code_info_size;
173-
while (*(*ip)++ != 255) {
174-
}
172+
*ip += n_info;
173+
*ip += n_cell;
175174
}
176175

177176
#endif // MICROPY_PERSISTENT_CODE_LOAD || MICROPY_PERSISTENT_CODE_SAVE
@@ -286,12 +285,9 @@ STATIC void load_prelude(mp_reader_t *reader, byte **ip, byte **ip2, bytecode_pr
286285
byte *ip_read = *ip;
287286
read_uint(reader, &ip_read); // read in n_state/etc (is effectively a var-uint)
288287
byte *ip_read_save = ip_read;
289-
size_t code_info_size = read_uint(reader, &ip_read); // read in code_info_size
290-
code_info_size -= ip_read - ip_read_save; // subtract bytes taken by code_info_size itself
291-
read_bytes(reader, ip_read, code_info_size); // read remaining code info
292-
ip_read += code_info_size;
293-
while ((*ip_read++ = read_byte(reader)) != 255) {
294-
}
288+
read_uint(reader, &ip_read); // read in n_info/n_cell (is effectively a var-uint)
289+
MP_BC_PRELUDE_SIZE_DECODE(ip_read_save);
290+
read_bytes(reader, ip_read, n_info + n_cell); // read remaining code info
295291

296292
// Entire prelude has been read into *ip, now decode and extract values from it
297293
extract_prelude((const byte**)ip, (const byte**)ip2, prelude);

py/profile.c

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
STATIC uint mp_prof_bytecode_lineno(const mp_raw_code_t *rc, size_t bc) {
3636
const mp_bytecode_prelude_t *prelude = &rc->prelude;
37-
return mp_bytecode_get_source_line(prelude->line_info, bc + prelude->opcodes - prelude->locals);
37+
return mp_bytecode_get_source_line(prelude->line_info, bc);
3838
}
3939

4040
void mp_prof_extract_prelude(const byte *bytecode, mp_bytecode_prelude_t *prelude) {
@@ -48,22 +48,15 @@ void mp_prof_extract_prelude(const byte *bytecode, mp_bytecode_prelude_t *prelud
4848
prelude->n_kwonly_args = n_kwonly_args;
4949
prelude->n_def_pos_args = n_def_pos_args;
5050

51-
const byte *code_info = ip;
52-
size_t code_info_size = mp_decode_uint(&ip);
51+
MP_BC_PRELUDE_SIZE_DECODE(ip);
52+
53+
prelude->line_info = ip + 4;
54+
prelude->opcodes = ip + n_info + n_cell;
5355

5456
qstr block_name = ip[0] | (ip[1] << 8);
5557
qstr source_file = ip[2] | (ip[3] << 8);
56-
ip += 4;
5758
prelude->qstr_block_name = block_name;
5859
prelude->qstr_source_file = source_file;
59-
60-
prelude->line_info = ip;
61-
prelude->locals = code_info + code_info_size;
62-
63-
ip = prelude->locals;
64-
while (*ip++ != 255) {
65-
}
66-
prelude->opcodes = ip;
6760
}
6861

6962
/******************************************************************************/

py/showbc.c

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,8 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const m
8585

8686
// Decode prelude
8787
MP_BC_PRELUDE_SIG_DECODE(ip);
88-
88+
MP_BC_PRELUDE_SIZE_DECODE(ip);
8989
const byte *code_info = ip;
90-
mp_uint_t code_info_size = mp_decode_uint(&code_info);
91-
ip += code_info_size;
9290

9391
#if MICROPY_PERSISTENT_CODE
9492
qstr block_name = code_info[0] | (code_info[1] << 8);
@@ -102,7 +100,9 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const m
102100
qstr_str(source_file), qstr_str(block_name), descr, mp_showbc_code_start, len);
103101

104102
// raw bytecode dump
105-
printf("Raw bytecode (code_info_size=" UINT_FMT ", bytecode_size=" UINT_FMT "):\n", code_info_size, len - code_info_size);
103+
size_t prelude_size = ip - mp_showbc_code_start + n_info + n_cell;
104+
printf("Raw bytecode (code_info_size=" UINT_FMT ", bytecode_size=" UINT_FMT "):\n",
105+
prelude_size, len - prelude_size);
106106
for (mp_uint_t i = 0; i < len; i++) {
107107
if (i > 0 && i % 16 == 0) {
108108
printf("\n");
@@ -121,21 +121,18 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const m
121121
printf("(N_STATE %u)\n", (unsigned)n_state);
122122
printf("(N_EXC_STACK %u)\n", (unsigned)n_exc_stack);
123123

124-
// for printing line number info
125-
const byte *bytecode_start = ip;
124+
// skip over code_info
125+
ip += n_info;
126126

127127
// bytecode prelude: initialise closed over variables
128-
{
129-
uint local_num;
130-
while ((local_num = *ip++) != 255) {
131-
printf("(INIT_CELL %u)\n", local_num);
132-
}
133-
len -= ip - mp_showbc_code_start;
128+
for (size_t i = 0; i < n_cell; ++i) {
129+
uint local_num = *ip++;
130+
printf("(INIT_CELL %u)\n", local_num);
134131
}
135132

136133
// print out line number info
137134
{
138-
mp_int_t bc = bytecode_start - ip;
135+
mp_int_t bc = 0;
139136
mp_uint_t source_line = 1;
140137
printf(" bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line);
141138
for (const byte* ci = code_info; *ci;) {
@@ -153,7 +150,7 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const m
153150
printf(" bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line);
154151
}
155152
}
156-
mp_bytecode_print2(ip, len - 0, const_table);
153+
mp_bytecode_print2(ip, len - prelude_size, const_table);
157154
}
158155

159156
const byte *mp_bytecode_print_str(const byte *ip) {

py/vm.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,10 +1441,13 @@ unwind_jump:;
14411441
&& *code_state->ip != MP_BC_RAISE_LAST) {
14421442
const byte *ip = code_state->fun_bc->bytecode;
14431443
MP_BC_PRELUDE_SIG_DECODE(ip);
1444-
size_t bc = code_state->ip - ip;
1445-
size_t code_info_size = mp_decode_uint_value(ip);
1446-
ip = mp_decode_uint_skip(ip); // skip code_info_size
1447-
bc -= code_info_size;
1444+
MP_BC_PRELUDE_SIZE_DECODE(ip);
1445+
const byte *bytecode_start = ip + n_info + n_cell;
1446+
#if !MICROPY_PERSISTENT_CODE
1447+
// so bytecode is aligned
1448+
bytecode_start = MP_ALIGN(bytecode_start, sizeof(mp_uint_t));
1449+
#endif
1450+
size_t bc = code_state->ip - bytecode_start;
14481451
#if MICROPY_PERSISTENT_CODE
14491452
qstr block_name = ip[0] | (ip[1] << 8);
14501453
qstr source_file = ip[2] | (ip[3] << 8);

tests/cmdline/cmd_parsetree.py.exp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ Raw bytecode (code_info_size=\\d\+, bytecode_size=\\d\+):
3636
arg names:
3737
(N_STATE 5)
3838
(N_EXC_STACK 0)
39-
bc=-1 line=1
39+
bc=0 line=1
4040
bc=0 line=4
4141
bc=9 line=5
4242
bc=12 line=6

0 commit comments

Comments
 (0)