Skip to content

Commit 58e0f4a

Browse files
committed
py: Allocate parse nodes in chunks to reduce fragmentation and RAM use.
With this patch parse nodes are allocated sequentially in chunks. This reduces fragmentation of the heap and prevents waste at the end of individually allocated parse nodes. Saves roughly 20% of RAM during parse stage.
1 parent e5635f4 commit 58e0f4a

12 files changed

Lines changed: 123 additions & 57 deletions

File tree

bare-arm/main.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ void do_str(const char *src, mp_parse_input_kind_t input_kind) {
1616
nlr_buf_t nlr;
1717
if (nlr_push(&nlr) == 0) {
1818
qstr source_name = lex->source_name;
19-
mp_parse_node_t pn = mp_parse(lex, input_kind);
20-
mp_obj_t module_fun = mp_compile(pn, source_name, MP_EMIT_OPT_NONE, true);
19+
mp_parse_tree_t parse_tree = mp_parse(lex, input_kind);
20+
mp_obj_t module_fun = mp_compile(&parse_tree, source_name, MP_EMIT_OPT_NONE, true);
2121
mp_call_function_0(module_fun);
2222
nlr_pop();
2323
} else {

minimal/main.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ void do_str(const char *src, mp_parse_input_kind_t input_kind) {
1919
nlr_buf_t nlr;
2020
if (nlr_push(&nlr) == 0) {
2121
qstr source_name = lex->source_name;
22-
mp_parse_node_t pn = mp_parse(lex, input_kind);
23-
mp_obj_t module_fun = mp_compile(pn, source_name, MP_EMIT_OPT_NONE, true);
22+
mp_parse_tree_t parse_tree = mp_parse(lex, input_kind);
23+
mp_obj_t module_fun = mp_compile(&parse_tree, source_name, MP_EMIT_OPT_NONE, true);
2424
mp_call_function_0(module_fun);
2525
nlr_pop();
2626
} else {

py/compile.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3317,7 +3317,7 @@ STATIC void scope_compute_things(scope_t *scope) {
33173317
}
33183318
}
33193319

3320-
mp_obj_t mp_compile(mp_parse_node_t pn, qstr source_file, uint emit_opt, bool is_repl) {
3320+
mp_obj_t mp_compile(mp_parse_tree_t *parse_tree, qstr source_file, uint emit_opt, bool is_repl) {
33213321
// put compiler state on the stack, it's relatively small
33223322
compiler_t comp_state = {0};
33233323
compiler_t *comp = &comp_state;
@@ -3326,7 +3326,7 @@ mp_obj_t mp_compile(mp_parse_node_t pn, qstr source_file, uint emit_opt, bool is
33263326
comp->is_repl = is_repl;
33273327

33283328
// create the module scope
3329-
scope_t *module_scope = scope_new_and_link(comp, SCOPE_MODULE, pn, emit_opt);
3329+
scope_t *module_scope = scope_new_and_link(comp, SCOPE_MODULE, parse_tree->root, emit_opt);
33303330

33313331
// optimise constants (scope must be set for error messages to work)
33323332
comp->scope_cur = module_scope;
@@ -3485,7 +3485,7 @@ mp_obj_t mp_compile(mp_parse_node_t pn, qstr source_file, uint emit_opt, bool is
34853485
#endif
34863486

34873487
// free the parse tree
3488-
mp_parse_node_free(module_scope->pn);
3488+
mp_parse_tree_clear(parse_tree);
34893489

34903490
// free the scopes
34913491
mp_raw_code_t *outer_raw_code = module_scope->raw_code;

py/compile.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ enum {
4040
};
4141

4242
// the compiler will raise an exception if an error occurred
43-
// the compiler will free the parse tree (pn) before it returns
44-
mp_obj_t mp_compile(mp_parse_node_t pn, qstr source_file, uint emit_opt, bool is_repl);
43+
// the compiler will clear the parse tree before it returns
44+
mp_obj_t mp_compile(mp_parse_tree_t *parse_tree, qstr source_file, uint emit_opt, bool is_repl);
4545

4646
// this is implemented in runtime.c
4747
mp_obj_t mp_parse_compile_execute(mp_lexer_t *lex, mp_parse_input_kind_t parse_input_kind, mp_obj_dict_t *globals, mp_obj_dict_t *locals);

py/mpconfig.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,12 @@
118118
#define MICROPY_ALLOC_PARSE_INTERN_STRING_LEN (10)
119119
#endif
120120

121+
// Number of bytes to allocate initially when creating new chunks to store
122+
// parse nodes. Small leads to fragmentation, large leads to excess use.
123+
#ifndef MICROPY_ALLOC_PARSE_CHUNK_INIT
124+
#define MICROPY_ALLOC_PARSE_CHUNK_INIT (128)
125+
#endif
126+
121127
// Initial amount for ids in a scope
122128
#ifndef MICROPY_ALLOC_SCOPE_ID_INIT
123129
#define MICROPY_ALLOC_SCOPE_ID_INIT (4)

py/parse.c

Lines changed: 90 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,15 @@ typedef struct _rule_stack_t {
112112
mp_uint_t arg_i; // this dictates the maximum nodes in a "list" of things
113113
} rule_stack_t;
114114

115+
typedef struct _mp_parse_chunk_t {
116+
mp_uint_t alloc;
117+
union {
118+
mp_uint_t used;
119+
struct _mp_parse_chunk_t *next;
120+
} union_;
121+
byte data[];
122+
} mp_parse_chunk_t;
123+
115124
typedef struct _parser_t {
116125
bool had_memory_error;
117126

@@ -124,12 +133,56 @@ typedef struct _parser_t {
124133
mp_parse_node_t *result_stack;
125134

126135
mp_lexer_t *lexer;
136+
137+
mp_parse_tree_t tree;
138+
mp_parse_chunk_t *cur_chunk;
127139
} parser_t;
128140

129141
STATIC inline void memory_error(parser_t *parser) {
130142
parser->had_memory_error = true;
131143
}
132144

145+
STATIC void *parser_alloc(parser_t *parser, size_t num_bytes) {
146+
// use a custom memory allocator to store parse nodes sequentially in large chunks
147+
148+
mp_parse_chunk_t *chunk = parser->cur_chunk;
149+
150+
if (chunk != NULL && chunk->union_.used + num_bytes > chunk->alloc) {
151+
// not enough room at end of previously allocated chunk so try to grow
152+
mp_parse_chunk_t *new_data = (mp_parse_chunk_t*)m_renew_maybe(byte, chunk,
153+
sizeof(mp_parse_chunk_t) + chunk->alloc,
154+
sizeof(mp_parse_chunk_t) + chunk->alloc + num_bytes, false);
155+
if (new_data == NULL) {
156+
// could not grow existing memory; shrink it to fit previous
157+
(void)m_renew(byte, chunk, sizeof(mp_parse_chunk_t) + chunk->alloc,
158+
sizeof(mp_parse_chunk_t) + chunk->union_.used);
159+
chunk->alloc = chunk->union_.used;
160+
chunk->union_.next = parser->tree.chunk;
161+
parser->tree.chunk = chunk;
162+
chunk = NULL;
163+
} else {
164+
// could grow existing memory
165+
chunk->alloc += num_bytes;
166+
}
167+
}
168+
169+
if (chunk == NULL) {
170+
// no previous chunk, allocate a new chunk
171+
size_t alloc = MICROPY_ALLOC_PARSE_CHUNK_INIT;
172+
if (alloc < num_bytes) {
173+
alloc = num_bytes;
174+
}
175+
chunk = (mp_parse_chunk_t*)m_new(byte, sizeof(mp_parse_chunk_t) + alloc);
176+
chunk->alloc = alloc;
177+
chunk->union_.used = 0;
178+
parser->cur_chunk = chunk;
179+
}
180+
181+
byte *ret = chunk->data + chunk->union_.used;
182+
chunk->union_.used += num_bytes;
183+
return ret;
184+
}
185+
133186
STATIC void push_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t arg_i) {
134187
if (parser->had_memory_error) {
135188
return;
@@ -171,31 +224,6 @@ mp_parse_node_t mp_parse_node_new_leaf(mp_int_t kind, mp_int_t arg) {
171224
return (mp_parse_node_t)(kind | (arg << 4));
172225
}
173226

174-
void mp_parse_node_free(mp_parse_node_t pn) {
175-
if (MP_PARSE_NODE_IS_STRUCT(pn)) {
176-
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
177-
mp_uint_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
178-
mp_uint_t rule_id = MP_PARSE_NODE_STRUCT_KIND(pns);
179-
if (rule_id == RULE_string || rule_id == RULE_bytes) {
180-
m_del(char, (char*)pns->nodes[0], (mp_uint_t)pns->nodes[1]);
181-
} else if (rule_id == RULE_const_object) {
182-
// don't free the const object since it's probably used by the compiled code
183-
} else {
184-
bool adjust = ADD_BLANK_NODE(rules[rule_id]);
185-
if (adjust) {
186-
n--;
187-
}
188-
for (mp_uint_t i = 0; i < n; i++) {
189-
mp_parse_node_free(pns->nodes[i]);
190-
}
191-
if (adjust) {
192-
n++;
193-
}
194-
}
195-
m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns);
196-
}
197-
}
198-
199227
int mp_parse_node_extract_list(mp_parse_node_t *pn, mp_uint_t pn_kind, mp_parse_node_t **nodes) {
200228
if (MP_PARSE_NODE_IS_NULL(*pn)) {
201229
*nodes = NULL;
@@ -305,7 +333,7 @@ STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
305333
}
306334

307335
STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, mp_uint_t src_line, mp_uint_t rule_kind, const char *str, mp_uint_t len) {
308-
mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, 2);
336+
mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * 2);
309337
if (pn == NULL) {
310338
memory_error(parser);
311339
return MP_PARSE_NODE_NULL;
@@ -320,7 +348,7 @@ STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, mp_uint_t src_li
320348
}
321349

322350
STATIC mp_parse_node_t make_node_const_object(parser_t *parser, mp_uint_t src_line, mp_obj_t obj) {
323-
mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, 1);
351+
mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t));
324352
if (pn == NULL) {
325353
memory_error(parser);
326354
return MP_PARSE_NODE_NULL;
@@ -371,7 +399,7 @@ STATIC void push_result_token(parser_t *parser) {
371399
}
372400

373401
STATIC void push_result_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t num_args) {
374-
mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, num_args);
402+
mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * num_args);
375403
if (pn == NULL) {
376404
memory_error(parser);
377405
return;
@@ -384,7 +412,7 @@ STATIC void push_result_rule(parser_t *parser, mp_uint_t src_line, const rule_t
384412
push_result_node(parser, (mp_parse_node_t)pn);
385413
}
386414

387-
mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
415+
mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
388416

389417
// initialise parser and allocate memory for its stacks
390418

@@ -402,6 +430,9 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
402430

403431
parser.lexer = lex;
404432

433+
parser.tree.chunk = NULL;
434+
parser.cur_chunk = NULL;
435+
405436
// check if we could allocate the stacks
406437
if (parser.rule_stack == NULL || parser.result_stack == NULL) {
407438
goto memory_error;
@@ -554,7 +585,13 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
554585
mp_parse_node_t p = peek_result(&parser, 1);
555586
if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) {
556587
pop_result(&parser); // MP_PARSE_NODE_NULL
557-
mp_parse_node_free(pop_result(&parser)); // RULE_string
588+
mp_parse_node_t pn = pop_result(&parser); // possibly RULE_string
589+
if (MP_PARSE_NODE_IS_STRUCT(pn)) {
590+
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
591+
if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
592+
m_del(char, (char*)pns->nodes[0], (mp_uint_t)pns->nodes[1]);
593+
}
594+
}
558595
push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
559596
break;
560597
}
@@ -703,15 +740,24 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
703740
}
704741
}
705742

743+
// truncate final chunk and link into chain of chunks
744+
if (parser.cur_chunk != NULL) {
745+
(void)m_renew(byte, parser.cur_chunk,
746+
sizeof(mp_parse_chunk_t) + parser.cur_chunk->alloc,
747+
sizeof(mp_parse_chunk_t) + parser.cur_chunk->union_.used);
748+
parser.cur_chunk->alloc = parser.cur_chunk->union_.used;
749+
parser.cur_chunk->union_.next = parser.tree.chunk;
750+
parser.tree.chunk = parser.cur_chunk;
751+
}
752+
706753
mp_obj_t exc;
707-
mp_parse_node_t result;
708754

709755
// check if we had a memory error
710756
if (parser.had_memory_error) {
711757
memory_error:
712758
exc = mp_obj_new_exception_msg(&mp_type_MemoryError,
713759
"parser could not allocate enough memory");
714-
result = MP_PARSE_NODE_NULL;
760+
parser.tree.root = MP_PARSE_NODE_NULL;
715761
goto finished;
716762
}
717763

@@ -733,7 +779,7 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
733779
// get the root parse node that we created
734780
assert(parser.result_stack_top == 1);
735781
exc = MP_OBJ_NULL;
736-
result = parser.result_stack[0];
782+
parser.tree.root = parser.result_stack[0];
737783

738784
finished:
739785
// free the memory that we don't need anymore
@@ -750,7 +796,7 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
750796
nlr_raise(exc);
751797
} else {
752798
mp_lexer_free(lex);
753-
return result;
799+
return parser.tree;
754800
}
755801

756802
syntax_error:
@@ -771,6 +817,15 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
771817
#endif
772818
#endif
773819
}
774-
result = MP_PARSE_NODE_NULL;
820+
parser.tree.root = MP_PARSE_NODE_NULL;
775821
goto finished;
776822
}
823+
824+
void mp_parse_tree_clear(mp_parse_tree_t *tree) {
825+
mp_parse_chunk_t *chunk = tree->chunk;
826+
while (chunk != NULL) {
827+
mp_parse_chunk_t *next = chunk->union_.next;
828+
m_del(byte, chunk, sizeof(mp_parse_chunk_t) + chunk->alloc);
829+
chunk = next;
830+
}
831+
}

py/parse.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ typedef struct _mp_parse_node_struct_t {
7676
#define MP_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8)
7777

7878
mp_parse_node_t mp_parse_node_new_leaf(mp_int_t kind, mp_int_t arg);
79-
void mp_parse_node_free(mp_parse_node_t pn);
8079
int mp_parse_node_extract_list(mp_parse_node_t *pn, mp_uint_t pn_kind, mp_parse_node_t **nodes);
8180
void mp_parse_node_print(mp_parse_node_t pn, mp_uint_t indent);
8281

@@ -86,8 +85,14 @@ typedef enum {
8685
MP_PARSE_EVAL_INPUT,
8786
} mp_parse_input_kind_t;
8887

88+
typedef struct _mp_parse_t {
89+
mp_parse_node_t root;
90+
struct _mp_parse_chunk_t *chunk;
91+
} mp_parse_tree_t;
92+
8993
// the parser will raise an exception if an error occurred
9094
// the parser will free the lexer before it returns
91-
mp_parse_node_t mp_parse(struct _mp_lexer_t *lex, mp_parse_input_kind_t input_kind);
95+
mp_parse_tree_t mp_parse(struct _mp_lexer_t *lex, mp_parse_input_kind_t input_kind);
96+
void mp_parse_tree_clear(mp_parse_tree_t *tree);
9297

9398
#endif // __MICROPY_INCLUDED_PY_PARSE_H__

py/runtime.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1308,8 +1308,8 @@ mp_obj_t mp_parse_compile_execute(mp_lexer_t *lex, mp_parse_input_kind_t parse_i
13081308
nlr_buf_t nlr;
13091309
if (nlr_push(&nlr) == 0) {
13101310
qstr source_name = lex->source_name;
1311-
mp_parse_node_t pn = mp_parse(lex, parse_input_kind);
1312-
mp_obj_t module_fun = mp_compile(pn, source_name, MP_EMIT_OPT_NONE, false);
1311+
mp_parse_tree_t parse_tree = mp_parse(lex, parse_input_kind);
1312+
mp_obj_t module_fun = mp_compile(&parse_tree, source_name, MP_EMIT_OPT_NONE, false);
13131313

13141314
mp_obj_t ret;
13151315
if (MICROPY_PY_BUILTINS_COMPILE && globals == NULL) {

qemu-arm/main.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ void do_str(const char *src, mp_parse_input_kind_t input_kind) {
2121
nlr_buf_t nlr;
2222
if (nlr_push(&nlr) == 0) {
2323
qstr source_name = lex->source_name;
24-
mp_parse_node_t pn = mp_parse(lex, input_kind);
25-
mp_obj_t module_fun = mp_compile(pn, source_name, MP_EMIT_OPT_NONE, true);
24+
mp_parse_tree_t parse_tree = mp_parse(lex, input_kind);
25+
mp_obj_t module_fun = mp_compile(&parse_tree, source_name, MP_EMIT_OPT_NONE, true);
2626
mp_call_function_0(module_fun);
2727
nlr_pop();
2828
} else {

qemu-arm/test_main.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ inline void do_str(const char *src) {
2525
nlr_buf_t nlr;
2626
if (nlr_push(&nlr) == 0) {
2727
qstr source_name = lex->source_name;
28-
mp_parse_node_t pn = mp_parse(lex, MP_PARSE_FILE_INPUT);
29-
mp_obj_t module_fun = mp_compile(pn, source_name, MP_EMIT_OPT_NONE, true);
28+
mp_parse_tree_t parse_tree = mp_parse(lex, MP_PARSE_FILE_INPUT);
29+
mp_obj_t module_fun = mp_compile(&parse_tree, source_name, MP_EMIT_OPT_NONE, true);
3030
mp_call_function_0(module_fun);
3131
nlr_pop();
3232
} else {

0 commit comments

Comments
 (0)