Skip to content

Commit ade9a05

Browse files
committed
py: Improve allocation policy of qstr data.
Previous to this patch all interned strings lived in their own malloc'd chunk. On average this wastes N/2 bytes per interned string, where N is the number-of-bytes for a quanta of the memory allocator (16 bytes on 32 bit archs). With this patch interned strings are concatenated into the same malloc'd chunk when possible. Such chunks are enlarged inplace when possible, and shrunk to fit when a new chunk is needed. RAM savings with this patch are highly varied, but should always show an improvement (unless only 3 or 4 strings are interned). New version typically uses about 70% of previous memory for the qstr data, and can lead to savings of around 10% of total memory footprint of a running script. Costs about 120 bytes code size on Thumb2 archs (depends on how many calls to gc_realloc are made).
1 parent c48740e commit ade9a05

9 files changed

Lines changed: 75 additions & 14 deletions

File tree

py/gc.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ void *gc_realloc(void *ptr, mp_uint_t n_bytes) {
522522

523523
#else // Alternative gc_realloc impl
524524

525-
void *gc_realloc(void *ptr_in, mp_uint_t n_bytes) {
525+
void *gc_realloc(void *ptr_in, mp_uint_t n_bytes, bool allow_move) {
526526
if (MP_STATE_MEM(gc_lock_depth) > 0) {
527527
return NULL;
528528
}
@@ -624,6 +624,11 @@ void *gc_realloc(void *ptr_in, mp_uint_t n_bytes) {
624624
return ptr_in;
625625
}
626626

627+
if (!allow_move) {
628+
// not allowed to move memory block so return failure
629+
return NULL;
630+
}
631+
627632
// can't resize inplace; try to find a new contiguous chain
628633
void *ptr_out = gc_alloc(n_bytes,
629634
#if MICROPY_ENABLE_FINALISER

py/gc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ void gc_collect_end(void);
4848
void *gc_alloc(mp_uint_t n_bytes, bool has_finaliser);
4949
void gc_free(void *ptr);
5050
mp_uint_t gc_nbytes(const void *ptr);
51-
void *gc_realloc(void *ptr, mp_uint_t n_bytes);
51+
void *gc_realloc(void *ptr, mp_uint_t n_bytes, bool allow_move);
5252

5353
typedef struct _gc_info_t {
5454
mp_uint_t total;

py/malloc.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,10 @@
5656
#define malloc(b) gc_alloc((b), false)
5757
#define malloc_with_finaliser(b) gc_alloc((b), true)
5858
#define free gc_free
59-
#define realloc gc_realloc
59+
#define realloc(ptr, n) gc_realloc(ptr, n, true)
60+
#define realloc_ext(ptr, n, mv) gc_realloc(ptr, n, mv)
61+
#else
62+
#define realloc_ext(ptr, n, mv) realloc(ptr, n)
6063
#endif // MICROPY_ENABLE_GC
6164

6265
void *m_malloc(size_t num_bytes) {
@@ -134,11 +137,11 @@ void *m_realloc(void *ptr, size_t new_num_bytes) {
134137
}
135138

136139
#if MICROPY_MALLOC_USES_ALLOCATED_SIZE
137-
void *m_realloc_maybe(void *ptr, size_t old_num_bytes, size_t new_num_bytes) {
140+
void *m_realloc_maybe(void *ptr, size_t old_num_bytes, size_t new_num_bytes, bool allow_move) {
138141
#else
139-
void *m_realloc_maybe(void *ptr, size_t new_num_bytes) {
142+
void *m_realloc_maybe(void *ptr, size_t new_num_bytes, bool allow_move) {
140143
#endif
141-
void *new_ptr = realloc(ptr, new_num_bytes);
144+
void *new_ptr = realloc_ext(ptr, new_num_bytes, allow_move);
142145
#if MICROPY_MEM_STATS
143146
// At first thought, "Total bytes allocated" should only grow,
144147
// after all, it's *total*. But consider for example 2K block

py/misc.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,12 @@ typedef unsigned int uint;
6363
#endif
6464
#if MICROPY_MALLOC_USES_ALLOCATED_SIZE
6565
#define m_renew(type, ptr, old_num, new_num) ((type*)(m_realloc((ptr), sizeof(type) * (old_num), sizeof(type) * (new_num))))
66-
#define m_renew_maybe(type, ptr, old_num, new_num) ((type*)(m_realloc_maybe((ptr), sizeof(type) * (old_num), sizeof(type) * (new_num))))
66+
#define m_renew_maybe(type, ptr, old_num, new_num, allow_move) ((type*)(m_realloc_maybe((ptr), sizeof(type) * (old_num), sizeof(type) * (new_num), (allow_move))))
6767
#define m_del(type, ptr, num) m_free(ptr, sizeof(type) * (num))
6868
#define m_del_var(obj_type, var_type, var_num, ptr) (m_free(ptr, sizeof(obj_type) + sizeof(var_type) * (var_num)))
6969
#else
7070
#define m_renew(type, ptr, old_num, new_num) ((type*)(m_realloc((ptr), sizeof(type) * (new_num))))
71-
#define m_renew_maybe(type, ptr, old_num, new_num) ((type*)(m_realloc_maybe((ptr), sizeof(type) * (new_num))))
71+
#define m_renew_maybe(type, ptr, old_num, new_num, allow_move) ((type*)(m_realloc_maybe((ptr), sizeof(type) * (new_num), (allow_move))))
7272
#define m_del(type, ptr, num) ((void)(num), m_free(ptr))
7373
#define m_del_var(obj_type, var_type, var_num, ptr) ((void)(var_num), m_free(ptr))
7474
#endif
@@ -80,11 +80,11 @@ void *m_malloc_with_finaliser(size_t num_bytes);
8080
void *m_malloc0(size_t num_bytes);
8181
#if MICROPY_MALLOC_USES_ALLOCATED_SIZE
8282
void *m_realloc(void *ptr, size_t old_num_bytes, size_t new_num_bytes);
83-
void *m_realloc_maybe(void *ptr, size_t old_num_bytes, size_t new_num_bytes);
83+
void *m_realloc_maybe(void *ptr, size_t old_num_bytes, size_t new_num_bytes, bool allow_move);
8484
void m_free(void *ptr, size_t num_bytes);
8585
#else
8686
void *m_realloc(void *ptr, size_t new_num_bytes);
87-
void *m_realloc_maybe(void *ptr, size_t new_num_bytes);
87+
void *m_realloc_maybe(void *ptr, size_t new_num_bytes, bool allow_move);
8888
void m_free(void *ptr);
8989
#endif
9090
void *m_malloc_fail(size_t num_bytes);

py/mpconfig.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,14 @@
7575
#define MICROPY_ALLOC_GC_STACK_SIZE (64)
7676
#endif
7777

78+
// Number of bytes to allocate initially when creating new chunks to store
79+
// interned string data. Smaller numbers lead to more chunks being needed
80+
// and more wastage at the end of the chunk. Larger numbers lead to wasted
81+
// space at the end when no more strings need interning.
82+
#ifndef MICROPY_ALLOC_QSTR_CHUNK_INIT
83+
#define MICROPY_ALLOC_QSTR_CHUNK_INIT (128)
84+
#endif
85+
7886
// Initial amount for lexer indentation level
7987
#ifndef MICROPY_ALLOC_LEXER_INDENT_INIT
8088
#define MICROPY_ALLOC_LEXER_INDENT_INIT (10)

py/mpstate.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,12 @@ typedef struct _mp_state_vm_t {
131131
// END ROOT POINTER SECTION
132132
////////////////////////////////////////////////////////////
133133

134+
// pointer and sizes to store interned string data
135+
// (qstr_last_chunk can be root pointer but is also stored in qstr pool)
136+
byte *qstr_last_chunk;
137+
mp_uint_t qstr_last_alloc;
138+
mp_uint_t qstr_last_used;
139+
134140
// Stack top at the start of program
135141
// Note: this entry is used to locate the end of the root pointer section.
136142
char *stack_top;

py/objexcept.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ void mp_obj_exception_add_traceback(mp_obj_t self_in, qstr file, mp_uint_t line,
444444
self->traceback_len = 0;
445445
} else if (self->traceback_len + 3 > self->traceback_alloc) {
446446
// be conservative with growing traceback data
447-
mp_uint_t *tb_data = m_renew_maybe(mp_uint_t, self->traceback_data, self->traceback_alloc, self->traceback_alloc + 3);
447+
mp_uint_t *tb_data = m_renew_maybe(mp_uint_t, self->traceback_data, self->traceback_alloc, self->traceback_alloc + 3, true);
448448
if (tb_data == NULL) {
449449
return;
450450
}

py/parse.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ STATIC void push_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule,
135135
return;
136136
}
137137
if (parser->rule_stack_top >= parser->rule_stack_alloc) {
138-
rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC);
138+
rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC, true);
139139
if (rs == NULL) {
140140
memory_error(parser);
141141
return;
@@ -293,7 +293,7 @@ STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
293293
return;
294294
}
295295
if (parser->result_stack_top >= parser->result_stack_alloc) {
296-
mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC);
296+
mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC, true);
297297
if (stack == NULL) {
298298
memory_error(parser);
299299
return;

py/qstr.c

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ STATIC const qstr_pool_t const_pool = {
9292

9393
void qstr_init(void) {
9494
MP_STATE_VM(last_pool) = (qstr_pool_t*)&const_pool; // we won't modify the const_pool since it has no allocated room left
95+
MP_STATE_VM(qstr_last_chunk) = NULL;
9596
}
9697

9798
STATIC const byte *find_qstr(qstr q) {
@@ -152,8 +153,46 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) {
152153
assert(len < (1 << (8 * MICROPY_QSTR_BYTES_IN_LEN)));
153154
qstr q = qstr_find_strn(str, len);
154155
if (q == 0) {
156+
// qstr does not exist in interned pool so need to add it
157+
158+
// compute number of bytes needed to intern this string
159+
mp_uint_t n_bytes = 2 + MICROPY_QSTR_BYTES_IN_LEN + len + 1;
160+
161+
if (MP_STATE_VM(qstr_last_chunk) != NULL && MP_STATE_VM(qstr_last_used) + n_bytes > MP_STATE_VM(qstr_last_alloc)) {
162+
// not enough room at end of previously interned string so try to grow
163+
byte *new_p = m_renew_maybe(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_alloc) + n_bytes, false);
164+
if (new_p == NULL) {
165+
// could not grow existing memory; shrink it to fit previous
166+
(void)m_renew(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_used));
167+
MP_STATE_VM(qstr_last_chunk) = NULL;
168+
} else {
169+
// could grow existing memory
170+
MP_STATE_VM(qstr_last_alloc) += n_bytes;
171+
}
172+
}
173+
174+
if (MP_STATE_VM(qstr_last_chunk) == NULL) {
175+
// no existing memory for the interned string so allocate a new chunk
176+
mp_uint_t al = n_bytes;
177+
if (al < MICROPY_ALLOC_QSTR_CHUNK_INIT) {
178+
al = MICROPY_ALLOC_QSTR_CHUNK_INIT;
179+
}
180+
MP_STATE_VM(qstr_last_chunk) = m_new_maybe(byte, al);
181+
if (MP_STATE_VM(qstr_last_chunk) == NULL) {
182+
// failed to allocate a large chunk so try with exact size
183+
MP_STATE_VM(qstr_last_chunk) = m_new(byte, n_bytes);
184+
al = n_bytes;
185+
}
186+
MP_STATE_VM(qstr_last_alloc) = al;
187+
MP_STATE_VM(qstr_last_used) = 0;
188+
}
189+
190+
// allocate memory from the chunk for this new interned string's data
191+
byte *q_ptr = MP_STATE_VM(qstr_last_chunk) + MP_STATE_VM(qstr_last_used);
192+
MP_STATE_VM(qstr_last_used) += n_bytes;
193+
194+
// store the interned strings' data
155195
mp_uint_t hash = qstr_compute_hash((const byte*)str, len);
156-
byte *q_ptr = m_new(byte, 2 + MICROPY_QSTR_BYTES_IN_LEN + len + 1);
157196
q_ptr[0] = hash;
158197
q_ptr[1] = hash >> 8;
159198
Q_SET_LENGTH(q_ptr, len);

0 commit comments

Comments
 (0)