Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ports/rp2/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@

// Optimisations
#define MICROPY_OPT_COMPUTED_GOTO (1)
#define MICROPY_GC_NO_SCAN (1)

// Python internal features
#define MICROPY_TRACKED_ALLOC (MICROPY_SSL_MBEDTLS || MICROPY_BLUETOOTH_BTSTACK)
Expand Down
53 changes: 51 additions & 2 deletions py/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,16 @@
#define WTB_CLEAR(area, block) do { area->gc_weakref_table_start[(block) / BLOCKS_PER_WTB] &= (~(1 << ((block) & 7))); } while (0)
#endif

#if MICROPY_GC_NO_SCAN
// NTB = no-scan table byte
// if set on a head block, the GC mark phase does not scan its contents for
// pointers (the block holds pure data, e.g. a bytearray/array buffer).
#define BLOCKS_PER_NTB (8)
#define NTB_GET(area, block) ((area->gc_no_scan_table_start[(block) / BLOCKS_PER_NTB] >> ((block) & 7)) & 1)
#define NTB_SET(area, block) do { area->gc_no_scan_table_start[(block) / BLOCKS_PER_NTB] |= (1 << ((block) & 7)); } while (0)
#define NTB_CLEAR(area, block) do { area->gc_no_scan_table_start[(block) / BLOCKS_PER_NTB] &= (~(1 << ((block) & 7))); } while (0)
#endif

#if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL
#define GC_MUTEX_INIT() mp_thread_recursive_mutex_init(&MP_STATE_MEM(gc_mutex))
#define GC_ENTER() mp_thread_recursive_mutex_lock(&MP_STATE_MEM(gc_mutex), 1)
Expand Down Expand Up @@ -151,7 +161,7 @@ static void gc_setup_area(mp_state_mem_area_t *area, void *start, void *end) {
// P = A * BLOCKS_PER_ATB * BYTES_PER_BLOCK
// => T = A * (1 + BLOCKS_PER_ATB / BLOCKS_PER_FTB + BLOCKS_PER_ATB / BLOCKS_PER_WTB + BLOCKS_PER_ATB * BYTES_PER_BLOCK)
size_t total_byte_len = (byte *)end - (byte *)start;
#if MICROPY_ENABLE_FINALISER || MICROPY_PY_WEAKREF
#if MICROPY_ENABLE_FINALISER || MICROPY_PY_WEAKREF || MICROPY_GC_NO_SCAN
area->gc_alloc_table_byte_len = (total_byte_len - ALLOC_TABLE_GAP_BYTE)
* MP_BITS_PER_BYTE
/ (
Expand All @@ -162,6 +172,9 @@ static void gc_setup_area(mp_state_mem_area_t *area, void *start, void *end) {
#if MICROPY_PY_WEAKREF
+ MP_BITS_PER_BYTE * BLOCKS_PER_ATB / BLOCKS_PER_WTB
#endif
#if MICROPY_GC_NO_SCAN
+ MP_BITS_PER_BYTE * BLOCKS_PER_ATB / BLOCKS_PER_NTB
#endif
+ MP_BITS_PER_BYTE * BLOCKS_PER_ATB * BYTES_PER_BLOCK
);
#else
Expand All @@ -183,6 +196,11 @@ static void gc_setup_area(mp_state_mem_area_t *area, void *start, void *end) {
area->gc_weakref_table_start = next_table;
next_table += gc_weakref_table_byte_len;
#endif
#if MICROPY_GC_NO_SCAN
size_t gc_no_scan_table_byte_len = (area->gc_alloc_table_byte_len * BLOCKS_PER_ATB + BLOCKS_PER_NTB - 1) / BLOCKS_PER_NTB;
area->gc_no_scan_table_start = next_table;
next_table += gc_no_scan_table_byte_len;
#endif

// Allocate the GC pool of heap blocks.
size_t gc_pool_block_len = area->gc_alloc_table_byte_len * BLOCKS_PER_ATB;
Expand All @@ -199,6 +217,9 @@ static void gc_setup_area(mp_state_mem_area_t *area, void *start, void *end) {
#if MICROPY_PY_WEAKREF
+ gc_weakref_table_byte_len
#endif
#if MICROPY_GC_NO_SCAN
+ gc_no_scan_table_byte_len
#endif
);

area->gc_last_free_atb_index = 0;
Expand Down Expand Up @@ -511,7 +532,15 @@ static void gc_mark_subtree(size_t block)
#endif

// work out number of consecutive blocks in the chain starting with this one
// A block tagged no-scan holds pure data with no child pointers, so we
// skip walking its chain here: leaving n_blocks == 0 makes the scan loop
// below a no-op. This avoids reading the allocation table for every block
// of a large data buffer (e.g. a multi-MB bytearray, especially in slow
// PSRAM) just to mark it.
size_t n_blocks = 0;
#if MICROPY_GC_NO_SCAN
if (!NTB_GET(area, block))
#endif
do {
n_blocks += 1;
} while (ATB_GET_KIND(area, block + n_blocks) == AT_TAIL);
Expand Down Expand Up @@ -971,6 +1000,17 @@ void *gc_alloc(size_t n_bytes, unsigned int alloc_flags) {
// mark first block as used head
ATB_FREE_TO_HEAD(area, start_block);

#if MICROPY_GC_NO_SCAN
// Tag (or untag) the head block so the mark phase knows whether to scan it.
// Done under the lock we already hold; always written so a reused block
// never inherits a stale no-scan bit.
if (alloc_flags & GC_ALLOC_FLAG_NO_SCAN) {
NTB_SET(area, start_block);
} else {
NTB_CLEAR(area, start_block);
}
#endif

// mark rest of blocks as used tail
// TODO for a run of many blocks can make this more efficient
for (size_t bl = start_block + 1; bl <= end_block; bl++) {
Expand Down Expand Up @@ -1013,6 +1053,7 @@ void *gc_alloc(size_t n_bytes, unsigned int alloc_flags) {
(void)has_finaliser;
#endif


#if EXTENSIVE_HEAP_PROFILING
gc_dump_alloc_table(&mp_plat_print);
#endif
Expand Down Expand Up @@ -1260,6 +1301,14 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) {
bool ftb_state = false;
#endif

unsigned int realloc_flags = ftb_state ? GC_ALLOC_FLAG_HAS_FINALISER : 0;
#if MICROPY_GC_NO_SCAN
// Preserve the no-scan tag if the block being moved was pure data.
if (NTB_GET(area, block)) {
realloc_flags |= GC_ALLOC_FLAG_NO_SCAN;
}
#endif

GC_EXIT();

if (!allow_move) {
Expand All @@ -1268,7 +1317,7 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) {
}

// can't resize inplace; try to find a new contiguous chain
void *ptr_out = gc_alloc(n_bytes, ftb_state);
void *ptr_out = gc_alloc(n_bytes, realloc_flags);

// check that the alloc succeeded
if (ptr_out == NULL) {
Expand Down
3 changes: 3 additions & 0 deletions py/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ void gc_weakref_sweep(void);

enum {
GC_ALLOC_FLAG_HAS_FINALISER = 1,
// Block contains no heap pointers, so the GC mark phase can skip scanning
// its contents (see MICROPY_GC_NO_SCAN). Only valid for pure-data buffers.
GC_ALLOC_FLAG_NO_SCAN = 2,
};

void *gc_alloc(size_t n_bytes, unsigned int alloc_flags);
Expand Down
17 changes: 17 additions & 0 deletions py/malloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#undef realloc
#define malloc(b) gc_alloc((b), 0)
#define malloc_with_finaliser(b) gc_alloc((b), GC_ALLOC_FLAG_HAS_FINALISER)
#define malloc_no_scan(b) gc_alloc((b), GC_ALLOC_FLAG_NO_SCAN)
#define free gc_free
#define realloc(ptr, n) gc_realloc(ptr, n, true)
#define realloc_ext(ptr, n, mv) gc_realloc(ptr, n, mv)
Expand Down Expand Up @@ -123,6 +124,22 @@ void *m_malloc_with_finaliser(size_t num_bytes) {
}
#endif

#if MICROPY_GC_NO_SCAN
void *m_malloc_no_scan(size_t num_bytes) {
void *ptr = malloc_no_scan(num_bytes);
if (ptr == NULL && num_bytes != 0) {
m_malloc_fail(num_bytes);
}
#if MICROPY_MEM_STATS
MP_STATE_MEM(total_bytes_allocated) += num_bytes;
MP_STATE_MEM(current_bytes_allocated) += num_bytes;
UPDATE_PEAK();
#endif
DEBUG_printf("malloc(no-scan) %d : %p\n", num_bytes, ptr);
return ptr;
}
#endif

void *m_malloc0(size_t num_bytes) {
void *ptr = m_malloc(num_bytes);
// If this config is set then the GC clears all memory, so we don't need to.
Expand Down
9 changes: 9 additions & 0 deletions py/misc.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,15 @@ typedef unsigned int uint;

#define m_new(type, num) ((type *)(m_malloc(sizeof(type) * (num))))
#define m_new_maybe(type, num) ((type *)(m_malloc_maybe(sizeof(type) * (num))))
// m_new_no_scan: like m_new, but the buffer is tagged so the GC won't scan it
// for pointers. ONLY use for buffers that are guaranteed to hold pure data
// (no mp_obj_t / heap pointers), e.g. bytearray/array item storage.
#if MICROPY_GC_NO_SCAN
void *m_malloc_no_scan(size_t num_bytes);
#define m_new_no_scan(type, num) ((type *)(m_malloc_no_scan(sizeof(type) * (num))))
#else
#define m_new_no_scan(type, num) m_new(type, num)
#endif
#define m_new0(type, num) ((type *)(m_malloc0(sizeof(type) * (num))))
#define m_new_obj(type) (m_new(type, 1))
#define m_new_obj_maybe(type) (m_new_maybe(type, 1))
Expand Down
8 changes: 8 additions & 0 deletions py/mpconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,14 @@ typedef uint64_t mp_uint_t;
#define MICROPY_OPT_MAP_LOOKUP_CACHE_SIZE (128)
#endif

// Allow pure-data allocations (e.g. bytearray/array buffers) to be tagged so
// the GC mark phase skips scanning their contents for pointers. Costs one bit
// per heap block for the no-trace table; saves scanning large data buffers
// (framebuffers etc.) on every collection.
#ifndef MICROPY_GC_NO_SCAN
#define MICROPY_GC_NO_SCAN (0)
#endif

// Whether to use fast versions of bitwise operations (and, or, xor) when the
// arguments are both positive. Increases Thumb2 code size by about 250 bytes.
#ifndef MICROPY_OPT_MPZ_BITWISE
Expand Down
3 changes: 3 additions & 0 deletions py/mpstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ typedef struct _mp_state_mem_area_t {
#if MICROPY_PY_WEAKREF
byte *gc_weakref_table_start;
#endif
#if MICROPY_GC_NO_SCAN
byte *gc_no_scan_table_start;
#endif
byte *gc_pool_start;
byte *gc_pool_end;

Expand Down
4 changes: 3 additions & 1 deletion py/objarray.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ static mp_obj_array_t *array_new(char typecode, size_t n) {
o->typecode = typecode;
o->free = 0;
o->len = n;
o->items = m_new(byte, typecode_size * o->len);
// array/bytearray items are always pure numeric data (no heap pointers),
// so tag the buffer no-scan to keep it out of the GC mark scan.
o->items = m_new_no_scan(byte, typecode_size * o->len);
return o;
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion py/objstr.c
Original file line number Diff line number Diff line change
Expand Up @@ -2236,7 +2236,8 @@ mp_obj_t mp_obj_new_str_copy(const mp_obj_type_t *type, const byte *data, size_t
o->len = len;
if (data) {
o->hash = qstr_compute_hash(data, len);
byte *p = m_new(byte, len + 1);
// str/bytes payload is pure data (no heap pointers): tag it no-scan.
byte *p = m_new_no_scan(byte, len + 1);
o->data = p;
memcpy(p, data, len * sizeof(byte));
p[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
Expand Down
5 changes: 4 additions & 1 deletion py/vstr.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ void vstr_init(vstr_t *vstr, size_t alloc) {
}
vstr->alloc = alloc;
vstr->len = 0;
vstr->buf = m_new(char, vstr->alloc);
// vstr holds raw bytes only (string/repr/JSON building), never heap
// pointers, so tag it no-scan to keep it out of the GC mark scan. Growth
// via m_renew preserves the tag (gc_realloc carries it across a move).
vstr->buf = m_new_no_scan(char, vstr->alloc);
vstr->fixed_buf = false;
}

Expand Down
Loading