From b44a55a7e7ae7265929be02df85717c856c60266 Mon Sep 17 00:00:00 2001 From: Phil Howard Date: Mon, 22 Jun 2026 12:14:15 +0100 Subject: [PATCH 1/3] py/gc: Add no-scan tag to skip GC marking of pure-data buffers. The mark phase conservatively scans every word of every reachable block for pointers, so a large bytearray/array buffer is scanned in full on every collection despite holding no pointers. Add an optional per-block "no-scan table" (NTB, 1 bit/block, like the finaliser/weakref tables) and a GC_ALLOC_FLAG_NO_SCAN; tagged head blocks are marked but their contents are not scanned. A no-scan block has no child pointers, so the mark phase also skips the chain-walk for it (n_blocks left 0) and avoids re-reading the allocation table for every block of the buffer just to mark it - this matters for large buffers in slow PSRAM. The tag is written on every allocation (so a reused block never inherits a stale bit) and preserved across realloc moves. Exposed as m_new_no_scan() / m_malloc_no_scan(), which alias plain m_new()/gc_alloc() when disabled, and gated behind MICROPY_GC_NO_SCAN (default off). This commit adds the mechanism only; callers are converted separately. Signed-off-by: Phil Howard --- py/gc.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++-- py/gc.h | 3 +++ py/malloc.c | 17 +++++++++++++++++ py/misc.h | 9 +++++++++ py/mpconfig.h | 8 ++++++++ py/mpstate.h | 3 +++ 6 files changed, 91 insertions(+), 2 deletions(-) diff --git a/py/gc.c b/py/gc.c index c1a19da3efadb..6807f67131d21 100644 --- a/py/gc.c +++ b/py/gc.c @@ -119,6 +119,16 @@ #define WTB_CLEAR(area, block) do { area->gc_weakref_table_start[(block) / BLOCKS_PER_WTB] &= (~(1 << ((block) & 7))); } while (0) #endif +#if MICROPY_GC_NO_SCAN +// NTB = no-scan table byte +// if set on a head block, the GC mark phase does not scan its contents for +// pointers (the block holds pure data, e.g. a bytearray/array buffer). +#define BLOCKS_PER_NTB (8) +#define NTB_GET(area, block) ((area->gc_no_scan_table_start[(block) / BLOCKS_PER_NTB] >> ((block) & 7)) & 1) +#define NTB_SET(area, block) do { area->gc_no_scan_table_start[(block) / BLOCKS_PER_NTB] |= (1 << ((block) & 7)); } while (0) +#define NTB_CLEAR(area, block) do { area->gc_no_scan_table_start[(block) / BLOCKS_PER_NTB] &= (~(1 << ((block) & 7))); } while (0) +#endif + #if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL #define GC_MUTEX_INIT() mp_thread_recursive_mutex_init(&MP_STATE_MEM(gc_mutex)) #define GC_ENTER() mp_thread_recursive_mutex_lock(&MP_STATE_MEM(gc_mutex), 1) @@ -151,7 +161,7 @@ static void gc_setup_area(mp_state_mem_area_t *area, void *start, void *end) { // P = A * BLOCKS_PER_ATB * BYTES_PER_BLOCK // => T = A * (1 + BLOCKS_PER_ATB / BLOCKS_PER_FTB + BLOCKS_PER_ATB / BLOCKS_PER_WTB + BLOCKS_PER_ATB * BYTES_PER_BLOCK) size_t total_byte_len = (byte *)end - (byte *)start; - #if MICROPY_ENABLE_FINALISER || MICROPY_PY_WEAKREF + #if MICROPY_ENABLE_FINALISER || MICROPY_PY_WEAKREF || MICROPY_GC_NO_SCAN area->gc_alloc_table_byte_len = (total_byte_len - ALLOC_TABLE_GAP_BYTE) * MP_BITS_PER_BYTE / ( @@ -162,6 +172,9 @@ static void gc_setup_area(mp_state_mem_area_t *area, void *start, void *end) { #if MICROPY_PY_WEAKREF + MP_BITS_PER_BYTE * BLOCKS_PER_ATB / BLOCKS_PER_WTB #endif + #if MICROPY_GC_NO_SCAN + + MP_BITS_PER_BYTE * BLOCKS_PER_ATB / BLOCKS_PER_NTB + #endif + MP_BITS_PER_BYTE * BLOCKS_PER_ATB * BYTES_PER_BLOCK ); #else @@ -183,6 +196,11 @@ static void gc_setup_area(mp_state_mem_area_t *area, void *start, void *end) { area->gc_weakref_table_start = next_table; next_table += gc_weakref_table_byte_len; #endif + #if MICROPY_GC_NO_SCAN + size_t gc_no_scan_table_byte_len = (area->gc_alloc_table_byte_len * BLOCKS_PER_ATB + BLOCKS_PER_NTB - 1) / BLOCKS_PER_NTB; + area->gc_no_scan_table_start = next_table; + next_table += gc_no_scan_table_byte_len; + #endif // Allocate the GC pool of heap blocks. size_t gc_pool_block_len = area->gc_alloc_table_byte_len * BLOCKS_PER_ATB; @@ -199,6 +217,9 @@ static void gc_setup_area(mp_state_mem_area_t *area, void *start, void *end) { #if MICROPY_PY_WEAKREF + gc_weakref_table_byte_len #endif + #if MICROPY_GC_NO_SCAN + + gc_no_scan_table_byte_len + #endif ); area->gc_last_free_atb_index = 0; @@ -511,7 +532,15 @@ static void gc_mark_subtree(size_t block) #endif // work out number of consecutive blocks in the chain starting with this one + // A block tagged no-scan holds pure data with no child pointers, so we + // skip walking its chain here: leaving n_blocks == 0 makes the scan loop + // below a no-op. This avoids reading the allocation table for every block + // of a large data buffer (e.g. a multi-MB bytearray, especially in slow + // PSRAM) just to mark it. size_t n_blocks = 0; + #if MICROPY_GC_NO_SCAN + if (!NTB_GET(area, block)) + #endif do { n_blocks += 1; } while (ATB_GET_KIND(area, block + n_blocks) == AT_TAIL); @@ -971,6 +1000,17 @@ void *gc_alloc(size_t n_bytes, unsigned int alloc_flags) { // mark first block as used head ATB_FREE_TO_HEAD(area, start_block); + #if MICROPY_GC_NO_SCAN + // Tag (or untag) the head block so the mark phase knows whether to scan it. + // Done under the lock we already hold; always written so a reused block + // never inherits a stale no-scan bit. + if (alloc_flags & GC_ALLOC_FLAG_NO_SCAN) { + NTB_SET(area, start_block); + } else { + NTB_CLEAR(area, start_block); + } + #endif + // mark rest of blocks as used tail // TODO for a run of many blocks can make this more efficient for (size_t bl = start_block + 1; bl <= end_block; bl++) { @@ -1013,6 +1053,7 @@ void *gc_alloc(size_t n_bytes, unsigned int alloc_flags) { (void)has_finaliser; #endif + #if EXTENSIVE_HEAP_PROFILING gc_dump_alloc_table(&mp_plat_print); #endif @@ -1260,6 +1301,14 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) { bool ftb_state = false; #endif + unsigned int realloc_flags = ftb_state ? GC_ALLOC_FLAG_HAS_FINALISER : 0; + #if MICROPY_GC_NO_SCAN + // Preserve the no-scan tag if the block being moved was pure data. + if (NTB_GET(area, block)) { + realloc_flags |= GC_ALLOC_FLAG_NO_SCAN; + } + #endif + GC_EXIT(); if (!allow_move) { @@ -1268,7 +1317,7 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) { } // can't resize inplace; try to find a new contiguous chain - void *ptr_out = gc_alloc(n_bytes, ftb_state); + void *ptr_out = gc_alloc(n_bytes, realloc_flags); // check that the alloc succeeded if (ptr_out == NULL) { diff --git a/py/gc.h b/py/gc.h index ca73685d9474d..50f1b746c382c 100644 --- a/py/gc.h +++ b/py/gc.h @@ -65,6 +65,9 @@ void gc_weakref_sweep(void); enum { GC_ALLOC_FLAG_HAS_FINALISER = 1, + // Block contains no heap pointers, so the GC mark phase can skip scanning + // its contents (see MICROPY_GC_NO_SCAN). Only valid for pure-data buffers. + GC_ALLOC_FLAG_NO_SCAN = 2, }; void *gc_alloc(size_t n_bytes, unsigned int alloc_flags); diff --git a/py/malloc.c b/py/malloc.c index 959034e7cb190..38c10eb570576 100644 --- a/py/malloc.c +++ b/py/malloc.c @@ -58,6 +58,7 @@ #undef realloc #define malloc(b) gc_alloc((b), 0) #define malloc_with_finaliser(b) gc_alloc((b), GC_ALLOC_FLAG_HAS_FINALISER) +#define malloc_no_scan(b) gc_alloc((b), GC_ALLOC_FLAG_NO_SCAN) #define free gc_free #define realloc(ptr, n) gc_realloc(ptr, n, true) #define realloc_ext(ptr, n, mv) gc_realloc(ptr, n, mv) @@ -123,6 +124,22 @@ void *m_malloc_with_finaliser(size_t num_bytes) { } #endif +#if MICROPY_GC_NO_SCAN +void *m_malloc_no_scan(size_t num_bytes) { + void *ptr = malloc_no_scan(num_bytes); + if (ptr == NULL && num_bytes != 0) { + m_malloc_fail(num_bytes); + } + #if MICROPY_MEM_STATS + MP_STATE_MEM(total_bytes_allocated) += num_bytes; + MP_STATE_MEM(current_bytes_allocated) += num_bytes; + UPDATE_PEAK(); + #endif + DEBUG_printf("malloc(no-scan) %d : %p\n", num_bytes, ptr); + return ptr; +} +#endif + void *m_malloc0(size_t num_bytes) { void *ptr = m_malloc(num_bytes); // If this config is set then the GC clears all memory, so we don't need to. diff --git a/py/misc.h b/py/misc.h index 2fe0f11796b3e..d0921f48794c9 100644 --- a/py/misc.h +++ b/py/misc.h @@ -100,6 +100,15 @@ typedef unsigned int uint; #define m_new(type, num) ((type *)(m_malloc(sizeof(type) * (num)))) #define m_new_maybe(type, num) ((type *)(m_malloc_maybe(sizeof(type) * (num)))) +// m_new_no_scan: like m_new, but the buffer is tagged so the GC won't scan it +// for pointers. ONLY use for buffers that are guaranteed to hold pure data +// (no mp_obj_t / heap pointers), e.g. bytearray/array item storage. +#if MICROPY_GC_NO_SCAN +void *m_malloc_no_scan(size_t num_bytes); +#define m_new_no_scan(type, num) ((type *)(m_malloc_no_scan(sizeof(type) * (num)))) +#else +#define m_new_no_scan(type, num) m_new(type, num) +#endif #define m_new0(type, num) ((type *)(m_malloc0(sizeof(type) * (num)))) #define m_new_obj(type) (m_new(type, 1)) #define m_new_obj_maybe(type) (m_new_maybe(type, 1)) diff --git a/py/mpconfig.h b/py/mpconfig.h index 6bd179e3b8351..b33cd97bf1f67 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -700,6 +700,14 @@ typedef uint64_t mp_uint_t; #define MICROPY_OPT_MAP_LOOKUP_CACHE_SIZE (128) #endif +// Allow pure-data allocations (e.g. bytearray/array buffers) to be tagged so +// the GC mark phase skips scanning their contents for pointers. Costs one bit +// per heap block for the no-trace table; saves scanning large data buffers +// (framebuffers etc.) on every collection. +#ifndef MICROPY_GC_NO_SCAN +#define MICROPY_GC_NO_SCAN (0) +#endif + // Whether to use fast versions of bitwise operations (and, or, xor) when the // arguments are both positive. Increases Thumb2 code size by about 250 bytes. #ifndef MICROPY_OPT_MPZ_BITWISE diff --git a/py/mpstate.h b/py/mpstate.h index 7662813b72ad4..2dbbc2e573820 100644 --- a/py/mpstate.h +++ b/py/mpstate.h @@ -113,6 +113,9 @@ typedef struct _mp_state_mem_area_t { #if MICROPY_PY_WEAKREF byte *gc_weakref_table_start; #endif + #if MICROPY_GC_NO_SCAN + byte *gc_no_scan_table_start; + #endif byte *gc_pool_start; byte *gc_pool_end; From 8fb1ea18aa1ed8736fe04ffc386e3160961b1a4d Mon Sep 17 00:00:00 2001 From: Phil Howard Date: Mon, 22 Jun 2026 12:14:15 +0100 Subject: [PATCH 2/3] py: Allocate pure-data buffers as no-scan. Tag the buffers that only ever hold raw data (never heap pointers) with m_new_no_scan(), so the GC mark phase skips scanning them once MICROPY_GC_NO_SCAN is enabled (a no-op otherwise): py/objarray.c: array/bytearray item storage. py/objstr.c: str/bytes payloads. py/vstr.c: the vstr builder, growth via gc_realloc preserves the tag. Signed-off-by: Phil Howard --- py/objarray.c | 4 +++- py/objstr.c | 3 ++- py/vstr.c | 5 ++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/py/objarray.c b/py/objarray.c index 75bc671c168cc..4e5b0bc847bdc 100644 --- a/py/objarray.c +++ b/py/objarray.c @@ -107,7 +107,9 @@ static mp_obj_array_t *array_new(char typecode, size_t n) { o->typecode = typecode; o->free = 0; o->len = n; - o->items = m_new(byte, typecode_size * o->len); + // array/bytearray items are always pure numeric data (no heap pointers), + // so tag the buffer no-scan to keep it out of the GC mark scan. + o->items = m_new_no_scan(byte, typecode_size * o->len); return o; } #endif diff --git a/py/objstr.c b/py/objstr.c index 06afb91fc7f73..6321ea5d1c104 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -2236,7 +2236,8 @@ mp_obj_t mp_obj_new_str_copy(const mp_obj_type_t *type, const byte *data, size_t o->len = len; if (data) { o->hash = qstr_compute_hash(data, len); - byte *p = m_new(byte, len + 1); + // str/bytes payload is pure data (no heap pointers): tag it no-scan. + byte *p = m_new_no_scan(byte, len + 1); o->data = p; memcpy(p, data, len * sizeof(byte)); p[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings diff --git a/py/vstr.c b/py/vstr.c index faf216d657ae8..3e064961837d3 100644 --- a/py/vstr.c +++ b/py/vstr.c @@ -44,7 +44,10 @@ void vstr_init(vstr_t *vstr, size_t alloc) { } vstr->alloc = alloc; vstr->len = 0; - vstr->buf = m_new(char, vstr->alloc); + // vstr holds raw bytes only (string/repr/JSON building), never heap + // pointers, so tag it no-scan to keep it out of the GC mark scan. Growth + // via m_renew preserves the tag (gc_realloc carries it across a move). + vstr->buf = m_new_no_scan(char, vstr->alloc); vstr->fixed_buf = false; } From 1fb95327d8bfcba290fa1c378a418709f47face9 Mon Sep 17 00:00:00 2001 From: Phil Howard Date: Mon, 22 Jun 2026 12:14:15 +0100 Subject: [PATCH 3/3] rp2: Enable no-scan GC. For CI, build tests only. Signed-off-by: Phil Howard --- ports/rp2/mpconfigport.h | 1 + 1 file changed, 1 insertion(+) diff --git a/ports/rp2/mpconfigport.h b/ports/rp2/mpconfigport.h index 0bfaf6098ad4c..755caea06df34 100644 --- a/ports/rp2/mpconfigport.h +++ b/ports/rp2/mpconfigport.h @@ -112,6 +112,7 @@ // Optimisations #define MICROPY_OPT_COMPUTED_GOTO (1) +#define MICROPY_GC_NO_SCAN (1) // Python internal features #define MICROPY_TRACKED_ALLOC (MICROPY_SSL_MBEDTLS || MICROPY_BLUETOOTH_BTSTACK)