From e3bf14104592955ad47c52f73c23052354ccbb56 Mon Sep 17 00:00:00 2001 From: Alessandro Gatti Date: Thu, 4 Jun 2026 15:04:05 +0200 Subject: [PATCH 1/4] py/dynruntime.mk: Let natmods be built with Clang. This commit modifies the build rules for native modules in order to remove the dependence on GCC for creating native MPY files. Whilst the Unix port of MicroPython can be built with Clang by overriding the `CC` variable, natmods require a bit more work. GCC builds compilers that are tailored for a single architecture, but Clang takes the opposite approach, so a single binary may target more than one architecture. Architecture selection is, by definition, not compatible between those two compilers. These changes attempt to make things easier to handle when using Clang. Native modules can now be built with something like this: make CC=clang ARCH= \ CFLAGS_EXTRA='--target=' \ LDFLAGS_EXTRA='--target=' So, for example building an x86 native module the command line will look something like this: make CC=clang ARCH=x86 \ CFLAGS_EXTRA='--target=i686-unknown-linux-gnu' \ LDFLAGS_EXTRA='--target=i686-unknown-linux-gnu' Clang and GCC, however, have different tolerances for deviations from the chosen C standard. Whilst GCC doesn't really mind whether a typedef is defined multiple times as long as it is defined to the same value, Clang does raise a warning which is then interpreted as an error. Unfortunately #ifdef/#ifndef does not work with typedefs, and the way native modules are built meant that `py/mpconfig.h` would first include the native module's generated configuration file and then proceed with the rest of the configuration. However, both files attempt to provide aliases for both `mp_int_t` and `mp_uint_t`, and that doesn't really work. Thus, the only sane way to work around it is to rely on the presence of a definition that indicates that `mp_int_t` and `mp_uint_t` are already there to begin with, letting builds proceed on both GCC and Clang. Signed-off-by: Alessandro Gatti --- py/dynruntime.mk | 23 ++++++++++++++++------- py/mpconfig.h | 4 ++++ tools/mpy_ld.py | 3 ++- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/py/dynruntime.mk b/py/dynruntime.mk index 3902acbd0b32a..e544c7c0c8c40 100644 --- a/py/dynruntime.mk +++ b/py/dynruntime.mk @@ -2,6 +2,7 @@ # MPY_DIR must be set to the top of the MicroPython source tree BUILD ?= build +CC ?= gcc ECHO = @echo RM = /bin/rm @@ -104,13 +105,15 @@ MICROPY_FLOAT_IMPL ?= float else ifeq ($(ARCH),rv32imc) # rv32imc +ifeq ($(firstword $(shell $(CC) --version)),gcc) CROSS = riscv64-unknown-elf- +endif CFLAGS_ARCH += -march=rv32imac -mabi=ilp32 -mno-relax # If Picolibc is available then select it explicitly. Ubuntu 24.04 ships its # bare metal RISC-V toolchain with Picolibc rather than Newlib, and the default # is "nosys" so a value must be provided. To avoid having per-distro # workarounds, always select Picolibc if available. -PICOLIBC_SPECS := $(shell $(CROSS)gcc --print-file-name=picolibc.specs) +PICOLIBC_SPECS := $(shell $(CROSS)$(CC) --print-file-name=picolibc.specs) ifneq ($(PICOLIBC_SPECS),picolibc.specs) CFLAGS_ARCH += -specs=$(PICOLIBC_SPECS) USE_PICOLIBC := 1 @@ -123,13 +126,15 @@ MICROPY_FLOAT_IMPL ?= none else ifeq ($(ARCH),rv64imc) # rv64imc +ifeq ($(firstword $(shell $(CC) --version)),gcc) CROSS = riscv64-unknown-elf- +endif CFLAGS_ARCH += -march=rv64imac -mabi=lp64 -mno-relax # If Picolibc is available then select it explicitly. Ubuntu 24.04 ships its # bare metal RISC-V toolchain with Picolibc rather than Newlib, and the default # is "nosys" so a value must be provided. To avoid having per-distro # workarounds, always select Picolibc if available. -PICOLIBC_SPECS := $(shell $(CROSS)gcc --print-file-name=picolibc.specs) +PICOLIBC_SPECS := $(shell $(CROSS)$(CC) --print-file-name=picolibc.specs) ifneq ($(PICOLIBC_SPECS),picolibc.specs) CFLAGS_ARCH += -specs=$(PICOLIBC_SPECS) USE_PICOLIBC := 1 @@ -143,7 +148,11 @@ else $(error architecture '$(ARCH)' not supported) endif -ifneq ($(findstring -musl,$(shell $(CROSS)gcc -dumpmachine)),) +ifeq ($(findstring clang,$(shell $(CC) --version)),clang) +CROSS = +endif + +ifneq ($(findstring -musl,$(shell $(CROSS)$(CC) -dumpmachine)),) USE_MUSL := 1 endif @@ -175,8 +184,8 @@ LIBM_NAME := libc.a else LIBM_NAME := libm.a endif -LIBGCC_PATH := $(realpath $(shell $(CROSS)gcc $(CFLAGS) --print-libgcc-file-name)) -LIBM_PATH := $(realpath $(shell $(CROSS)gcc $(CFLAGS) --print-file-name=$(LIBM_NAME))) +LIBGCC_PATH := $(realpath $(shell $(CROSS)$(CC) $(CFLAGS) --print-libgcc-file-name)) +LIBM_PATH := $(realpath $(shell $(CROSS)$(CC) $(CFLAGS) --print-file-name=$(LIBM_NAME))) ifeq ($(USE_PICOLIBC),1) ifeq ($(LIBM_PATH),) # The CROSS toolchain prefix usually ends with a dash, but that may not be @@ -224,12 +233,12 @@ $(CONFIG_H): $(SRC) # Build .o from .c source files $(BUILD)/%.o: %.c $(CONFIG_H) Makefile $(ECHO) "CC $<" - $(Q)$(CROSS)gcc $(CFLAGS) -o $@ -c $< + $(Q)$(CROSS)$(CC) $(CFLAGS) -o $@ -c $< # Build .o from .S source files $(BUILD)/%.o: %.S $(CONFIG_H) Makefile $(ECHO) "AS $<" - $(Q)$(CROSS)gcc $(CFLAGS) -o $@ -c $< + $(Q)$(CROSS)$(CC) $(CFLAGS) -o $@ -c $< # Build .mpy from .py source files $(BUILD)/%.mpy: %.py diff --git a/py/mpconfig.h b/py/mpconfig.h index 1574243e8ea99..277ab9d7a4a2b 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -192,14 +192,18 @@ #endif #if MP_INT_TYPE == MP_INT_TYPE_INTPTR +#if !defined(MP_INT_TYPES_DEFINED) typedef intptr_t mp_int_t; typedef uintptr_t mp_uint_t; +#endif #define MP_INT_MAX INTPTR_MAX #define MP_INT_MIN INTPTR_MIN #define MP_UINT_MAX INTPTR_UMAX #elif MP_INT_TYPE == MP_INT_TYPE_INT64 +#if !defined(MP_INT_TYPES_DEFINED) typedef int64_t mp_int_t; typedef uint64_t mp_uint_t; +#endif #define MP_INT_MAX INT64_MAX #define MP_INT_MIN INT64_MIN #define MP_UINT_MAX INT64_UMAX diff --git a/tools/mpy_ld.py b/tools/mpy_ld.py index 20d665ff8849f..7cbe521aba67c 100755 --- a/tools/mpy_ld.py +++ b/tools/mpy_ld.py @@ -1509,7 +1509,8 @@ def do_preprocess(args): "#include \n" "typedef uintptr_t mp_uint_t;\n" "typedef intptr_t mp_int_t;\n" - "typedef uintptr_t mp_off_t;", + "typedef uintptr_t mp_off_t;\n" + "#define MP_INT_TYPES_DEFINED", file=f, ) for i, q in enumerate(static_qstrs): From 2915d8400ca9c7e519acf2fbfe36d75bcf08ef08 Mon Sep 17 00:00:00 2001 From: Alessandro Gatti Date: Thu, 4 Jun 2026 16:52:05 +0200 Subject: [PATCH 2/4] examples/natmod/deflate: Fix build with Clang. This commit fixes building the `deflate` module using Clang rather than using GCC. The Clang standard library (libc) implementation of `memset` depends on functions that have a non-empty data section, which is not currently supported. Therefore we provide our own `memset` implementation that is good enough to let linking succeed. Signed-off-by: Alessandro Gatti --- examples/natmod/deflate/deflate.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/examples/natmod/deflate/deflate.c b/examples/natmod/deflate/deflate.c index 9de7e101a7689..56ce8c8385e92 100644 --- a/examples/natmod/deflate/deflate.c +++ b/examples/natmod/deflate/deflate.c @@ -68,3 +68,20 @@ mp_obj_t mpy_init(mp_obj_fun_bc_t *self, size_t n_args, size_t n_kw, mp_obj_t *a MP_DYNRUNTIME_INIT_EXIT } + +// On x86 and x64 (at least) Clang brings in its own memset() implementation, +// which will check at runtime which CPU features are available and then pick +// the fastest implementation depending on the running environment. +// +// This unfortunately includes a series of dependencies that do have a +// non-empty data section, which is currently not supported. Therefore if we +// detect Clang we provide our own naïve memset implementation. + +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) +__attribute__((weak)) void *memset(void *pointer, int character, size_t length) { + for (size_t index = 0; index < length; ++index) { + ((char *)pointer)[index] = (char)character; + } + return pointer; +} +#endif From 12551ff3b045e5f4d320aec344dfcd60117a5f02 Mon Sep 17 00:00:00 2001 From: Alessandro Gatti Date: Thu, 4 Jun 2026 16:54:26 +0200 Subject: [PATCH 3/4] examples/natmod/framebuf: Fix build with Clang. This commit fixes building the `framebuf` module using Clang rather than using GCC. The Clang standard library (libc) implementation of `memset` depends on functions that have a non-empty data section, which is not currently supported. Therefore we provide our own `memset` implementation that is good enough to let linking succeed. Signed-off-by: Alessandro Gatti --- examples/natmod/framebuf/framebuf.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/examples/natmod/framebuf/framebuf.c b/examples/natmod/framebuf/framebuf.c index 5fd7c6be3a456..c1f414ead23d7 100644 --- a/examples/natmod/framebuf/framebuf.c +++ b/examples/natmod/framebuf/framebuf.c @@ -52,3 +52,20 @@ mp_obj_t mpy_init(mp_obj_fun_bc_t *self, size_t n_args, size_t n_kw, mp_obj_t *a MP_DYNRUNTIME_INIT_EXIT } + +// On x86 and x64 (at least) Clang brings in its own memset() implementation, +// which will check at runtime which CPU features are available and then pick +// the fastest implementation depending on the running environment. +// +// This unfortunately includes a series of dependencies that do have a +// non-empty data section, which is currently not supported. Therefore if we +// detect Clang we provide our own naïve memset implementation. + +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) +__attribute__((weak)) void *memset(void *pointer, int character, size_t length) { + for (size_t index = 0; index < length; ++index) { + ((char *)pointer)[index] = (char)character; + } + return pointer; +} +#endif From 6adecfb52732b37ea29b146e7f51b058962f68cd Mon Sep 17 00:00:00 2001 From: Alessandro Gatti Date: Thu, 4 Jun 2026 16:55:00 +0200 Subject: [PATCH 4/4] examples/natmod/re: Fix build with Clang. This commit fixes building the `re` module using Clang rather than using GCC. The Clang standard library (libc) implementation of `memset` depends on functions that have a non-empty data section, which is not currently supported. Therefore we provide our own `memset` implementation that is good enough to let linking succeed. Signed-off-by: Alessandro Gatti --- examples/natmod/re/re.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/examples/natmod/re/re.c b/examples/natmod/re/re.c index c0279ee7e816d..f9b9fd1de382d 100644 --- a/examples/natmod/re/re.c +++ b/examples/natmod/re/re.c @@ -89,3 +89,20 @@ mp_obj_t mpy_init(mp_obj_fun_bc_t *self, size_t n_args, size_t n_kw, mp_obj_t *a MP_DYNRUNTIME_INIT_EXIT } + +// On x86 and x64 (at least) Clang brings in its own memset() implementation, +// which will check at runtime which CPU features are available and then pick +// the fastest implementation depending on the running environment. +// +// This unfortunately includes a series of dependencies that do have a +// non-empty data section, which is currently not supported. Therefore if we +// detect Clang we provide our own naïve memset implementation. + +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) +__attribute__((weak)) void *memset(void *pointer, int character, size_t length) { + for (size_t index = 0; index < length; ++index) { + ((char *)pointer)[index] = (char)character; + } + return pointer; +} +#endif