diff --git a/Makefile.pre.in b/Makefile.pre.in index 9435bf534fb512..765c72290e46ee 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -3111,9 +3111,18 @@ config.status: $(srcdir)/configure .PRECIOUS: config.status $(BUILDPYTHON) Makefile Makefile.pre -Python/asm_trampoline.o: $(srcdir)/Python/asm_trampoline.S +Python/asm_trampoline_x86_64.o: $(srcdir)/Python/asm_trampoline_x86_64.S $(CC) -c $(PY_CORE_CFLAGS) -o $@ $< +Python/asm_trampoline_aarch64.o: $(srcdir)/Python/asm_trampoline_aarch64.S + $(CC) -c $(PY_CORE_CFLAGS) -o $@ $< + +Python/asm_trampoline_riscv64.o: $(srcdir)/Python/asm_trampoline_riscv64.S + $(CC) -c $(PY_CORE_CFLAGS) -o $@ $< + +Python/asm_trampoline_universal2.o: Python/asm_trampoline_aarch64.o Python/asm_trampoline_x86_64.o + lipo -create -output $@ Python/asm_trampoline_aarch64.o Python/asm_trampoline_x86_64.o + Python/emscripten_trampoline_inner.wasm: $(srcdir)/Python/emscripten_trampoline_inner.c # emcc has a path that ends with emsdk/upstream/emscripten/emcc, we're looking for emsdk/upstream/bin/clang. $$(dirname $$(dirname $(CC)))/bin/clang -o $@ $< -mgc -O2 -Wl,--no-entry -Wl,--import-table -Wl,--import-memory -target wasm32-unknown-unknown -nostdlib diff --git a/Python/asm_trampoline_aarch64.h b/Python/asm_trampoline_aarch64.S similarity index 76% rename from Python/asm_trampoline_aarch64.h rename to Python/asm_trampoline_aarch64.S index bc83aa460b6860..b3aeb728de200c 100644 --- a/Python/asm_trampoline_aarch64.h +++ b/Python/asm_trampoline_aarch64.S @@ -1,6 +1,3 @@ -#ifndef ASM_TRAMPOLINE_AARCH_64_H_ -#define ASM_TRAMPOLINE_AARCH_64_H_ - /* * References: * - https://developer.arm.com/documentation/101028/0012/5--Feature-test-macros @@ -38,6 +35,31 @@ #define GNU_PROPERTY_AARCH64_GCS 0 #endif + .text +#if defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) +#if defined(__APPLE__) + .globl __Py_trampoline_func_start +__Py_trampoline_func_start: +#else + .globl _Py_trampoline_func_start +_Py_trampoline_func_start: +#endif + SIGN_LR + stp x29, x30, [sp, -16]! + mov x29, sp + blr x3 + ldp x29, x30, [sp], 16 + VERIFY_LR + ret +#if defined(__APPLE__) + .globl __Py_trampoline_func_end +__Py_trampoline_func_end: +#else + .globl _Py_trampoline_func_end +_Py_trampoline_func_end: + .section .note.GNU-stack,"",@progbits +#endif + /* Add the BTI, PAC and GCS support to GNU Notes section */ #if GNU_PROPERTY_AARCH64_BTI != 0 || GNU_PROPERTY_AARCH64_POINTER_AUTH != 0 || GNU_PROPERTY_AARCH64_GCS != 0 .pushsection .note.gnu.property, "a"; /* Start a new allocatable section */ @@ -52,5 +74,4 @@ .long 0; /* padding for 8 byte alignment */ .popsection; /* end the section */ #endif - -#endif +#endif // __aarch64__ && __AARCH64EL__ && !__ILP32__ diff --git a/Python/asm_trampoline_riscv64.S b/Python/asm_trampoline_riscv64.S new file mode 100644 index 00000000000000..6125ba95373f95 --- /dev/null +++ b/Python/asm_trampoline_riscv64.S @@ -0,0 +1,12 @@ + .text + .globl _Py_trampoline_func_start +_Py_trampoline_func_start: + addi sp,sp,-16 + sd ra,8(sp) + jalr a3 + ld ra,8(sp) + addi sp,sp,16 + jr ra + .globl _Py_trampoline_func_end +_Py_trampoline_func_end: + .section .note.GNU-stack,"",@progbits diff --git a/Python/asm_trampoline.S b/Python/asm_trampoline_x86_64.S similarity index 51% rename from Python/asm_trampoline.S rename to Python/asm_trampoline_x86_64.S index 9f3ca909ab7d85..0e6b11589eafc8 100644 --- a/Python/asm_trampoline.S +++ b/Python/asm_trampoline_x86_64.S @@ -1,24 +1,12 @@ -#include "asm_trampoline_aarch64.h" - .text +#ifdef __x86_64__ #if defined(__APPLE__) .globl __Py_trampoline_func_start -#else - .globl _Py_trampoline_func_start -#endif -# The following assembly is equivalent to: -# PyObject * -# trampoline(PyThreadState *ts, _PyInterpreterFrame *f, -# int throwflag, py_evaluator evaluator) -# { -# return evaluator(ts, f, throwflag); -# } -#if defined(__APPLE__) __Py_trampoline_func_start: #else + .globl _Py_trampoline_func_start _Py_trampoline_func_start: #endif -#ifdef __x86_64__ #if defined(__CET__) && (__CET__ & 1) endbr64 #endif @@ -27,26 +15,6 @@ _Py_trampoline_func_start: call *%rcx pop %rbp ret -#endif // __x86_64__ -#if defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) - // ARM64 little endian, 64bit ABI - // generate with aarch64-linux-gnu-gcc 12.1 - SIGN_LR - stp x29, x30, [sp, -16]! - mov x29, sp - blr x3 - ldp x29, x30, [sp], 16 - VERIFY_LR - ret -#endif -#ifdef __riscv - addi sp,sp,-16 - sd ra,8(sp) - jalr a3 - ld ra,8(sp) - addi sp,sp,16 - jr ra -#endif #if defined(__APPLE__) .globl __Py_trampoline_func_end __Py_trampoline_func_end: @@ -56,7 +24,7 @@ _Py_trampoline_func_end: .section .note.GNU-stack,"",@progbits #endif # Note for indicating the assembly code supports CET -#if defined(__x86_64__) && defined(__CET__) && (__CET__ & 1) +#if defined(__CET__) && (__CET__ & 1) .section .note.gnu.property,"a" .align 8 .long 1f - 0f @@ -73,4 +41,5 @@ _Py_trampoline_func_end: 3: .align 8 4: +#endif #endif // __x86_64__ diff --git a/configure b/configure index 63b41117957cab..97a6f79996c4ef 100755 --- a/configure +++ b/configure @@ -14356,17 +14356,35 @@ printf "%s\n" "$SHLIBS" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking perf trampoline" >&5 printf %s "checking perf trampoline... " >&6; } +PERF_TRAMPOLINE_OBJ="" case $PLATFORM_TRIPLET in #( x86_64-linux-gnu) : - perf_trampoline=yes ;; #( + perf_trampoline=yes + PERF_TRAMPOLINE_OBJ=Python/asm_trampoline_x86_64.o ;; #( aarch64-linux-gnu) : - perf_trampoline=yes ;; #( + perf_trampoline=yes + PERF_TRAMPOLINE_OBJ=Python/asm_trampoline_aarch64.o ;; #( darwin) : case $MACOSX_DEPLOYMENT_TARGET in #( 10.[0-9]|10.1[0-1]) : perf_trampoline=no ;; #( *) : perf_trampoline=yes + if test "${enable_universalsdk}" && test "$UNIVERSAL_ARCHS" = "universal2"; then + PERF_TRAMPOLINE_OBJ=Python/asm_trampoline_universal2.o + else + case "$host_cpu" in + x86_64) + PERF_TRAMPOLINE_OBJ=Python/asm_trampoline_x86_64.o + ;; + aarch64|arm64) + PERF_TRAMPOLINE_OBJ=Python/asm_trampoline_aarch64.o + ;; + *) + perf_trampoline=no + ;; + esac + fi ;; esac ;; #( *) : @@ -14382,7 +14400,6 @@ then : printf "%s\n" "#define PY_HAVE_PERF_TRAMPOLINE 1" >>confdefs.h - PERF_TRAMPOLINE_OBJ=Python/asm_trampoline.o fi diff --git a/configure.ac b/configure.ac index 6df5d1bee31c67..d3c84d233032d1 100644 --- a/configure.ac +++ b/configure.ac @@ -3818,12 +3818,30 @@ AC_MSG_RESULT([$SHLIBS]) dnl perf trampoline is Linux and macOS specific and requires an arch-specific dnl trampoline in assembly. AC_MSG_CHECKING([perf trampoline]) +PERF_TRAMPOLINE_OBJ="" AS_CASE([$PLATFORM_TRIPLET], - [x86_64-linux-gnu], [perf_trampoline=yes], - [aarch64-linux-gnu], [perf_trampoline=yes], + [x86_64-linux-gnu], [perf_trampoline=yes + PERF_TRAMPOLINE_OBJ=Python/asm_trampoline_x86_64.o], + [aarch64-linux-gnu], [perf_trampoline=yes + PERF_TRAMPOLINE_OBJ=Python/asm_trampoline_aarch64.o], [darwin], [AS_CASE([$MACOSX_DEPLOYMENT_TARGET], [[10.[0-9]|10.1[0-1]]], [perf_trampoline=no], - [perf_trampoline=yes] + [perf_trampoline=yes + if test "${enable_universalsdk}" && test "$UNIVERSAL_ARCHS" = "universal2"; then + PERF_TRAMPOLINE_OBJ=Python/asm_trampoline_universal2.o + else + case "$host_cpu" in + x86_64) + PERF_TRAMPOLINE_OBJ=Python/asm_trampoline_x86_64.o + ;; + aarch64|arm64) + PERF_TRAMPOLINE_OBJ=Python/asm_trampoline_aarch64.o + ;; + *) + perf_trampoline=no + ;; + esac + fi] )], [perf_trampoline=no] ) @@ -3831,7 +3849,6 @@ AC_MSG_RESULT([$perf_trampoline]) AS_VAR_IF([perf_trampoline], [yes], [ AC_DEFINE([PY_HAVE_PERF_TRAMPOLINE], [1], [Define to 1 if you have the perf trampoline.]) - PERF_TRAMPOLINE_OBJ=Python/asm_trampoline.o ]) AC_SUBST([PERF_TRAMPOLINE_OBJ])