From 7942eab9d0024ce8b4a591edbc5b745f0246ce46 Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Fri, 7 Jun 2024 12:54:36 +0100 Subject: [PATCH 1/5] gh-119726: JIT: re-use trampolines on AArch64 When emitting AArch64 trampolines at the end of every data stencil, re-use existent ones fot the same symbol. Fix the disassebly to reflect the "bl" instruction without the relocation. --- Tools/jit/_stencils.py | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 6e046df3026ae9..211a9784b1a03f 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -4,6 +4,7 @@ import enum import sys import typing +import re import _schema @@ -181,6 +182,7 @@ class Stencil: body: bytearray = dataclasses.field(default_factory=bytearray, init=False) holes: list[Hole] = dataclasses.field(default_factory=list, init=False) disassembly: list[str] = dataclasses.field(default_factory=list, init=False) + aarch64_trampolines: dict = dataclasses.field(default_factory=dict, init=False) def pad(self, alignment: int) -> None: """Pad the stencil to the given alignment.""" @@ -189,14 +191,39 @@ def pad(self, alignment: int) -> None: self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}") self.body.extend([0] * padding) - def emit_aarch64_trampoline(self, hole: Hole) -> None: + def emit_aarch64_trampoline(self, hole: Hole, alignment: int) -> None: """Even with the large code model, AArch64 Linux insists on 28-bit jumps.""" - base = len(self.body) + reuse_trampoline = hole.symbol in self.aarch64_trampolines + if reuse_trampoline: + # Re-use the base address of the previously created trampoline + base = self.aarch64_trampolines[hole.symbol] + else: + self.pad(alignment) + base = len(self.body) where = slice(hole.offset, hole.offset + 4) instruction = int.from_bytes(self.body[where], sys.byteorder) instruction &= 0xFC000000 instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF self.body[where] = instruction.to_bytes(4, sys.byteorder) + + # Fix the disassembly for the branch to call the trampoline + bl_instruction = f"{hole.offset:x}: {instruction:x} bl {hole.offset:#x} <{hole.symbol}> // trampoline" + self.disassembly = [ + bl_instruction if line.startswith(f"{hole.offset:x}:") else line + for line in self.disassembly + ] + + # Remove the relocation once the bl instruction has been fixed + relocation_regex = re.compile(rf"{hole.offset:016x}:\s+{hole.kind}\s+_?{hole.symbol}") + self.disassembly = [ + line2 for line2 in self.disassembly if not relocation_regex.match(line2) + ] + + if reuse_trampoline: + # There is no need to emit a new trampoline. + return + + # Emit a new trampoline only if is not already present in the Stencil self.disassembly += [ f"{base + 4 * 0:x}: d2800008 mov x8, #0x0", f"{base + 4 * 0:016x}: R_AARCH64_MOVW_UABS_G0_NC {hole.symbol}", @@ -225,6 +252,7 @@ def emit_aarch64_trampoline(self, hole: Hole) -> None: ] ): self.holes.append(hole.replace(offset=base + 4 * i, kind=kind)) + self.aarch64_trampolines.update({hole.symbol: base}) def remove_jump(self, *, alignment: int = 1) -> None: """Remove a zero-length continuation jump, if it exists.""" @@ -300,8 +328,7 @@ def process_relocations(self, *, alignment: int = 1) -> None: in {"R_AARCH64_CALL26", "R_AARCH64_JUMP26", "ARM64_RELOC_BRANCH26"} and hole.value is HoleValue.ZERO ): - self.code.pad(alignment) - self.code.emit_aarch64_trampoline(hole) + self.code.emit_aarch64_trampoline(hole, alignment) self.code.holes.remove(hole) self.code.remove_jump(alignment=alignment) self.code.pad(alignment) From fc9d5348ce43c568b403b6ca166de5284ed71439 Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Fri, 7 Jun 2024 23:32:35 +0100 Subject: [PATCH 2/5] Fix formatting and typing --- Tools/jit/_stencils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 211a9784b1a03f..990847c0c5cad6 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -182,7 +182,9 @@ class Stencil: body: bytearray = dataclasses.field(default_factory=bytearray, init=False) holes: list[Hole] = dataclasses.field(default_factory=list, init=False) disassembly: list[str] = dataclasses.field(default_factory=list, init=False) - aarch64_trampolines: dict = dataclasses.field(default_factory=dict, init=False) + aarch64_trampolines: dict[str | None, int] = dataclasses.field( + default_factory=dict, init=False + ) def pad(self, alignment: int) -> None: """Pad the stencil to the given alignment.""" @@ -214,7 +216,9 @@ def emit_aarch64_trampoline(self, hole: Hole, alignment: int) -> None: ] # Remove the relocation once the bl instruction has been fixed - relocation_regex = re.compile(rf"{hole.offset:016x}:\s+{hole.kind}\s+_?{hole.symbol}") + relocation_regex = re.compile( + rf"{hole.offset:016x}:\s+{hole.kind}\s+_?{hole.symbol}" + ) self.disassembly = [ line2 for line2 in self.disassembly if not relocation_regex.match(line2) ] From 7a3ab2034b06302d55e4026b6303dcfd7a941387 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 7 Jun 2024 22:54:17 +0000 Subject: [PATCH 3/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2024-06-07-22-54-15.gh-issue-119726.D9EE-o.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-07-22-54-15.gh-issue-119726.D9EE-o.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-07-22-54-15.gh-issue-119726.D9EE-o.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-07-22-54-15.gh-issue-119726.D9EE-o.rst new file mode 100644 index 00000000000000..595d8dda25fe1b --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-07-22-54-15.gh-issue-119726.D9EE-o.rst @@ -0,0 +1 @@ +JIT: Re-use trampolines on AArch64 when creating stencils. Patch by Diego Russo From 68cf3445f3b769bdba8160ddd7d7be6fc6ae9736 Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Mon, 10 Jun 2024 13:41:30 +0100 Subject: [PATCH 4/5] Address PR comments. --- Tools/jit/_stencils.py | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 990847c0c5cad6..e7bba86c37a7a1 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -2,9 +2,9 @@ import dataclasses import enum +import re import sys import typing -import re import _schema @@ -182,7 +182,7 @@ class Stencil: body: bytearray = dataclasses.field(default_factory=bytearray, init=False) holes: list[Hole] = dataclasses.field(default_factory=list, init=False) disassembly: list[str] = dataclasses.field(default_factory=list, init=False) - aarch64_trampolines: dict[str | None, int] = dataclasses.field( + trampolines: dict[str | None, int] = dataclasses.field( default_factory=dict, init=False ) @@ -195,10 +195,10 @@ def pad(self, alignment: int) -> None: def emit_aarch64_trampoline(self, hole: Hole, alignment: int) -> None: """Even with the large code model, AArch64 Linux insists on 28-bit jumps.""" - reuse_trampoline = hole.symbol in self.aarch64_trampolines + reuse_trampoline = hole.symbol in self.trampolines if reuse_trampoline: # Re-use the base address of the previously created trampoline - base = self.aarch64_trampolines[hole.symbol] + base = self.trampolines[hole.symbol] else: self.pad(alignment) base = len(self.body) @@ -208,26 +208,9 @@ def emit_aarch64_trampoline(self, hole: Hole, alignment: int) -> None: instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF self.body[where] = instruction.to_bytes(4, sys.byteorder) - # Fix the disassembly for the branch to call the trampoline - bl_instruction = f"{hole.offset:x}: {instruction:x} bl {hole.offset:#x} <{hole.symbol}> // trampoline" - self.disassembly = [ - bl_instruction if line.startswith(f"{hole.offset:x}:") else line - for line in self.disassembly - ] - - # Remove the relocation once the bl instruction has been fixed - relocation_regex = re.compile( - rf"{hole.offset:016x}:\s+{hole.kind}\s+_?{hole.symbol}" - ) - self.disassembly = [ - line2 for line2 in self.disassembly if not relocation_regex.match(line2) - ] - if reuse_trampoline: - # There is no need to emit a new trampoline. return - # Emit a new trampoline only if is not already present in the Stencil self.disassembly += [ f"{base + 4 * 0:x}: d2800008 mov x8, #0x0", f"{base + 4 * 0:016x}: R_AARCH64_MOVW_UABS_G0_NC {hole.symbol}", @@ -256,7 +239,7 @@ def emit_aarch64_trampoline(self, hole: Hole, alignment: int) -> None: ] ): self.holes.append(hole.replace(offset=base + 4 * i, kind=kind)) - self.aarch64_trampolines.update({hole.symbol: base}) + self.trampolines[hole.symbol] = base def remove_jump(self, *, alignment: int = 1) -> None: """Remove a zero-length continuation jump, if it exists.""" From 69e16e01300d63f9a3ed547f99e3b9ceb7626059 Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Wed, 12 Jun 2024 09:42:50 +0100 Subject: [PATCH 5/5] Address further comments. --- Tools/jit/_stencils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index e7bba86c37a7a1..c7c5ca1590d601 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -2,7 +2,6 @@ import dataclasses import enum -import re import sys import typing @@ -182,9 +181,7 @@ class Stencil: body: bytearray = dataclasses.field(default_factory=bytearray, init=False) holes: list[Hole] = dataclasses.field(default_factory=list, init=False) disassembly: list[str] = dataclasses.field(default_factory=list, init=False) - trampolines: dict[str | None, int] = dataclasses.field( - default_factory=dict, init=False - ) + trampolines: dict[str, int] = dataclasses.field(default_factory=dict, init=False) def pad(self, alignment: int) -> None: """Pad the stencil to the given alignment.""" @@ -195,6 +192,7 @@ def pad(self, alignment: int) -> None: def emit_aarch64_trampoline(self, hole: Hole, alignment: int) -> None: """Even with the large code model, AArch64 Linux insists on 28-bit jumps.""" + assert hole.symbol is not None reuse_trampoline = hole.symbol in self.trampolines if reuse_trampoline: # Re-use the base address of the previously created trampoline