Skip to content

Commit dc51270

Browse files
apalostrini
authored andcommitted
arm: io.h: Fix io accessors for KVM
commit 2e2c2a5 ("arm: qemu: override flash accessors to use virtualizable instructions") explains why we can't have instructions with multiple output registers when running under QEMU + KVM and the instruction leads to an exception to the hypervisor. USB XHCI is such a case (MMIO) where a ldr w1, [x0], #4 is emitted for xhci_start() which works fine with QEMU but crashes for QEMU + KVM. These instructions cannot be emulated by KVM as they do not produce syndrome information data that KVM can use to infer the destination register, the faulting address, whether it was a load or store, or if it's a 32 or 64 bit general-purpose register. As a result an external abort is injected from QEMU, via ext_dabt_pending to KVM and we end up throwing an exception that looks like U-Boot 2025.07-rc4 (Jun 10 2025 - 12:00:15 +0000) [...] Register 8001040 NbrPorts 8 Starting the controller "Synchronous Abort" handler, esr 0x96000010, far 0x10100040 elr: 000000000005b1c8 lr : 000000000005b1ac (reloc) elr: 00000000476fc1c8 lr : 00000000476fc1ac x0 : 0000000010100040 x1 : 0000000000000001 x2 : 0000000000000000 x3 : 0000000000003e80 x4 : 0000000000000000 x5 : 00000000477a5694 x6 : 0000000000000038 x7 : 000000004666f360 x8 : 0000000000000000 x9 : 00000000ffffffd8 x10: 000000000000000d x11: 0000000000000006 x12: 0000000046560a78 x13: 0000000046560dd0 x14: 00000000ffffffff x15: 000000004666eed2 x16: 00000000476ee2f0 x17: 0000000000000000 x18: 0000000046660dd0 x19: 000000004666f480 x20: 0000000000000000 x21: 0000000010100040 x22: 0000000010100000 x23: 0000000000000000 x24: 0000000000000000 x25: 0000000000000000 x26: 0000000000000000 x27: 0000000000000000 x28: 0000000000000000 x29: 000000004666f360 Code: d5033fbf aa1503e0 5287d003 52800002 (b8004401) Resetting CPU ... There are two problems making this the default. - It will emit ldr + add or str + add instead of ldr/str(post increment) in somne cases - Some platforms that depend on TPL/SPL grow in size enough so that the binary doesn't fit anymore. So let's add proper I/O accessors add a Kconfig option to turn it off by default apart from our QEMU builds. Reported-by: Mikko Rapeli <mikko.rapeli@linaro.org> Tested-by: Mikko Rapeli <mikko.rapeli@linaro.org> Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
1 parent b56c063 commit dc51270

2 files changed

Lines changed: 124 additions & 40 deletions

File tree

arch/arm/Kconfig

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,18 @@ config LNX_KRNL_IMG_TEXT_OFFSET_BASE
108108
The value subtracted from CONFIG_TEXT_BASE to calculate the
109109
TEXT_OFFSET value written to the Linux kernel image header.
110110

111+
config KVM_VIRT_INS
112+
bool "Emit virtualizable instructions"
113+
help
114+
Instructions in the ARM ISA that have multiple output registers,
115+
can't be used if the instruction leads to an exception to the hypervisor.
116+
These instructions cannot be emulated by KVM because they do not produce
117+
syndrome information data that KVM can use to infer the destination
118+
register, the faulting address, whether it was a load or store,
119+
if it's a 32 or 64 bit general-purpose register amongst other things.
120+
Use this to produce virtualizable instructions if you plan to run U-Boot
121+
with KVM.
122+
111123
config NVIC
112124
bool
113125

arch/arm/include/asm/io.h

Lines changed: 112 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -20,72 +20,147 @@ static inline void sync(void)
2020
{
2121
}
2222

23-
/* Generic virtual read/write. */
24-
#define __arch_getb(a) (*(volatile unsigned char *)(a))
25-
#define __arch_getw(a) (*(volatile unsigned short *)(a))
26-
#define __arch_getl(a) (*(volatile unsigned int *)(a))
27-
#define __arch_getq(a) (*(volatile unsigned long long *)(a))
23+
#ifdef CONFIG_ARM64
24+
#define __W "w"
25+
#else
26+
#define __W
27+
#endif
28+
29+
#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD)
30+
#define __R "l"
31+
#define __RM "=l"
32+
#else
33+
#define __R "r"
34+
#define __RM "=r"
35+
#endif
2836

29-
#define __arch_putb(v,a) (*(volatile unsigned char *)(a) = (v))
30-
#define __arch_putw(v,a) (*(volatile unsigned short *)(a) = (v))
31-
#define __arch_putl(v,a) (*(volatile unsigned int *)(a) = (v))
32-
#define __arch_putq(v,a) (*(volatile unsigned long long *)(a) = (v))
37+
#ifdef CONFIG_KVM_VIRT_INS
38+
/*
39+
* The __raw_writeX/__raw_readX below should be converted to static inline
40+
* functions. However doing so produces a lot of compilation warnings when
41+
* called with a raw address. Convert these once the callers have been fixed.
42+
*/
43+
#define __raw_writeb(val, addr) \
44+
do { \
45+
asm volatile("strb %" __W "0, [%1]" \
46+
: \
47+
: __R ((u8)(val)), __R (addr)); \
48+
} while (0)
49+
50+
#define __raw_readb(addr) \
51+
({ \
52+
u32 __val; \
53+
asm volatile("ldrb %" __W "0, [%1]" \
54+
: __RM (__val) \
55+
: __R (addr)); \
56+
__val; \
57+
})
58+
59+
#define __raw_writew(val, addr) \
60+
do { \
61+
asm volatile("strh %" __W "0, [%1]" \
62+
: \
63+
: __R ((u16)(val)), __R (addr)); \
64+
} while (0)
65+
66+
#define __raw_readw(addr) \
67+
({ \
68+
u32 __val; \
69+
asm volatile("ldrh %" __W "0, [%1]" \
70+
: __RM (__val) \
71+
: __R (addr)); \
72+
__val; \
73+
})
74+
75+
#define __raw_writel(val, addr) \
76+
do { \
77+
asm volatile("str %" __W "0, [%1]" \
78+
: \
79+
: __R ((u32)(val)), __R (addr)); \
80+
} while (0)
81+
82+
#define __raw_readl(addr) \
83+
({ \
84+
u32 __val; \
85+
asm volatile("ldr %" __W "0, [%1]" \
86+
: __RM (__val) \
87+
: __R (addr)); \
88+
__val; \
89+
})
90+
91+
#define __raw_writeq(val, addr) \
92+
do { \
93+
asm volatile("str %0, [%1]" \
94+
: \
95+
: __R ((u64)(val)), __R (addr)); \
96+
} while (0)
97+
98+
#define __raw_readq(addr) \
99+
({ \
100+
u64 __val; \
101+
asm volatile("ldr %0, [%1]" \
102+
: __RM (__val) \
103+
: __R (addr)); \
104+
__val; \
105+
})
106+
#else
107+
/* Generic virtual read/write. */
108+
#define __raw_readb(a) (*(volatile unsigned char *)(a))
109+
#define __raw_readw(a) (*(volatile unsigned short *)(a))
110+
#define __raw_readl(a) (*(volatile unsigned int *)(a))
111+
#define __raw_readq(a) (*(volatile unsigned long long *)(a))
112+
113+
#define __raw_writeb(v, a) (*(volatile unsigned char *)(a) = (v))
114+
#define __raw_writew(v, a) (*(volatile unsigned short *)(a) = (v))
115+
#define __raw_writel(v, a) (*(volatile unsigned int *)(a) = (v))
116+
#define __raw_writeq(v, a) (*(volatile unsigned long long *)(a) = (v))
117+
#endif
33118

34119
static inline void __raw_writesb(unsigned long addr, const void *data,
35120
int bytelen)
36121
{
37122
uint8_t *buf = (uint8_t *)data;
38123
while(bytelen--)
39-
__arch_putb(*buf++, addr);
124+
__raw_writeb(*buf++, addr);
40125
}
41126

42127
static inline void __raw_writesw(unsigned long addr, const void *data,
43128
int wordlen)
44129
{
45130
uint16_t *buf = (uint16_t *)data;
46131
while(wordlen--)
47-
__arch_putw(*buf++, addr);
132+
__raw_writew(*buf++, addr);
48133
}
49134

50135
static inline void __raw_writesl(unsigned long addr, const void *data,
51136
int longlen)
52137
{
53138
uint32_t *buf = (uint32_t *)data;
54139
while(longlen--)
55-
__arch_putl(*buf++, addr);
140+
__raw_writel(*buf++, addr);
56141
}
57142

58143
static inline void __raw_readsb(unsigned long addr, void *data, int bytelen)
59144
{
60145
uint8_t *buf = (uint8_t *)data;
61146
while(bytelen--)
62-
*buf++ = __arch_getb(addr);
147+
*buf++ = __raw_readb(addr);
63148
}
64149

65150
static inline void __raw_readsw(unsigned long addr, void *data, int wordlen)
66151
{
67152
uint16_t *buf = (uint16_t *)data;
68153
while(wordlen--)
69-
*buf++ = __arch_getw(addr);
154+
*buf++ = __raw_readw(addr);
70155
}
71156

72157
static inline void __raw_readsl(unsigned long addr, void *data, int longlen)
73158
{
74159
uint32_t *buf = (uint32_t *)data;
75160
while(longlen--)
76-
*buf++ = __arch_getl(addr);
161+
*buf++ = __raw_readl(addr);
77162
}
78163

79-
#define __raw_writeb(v,a) __arch_putb(v,a)
80-
#define __raw_writew(v,a) __arch_putw(v,a)
81-
#define __raw_writel(v,a) __arch_putl(v,a)
82-
#define __raw_writeq(v,a) __arch_putq(v,a)
83-
84-
#define __raw_readb(a) __arch_getb(a)
85-
#define __raw_readw(a) __arch_getw(a)
86-
#define __raw_readl(a) __arch_getl(a)
87-
#define __raw_readq(a) __arch_getq(a)
88-
89164
/*
90165
* TODO: The kernel offers some more advanced versions of barriers, it might
91166
* have some advantages to use them instead of the simple one here.
@@ -98,15 +173,15 @@ static inline void __raw_readsl(unsigned long addr, void *data, int longlen)
98173

99174
#define smp_processor_id() 0
100175

101-
#define writeb(v,c) ({ u8 __v = v; __iowmb(); __arch_putb(__v,c); __v; })
102-
#define writew(v,c) ({ u16 __v = v; __iowmb(); __arch_putw(__v,c); __v; })
103-
#define writel(v,c) ({ u32 __v = v; __iowmb(); __arch_putl(__v,c); __v; })
104-
#define writeq(v,c) ({ u64 __v = v; __iowmb(); __arch_putq(__v,c); __v; })
176+
#define writeb(v, c) ({ u8 __v = v; __iowmb(); writeb_relaxed(__v, c); __v; })
177+
#define writew(v, c) ({ u16 __v = v; __iowmb(); writew_relaxed(__v, c); __v; })
178+
#define writel(v, c) ({ u32 __v = v; __iowmb(); writel_relaxed(__v, c); __v; })
179+
#define writeq(v, c) ({ u64 __v = v; __iowmb(); writeq_relaxed(__v, c); __v; })
105180

106-
#define readb(c) ({ u8 __v = __arch_getb(c); __iormb(); __v; })
107-
#define readw(c) ({ u16 __v = __arch_getw(c); __iormb(); __v; })
108-
#define readl(c) ({ u32 __v = __arch_getl(c); __iormb(); __v; })
109-
#define readq(c) ({ u64 __v = __arch_getq(c); __iormb(); __v; })
181+
#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; })
182+
#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; })
183+
#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; })
184+
#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(); __v; })
110185

111186
/*
112187
* Relaxed I/O memory access primitives. These follow the Device memory
@@ -121,13 +196,10 @@ static inline void __raw_readsl(unsigned long addr, void *data, int longlen)
121196
#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64) \
122197
__raw_readq(c)); __r; })
123198

124-
#define writeb_relaxed(v, c) ((void)__raw_writeb((v), (c)))
125-
#define writew_relaxed(v, c) ((void)__raw_writew((__force u16) \
126-
cpu_to_le16(v), (c)))
127-
#define writel_relaxed(v, c) ((void)__raw_writel((__force u32) \
128-
cpu_to_le32(v), (c)))
129-
#define writeq_relaxed(v, c) ((void)__raw_writeq((__force u64) \
130-
cpu_to_le64(v), (c)))
199+
#define writeb_relaxed(v, c) __raw_writeb((v), (c))
200+
#define writew_relaxed(v, c) __raw_writew((__force u16)cpu_to_le16(v), (c))
201+
#define writel_relaxed(v, c) __raw_writel((__force u32)cpu_to_le32(v), (c))
202+
#define writeq_relaxed(v, c) __raw_writeq((__force u64)cpu_to_le64(v), (c))
131203

132204
/*
133205
* The compiler seems to be incapable of optimising constants

0 commit comments

Comments
 (0)