| 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | |
| 3 | #ifndef _ASM_X86_NOSPEC_BRANCH_H_ |
| 4 | #define _ASM_X86_NOSPEC_BRANCH_H_ |
| 5 | |
| 6 | #include <linux/static_key.h> |
| 7 | #include <linux/objtool.h> |
| 8 | #include <linux/linkage.h> |
| 9 | |
| 10 | #include <asm/alternative.h> |
| 11 | #include <asm/cpufeatures.h> |
| 12 | #include <asm/msr-index.h> |
| 13 | #include <asm/unwind_hints.h> |
| 14 | #include <asm/percpu.h> |
| 15 | |
| 16 | /* |
| 17 | * Call depth tracking for Intel SKL CPUs to address the RSB underflow |
| 18 | * issue in software. |
| 19 | * |
| 20 | * The tracking does not use a counter. It uses uses arithmetic shift |
| 21 | * right on call entry and logical shift left on return. |
| 22 | * |
| 23 | * The depth tracking variable is initialized to 0x8000.... when the call |
| 24 | * depth is zero. The arithmetic shift right sign extends the MSB and |
| 25 | * saturates after the 12th call. The shift count is 5 for both directions |
| 26 | * so the tracking covers 12 nested calls. |
| 27 | * |
| 28 | * Call |
| 29 | * 0: 0x8000000000000000 0x0000000000000000 |
| 30 | * 1: 0xfc00000000000000 0xf000000000000000 |
| 31 | * ... |
| 32 | * 11: 0xfffffffffffffff8 0xfffffffffffffc00 |
| 33 | * 12: 0xffffffffffffffff 0xffffffffffffffe0 |
| 34 | * |
| 35 | * After a return buffer fill the depth is credited 12 calls before the |
| 36 | * next stuffing has to take place. |
| 37 | * |
| 38 | * There is a inaccuracy for situations like this: |
| 39 | * |
| 40 | * 10 calls |
| 41 | * 5 returns |
| 42 | * 3 calls |
| 43 | * 4 returns |
| 44 | * 3 calls |
| 45 | * .... |
| 46 | * |
| 47 | * The shift count might cause this to be off by one in either direction, |
| 48 | * but there is still a cushion vs. the RSB depth. The algorithm does not |
| 49 | * claim to be perfect and it can be speculated around by the CPU, but it |
| 50 | * is considered that it obfuscates the problem enough to make exploitation |
| 51 | * extremely difficult. |
| 52 | */ |
| 53 | #define RET_DEPTH_SHIFT 5 |
| 54 | #define RSB_RET_STUFF_LOOPS 16 |
| 55 | #define RET_DEPTH_INIT 0x8000000000000000ULL |
| 56 | #define RET_DEPTH_INIT_FROM_CALL 0xfc00000000000000ULL |
| 57 | #define RET_DEPTH_CREDIT 0xffffffffffffffffULL |
| 58 | |
| 59 | #ifdef CONFIG_CALL_THUNKS_DEBUG |
| 60 | # define CALL_THUNKS_DEBUG_INC_CALLS \ |
| 61 | incq PER_CPU_VAR(__x86_call_count); |
| 62 | # define CALL_THUNKS_DEBUG_INC_RETS \ |
| 63 | incq PER_CPU_VAR(__x86_ret_count); |
| 64 | # define CALL_THUNKS_DEBUG_INC_STUFFS \ |
| 65 | incq PER_CPU_VAR(__x86_stuffs_count); |
| 66 | # define CALL_THUNKS_DEBUG_INC_CTXSW \ |
| 67 | incq PER_CPU_VAR(__x86_ctxsw_count); |
| 68 | #else |
| 69 | # define CALL_THUNKS_DEBUG_INC_CALLS |
| 70 | # define CALL_THUNKS_DEBUG_INC_RETS |
| 71 | # define CALL_THUNKS_DEBUG_INC_STUFFS |
| 72 | # define CALL_THUNKS_DEBUG_INC_CTXSW |
| 73 | #endif |
| 74 | |
| 75 | #if defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS) |
| 76 | |
| 77 | #include <asm/asm-offsets.h> |
| 78 | |
| 79 | #define CREDIT_CALL_DEPTH \ |
| 80 | movq $-1, PER_CPU_VAR(__x86_call_depth); |
| 81 | |
| 82 | #define RESET_CALL_DEPTH \ |
| 83 | xor %eax, %eax; \ |
| 84 | bts $63, %rax; \ |
| 85 | movq %rax, PER_CPU_VAR(__x86_call_depth); |
| 86 | |
| 87 | #define RESET_CALL_DEPTH_FROM_CALL \ |
| 88 | movb $0xfc, %al; \ |
| 89 | shl $56, %rax; \ |
| 90 | movq %rax, PER_CPU_VAR(__x86_call_depth); \ |
| 91 | CALL_THUNKS_DEBUG_INC_CALLS |
| 92 | |
| 93 | #define INCREMENT_CALL_DEPTH \ |
| 94 | sarq $5, PER_CPU_VAR(__x86_call_depth); \ |
| 95 | CALL_THUNKS_DEBUG_INC_CALLS |
| 96 | |
| 97 | #else |
| 98 | #define CREDIT_CALL_DEPTH |
| 99 | #define RESET_CALL_DEPTH |
| 100 | #define RESET_CALL_DEPTH_FROM_CALL |
| 101 | #define INCREMENT_CALL_DEPTH |
| 102 | #endif |
| 103 | |
| 104 | /* |
| 105 | * Fill the CPU return stack buffer. |
| 106 | * |
| 107 | * Each entry in the RSB, if used for a speculative 'ret', contains an |
| 108 | * infinite 'pause; lfence; jmp' loop to capture speculative execution. |
| 109 | * |
| 110 | * This is required in various cases for retpoline and IBRS-based |
| 111 | * mitigations for the Spectre variant 2 vulnerability. Sometimes to |
| 112 | * eliminate potentially bogus entries from the RSB, and sometimes |
| 113 | * purely to ensure that it doesn't get empty, which on some CPUs would |
| 114 | * allow predictions from other (unwanted!) sources to be used. |
| 115 | * |
| 116 | * We define a CPP macro such that it can be used from both .S files and |
| 117 | * inline assembly. It's possible to do a .macro and then include that |
| 118 | * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. |
| 119 | */ |
| 120 | |
| 121 | #define RETPOLINE_THUNK_SIZE 32 |
| 122 | #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ |
| 123 | |
| 124 | /* |
| 125 | * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN. |
| 126 | */ |
| 127 | #define __FILL_RETURN_SLOT \ |
| 128 | ANNOTATE_INTRA_FUNCTION_CALL; \ |
| 129 | call 772f; \ |
| 130 | int3; \ |
| 131 | 772: |
| 132 | |
| 133 | /* |
| 134 | * Stuff the entire RSB. |
| 135 | * |
| 136 | * Google experimented with loop-unrolling and this turned out to be |
| 137 | * the optimal version - two calls, each with their own speculation |
| 138 | * trap should their return address end up getting used, in a loop. |
| 139 | */ |
| 140 | #ifdef CONFIG_X86_64 |
| 141 | #define __FILL_RETURN_BUFFER(reg, nr) \ |
| 142 | mov $(nr/2), reg; \ |
| 143 | 771: \ |
| 144 | __FILL_RETURN_SLOT \ |
| 145 | __FILL_RETURN_SLOT \ |
| 146 | add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \ |
| 147 | dec reg; \ |
| 148 | jnz 771b; \ |
| 149 | /* barrier for jnz misprediction */ \ |
| 150 | lfence; \ |
| 151 | CREDIT_CALL_DEPTH \ |
| 152 | CALL_THUNKS_DEBUG_INC_CTXSW |
| 153 | #else |
| 154 | /* |
| 155 | * i386 doesn't unconditionally have LFENCE, as such it can't |
| 156 | * do a loop. |
| 157 | */ |
| 158 | #define __FILL_RETURN_BUFFER(reg, nr) \ |
| 159 | .rept nr; \ |
| 160 | __FILL_RETURN_SLOT; \ |
| 161 | .endr; \ |
| 162 | add $(BITS_PER_LONG/8) * nr, %_ASM_SP; |
| 163 | #endif |
| 164 | |
| 165 | /* |
| 166 | * Stuff a single RSB slot. |
| 167 | * |
| 168 | * To mitigate Post-Barrier RSB speculation, one CALL instruction must be |
| 169 | * forced to retire before letting a RET instruction execute. |
| 170 | * |
| 171 | * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed |
| 172 | * before this point. |
| 173 | */ |
| 174 | #define __FILL_ONE_RETURN \ |
| 175 | __FILL_RETURN_SLOT \ |
| 176 | add $(BITS_PER_LONG/8), %_ASM_SP; \ |
| 177 | lfence; |
| 178 | |
| 179 | #ifdef __ASSEMBLER__ |
| 180 | |
| 181 | /* |
| 182 | * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions |
| 183 | * vs RETBleed validation. |
| 184 | */ |
| 185 | #define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE |
| 186 | |
| 187 | /* |
| 188 | * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should |
| 189 | * eventually turn into its own annotation. |
| 190 | */ |
| 191 | .macro VALIDATE_UNRET_END |
| 192 | #if defined(CONFIG_NOINSTR_VALIDATION) && \ |
| 193 | (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) |
| 194 | ANNOTATE_RETPOLINE_SAFE |
| 195 | nop |
| 196 | #endif |
| 197 | .endm |
| 198 | |
| 199 | /* |
| 200 | * Emits a conditional CS prefix that is compatible with |
| 201 | * -mindirect-branch-cs-prefix. |
| 202 | */ |
| 203 | .macro __CS_PREFIX reg:req |
| 204 | .irp rs,r8,r9,r10,r11,r12,r13,r14,r15 |
| 205 | .ifc \reg,\rs |
| 206 | .byte 0x2e |
| 207 | .endif |
| 208 | .endr |
| 209 | .endm |
| 210 | |
| 211 | /* |
| 212 | * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple |
| 213 | * indirect jmp/call which may be susceptible to the Spectre variant 2 |
| 214 | * attack. |
| 215 | * |
| 216 | * NOTE: these do not take kCFI into account and are thus not comparable to C |
| 217 | * indirect calls, take care when using. The target of these should be an ENDBR |
| 218 | * instruction irrespective of kCFI. |
| 219 | */ |
| 220 | .macro JMP_NOSPEC reg:req |
| 221 | #ifdef CONFIG_MITIGATION_RETPOLINE |
| 222 | __CS_PREFIX \reg |
| 223 | jmp __x86_indirect_thunk_\reg |
| 224 | #else |
| 225 | jmp *%\reg |
| 226 | int3 |
| 227 | #endif |
| 228 | .endm |
| 229 | |
| 230 | .macro CALL_NOSPEC reg:req |
| 231 | #ifdef CONFIG_MITIGATION_RETPOLINE |
| 232 | __CS_PREFIX \reg |
| 233 | call __x86_indirect_thunk_\reg |
| 234 | #else |
| 235 | call *%\reg |
| 236 | #endif |
| 237 | .endm |
| 238 | |
| 239 | /* |
| 240 | * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP |
| 241 | * monstrosity above, manually. |
| 242 | */ |
| 243 | .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS) |
| 244 | ALTERNATIVE_2 "jmp .Lskip_rsb_\@" , \ |
| 245 | __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \ |
| 246 | __stringify(nop;nop;__FILL_ONE_RETURN), \ftr2 |
| 247 | |
| 248 | .Lskip_rsb_\@: |
| 249 | .endm |
| 250 | |
| 251 | /* |
| 252 | * The CALL to srso_alias_untrain_ret() must be patched in directly at |
| 253 | * the spot where untraining must be done, ie., srso_alias_untrain_ret() |
| 254 | * must be the target of a CALL instruction instead of indirectly |
| 255 | * jumping to a wrapper which then calls it. Therefore, this macro is |
| 256 | * called outside of __UNTRAIN_RET below, for the time being, before the |
| 257 | * kernel can support nested alternatives with arbitrary nesting. |
| 258 | */ |
| 259 | .macro CALL_UNTRAIN_RET |
| 260 | #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) |
| 261 | ALTERNATIVE_2 "" , "call entry_untrain_ret" , X86_FEATURE_UNRET, \ |
| 262 | "call srso_alias_untrain_ret" , X86_FEATURE_SRSO_ALIAS |
| 263 | #endif |
| 264 | .endm |
| 265 | |
| 266 | /* |
| 267 | * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the |
| 268 | * return thunk isn't mapped into the userspace tables (then again, AMD |
| 269 | * typically has NO_MELTDOWN). |
| 270 | * |
| 271 | * While retbleed_untrain_ret() doesn't clobber anything but requires stack, |
| 272 | * write_ibpb() will clobber AX, CX, DX. |
| 273 | * |
| 274 | * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point |
| 275 | * where we have a stack but before any RET instruction. |
| 276 | */ |
| 277 | .macro __UNTRAIN_RET ibpb_feature, call_depth_insns |
| 278 | #if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY) |
| 279 | VALIDATE_UNRET_END |
| 280 | CALL_UNTRAIN_RET |
| 281 | ALTERNATIVE_2 "" , \ |
| 282 | "call write_ibpb" , \ibpb_feature, \ |
| 283 | __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH |
| 284 | #endif |
| 285 | .endm |
| 286 | |
| 287 | #define UNTRAIN_RET \ |
| 288 | __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH) |
| 289 | |
| 290 | #define UNTRAIN_RET_VM \ |
| 291 | __UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH) |
| 292 | |
| 293 | #define UNTRAIN_RET_FROM_CALL \ |
| 294 | __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL) |
| 295 | |
| 296 | |
| 297 | .macro CALL_DEPTH_ACCOUNT |
| 298 | #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING |
| 299 | ALTERNATIVE "" , \ |
| 300 | __stringify(INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH |
| 301 | #endif |
| 302 | .endm |
| 303 | |
| 304 | /* |
| 305 | * Macro to execute VERW insns that mitigate transient data sampling |
| 306 | * attacks such as MDS or TSA. On affected systems a microcode update |
| 307 | * overloaded VERW insns to also clear the CPU buffers. VERW clobbers |
| 308 | * CFLAGS.ZF. |
| 309 | * Note: Only the memory operand variant of VERW clears the CPU buffers. |
| 310 | */ |
| 311 | #ifdef CONFIG_X86_64 |
| 312 | #define VERW verw x86_verw_sel(%rip) |
| 313 | #else |
| 314 | /* |
| 315 | * In 32bit mode, the memory operand must be a %cs reference. The data segments |
| 316 | * may not be usable (vm86 mode), and the stack segment may not be flat (ESPFIX32). |
| 317 | */ |
| 318 | #define VERW verw %cs:x86_verw_sel |
| 319 | #endif |
| 320 | |
| 321 | /* |
| 322 | * Provide a stringified VERW macro for simple usage, and a non-stringified |
| 323 | * VERW macro for use in more elaborate sequences, e.g. to encode a conditional |
| 324 | * VERW within an ALTERNATIVE. |
| 325 | */ |
| 326 | #define __CLEAR_CPU_BUFFERS __stringify(VERW) |
| 327 | |
| 328 | /* If necessary, emit VERW on exit-to-userspace to clear CPU buffers. */ |
| 329 | #define CLEAR_CPU_BUFFERS \ |
| 330 | ALTERNATIVE "", __CLEAR_CPU_BUFFERS, X86_FEATURE_CLEAR_CPU_BUF |
| 331 | |
| 332 | #ifdef CONFIG_X86_64 |
| 333 | .macro CLEAR_BRANCH_HISTORY |
| 334 | ALTERNATIVE "" , "call clear_bhb_loop" , X86_FEATURE_CLEAR_BHB_LOOP |
| 335 | .endm |
| 336 | |
| 337 | .macro CLEAR_BRANCH_HISTORY_VMEXIT |
| 338 | ALTERNATIVE "" , "call clear_bhb_loop" , X86_FEATURE_CLEAR_BHB_VMEXIT |
| 339 | .endm |
| 340 | #else |
| 341 | #define CLEAR_BRANCH_HISTORY |
| 342 | #define CLEAR_BRANCH_HISTORY_VMEXIT |
| 343 | #endif |
| 344 | |
| 345 | #else /* __ASSEMBLER__ */ |
| 346 | |
| 347 | #define ITS_THUNK_SIZE 64 |
| 348 | |
| 349 | typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; |
| 350 | typedef u8 its_thunk_t[ITS_THUNK_SIZE]; |
| 351 | extern retpoline_thunk_t __x86_indirect_thunk_array[]; |
| 352 | extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; |
| 353 | extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; |
| 354 | extern its_thunk_t __x86_indirect_its_thunk_array[]; |
| 355 | |
| 356 | #ifdef CONFIG_MITIGATION_RETHUNK |
| 357 | extern void __x86_return_thunk(void); |
| 358 | #else |
| 359 | static inline void __x86_return_thunk(void) {} |
| 360 | #endif |
| 361 | |
| 362 | #ifdef CONFIG_MITIGATION_UNRET_ENTRY |
| 363 | extern void retbleed_return_thunk(void); |
| 364 | #else |
| 365 | static inline void retbleed_return_thunk(void) {} |
| 366 | #endif |
| 367 | |
| 368 | extern void srso_alias_untrain_ret(void); |
| 369 | |
| 370 | #ifdef CONFIG_MITIGATION_SRSO |
| 371 | extern void srso_return_thunk(void); |
| 372 | extern void srso_alias_return_thunk(void); |
| 373 | #else |
| 374 | static inline void srso_return_thunk(void) {} |
| 375 | static inline void srso_alias_return_thunk(void) {} |
| 376 | #endif |
| 377 | |
| 378 | #ifdef CONFIG_MITIGATION_ITS |
| 379 | extern void its_return_thunk(void); |
| 380 | #else |
| 381 | static inline void its_return_thunk(void) {} |
| 382 | #endif |
| 383 | |
| 384 | extern void retbleed_return_thunk(void); |
| 385 | extern void srso_return_thunk(void); |
| 386 | extern void srso_alias_return_thunk(void); |
| 387 | |
| 388 | extern void entry_untrain_ret(void); |
| 389 | extern void write_ibpb(void); |
| 390 | |
| 391 | #ifdef CONFIG_X86_64 |
| 392 | extern void clear_bhb_loop(void); |
| 393 | #endif |
| 394 | |
| 395 | extern void (*x86_return_thunk)(void); |
| 396 | |
| 397 | extern void __warn_thunk(void); |
| 398 | |
| 399 | #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING |
| 400 | extern void call_depth_return_thunk(void); |
| 401 | |
| 402 | #define CALL_DEPTH_ACCOUNT \ |
| 403 | ALTERNATIVE("", \ |
| 404 | __stringify(INCREMENT_CALL_DEPTH), \ |
| 405 | X86_FEATURE_CALL_DEPTH) |
| 406 | |
| 407 | DECLARE_PER_CPU_CACHE_HOT(u64, __x86_call_depth); |
| 408 | |
| 409 | #ifdef CONFIG_CALL_THUNKS_DEBUG |
| 410 | DECLARE_PER_CPU(u64, __x86_call_count); |
| 411 | DECLARE_PER_CPU(u64, __x86_ret_count); |
| 412 | DECLARE_PER_CPU(u64, __x86_stuffs_count); |
| 413 | DECLARE_PER_CPU(u64, __x86_ctxsw_count); |
| 414 | #endif |
| 415 | #else /* !CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ |
| 416 | |
| 417 | static inline void call_depth_return_thunk(void) {} |
| 418 | #define CALL_DEPTH_ACCOUNT "" |
| 419 | |
| 420 | #endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ |
| 421 | |
| 422 | #ifdef CONFIG_MITIGATION_RETPOLINE |
| 423 | |
| 424 | #define GEN(reg) \ |
| 425 | extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; |
| 426 | #include <asm/GEN-for-each-reg.h> |
| 427 | #undef GEN |
| 428 | |
| 429 | #define GEN(reg) \ |
| 430 | extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg; |
| 431 | #include <asm/GEN-for-each-reg.h> |
| 432 | #undef GEN |
| 433 | |
| 434 | #define GEN(reg) \ |
| 435 | extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg; |
| 436 | #include <asm/GEN-for-each-reg.h> |
| 437 | #undef GEN |
| 438 | |
| 439 | #ifdef CONFIG_X86_64 |
| 440 | |
| 441 | /* |
| 442 | * Emits a conditional CS prefix that is compatible with |
| 443 | * -mindirect-branch-cs-prefix. |
| 444 | */ |
| 445 | #define __CS_PREFIX(reg) \ |
| 446 | ".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n" \ |
| 447 | ".ifc \\rs," reg "\n" \ |
| 448 | ".byte 0x2e\n" \ |
| 449 | ".endif\n" \ |
| 450 | ".endr\n" |
| 451 | |
| 452 | /* |
| 453 | * Inline asm uses the %V modifier which is only in newer GCC |
| 454 | * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined. |
| 455 | */ |
| 456 | #define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \ |
| 457 | "call __x86_indirect_thunk_%V[thunk_target]\n" |
| 458 | |
| 459 | # define THUNK_TARGET(addr) [thunk_target] "r" (addr) |
| 460 | |
| 461 | #else /* CONFIG_X86_32 */ |
| 462 | /* |
| 463 | * For i386 we use the original ret-equivalent retpoline, because |
| 464 | * otherwise we'll run out of registers. We don't care about CET |
| 465 | * here, anyway. |
| 466 | */ |
| 467 | # define CALL_NOSPEC \ |
| 468 | ALTERNATIVE_2( \ |
| 469 | ANNOTATE_RETPOLINE_SAFE "\n" \ |
| 470 | "call *%[thunk_target]\n", \ |
| 471 | " jmp 904f;\n" \ |
| 472 | " .align 16\n" \ |
| 473 | "901: call 903f;\n" \ |
| 474 | "902: pause;\n" \ |
| 475 | " lfence;\n" \ |
| 476 | " jmp 902b;\n" \ |
| 477 | " .align 16\n" \ |
| 478 | "903: lea 4(%%esp), %%esp;\n" \ |
| 479 | " pushl %[thunk_target];\n" \ |
| 480 | " ret;\n" \ |
| 481 | " .align 16\n" \ |
| 482 | "904: call 901b;\n", \ |
| 483 | X86_FEATURE_RETPOLINE, \ |
| 484 | "lfence;\n" \ |
| 485 | ANNOTATE_RETPOLINE_SAFE "\n" \ |
| 486 | "call *%[thunk_target]\n", \ |
| 487 | X86_FEATURE_RETPOLINE_LFENCE) |
| 488 | |
| 489 | # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) |
| 490 | #endif |
| 491 | #else /* No retpoline for C / inline asm */ |
| 492 | # define CALL_NOSPEC "call *%[thunk_target]\n" |
| 493 | # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) |
| 494 | #endif |
| 495 | |
| 496 | /* The Spectre V2 mitigation variants */ |
| 497 | enum spectre_v2_mitigation { |
| 498 | SPECTRE_V2_NONE, |
| 499 | SPECTRE_V2_RETPOLINE, |
| 500 | SPECTRE_V2_LFENCE, |
| 501 | SPECTRE_V2_EIBRS, |
| 502 | SPECTRE_V2_EIBRS_RETPOLINE, |
| 503 | SPECTRE_V2_EIBRS_LFENCE, |
| 504 | SPECTRE_V2_IBRS, |
| 505 | }; |
| 506 | |
| 507 | /* The indirect branch speculation control variants */ |
| 508 | enum spectre_v2_user_mitigation { |
| 509 | SPECTRE_V2_USER_NONE, |
| 510 | SPECTRE_V2_USER_STRICT, |
| 511 | SPECTRE_V2_USER_STRICT_PREFERRED, |
| 512 | SPECTRE_V2_USER_PRCTL, |
| 513 | SPECTRE_V2_USER_SECCOMP, |
| 514 | }; |
| 515 | |
| 516 | /* The Speculative Store Bypass disable variants */ |
| 517 | enum ssb_mitigation { |
| 518 | SPEC_STORE_BYPASS_NONE, |
| 519 | SPEC_STORE_BYPASS_AUTO, |
| 520 | SPEC_STORE_BYPASS_DISABLE, |
| 521 | SPEC_STORE_BYPASS_PRCTL, |
| 522 | SPEC_STORE_BYPASS_SECCOMP, |
| 523 | }; |
| 524 | |
| 525 | static __always_inline |
| 526 | void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) |
| 527 | { |
| 528 | asm volatile(ALTERNATIVE("" , "wrmsr" , %c[feature]) |
| 529 | : : "c" (msr), |
| 530 | "a" ((u32)val), |
| 531 | "d" ((u32)(val >> 32)), |
| 532 | [feature] "i" (feature) |
| 533 | : "memory" ); |
| 534 | } |
| 535 | |
| 536 | DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user); |
| 537 | |
| 538 | static inline void indirect_branch_prediction_barrier(void) |
| 539 | { |
| 540 | asm_inline volatile(ALTERNATIVE("" , "call write_ibpb" , X86_FEATURE_IBPB) |
| 541 | : ASM_CALL_CONSTRAINT |
| 542 | :: "rax" , "rcx" , "rdx" , "memory" ); |
| 543 | } |
| 544 | |
| 545 | /* The Intel SPEC CTRL MSR base value cache */ |
| 546 | extern u64 x86_spec_ctrl_base; |
| 547 | DECLARE_PER_CPU(u64, x86_spec_ctrl_current); |
| 548 | extern void update_spec_ctrl_cond(u64 val); |
| 549 | extern u64 spec_ctrl_current(void); |
| 550 | |
| 551 | /* |
| 552 | * With retpoline, we must use IBRS to restrict branch prediction |
| 553 | * before calling into firmware. |
| 554 | * |
| 555 | * (Implemented as CPP macros due to header hell.) |
| 556 | */ |
| 557 | #define firmware_restrict_branch_speculation_start() \ |
| 558 | do { \ |
| 559 | preempt_disable(); \ |
| 560 | alternative_msr_write(MSR_IA32_SPEC_CTRL, \ |
| 561 | spec_ctrl_current() | SPEC_CTRL_IBRS, \ |
| 562 | X86_FEATURE_USE_IBRS_FW); \ |
| 563 | alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \ |
| 564 | X86_FEATURE_USE_IBPB_FW); \ |
| 565 | } while (0) |
| 566 | |
| 567 | #define firmware_restrict_branch_speculation_end() \ |
| 568 | do { \ |
| 569 | alternative_msr_write(MSR_IA32_SPEC_CTRL, \ |
| 570 | spec_ctrl_current(), \ |
| 571 | X86_FEATURE_USE_IBRS_FW); \ |
| 572 | preempt_enable(); \ |
| 573 | } while (0) |
| 574 | |
| 575 | DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); |
| 576 | DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); |
| 577 | DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); |
| 578 | |
| 579 | DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb); |
| 580 | |
| 581 | DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear); |
| 582 | |
| 583 | DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); |
| 584 | |
| 585 | extern u16 x86_verw_sel; |
| 586 | |
| 587 | #include <asm/segment.h> |
| 588 | |
| 589 | /** |
| 590 | * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns |
| 591 | * |
| 592 | * This uses the otherwise unused and obsolete VERW instruction in |
| 593 | * combination with microcode which triggers a CPU buffer flush when the |
| 594 | * instruction is executed. |
| 595 | */ |
| 596 | static __always_inline void x86_clear_cpu_buffers(void) |
| 597 | { |
| 598 | static const u16 ds = __KERNEL_DS; |
| 599 | |
| 600 | /* |
| 601 | * Has to be the memory-operand variant because only that |
| 602 | * guarantees the CPU buffer flush functionality according to |
| 603 | * documentation. The register-operand variant does not. |
| 604 | * Works with any segment selector, but a valid writable |
| 605 | * data segment is the fastest variant. |
| 606 | * |
| 607 | * "cc" clobber is required because VERW modifies ZF. |
| 608 | */ |
| 609 | asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc" ); |
| 610 | } |
| 611 | |
| 612 | /** |
| 613 | * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS |
| 614 | * and TSA vulnerabilities. |
| 615 | * |
| 616 | * Clear CPU buffers if the corresponding static key is enabled |
| 617 | */ |
| 618 | static __always_inline void x86_idle_clear_cpu_buffers(void) |
| 619 | { |
| 620 | if (static_branch_likely(&cpu_buf_idle_clear)) |
| 621 | x86_clear_cpu_buffers(); |
| 622 | } |
| 623 | |
| 624 | #endif /* __ASSEMBLER__ */ |
| 625 | |
| 626 | #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ |
| 627 | |