aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-02-26 12:34:21 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-02-26 12:34:21 -0500
commit85a2d939c05965ab9e849735436a3c8d3538dc75 (patch)
treeba5436dc3c687dc84b22536824b68ab413fc7c4a
parentd4858aaf6bd8a90e2dacc0dfec2077e334dcedbf (diff)
parent946fbbc13dce68902f64515b610eeb2a6c3d7a64 (diff)
Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Thomas Gleixner: "Yet another pile of melted spectrum related changes: - sanitize the array_index_nospec protection mechanism: Remove the overengineered array_index_nospec_mask_check() magic and allow const-qualified types as index to avoid temporary storage in a non-const local variable. - make the microcode loader more robust by properly propagating error codes. Provide information about new feature bits after micro code was updated so administrators can act upon. - optimizations of the entry ASM code which reduce code footprint and make the code simpler and faster. - fix the {pmd,pud}_{set,clear}_flags() implementations to work properly on paravirt kernels by removing the address translation operations. - revert the harmful vmexit_fill_RSB() optimization - use IBRS around firmware calls - teach objtool about retpolines and add annotations for indirect jumps and calls. - explicitly disable jumplabel patching in __init code and handle patching failures properly instead of silently ignoring them. - remove indirect paravirt calls for writing the speculation control MSR as these calls are obviously proving the same attack vector which is tried to be mitigated. - a few small fixes which address build issues with recent compiler and assembler versions" * 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (38 commits) KVM/VMX: Optimize vmx_vcpu_run() and svm_vcpu_run() by marking the RDMSR path as unlikely() KVM/x86: Remove indirect MSR op calls from SPEC_CTRL objtool, retpolines: Integrate objtool with retpoline support more closely x86/entry/64: Simplify ENCODE_FRAME_POINTER extable: Make init_kernel_text() global jump_label: Warn on failed jump_label patching attempt jump_label: Explicitly disable jump labels in __init code x86/entry/64: Open-code switch_to_thread_stack() x86/entry/64: Move ASM_CLAC to interrupt_entry() x86/entry/64: Remove 'interrupt' macro x86/entry/64: Move the switch_to_thread_stack() call to interrupt_entry() x86/entry/64: Move ENTER_IRQ_STACK from interrupt macro to interrupt_entry x86/entry/64: Move PUSH_AND_CLEAR_REGS from interrupt macro to helper function x86/speculation: Move firmware_restrict_branch_speculation_*() from C to CPP objtool: Add module specific retpoline rules objtool: Add retpoline validation objtool: Use existing global variables for options x86/mm/sme, objtool: Annotate indirect call in sme_encrypt_execute() x86/boot, objtool: Annotate indirect jump in secondary_startup_64() x86/paravirt, objtool: Annotate indirect calls ...
-rw-r--r--Makefile5
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Makefile7
-rw-r--r--arch/x86/entry/calling.h34
-rw-r--r--arch/x86/entry/entry_32.S3
-rw-r--r--arch/x86/entry/entry_64.S153
-rw-r--r--arch/x86/entry/entry_64_compat.S71
-rw-r--r--arch/x86/include/asm/apm.h6
-rw-r--r--arch/x86/include/asm/asm-prototypes.h3
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/efi.h17
-rw-r--r--arch/x86/include/asm/microcode.h9
-rw-r--r--arch/x86/include/asm/mmu_context.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h138
-rw-r--r--arch/x86/include/asm/paravirt.h17
-rw-r--r--arch/x86/include/asm/paravirt_types.h5
-rw-r--r--arch/x86/include/asm/pgtable.h8
-rw-r--r--arch/x86/include/asm/pgtable_types.h10
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/refcount.h4
-rw-r--r--arch/x86/include/asm/rmwcc.h16
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/cpu/bugs.c12
-rw-r--r--arch/x86/kernel/cpu/common.c30
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c10
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c39
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c10
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/unwind_orc.c3
-rw-r--r--arch/x86/kvm/svm.c9
-rw-r--r--arch/x86/kvm/vmx.c9
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/retpoline.S56
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S2
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S2
-rw-r--r--include/linux/compiler-clang.h5
-rw-r--r--include/linux/compiler-gcc.h4
-rw-r--r--include/linux/init.h8
-rw-r--r--include/linux/jump_label.h3
-rw-r--r--include/linux/kernel.h1
-rw-r--r--include/linux/nospec.h26
-rw-r--r--init/main.c2
-rw-r--r--kernel/extable.c2
-rw-r--r--kernel/jump_label.c27
-rw-r--r--scripts/Makefile.build8
-rw-r--r--tools/objtool/builtin-check.c6
-rw-r--r--tools/objtool/builtin-orc.c6
-rw-r--r--tools/objtool/builtin.h5
-rw-r--r--tools/objtool/check.c100
-rw-r--r--tools/objtool/check.h3
51 files changed, 604 insertions, 303 deletions
diff --git a/Makefile b/Makefile
index 659a7780aeb3..fb94072fd80d 100644
--- a/Makefile
+++ b/Makefile
@@ -489,6 +489,11 @@ KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
489KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) 489KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
490endif 490endif
491 491
492RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register
493RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
494RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
495export RETPOLINE_CFLAGS
496
492ifeq ($(config-targets),1) 497ifeq ($(config-targets),1)
493# =========================================================================== 498# ===========================================================================
494# *config targets only - make sure prerequisites are updated, and descend 499# *config targets only - make sure prerequisites are updated, and descend
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c1236b187824..eb7f43f23521 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -430,6 +430,7 @@ config GOLDFISH
430config RETPOLINE 430config RETPOLINE
431 bool "Avoid speculative indirect branches in kernel" 431 bool "Avoid speculative indirect branches in kernel"
432 default y 432 default y
433 select STACK_VALIDATION if HAVE_STACK_VALIDATION
433 help 434 help
434 Compile kernel with the retpoline compiler options to guard against 435 Compile kernel with the retpoline compiler options to guard against
435 kernel-to-user data leaks by avoiding speculative indirect 436 kernel-to-user data leaks by avoiding speculative indirect
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index fad55160dcb9..498c1b812300 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -232,10 +232,9 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
232 232
233# Avoid indirect branches in kernel to deal with Spectre 233# Avoid indirect branches in kernel to deal with Spectre
234ifdef CONFIG_RETPOLINE 234ifdef CONFIG_RETPOLINE
235 RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) 235ifneq ($(RETPOLINE_CFLAGS),)
236 ifneq ($(RETPOLINE_CFLAGS),) 236 KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
237 KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE 237endif
238 endif
239endif 238endif
240 239
241archscripts: scripts_basic 240archscripts: scripts_basic
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index dce7092ab24a..be63330c5511 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with
97 97
98#define SIZEOF_PTREGS 21*8 98#define SIZEOF_PTREGS 21*8
99 99
100.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax 100.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
101 /* 101 /*
102 * Push registers and sanitize registers of values that a 102 * Push registers and sanitize registers of values that a
103 * speculation attack might otherwise want to exploit. The 103 * speculation attack might otherwise want to exploit. The
@@ -105,32 +105,41 @@ For 32-bit we have the following conventions - kernel is built with
105 * could be put to use in a speculative execution gadget. 105 * could be put to use in a speculative execution gadget.
106 * Interleave XOR with PUSH for better uop scheduling: 106 * Interleave XOR with PUSH for better uop scheduling:
107 */ 107 */
108 .if \save_ret
109 pushq %rsi /* pt_regs->si */
110 movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
111 movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */
112 .else
108 pushq %rdi /* pt_regs->di */ 113 pushq %rdi /* pt_regs->di */
109 pushq %rsi /* pt_regs->si */ 114 pushq %rsi /* pt_regs->si */
115 .endif
110 pushq \rdx /* pt_regs->dx */ 116 pushq \rdx /* pt_regs->dx */
111 pushq %rcx /* pt_regs->cx */ 117 pushq %rcx /* pt_regs->cx */
112 pushq \rax /* pt_regs->ax */ 118 pushq \rax /* pt_regs->ax */
113 pushq %r8 /* pt_regs->r8 */ 119 pushq %r8 /* pt_regs->r8 */
114 xorq %r8, %r8 /* nospec r8 */ 120 xorl %r8d, %r8d /* nospec r8 */
115 pushq %r9 /* pt_regs->r9 */ 121 pushq %r9 /* pt_regs->r9 */
116 xorq %r9, %r9 /* nospec r9 */ 122 xorl %r9d, %r9d /* nospec r9 */
117 pushq %r10 /* pt_regs->r10 */ 123 pushq %r10 /* pt_regs->r10 */
118 xorq %r10, %r10 /* nospec r10 */ 124 xorl %r10d, %r10d /* nospec r10 */
119 pushq %r11 /* pt_regs->r11 */ 125 pushq %r11 /* pt_regs->r11 */
120 xorq %r11, %r11 /* nospec r11*/ 126 xorl %r11d, %r11d /* nospec r11*/
121 pushq %rbx /* pt_regs->rbx */ 127 pushq %rbx /* pt_regs->rbx */
122 xorl %ebx, %ebx /* nospec rbx*/ 128 xorl %ebx, %ebx /* nospec rbx*/
123 pushq %rbp /* pt_regs->rbp */ 129 pushq %rbp /* pt_regs->rbp */
124 xorl %ebp, %ebp /* nospec rbp*/ 130 xorl %ebp, %ebp /* nospec rbp*/
125 pushq %r12 /* pt_regs->r12 */ 131 pushq %r12 /* pt_regs->r12 */
126 xorq %r12, %r12 /* nospec r12*/ 132 xorl %r12d, %r12d /* nospec r12*/
127 pushq %r13 /* pt_regs->r13 */ 133 pushq %r13 /* pt_regs->r13 */
128 xorq %r13, %r13 /* nospec r13*/ 134 xorl %r13d, %r13d /* nospec r13*/
129 pushq %r14 /* pt_regs->r14 */ 135 pushq %r14 /* pt_regs->r14 */
130 xorq %r14, %r14 /* nospec r14*/ 136 xorl %r14d, %r14d /* nospec r14*/
131 pushq %r15 /* pt_regs->r15 */ 137 pushq %r15 /* pt_regs->r15 */
132 xorq %r15, %r15 /* nospec r15*/ 138 xorl %r15d, %r15d /* nospec r15*/
133 UNWIND_HINT_REGS 139 UNWIND_HINT_REGS
140 .if \save_ret
141 pushq %rsi /* return address on top of stack */
142 .endif
134.endm 143.endm
135 144
136.macro POP_REGS pop_rdi=1 skip_r11rcx=0 145.macro POP_REGS pop_rdi=1 skip_r11rcx=0
@@ -172,12 +181,7 @@ For 32-bit we have the following conventions - kernel is built with
172 */ 181 */
173.macro ENCODE_FRAME_POINTER ptregs_offset=0 182.macro ENCODE_FRAME_POINTER ptregs_offset=0
174#ifdef CONFIG_FRAME_POINTER 183#ifdef CONFIG_FRAME_POINTER
175 .if \ptregs_offset 184 leaq 1+\ptregs_offset(%rsp), %rbp
176 leaq \ptregs_offset(%rsp), %rbp
177 .else
178 mov %rsp, %rbp
179 .endif
180 orq $0x1, %rbp
181#endif 185#endif
182.endm 186.endm
183 187
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 16c2c022540d..6ad064c8cf35 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
252 * exist, overwrite the RSB with entries which capture 252 * exist, overwrite the RSB with entries which capture
253 * speculative execution to prevent attack. 253 * speculative execution to prevent attack.
254 */ 254 */
255 /* Clobbers %ebx */ 255 FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
256 FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
257#endif 256#endif
258 257
259 /* restore callee-saved registers */ 258 /* restore callee-saved registers */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 8971bd64d515..d5c7f18f79ac 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -364,8 +364,7 @@ ENTRY(__switch_to_asm)
364 * exist, overwrite the RSB with entries which capture 364 * exist, overwrite the RSB with entries which capture
365 * speculative execution to prevent attack. 365 * speculative execution to prevent attack.
366 */ 366 */
367 /* Clobbers %rbx */ 367 FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
368 FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
369#endif 368#endif
370 369
371 /* restore callee-saved registers */ 370 /* restore callee-saved registers */
@@ -449,9 +448,19 @@ END(irq_entries_start)
449 * 448 *
450 * The invariant is that, if irq_count != -1, then the IRQ stack is in use. 449 * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
451 */ 450 */
452.macro ENTER_IRQ_STACK regs=1 old_rsp 451.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
453 DEBUG_ENTRY_ASSERT_IRQS_OFF 452 DEBUG_ENTRY_ASSERT_IRQS_OFF
453
454 .if \save_ret
455 /*
456 * If save_ret is set, the original stack contains one additional
457 * entry -- the return address. Therefore, move the address one
458 * entry below %rsp to \old_rsp.
459 */
460 leaq 8(%rsp), \old_rsp
461 .else
454 movq %rsp, \old_rsp 462 movq %rsp, \old_rsp
463 .endif
455 464
456 .if \regs 465 .if \regs
457 UNWIND_HINT_REGS base=\old_rsp 466 UNWIND_HINT_REGS base=\old_rsp
@@ -497,6 +506,15 @@ END(irq_entries_start)
497 .if \regs 506 .if \regs
498 UNWIND_HINT_REGS indirect=1 507 UNWIND_HINT_REGS indirect=1
499 .endif 508 .endif
509
510 .if \save_ret
511 /*
512 * Push the return address to the stack. This return address can
513 * be found at the "real" original RSP, which was offset by 8 at
514 * the beginning of this macro.
515 */
516 pushq -8(\old_rsp)
517 .endif
500.endm 518.endm
501 519
502/* 520/*
@@ -520,27 +538,65 @@ END(irq_entries_start)
520.endm 538.endm
521 539
522/* 540/*
523 * Interrupt entry/exit. 541 * Interrupt entry helper function.
524 *
525 * Interrupt entry points save only callee clobbered registers in fast path.
526 * 542 *
527 * Entry runs with interrupts off. 543 * Entry runs with interrupts off. Stack layout at entry:
544 * +----------------------------------------------------+
545 * | regs->ss |
546 * | regs->rsp |
547 * | regs->eflags |
548 * | regs->cs |
549 * | regs->ip |
550 * +----------------------------------------------------+
551 * | regs->orig_ax = ~(interrupt number) |
552 * +----------------------------------------------------+
553 * | return address |
554 * +----------------------------------------------------+
528 */ 555 */
529 556ENTRY(interrupt_entry)
530/* 0(%rsp): ~(interrupt number) */ 557 UNWIND_HINT_FUNC
531 .macro interrupt func 558 ASM_CLAC
532 cld 559 cld
533 560
534 testb $3, CS-ORIG_RAX(%rsp) 561 testb $3, CS-ORIG_RAX+8(%rsp)
535 jz 1f 562 jz 1f
536 SWAPGS 563 SWAPGS
537 call switch_to_thread_stack 564
565 /*
566 * Switch to the thread stack. The IRET frame and orig_ax are
567 * on the stack, as well as the return address. RDI..R12 are
568 * not (yet) on the stack and space has not (yet) been
569 * allocated for them.
570 */
571 pushq %rdi
572
573 /* Need to switch before accessing the thread stack. */
574 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
575 movq %rsp, %rdi
576 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
577
578 /*
579 * We have RDI, return address, and orig_ax on the stack on
580 * top of the IRET frame. That means offset=24
581 */
582 UNWIND_HINT_IRET_REGS base=%rdi offset=24
583
584 pushq 7*8(%rdi) /* regs->ss */
585 pushq 6*8(%rdi) /* regs->rsp */
586 pushq 5*8(%rdi) /* regs->eflags */
587 pushq 4*8(%rdi) /* regs->cs */
588 pushq 3*8(%rdi) /* regs->ip */
589 pushq 2*8(%rdi) /* regs->orig_ax */
590 pushq 8(%rdi) /* return address */
591 UNWIND_HINT_FUNC
592
593 movq (%rdi), %rdi
5381: 5941:
539 595
540 PUSH_AND_CLEAR_REGS 596 PUSH_AND_CLEAR_REGS save_ret=1
541 ENCODE_FRAME_POINTER 597 ENCODE_FRAME_POINTER 8
542 598
543 testb $3, CS(%rsp) 599 testb $3, CS+8(%rsp)
544 jz 1f 600 jz 1f
545 601
546 /* 602 /*
@@ -548,7 +604,7 @@ END(irq_entries_start)
548 * 604 *
549 * We need to tell lockdep that IRQs are off. We can't do this until 605 * We need to tell lockdep that IRQs are off. We can't do this until
550 * we fix gsbase, and we should do it before enter_from_user_mode 606 * we fix gsbase, and we should do it before enter_from_user_mode
551 * (which can take locks). Since TRACE_IRQS_OFF idempotent, 607 * (which can take locks). Since TRACE_IRQS_OFF is idempotent,
552 * the simplest way to handle it is to just call it twice if 608 * the simplest way to handle it is to just call it twice if
553 * we enter from user mode. There's no reason to optimize this since 609 * we enter from user mode. There's no reason to optimize this since
554 * TRACE_IRQS_OFF is a no-op if lockdep is off. 610 * TRACE_IRQS_OFF is a no-op if lockdep is off.
@@ -558,12 +614,15 @@ END(irq_entries_start)
558 CALL_enter_from_user_mode 614 CALL_enter_from_user_mode
559 615
5601: 6161:
561 ENTER_IRQ_STACK old_rsp=%rdi 617 ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
562 /* We entered an interrupt context - irqs are off: */ 618 /* We entered an interrupt context - irqs are off: */
563 TRACE_IRQS_OFF 619 TRACE_IRQS_OFF
564 620
565 call \func /* rdi points to pt_regs */ 621 ret
566 .endm 622END(interrupt_entry)
623
624
625/* Interrupt entry/exit. */
567 626
568 /* 627 /*
569 * The interrupt stubs push (~vector+0x80) onto the stack and 628 * The interrupt stubs push (~vector+0x80) onto the stack and
@@ -571,9 +630,10 @@ END(irq_entries_start)
571 */ 630 */
572 .p2align CONFIG_X86_L1_CACHE_SHIFT 631 .p2align CONFIG_X86_L1_CACHE_SHIFT
573common_interrupt: 632common_interrupt:
574 ASM_CLAC
575 addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ 633 addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
576 interrupt do_IRQ 634 call interrupt_entry
635 UNWIND_HINT_REGS indirect=1
636 call do_IRQ /* rdi points to pt_regs */
577 /* 0(%rsp): old RSP */ 637 /* 0(%rsp): old RSP */
578ret_from_intr: 638ret_from_intr:
579 DISABLE_INTERRUPTS(CLBR_ANY) 639 DISABLE_INTERRUPTS(CLBR_ANY)
@@ -766,10 +826,11 @@ END(common_interrupt)
766.macro apicinterrupt3 num sym do_sym 826.macro apicinterrupt3 num sym do_sym
767ENTRY(\sym) 827ENTRY(\sym)
768 UNWIND_HINT_IRET_REGS 828 UNWIND_HINT_IRET_REGS
769 ASM_CLAC
770 pushq $~(\num) 829 pushq $~(\num)
771.Lcommon_\sym: 830.Lcommon_\sym:
772 interrupt \do_sym 831 call interrupt_entry
832 UNWIND_HINT_REGS indirect=1
833 call \do_sym /* rdi points to pt_regs */
773 jmp ret_from_intr 834 jmp ret_from_intr
774END(\sym) 835END(\sym)
775.endm 836.endm
@@ -832,34 +893,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
832 */ 893 */
833#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) 894#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
834 895
835/*
836 * Switch to the thread stack. This is called with the IRET frame and
837 * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
838 * space has not been allocated for them.)
839 */
840ENTRY(switch_to_thread_stack)
841 UNWIND_HINT_FUNC
842
843 pushq %rdi
844 /* Need to switch before accessing the thread stack. */
845 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
846 movq %rsp, %rdi
847 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
848 UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
849
850 pushq 7*8(%rdi) /* regs->ss */
851 pushq 6*8(%rdi) /* regs->rsp */
852 pushq 5*8(%rdi) /* regs->eflags */
853 pushq 4*8(%rdi) /* regs->cs */
854 pushq 3*8(%rdi) /* regs->ip */
855 pushq 2*8(%rdi) /* regs->orig_ax */
856 pushq 8(%rdi) /* return address */
857 UNWIND_HINT_FUNC
858
859 movq (%rdi), %rdi
860 ret
861END(switch_to_thread_stack)
862
863.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 896.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
864ENTRY(\sym) 897ENTRY(\sym)
865 UNWIND_HINT_IRET_REGS offset=\has_error_code*8 898 UNWIND_HINT_IRET_REGS offset=\has_error_code*8
@@ -875,12 +908,8 @@ ENTRY(\sym)
875 pushq $-1 /* ORIG_RAX: no syscall to restart */ 908 pushq $-1 /* ORIG_RAX: no syscall to restart */
876 .endif 909 .endif
877 910
878 /* Save all registers in pt_regs */
879 PUSH_AND_CLEAR_REGS
880 ENCODE_FRAME_POINTER
881
882 .if \paranoid < 2 911 .if \paranoid < 2
883 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ 912 testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */
884 jnz .Lfrom_usermode_switch_stack_\@ 913 jnz .Lfrom_usermode_switch_stack_\@
885 .endif 914 .endif
886 915
@@ -1130,13 +1159,15 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1
1130#endif 1159#endif
1131 1160
1132/* 1161/*
1133 * Switch gs if needed. 1162 * Save all registers in pt_regs, and switch gs if needed.
1134 * Use slow, but surefire "are we in kernel?" check. 1163 * Use slow, but surefire "are we in kernel?" check.
1135 * Return: ebx=0: need swapgs on exit, ebx=1: otherwise 1164 * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
1136 */ 1165 */
1137ENTRY(paranoid_entry) 1166ENTRY(paranoid_entry)
1138 UNWIND_HINT_FUNC 1167 UNWIND_HINT_FUNC
1139 cld 1168 cld
1169 PUSH_AND_CLEAR_REGS save_ret=1
1170 ENCODE_FRAME_POINTER 8
1140 movl $1, %ebx 1171 movl $1, %ebx
1141 movl $MSR_GS_BASE, %ecx 1172 movl $MSR_GS_BASE, %ecx
1142 rdmsr 1173 rdmsr
@@ -1181,12 +1212,14 @@ ENTRY(paranoid_exit)
1181END(paranoid_exit) 1212END(paranoid_exit)
1182 1213
1183/* 1214/*
1184 * Switch gs if needed. 1215 * Save all registers in pt_regs, and switch GS if needed.
1185 * Return: EBX=0: came from user mode; EBX=1: otherwise 1216 * Return: EBX=0: came from user mode; EBX=1: otherwise
1186 */ 1217 */
1187ENTRY(error_entry) 1218ENTRY(error_entry)
1188 UNWIND_HINT_REGS offset=8 1219 UNWIND_HINT_FUNC
1189 cld 1220 cld
1221 PUSH_AND_CLEAR_REGS save_ret=1
1222 ENCODE_FRAME_POINTER 8
1190 testb $3, CS+8(%rsp) 1223 testb $3, CS+8(%rsp)
1191 jz .Lerror_kernelspace 1224 jz .Lerror_kernelspace
1192 1225
@@ -1577,8 +1610,6 @@ end_repeat_nmi:
1577 * frame to point back to repeat_nmi. 1610 * frame to point back to repeat_nmi.
1578 */ 1611 */
1579 pushq $-1 /* ORIG_RAX: no syscall to restart */ 1612 pushq $-1 /* ORIG_RAX: no syscall to restart */
1580 PUSH_AND_CLEAR_REGS
1581 ENCODE_FRAME_POINTER
1582 1613
1583 /* 1614 /*
1584 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit 1615 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index fd65e016e413..e811dd9c5e99 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat)
85 pushq %rcx /* pt_regs->cx */ 85 pushq %rcx /* pt_regs->cx */
86 pushq $-ENOSYS /* pt_regs->ax */ 86 pushq $-ENOSYS /* pt_regs->ax */
87 pushq $0 /* pt_regs->r8 = 0 */ 87 pushq $0 /* pt_regs->r8 = 0 */
88 xorq %r8, %r8 /* nospec r8 */ 88 xorl %r8d, %r8d /* nospec r8 */
89 pushq $0 /* pt_regs->r9 = 0 */ 89 pushq $0 /* pt_regs->r9 = 0 */
90 xorq %r9, %r9 /* nospec r9 */ 90 xorl %r9d, %r9d /* nospec r9 */
91 pushq $0 /* pt_regs->r10 = 0 */ 91 pushq $0 /* pt_regs->r10 = 0 */
92 xorq %r10, %r10 /* nospec r10 */ 92 xorl %r10d, %r10d /* nospec r10 */
93 pushq $0 /* pt_regs->r11 = 0 */ 93 pushq $0 /* pt_regs->r11 = 0 */
94 xorq %r11, %r11 /* nospec r11 */ 94 xorl %r11d, %r11d /* nospec r11 */
95 pushq %rbx /* pt_regs->rbx */ 95 pushq %rbx /* pt_regs->rbx */
96 xorl %ebx, %ebx /* nospec rbx */ 96 xorl %ebx, %ebx /* nospec rbx */
97 pushq %rbp /* pt_regs->rbp (will be overwritten) */ 97 pushq %rbp /* pt_regs->rbp (will be overwritten) */
98 xorl %ebp, %ebp /* nospec rbp */ 98 xorl %ebp, %ebp /* nospec rbp */
99 pushq $0 /* pt_regs->r12 = 0 */ 99 pushq $0 /* pt_regs->r12 = 0 */
100 xorq %r12, %r12 /* nospec r12 */ 100 xorl %r12d, %r12d /* nospec r12 */
101 pushq $0 /* pt_regs->r13 = 0 */ 101 pushq $0 /* pt_regs->r13 = 0 */
102 xorq %r13, %r13 /* nospec r13 */ 102 xorl %r13d, %r13d /* nospec r13 */
103 pushq $0 /* pt_regs->r14 = 0 */ 103 pushq $0 /* pt_regs->r14 = 0 */
104 xorq %r14, %r14 /* nospec r14 */ 104 xorl %r14d, %r14d /* nospec r14 */
105 pushq $0 /* pt_regs->r15 = 0 */ 105 pushq $0 /* pt_regs->r15 = 0 */
106 xorq %r15, %r15 /* nospec r15 */ 106 xorl %r15d, %r15d /* nospec r15 */
107 cld 107 cld
108 108
109 /* 109 /*
@@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
224 pushq %rbp /* pt_regs->cx (stashed in bp) */ 224 pushq %rbp /* pt_regs->cx (stashed in bp) */
225 pushq $-ENOSYS /* pt_regs->ax */ 225 pushq $-ENOSYS /* pt_regs->ax */
226 pushq $0 /* pt_regs->r8 = 0 */ 226 pushq $0 /* pt_regs->r8 = 0 */
227 xorq %r8, %r8 /* nospec r8 */ 227 xorl %r8d, %r8d /* nospec r8 */
228 pushq $0 /* pt_regs->r9 = 0 */ 228 pushq $0 /* pt_regs->r9 = 0 */
229 xorq %r9, %r9 /* nospec r9 */ 229 xorl %r9d, %r9d /* nospec r9 */
230 pushq $0 /* pt_regs->r10 = 0 */ 230 pushq $0 /* pt_regs->r10 = 0 */
231 xorq %r10, %r10 /* nospec r10 */ 231 xorl %r10d, %r10d /* nospec r10 */
232 pushq $0 /* pt_regs->r11 = 0 */ 232 pushq $0 /* pt_regs->r11 = 0 */
233 xorq %r11, %r11 /* nospec r11 */ 233 xorl %r11d, %r11d /* nospec r11 */
234 pushq %rbx /* pt_regs->rbx */ 234 pushq %rbx /* pt_regs->rbx */
235 xorl %ebx, %ebx /* nospec rbx */ 235 xorl %ebx, %ebx /* nospec rbx */
236 pushq %rbp /* pt_regs->rbp (will be overwritten) */ 236 pushq %rbp /* pt_regs->rbp (will be overwritten) */
237 xorl %ebp, %ebp /* nospec rbp */ 237 xorl %ebp, %ebp /* nospec rbp */
238 pushq $0 /* pt_regs->r12 = 0 */ 238 pushq $0 /* pt_regs->r12 = 0 */
239 xorq %r12, %r12 /* nospec r12 */ 239 xorl %r12d, %r12d /* nospec r12 */
240 pushq $0 /* pt_regs->r13 = 0 */ 240 pushq $0 /* pt_regs->r13 = 0 */
241 xorq %r13, %r13 /* nospec r13 */ 241 xorl %r13d, %r13d /* nospec r13 */
242 pushq $0 /* pt_regs->r14 = 0 */ 242 pushq $0 /* pt_regs->r14 = 0 */
243 xorq %r14, %r14 /* nospec r14 */ 243 xorl %r14d, %r14d /* nospec r14 */
244 pushq $0 /* pt_regs->r15 = 0 */ 244 pushq $0 /* pt_regs->r15 = 0 */
245 xorq %r15, %r15 /* nospec r15 */ 245 xorl %r15d, %r15d /* nospec r15 */
246 246
247 /* 247 /*
248 * User mode is traced as though IRQs are on, and SYSENTER 248 * User mode is traced as though IRQs are on, and SYSENTER
@@ -298,9 +298,9 @@ sysret32_from_system_call:
298 */ 298 */
299 SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 299 SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
300 300
301 xorq %r8, %r8 301 xorl %r8d, %r8d
302 xorq %r9, %r9 302 xorl %r9d, %r9d
303 xorq %r10, %r10 303 xorl %r10d, %r10d
304 swapgs 304 swapgs
305 sysretl 305 sysretl
306END(entry_SYSCALL_compat) 306END(entry_SYSCALL_compat)
@@ -347,10 +347,23 @@ ENTRY(entry_INT80_compat)
347 */ 347 */
348 movl %eax, %eax 348 movl %eax, %eax
349 349
350 /* switch to thread stack expects orig_ax and rdi to be pushed */
350 pushq %rax /* pt_regs->orig_ax */ 351 pushq %rax /* pt_regs->orig_ax */
352 pushq %rdi /* pt_regs->di */
353
354 /* Need to switch before accessing the thread stack. */
355 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
356 movq %rsp, %rdi
357 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
358
359 pushq 6*8(%rdi) /* regs->ss */
360 pushq 5*8(%rdi) /* regs->rsp */
361 pushq 4*8(%rdi) /* regs->eflags */
362 pushq 3*8(%rdi) /* regs->cs */
363 pushq 2*8(%rdi) /* regs->ip */
364 pushq 1*8(%rdi) /* regs->orig_ax */
351 365
352 /* switch to thread stack expects orig_ax to be pushed */ 366 movq (%rdi), %rdi /* restore %rdi */
353 call switch_to_thread_stack
354 367
355 pushq %rdi /* pt_regs->di */ 368 pushq %rdi /* pt_regs->di */
356 pushq %rsi /* pt_regs->si */ 369 pushq %rsi /* pt_regs->si */
@@ -358,25 +371,25 @@ ENTRY(entry_INT80_compat)
358 pushq %rcx /* pt_regs->cx */ 371 pushq %rcx /* pt_regs->cx */
359 pushq $-ENOSYS /* pt_regs->ax */ 372 pushq $-ENOSYS /* pt_regs->ax */
360 pushq $0 /* pt_regs->r8 = 0 */ 373 pushq $0 /* pt_regs->r8 = 0 */
361 xorq %r8, %r8 /* nospec r8 */ 374 xorl %r8d, %r8d /* nospec r8 */
362 pushq $0 /* pt_regs->r9 = 0 */ 375 pushq $0 /* pt_regs->r9 = 0 */
363 xorq %r9, %r9 /* nospec r9 */ 376 xorl %r9d, %r9d /* nospec r9 */
364 pushq $0 /* pt_regs->r10 = 0 */ 377 pushq $0 /* pt_regs->r10 = 0 */
365 xorq %r10, %r10 /* nospec r10 */ 378 xorl %r10d, %r10d /* nospec r10 */
366 pushq $0 /* pt_regs->r11 = 0 */ 379 pushq $0 /* pt_regs->r11 = 0 */
367 xorq %r11, %r11 /* nospec r11 */ 380 xorl %r11d, %r11d /* nospec r11 */
368 pushq %rbx /* pt_regs->rbx */ 381 pushq %rbx /* pt_regs->rbx */
369 xorl %ebx, %ebx /* nospec rbx */ 382 xorl %ebx, %ebx /* nospec rbx */
370 pushq %rbp /* pt_regs->rbp */ 383 pushq %rbp /* pt_regs->rbp */
371 xorl %ebp, %ebp /* nospec rbp */ 384 xorl %ebp, %ebp /* nospec rbp */
372 pushq %r12 /* pt_regs->r12 */ 385 pushq %r12 /* pt_regs->r12 */
373 xorq %r12, %r12 /* nospec r12 */ 386 xorl %r12d, %r12d /* nospec r12 */
374 pushq %r13 /* pt_regs->r13 */ 387 pushq %r13 /* pt_regs->r13 */
375 xorq %r13, %r13 /* nospec r13 */ 388 xorl %r13d, %r13d /* nospec r13 */
376 pushq %r14 /* pt_regs->r14 */ 389 pushq %r14 /* pt_regs->r14 */
377 xorq %r14, %r14 /* nospec r14 */ 390 xorl %r14d, %r14d /* nospec r14 */
378 pushq %r15 /* pt_regs->r15 */ 391 pushq %r15 /* pt_regs->r15 */
379 xorq %r15, %r15 /* nospec r15 */ 392 xorl %r15d, %r15d /* nospec r15 */
380 cld 393 cld
381 394
382 /* 395 /*
diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index 4d4015ddcf26..c356098b6fb9 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -7,6 +7,8 @@
7#ifndef _ASM_X86_MACH_DEFAULT_APM_H 7#ifndef _ASM_X86_MACH_DEFAULT_APM_H
8#define _ASM_X86_MACH_DEFAULT_APM_H 8#define _ASM_X86_MACH_DEFAULT_APM_H
9 9
10#include <asm/nospec-branch.h>
11
10#ifdef APM_ZERO_SEGS 12#ifdef APM_ZERO_SEGS
11# define APM_DO_ZERO_SEGS \ 13# define APM_DO_ZERO_SEGS \
12 "pushl %%ds\n\t" \ 14 "pushl %%ds\n\t" \
@@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
32 * N.B. We do NOT need a cld after the BIOS call 34 * N.B. We do NOT need a cld after the BIOS call
33 * because we always save and restore the flags. 35 * because we always save and restore the flags.
34 */ 36 */
37 firmware_restrict_branch_speculation_start();
35 __asm__ __volatile__(APM_DO_ZERO_SEGS 38 __asm__ __volatile__(APM_DO_ZERO_SEGS
36 "pushl %%edi\n\t" 39 "pushl %%edi\n\t"
37 "pushl %%ebp\n\t" 40 "pushl %%ebp\n\t"
@@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
44 "=S" (*esi) 47 "=S" (*esi)
45 : "a" (func), "b" (ebx_in), "c" (ecx_in) 48 : "a" (func), "b" (ebx_in), "c" (ecx_in)
46 : "memory", "cc"); 49 : "memory", "cc");
50 firmware_restrict_branch_speculation_end();
47} 51}
48 52
49static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, 53static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
56 * N.B. We do NOT need a cld after the BIOS call 60 * N.B. We do NOT need a cld after the BIOS call
57 * because we always save and restore the flags. 61 * because we always save and restore the flags.
58 */ 62 */
63 firmware_restrict_branch_speculation_start();
59 __asm__ __volatile__(APM_DO_ZERO_SEGS 64 __asm__ __volatile__(APM_DO_ZERO_SEGS
60 "pushl %%edi\n\t" 65 "pushl %%edi\n\t"
61 "pushl %%ebp\n\t" 66 "pushl %%ebp\n\t"
@@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
68 "=S" (si) 73 "=S" (si)
69 : "a" (func), "b" (ebx_in), "c" (ecx_in) 74 : "a" (func), "b" (ebx_in), "c" (ecx_in)
70 : "memory", "cc"); 75 : "memory", "cc");
76 firmware_restrict_branch_speculation_end();
71 return error; 77 return error;
72} 78}
73 79
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 4d111616524b..1908214b9125 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
38INDIRECT_THUNK(si) 38INDIRECT_THUNK(si)
39INDIRECT_THUNK(di) 39INDIRECT_THUNK(di)
40INDIRECT_THUNK(bp) 40INDIRECT_THUNK(bp)
41asmlinkage void __fill_rsb(void);
42asmlinkage void __clear_rsb(void);
43
44#endif /* CONFIG_RETPOLINE */ 41#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 0dfe4d3f74e2..f41079da38c5 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -213,6 +213,7 @@
213#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ 213#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
214 214
215#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ 215#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
216#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
216 217
217/* Virtualization flags: Linux defined, word 8 */ 218/* Virtualization flags: Linux defined, word 8 */
218#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ 219#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 85f6ccb80b91..a399c1ebf6f0 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -6,6 +6,7 @@
6#include <asm/pgtable.h> 6#include <asm/pgtable.h>
7#include <asm/processor-flags.h> 7#include <asm/processor-flags.h>
8#include <asm/tlb.h> 8#include <asm/tlb.h>
9#include <asm/nospec-branch.h>
9 10
10/* 11/*
11 * We map the EFI regions needed for runtime services non-contiguously, 12 * We map the EFI regions needed for runtime services non-contiguously,
@@ -36,8 +37,18 @@
36 37
37extern asmlinkage unsigned long efi_call_phys(void *, ...); 38extern asmlinkage unsigned long efi_call_phys(void *, ...);
38 39
39#define arch_efi_call_virt_setup() kernel_fpu_begin() 40#define arch_efi_call_virt_setup() \
40#define arch_efi_call_virt_teardown() kernel_fpu_end() 41({ \
42 kernel_fpu_begin(); \
43 firmware_restrict_branch_speculation_start(); \
44})
45
46#define arch_efi_call_virt_teardown() \
47({ \
48 firmware_restrict_branch_speculation_end(); \
49 kernel_fpu_end(); \
50})
51
41 52
42/* 53/*
43 * Wrap all the virtual calls in a way that forces the parameters on the stack. 54 * Wrap all the virtual calls in a way that forces the parameters on the stack.
@@ -73,6 +84,7 @@ struct efi_scratch {
73 efi_sync_low_kernel_mappings(); \ 84 efi_sync_low_kernel_mappings(); \
74 preempt_disable(); \ 85 preempt_disable(); \
75 __kernel_fpu_begin(); \ 86 __kernel_fpu_begin(); \
87 firmware_restrict_branch_speculation_start(); \
76 \ 88 \
77 if (efi_scratch.use_pgd) { \ 89 if (efi_scratch.use_pgd) { \
78 efi_scratch.prev_cr3 = __read_cr3(); \ 90 efi_scratch.prev_cr3 = __read_cr3(); \
@@ -91,6 +103,7 @@ struct efi_scratch {
91 __flush_tlb_all(); \ 103 __flush_tlb_all(); \
92 } \ 104 } \
93 \ 105 \
106 firmware_restrict_branch_speculation_end(); \
94 __kernel_fpu_end(); \ 107 __kernel_fpu_end(); \
95 preempt_enable(); \ 108 preempt_enable(); \
96}) 109})
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 55520cec8b27..7fb1047d61c7 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -37,7 +37,12 @@ struct cpu_signature {
37 37
38struct device; 38struct device;
39 39
40enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; 40enum ucode_state {
41 UCODE_OK = 0,
42 UCODE_UPDATED,
43 UCODE_NFOUND,
44 UCODE_ERROR,
45};
41 46
42struct microcode_ops { 47struct microcode_ops {
43 enum ucode_state (*request_microcode_user) (int cpu, 48 enum ucode_state (*request_microcode_user) (int cpu,
@@ -54,7 +59,7 @@ struct microcode_ops {
54 * are being called. 59 * are being called.
55 * See also the "Synchronization" section in microcode_core.c. 60 * See also the "Synchronization" section in microcode_core.c.
56 */ 61 */
57 int (*apply_microcode) (int cpu); 62 enum ucode_state (*apply_microcode) (int cpu);
58 int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); 63 int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
59}; 64};
60 65
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index c931b88982a0..1de72ce514cd 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -74,6 +74,7 @@ static inline void *ldt_slot_va(int slot)
74 return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); 74 return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
75#else 75#else
76 BUG(); 76 BUG();
77 return (void *)fix_to_virt(FIX_HOLE);
77#endif 78#endif
78} 79}
79 80
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 81a1be326571..d0dabeae0505 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -8,6 +8,50 @@
8#include <asm/cpufeatures.h> 8#include <asm/cpufeatures.h>
9#include <asm/msr-index.h> 9#include <asm/msr-index.h>
10 10
11/*
12 * Fill the CPU return stack buffer.
13 *
14 * Each entry in the RSB, if used for a speculative 'ret', contains an
15 * infinite 'pause; lfence; jmp' loop to capture speculative execution.
16 *
17 * This is required in various cases for retpoline and IBRS-based
18 * mitigations for the Spectre variant 2 vulnerability. Sometimes to
19 * eliminate potentially bogus entries from the RSB, and sometimes
20 * purely to ensure that it doesn't get empty, which on some CPUs would
21 * allow predictions from other (unwanted!) sources to be used.
22 *
23 * We define a CPP macro such that it can be used from both .S files and
24 * inline assembly. It's possible to do a .macro and then include that
25 * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
26 */
27
28#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
29#define RSB_FILL_LOOPS 16 /* To avoid underflow */
30
31/*
32 * Google experimented with loop-unrolling and this turned out to be
33 * the optimal version — two calls, each with their own speculation
34 * trap should their return address end up getting used, in a loop.
35 */
36#define __FILL_RETURN_BUFFER(reg, nr, sp) \
37 mov $(nr/2), reg; \
38771: \
39 call 772f; \
40773: /* speculation trap */ \
41 pause; \
42 lfence; \
43 jmp 773b; \
44772: \
45 call 774f; \
46775: /* speculation trap */ \
47 pause; \
48 lfence; \
49 jmp 775b; \
50774: \
51 dec reg; \
52 jnz 771b; \
53 add $(BITS_PER_LONG/8) * nr, sp;
54
11#ifdef __ASSEMBLY__ 55#ifdef __ASSEMBLY__
12 56
13/* 57/*
@@ -24,6 +68,18 @@
24.endm 68.endm
25 69
26/* 70/*
71 * This should be used immediately before an indirect jump/call. It tells
72 * objtool the subsequent indirect jump/call is vouched safe for retpoline
73 * builds.
74 */
75.macro ANNOTATE_RETPOLINE_SAFE
76 .Lannotate_\@:
77 .pushsection .discard.retpoline_safe
78 _ASM_PTR .Lannotate_\@
79 .popsection
80.endm
81
82/*
27 * These are the bare retpoline primitives for indirect jmp and call. 83 * These are the bare retpoline primitives for indirect jmp and call.
28 * Do not use these directly; they only exist to make the ALTERNATIVE 84 * Do not use these directly; they only exist to make the ALTERNATIVE
29 * invocation below less ugly. 85 * invocation below less ugly.
@@ -59,9 +115,9 @@
59.macro JMP_NOSPEC reg:req 115.macro JMP_NOSPEC reg:req
60#ifdef CONFIG_RETPOLINE 116#ifdef CONFIG_RETPOLINE
61 ANNOTATE_NOSPEC_ALTERNATIVE 117 ANNOTATE_NOSPEC_ALTERNATIVE
62 ALTERNATIVE_2 __stringify(jmp *\reg), \ 118 ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \
63 __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ 119 __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
64 __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD 120 __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
65#else 121#else
66 jmp *\reg 122 jmp *\reg
67#endif 123#endif
@@ -70,18 +126,25 @@
70.macro CALL_NOSPEC reg:req 126.macro CALL_NOSPEC reg:req
71#ifdef CONFIG_RETPOLINE 127#ifdef CONFIG_RETPOLINE
72 ANNOTATE_NOSPEC_ALTERNATIVE 128 ANNOTATE_NOSPEC_ALTERNATIVE
73 ALTERNATIVE_2 __stringify(call *\reg), \ 129 ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \
74 __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ 130 __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
75 __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD 131 __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
76#else 132#else
77 call *\reg 133 call *\reg
78#endif 134#endif
79.endm 135.endm
80 136
81/* This clobbers the BX register */ 137 /*
82.macro FILL_RETURN_BUFFER nr:req ftr:req 138 * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
139 * monstrosity above, manually.
140 */
141.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
83#ifdef CONFIG_RETPOLINE 142#ifdef CONFIG_RETPOLINE
84 ALTERNATIVE "", "call __clear_rsb", \ftr 143 ANNOTATE_NOSPEC_ALTERNATIVE
144 ALTERNATIVE "jmp .Lskip_rsb_\@", \
145 __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
146 \ftr
147.Lskip_rsb_\@:
85#endif 148#endif
86.endm 149.endm
87 150
@@ -93,6 +156,12 @@
93 ".long 999b - .\n\t" \ 156 ".long 999b - .\n\t" \
94 ".popsection\n\t" 157 ".popsection\n\t"
95 158
159#define ANNOTATE_RETPOLINE_SAFE \
160 "999:\n\t" \
161 ".pushsection .discard.retpoline_safe\n\t" \
162 _ASM_PTR " 999b\n\t" \
163 ".popsection\n\t"
164
96#if defined(CONFIG_X86_64) && defined(RETPOLINE) 165#if defined(CONFIG_X86_64) && defined(RETPOLINE)
97 166
98/* 167/*
@@ -102,6 +171,7 @@
102# define CALL_NOSPEC \ 171# define CALL_NOSPEC \
103 ANNOTATE_NOSPEC_ALTERNATIVE \ 172 ANNOTATE_NOSPEC_ALTERNATIVE \
104 ALTERNATIVE( \ 173 ALTERNATIVE( \
174 ANNOTATE_RETPOLINE_SAFE \
105 "call *%[thunk_target]\n", \ 175 "call *%[thunk_target]\n", \
106 "call __x86_indirect_thunk_%V[thunk_target]\n", \ 176 "call __x86_indirect_thunk_%V[thunk_target]\n", \
107 X86_FEATURE_RETPOLINE) 177 X86_FEATURE_RETPOLINE)
@@ -156,26 +226,54 @@ extern char __indirect_thunk_end[];
156static inline void vmexit_fill_RSB(void) 226static inline void vmexit_fill_RSB(void)
157{ 227{
158#ifdef CONFIG_RETPOLINE 228#ifdef CONFIG_RETPOLINE
159 alternative_input("", 229 unsigned long loops;
160 "call __fill_rsb", 230
161 X86_FEATURE_RETPOLINE, 231 asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
162 ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); 232 ALTERNATIVE("jmp 910f",
233 __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
234 X86_FEATURE_RETPOLINE)
235 "910:"
236 : "=r" (loops), ASM_CALL_CONSTRAINT
237 : : "memory" );
163#endif 238#endif
164} 239}
165 240
241#define alternative_msr_write(_msr, _val, _feature) \
242 asm volatile(ALTERNATIVE("", \
243 "movl %[msr], %%ecx\n\t" \
244 "movl %[val], %%eax\n\t" \
245 "movl $0, %%edx\n\t" \
246 "wrmsr", \
247 _feature) \
248 : : [msr] "i" (_msr), [val] "i" (_val) \
249 : "eax", "ecx", "edx", "memory")
250
166static inline void indirect_branch_prediction_barrier(void) 251static inline void indirect_branch_prediction_barrier(void)
167{ 252{
168 asm volatile(ALTERNATIVE("", 253 alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
169 "movl %[msr], %%ecx\n\t" 254 X86_FEATURE_USE_IBPB);
170 "movl %[val], %%eax\n\t"
171 "movl $0, %%edx\n\t"
172 "wrmsr",
173 X86_FEATURE_USE_IBPB)
174 : : [msr] "i" (MSR_IA32_PRED_CMD),
175 [val] "i" (PRED_CMD_IBPB)
176 : "eax", "ecx", "edx", "memory");
177} 255}
178 256
257/*
258 * With retpoline, we must use IBRS to restrict branch prediction
259 * before calling into firmware.
260 *
261 * (Implemented as CPP macros due to header hell.)
262 */
263#define firmware_restrict_branch_speculation_start() \
264do { \
265 preempt_disable(); \
266 alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \
267 X86_FEATURE_USE_IBRS_FW); \
268} while (0)
269
270#define firmware_restrict_branch_speculation_end() \
271do { \
272 alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \
273 X86_FEATURE_USE_IBRS_FW); \
274 preempt_enable(); \
275} while (0)
276
179#endif /* __ASSEMBLY__ */ 277#endif /* __ASSEMBLY__ */
180 278
181/* 279/*
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 554841fab717..c83a2f418cea 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -7,6 +7,7 @@
7#ifdef CONFIG_PARAVIRT 7#ifdef CONFIG_PARAVIRT
8#include <asm/pgtable_types.h> 8#include <asm/pgtable_types.h>
9#include <asm/asm.h> 9#include <asm/asm.h>
10#include <asm/nospec-branch.h>
10 11
11#include <asm/paravirt_types.h> 12#include <asm/paravirt_types.h>
12 13
@@ -879,23 +880,27 @@ extern void default_banner(void);
879 880
880#define INTERRUPT_RETURN \ 881#define INTERRUPT_RETURN \
881 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ 882 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
882 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) 883 ANNOTATE_RETPOLINE_SAFE; \
884 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);)
883 885
884#define DISABLE_INTERRUPTS(clobbers) \ 886#define DISABLE_INTERRUPTS(clobbers) \
885 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ 887 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
886 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ 888 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
889 ANNOTATE_RETPOLINE_SAFE; \
887 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ 890 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
888 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) 891 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
889 892
890#define ENABLE_INTERRUPTS(clobbers) \ 893#define ENABLE_INTERRUPTS(clobbers) \
891 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ 894 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
892 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ 895 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
896 ANNOTATE_RETPOLINE_SAFE; \
893 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ 897 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
894 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) 898 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
895 899
896#ifdef CONFIG_X86_32 900#ifdef CONFIG_X86_32
897#define GET_CR0_INTO_EAX \ 901#define GET_CR0_INTO_EAX \
898 push %ecx; push %edx; \ 902 push %ecx; push %edx; \
903 ANNOTATE_RETPOLINE_SAFE; \
899 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ 904 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
900 pop %edx; pop %ecx 905 pop %edx; pop %ecx
901#else /* !CONFIG_X86_32 */ 906#else /* !CONFIG_X86_32 */
@@ -917,21 +922,25 @@ extern void default_banner(void);
917 */ 922 */
918#define SWAPGS \ 923#define SWAPGS \
919 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ 924 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
920 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ 925 ANNOTATE_RETPOLINE_SAFE; \
926 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
921 ) 927 )
922 928
923#define GET_CR2_INTO_RAX \ 929#define GET_CR2_INTO_RAX \
924 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2) 930 ANNOTATE_RETPOLINE_SAFE; \
931 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2);
925 932
926#define USERGS_SYSRET64 \ 933#define USERGS_SYSRET64 \
927 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ 934 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
928 CLBR_NONE, \ 935 CLBR_NONE, \
929 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) 936 ANNOTATE_RETPOLINE_SAFE; \
937 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);)
930 938
931#ifdef CONFIG_DEBUG_ENTRY 939#ifdef CONFIG_DEBUG_ENTRY
932#define SAVE_FLAGS(clobbers) \ 940#define SAVE_FLAGS(clobbers) \
933 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \ 941 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
934 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ 942 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
943 ANNOTATE_RETPOLINE_SAFE; \
935 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \ 944 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
936 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) 945 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
937#endif 946#endif
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index f624f1f10316..180bc0bff0fb 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -43,6 +43,7 @@
43#include <asm/desc_defs.h> 43#include <asm/desc_defs.h>
44#include <asm/kmap_types.h> 44#include <asm/kmap_types.h>
45#include <asm/pgtable_types.h> 45#include <asm/pgtable_types.h>
46#include <asm/nospec-branch.h>
46 47
47struct page; 48struct page;
48struct thread_struct; 49struct thread_struct;
@@ -392,7 +393,9 @@ int paravirt_disable_iospace(void);
392 * offset into the paravirt_patch_template structure, and can therefore be 393 * offset into the paravirt_patch_template structure, and can therefore be
393 * freely converted back into a structure offset. 394 * freely converted back into a structure offset.
394 */ 395 */
395#define PARAVIRT_CALL "call *%c[paravirt_opptr];" 396#define PARAVIRT_CALL \
397 ANNOTATE_RETPOLINE_SAFE \
398 "call *%c[paravirt_opptr];"
396 399
397/* 400/*
398 * These macros are intended to wrap calls through one of the paravirt 401 * These macros are intended to wrap calls through one of the paravirt
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 63c2552b6b65..b444d83cfc95 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -350,14 +350,14 @@ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
350{ 350{
351 pmdval_t v = native_pmd_val(pmd); 351 pmdval_t v = native_pmd_val(pmd);
352 352
353 return __pmd(v | set); 353 return native_make_pmd(v | set);
354} 354}
355 355
356static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) 356static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
357{ 357{
358 pmdval_t v = native_pmd_val(pmd); 358 pmdval_t v = native_pmd_val(pmd);
359 359
360 return __pmd(v & ~clear); 360 return native_make_pmd(v & ~clear);
361} 361}
362 362
363static inline pmd_t pmd_mkold(pmd_t pmd) 363static inline pmd_t pmd_mkold(pmd_t pmd)
@@ -409,14 +409,14 @@ static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
409{ 409{
410 pudval_t v = native_pud_val(pud); 410 pudval_t v = native_pud_val(pud);
411 411
412 return __pud(v | set); 412 return native_make_pud(v | set);
413} 413}
414 414
415static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) 415static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
416{ 416{
417 pudval_t v = native_pud_val(pud); 417 pudval_t v = native_pud_val(pud);
418 418
419 return __pud(v & ~clear); 419 return native_make_pud(v & ~clear);
420} 420}
421 421
422static inline pud_t pud_mkold(pud_t pud) 422static inline pud_t pud_mkold(pud_t pud)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 3696398a9475..246f15b4e64c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -323,6 +323,11 @@ static inline pudval_t native_pud_val(pud_t pud)
323#else 323#else
324#include <asm-generic/pgtable-nopud.h> 324#include <asm-generic/pgtable-nopud.h>
325 325
326static inline pud_t native_make_pud(pudval_t val)
327{
328 return (pud_t) { .p4d.pgd = native_make_pgd(val) };
329}
330
326static inline pudval_t native_pud_val(pud_t pud) 331static inline pudval_t native_pud_val(pud_t pud)
327{ 332{
328 return native_pgd_val(pud.p4d.pgd); 333 return native_pgd_val(pud.p4d.pgd);
@@ -344,6 +349,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
344#else 349#else
345#include <asm-generic/pgtable-nopmd.h> 350#include <asm-generic/pgtable-nopmd.h>
346 351
352static inline pmd_t native_make_pmd(pmdval_t val)
353{
354 return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) };
355}
356
347static inline pmdval_t native_pmd_val(pmd_t pmd) 357static inline pmdval_t native_pmd_val(pmd_t pmd)
348{ 358{
349 return native_pgd_val(pmd.pud.p4d.pgd); 359 return native_pgd_val(pmd.pud.p4d.pgd);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1bd9ed87606f..b0ccd4847a58 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -977,4 +977,5 @@ bool xen_set_default_idle(void);
977 977
978void stop_this_cpu(void *dummy); 978void stop_this_cpu(void *dummy);
979void df_debug(struct pt_regs *regs, long error_code); 979void df_debug(struct pt_regs *regs, long error_code);
980void microcode_check(void);
980#endif /* _ASM_X86_PROCESSOR_H */ 981#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index 4e44250e7d0d..d65171120e90 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -67,13 +67,13 @@ static __always_inline __must_check
67bool refcount_sub_and_test(unsigned int i, refcount_t *r) 67bool refcount_sub_and_test(unsigned int i, refcount_t *r)
68{ 68{
69 GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, 69 GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
70 r->refs.counter, "er", i, "%0", e); 70 r->refs.counter, "er", i, "%0", e, "cx");
71} 71}
72 72
73static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) 73static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
74{ 74{
75 GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO, 75 GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
76 r->refs.counter, "%0", e); 76 r->refs.counter, "%0", e, "cx");
77} 77}
78 78
79static __always_inline __must_check 79static __always_inline __must_check
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index f91c365e57c3..4914a3e7c803 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -2,8 +2,7 @@
2#ifndef _ASM_X86_RMWcc 2#ifndef _ASM_X86_RMWcc
3#define _ASM_X86_RMWcc 3#define _ASM_X86_RMWcc
4 4
5#define __CLOBBERS_MEM "memory" 5#define __CLOBBERS_MEM(clb...) "memory", ## clb
6#define __CLOBBERS_MEM_CC_CX "memory", "cc", "cx"
7 6
8#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) 7#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
9 8
@@ -40,18 +39,19 @@ do { \
40#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ 39#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
41 40
42#define GEN_UNARY_RMWcc(op, var, arg0, cc) \ 41#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
43 __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM) 42 __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
44 43
45#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc) \ 44#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\
46 __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \ 45 __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \
47 __CLOBBERS_MEM_CC_CX) 46 __CLOBBERS_MEM(clobbers))
48 47
49#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ 48#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
50 __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \ 49 __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \
51 __CLOBBERS_MEM, vcon (val)) 50 __CLOBBERS_MEM(), vcon (val))
52 51
53#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc) \ 52#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc, \
53 clobbers...) \
54 __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \ 54 __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \
55 __CLOBBERS_MEM_CC_CX, vcon (val)) 55 __CLOBBERS_MEM(clobbers), vcon (val))
56 56
57#endif /* _ASM_X86_RMWcc */ 57#endif /* _ASM_X86_RMWcc */
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8ad2e410974f..7c5538769f7e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1603,7 +1603,7 @@ static void __init delay_with_tsc(void)
1603 do { 1603 do {
1604 rep_nop(); 1604 rep_nop();
1605 now = rdtsc(); 1605 now = rdtsc();
1606 } while ((now - start) < 40000000000UL / HZ && 1606 } while ((now - start) < 40000000000ULL / HZ &&
1607 time_before_eq(jiffies, end)); 1607 time_before_eq(jiffies, end));
1608} 1608}
1609 1609
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d71c8b54b696..bfca937bdcc3 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -300,6 +300,15 @@ retpoline_auto:
300 setup_force_cpu_cap(X86_FEATURE_USE_IBPB); 300 setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
301 pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); 301 pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
302 } 302 }
303
304 /*
305 * Retpoline means the kernel is safe because it has no indirect
306 * branches. But firmware isn't, so use IBRS to protect that.
307 */
308 if (boot_cpu_has(X86_FEATURE_IBRS)) {
309 setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
310 pr_info("Enabling Restricted Speculation for firmware calls\n");
311 }
303} 312}
304 313
305#undef pr_fmt 314#undef pr_fmt
@@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
326 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) 335 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
327 return sprintf(buf, "Not affected\n"); 336 return sprintf(buf, "Not affected\n");
328 337
329 return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], 338 return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
330 boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", 339 boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
340 boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
331 spectre_v2_module_string()); 341 spectre_v2_module_string());
332} 342}
333#endif 343#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 824aee0117bb..348cf4821240 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1749,3 +1749,33 @@ static int __init init_cpu_syscore(void)
1749 return 0; 1749 return 0;
1750} 1750}
1751core_initcall(init_cpu_syscore); 1751core_initcall(init_cpu_syscore);
1752
1753/*
1754 * The microcode loader calls this upon late microcode load to recheck features,
1755 * only when microcode has been updated. Caller holds microcode_mutex and CPU
1756 * hotplug lock.
1757 */
1758void microcode_check(void)
1759{
1760 struct cpuinfo_x86 info;
1761
1762 perf_check_microcode();
1763
1764 /* Reload CPUID max function as it might've changed. */
1765 info.cpuid_level = cpuid_eax(0);
1766
1767 /*
1768 * Copy all capability leafs to pick up the synthetic ones so that
1769 * memcmp() below doesn't fail on that. The ones coming from CPUID will
1770 * get overwritten in get_cpu_cap().
1771 */
1772 memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability));
1773
1774 get_cpu_cap(&info);
1775
1776 if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)))
1777 return;
1778
1779 pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
1780 pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
1781}
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 330b8462d426..a998e1a7d46f 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
498 return patch_size; 498 return patch_size;
499} 499}
500 500
501static int apply_microcode_amd(int cpu) 501static enum ucode_state apply_microcode_amd(int cpu)
502{ 502{
503 struct cpuinfo_x86 *c = &cpu_data(cpu); 503 struct cpuinfo_x86 *c = &cpu_data(cpu);
504 struct microcode_amd *mc_amd; 504 struct microcode_amd *mc_amd;
@@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu)
512 512
513 p = find_patch(cpu); 513 p = find_patch(cpu);
514 if (!p) 514 if (!p)
515 return 0; 515 return UCODE_NFOUND;
516 516
517 mc_amd = p->data; 517 mc_amd = p->data;
518 uci->mc = p->data; 518 uci->mc = p->data;
@@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu)
523 if (rev >= mc_amd->hdr.patch_id) { 523 if (rev >= mc_amd->hdr.patch_id) {
524 c->microcode = rev; 524 c->microcode = rev;
525 uci->cpu_sig.rev = rev; 525 uci->cpu_sig.rev = rev;
526 return 0; 526 return UCODE_OK;
527 } 527 }
528 528
529 if (__apply_microcode_amd(mc_amd)) { 529 if (__apply_microcode_amd(mc_amd)) {
530 pr_err("CPU%d: update failed for patch_level=0x%08x\n", 530 pr_err("CPU%d: update failed for patch_level=0x%08x\n",
531 cpu, mc_amd->hdr.patch_id); 531 cpu, mc_amd->hdr.patch_id);
532 return -1; 532 return UCODE_ERROR;
533 } 533 }
534 pr_info("CPU%d: new patch_level=0x%08x\n", cpu, 534 pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
535 mc_amd->hdr.patch_id); 535 mc_amd->hdr.patch_id);
@@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu)
537 uci->cpu_sig.rev = mc_amd->hdr.patch_id; 537 uci->cpu_sig.rev = mc_amd->hdr.patch_id;
538 c->microcode = mc_amd->hdr.patch_id; 538 c->microcode = mc_amd->hdr.patch_id;
539 539
540 return 0; 540 return UCODE_UPDATED;
541} 541}
542 542
543static int install_equiv_cpu_table(const u8 *buf) 543static int install_equiv_cpu_table(const u8 *buf)
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 319dd65f98a2..aa1b9a422f2b 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -374,7 +374,7 @@ static int collect_cpu_info(int cpu)
374} 374}
375 375
376struct apply_microcode_ctx { 376struct apply_microcode_ctx {
377 int err; 377 enum ucode_state err;
378}; 378};
379 379
380static void apply_microcode_local(void *arg) 380static void apply_microcode_local(void *arg)
@@ -489,31 +489,30 @@ static void __exit microcode_dev_exit(void)
489/* fake device for request_firmware */ 489/* fake device for request_firmware */
490static struct platform_device *microcode_pdev; 490static struct platform_device *microcode_pdev;
491 491
492static int reload_for_cpu(int cpu) 492static enum ucode_state reload_for_cpu(int cpu)
493{ 493{
494 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 494 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
495 enum ucode_state ustate; 495 enum ucode_state ustate;
496 int err = 0;
497 496
498 if (!uci->valid) 497 if (!uci->valid)
499 return err; 498 return UCODE_OK;
500 499
501 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true); 500 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
502 if (ustate == UCODE_OK) 501 if (ustate != UCODE_OK)
503 apply_microcode_on_target(cpu); 502 return ustate;
504 else 503
505 if (ustate == UCODE_ERROR) 504 return apply_microcode_on_target(cpu);
506 err = -EINVAL;
507 return err;
508} 505}
509 506
510static ssize_t reload_store(struct device *dev, 507static ssize_t reload_store(struct device *dev,
511 struct device_attribute *attr, 508 struct device_attribute *attr,
512 const char *buf, size_t size) 509 const char *buf, size_t size)
513{ 510{
511 enum ucode_state tmp_ret = UCODE_OK;
512 bool do_callback = false;
514 unsigned long val; 513 unsigned long val;
514 ssize_t ret = 0;
515 int cpu; 515 int cpu;
516 ssize_t ret = 0, tmp_ret;
517 516
518 ret = kstrtoul(buf, 0, &val); 517 ret = kstrtoul(buf, 0, &val);
519 if (ret) 518 if (ret)
@@ -526,15 +525,21 @@ static ssize_t reload_store(struct device *dev,
526 mutex_lock(&microcode_mutex); 525 mutex_lock(&microcode_mutex);
527 for_each_online_cpu(cpu) { 526 for_each_online_cpu(cpu) {
528 tmp_ret = reload_for_cpu(cpu); 527 tmp_ret = reload_for_cpu(cpu);
529 if (tmp_ret != 0) 528 if (tmp_ret > UCODE_NFOUND) {
530 pr_warn("Error reloading microcode on CPU %d\n", cpu); 529 pr_warn("Error reloading microcode on CPU %d\n", cpu);
531 530
532 /* save retval of the first encountered reload error */ 531 /* set retval for the first encountered reload error */
533 if (!ret) 532 if (!ret)
534 ret = tmp_ret; 533 ret = -EINVAL;
534 }
535
536 if (tmp_ret == UCODE_UPDATED)
537 do_callback = true;
535 } 538 }
536 if (!ret) 539
537 perf_check_microcode(); 540 if (!ret && do_callback)
541 microcode_check();
542
538 mutex_unlock(&microcode_mutex); 543 mutex_unlock(&microcode_mutex);
539 put_online_cpus(); 544 put_online_cpus();
540 545
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index a15db2b4e0d6..923054a6b760 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -772,7 +772,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
772 return 0; 772 return 0;
773} 773}
774 774
775static int apply_microcode_intel(int cpu) 775static enum ucode_state apply_microcode_intel(int cpu)
776{ 776{
777 struct microcode_intel *mc; 777 struct microcode_intel *mc;
778 struct ucode_cpu_info *uci; 778 struct ucode_cpu_info *uci;
@@ -782,7 +782,7 @@ static int apply_microcode_intel(int cpu)
782 782
783 /* We should bind the task to the CPU */ 783 /* We should bind the task to the CPU */
784 if (WARN_ON(raw_smp_processor_id() != cpu)) 784 if (WARN_ON(raw_smp_processor_id() != cpu))
785 return -1; 785 return UCODE_ERROR;
786 786
787 uci = ucode_cpu_info + cpu; 787 uci = ucode_cpu_info + cpu;
788 mc = uci->mc; 788 mc = uci->mc;
@@ -790,7 +790,7 @@ static int apply_microcode_intel(int cpu)
790 /* Look for a newer patch in our cache: */ 790 /* Look for a newer patch in our cache: */
791 mc = find_patch(uci); 791 mc = find_patch(uci);
792 if (!mc) 792 if (!mc)
793 return 0; 793 return UCODE_NFOUND;
794 } 794 }
795 795
796 /* write microcode via MSR 0x79 */ 796 /* write microcode via MSR 0x79 */
@@ -801,7 +801,7 @@ static int apply_microcode_intel(int cpu)
801 if (rev != mc->hdr.rev) { 801 if (rev != mc->hdr.rev) {
802 pr_err("CPU%d update to revision 0x%x failed\n", 802 pr_err("CPU%d update to revision 0x%x failed\n",
803 cpu, mc->hdr.rev); 803 cpu, mc->hdr.rev);
804 return -1; 804 return UCODE_ERROR;
805 } 805 }
806 806
807 if (rev != prev_rev) { 807 if (rev != prev_rev) {
@@ -818,7 +818,7 @@ static int apply_microcode_intel(int cpu)
818 uci->cpu_sig.rev = rev; 818 uci->cpu_sig.rev = rev;
819 c->microcode = rev; 819 c->microcode = rev;
820 820
821 return 0; 821 return UCODE_UPDATED;
822} 822}
823 823
824static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, 824static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 04a625f0fcda..0f545b3cf926 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -23,6 +23,7 @@
23#include <asm/nops.h> 23#include <asm/nops.h>
24#include "../entry/calling.h" 24#include "../entry/calling.h"
25#include <asm/export.h> 25#include <asm/export.h>
26#include <asm/nospec-branch.h>
26 27
27#ifdef CONFIG_PARAVIRT 28#ifdef CONFIG_PARAVIRT
28#include <asm/asm-offsets.h> 29#include <asm/asm-offsets.h>
@@ -134,6 +135,7 @@ ENTRY(secondary_startup_64)
134 135
135 /* Ensure I am executing from virtual addresses */ 136 /* Ensure I am executing from virtual addresses */
136 movq $1f, %rax 137 movq $1f, %rax
138 ANNOTATE_RETPOLINE_SAFE
137 jmp *%rax 139 jmp *%rax
1381: 1401:
139 UNWIND_HINT_EMPTY 141 UNWIND_HINT_EMPTY
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 1f9188f5357c..feb28fee6cea 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -5,7 +5,6 @@
5#include <asm/unwind.h> 5#include <asm/unwind.h>
6#include <asm/orc_types.h> 6#include <asm/orc_types.h>
7#include <asm/orc_lookup.h> 7#include <asm/orc_lookup.h>
8#include <asm/sections.h>
9 8
10#define orc_warn(fmt, ...) \ 9#define orc_warn(fmt, ...) \
11 printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__) 10 printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
@@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip)
148 } 147 }
149 148
150 /* vmlinux .init slow lookup: */ 149 /* vmlinux .init slow lookup: */
151 if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext) 150 if (init_kernel_text(ip))
152 return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, 151 return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
153 __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); 152 __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
154 153
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 3d8377f75eda..cbd7ab74952e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -49,6 +49,7 @@
49#include <asm/debugreg.h> 49#include <asm/debugreg.h>
50#include <asm/kvm_para.h> 50#include <asm/kvm_para.h>
51#include <asm/irq_remapping.h> 51#include <asm/irq_remapping.h>
52#include <asm/microcode.h>
52#include <asm/nospec-branch.h> 53#include <asm/nospec-branch.h>
53 54
54#include <asm/virtext.h> 55#include <asm/virtext.h>
@@ -5364,7 +5365,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
5364 * being speculatively taken. 5365 * being speculatively taken.
5365 */ 5366 */
5366 if (svm->spec_ctrl) 5367 if (svm->spec_ctrl)
5367 wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); 5368 native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
5368 5369
5369 asm volatile ( 5370 asm volatile (
5370 "push %%" _ASM_BP "; \n\t" 5371 "push %%" _ASM_BP "; \n\t"
@@ -5473,11 +5474,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
5473 * If the L02 MSR bitmap does not intercept the MSR, then we need to 5474 * If the L02 MSR bitmap does not intercept the MSR, then we need to
5474 * save it. 5475 * save it.
5475 */ 5476 */
5476 if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) 5477 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
5477 rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); 5478 svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
5478 5479
5479 if (svm->spec_ctrl) 5480 if (svm->spec_ctrl)
5480 wrmsrl(MSR_IA32_SPEC_CTRL, 0); 5481 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
5481 5482
5482 /* Eliminate branch target predictions from guest mode */ 5483 /* Eliminate branch target predictions from guest mode */
5483 vmexit_fill_RSB(); 5484 vmexit_fill_RSB();
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ec14f2319a87..cab6ea1f8be5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -51,6 +51,7 @@
51#include <asm/apic.h> 51#include <asm/apic.h>
52#include <asm/irq_remapping.h> 52#include <asm/irq_remapping.h>
53#include <asm/mmu_context.h> 53#include <asm/mmu_context.h>
54#include <asm/microcode.h>
54#include <asm/nospec-branch.h> 55#include <asm/nospec-branch.h>
55 56
56#include "trace.h" 57#include "trace.h"
@@ -9453,7 +9454,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9453 * being speculatively taken. 9454 * being speculatively taken.
9454 */ 9455 */
9455 if (vmx->spec_ctrl) 9456 if (vmx->spec_ctrl)
9456 wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); 9457 native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
9457 9458
9458 vmx->__launched = vmx->loaded_vmcs->launched; 9459 vmx->__launched = vmx->loaded_vmcs->launched;
9459 asm( 9460 asm(
@@ -9588,11 +9589,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9588 * If the L02 MSR bitmap does not intercept the MSR, then we need to 9589 * If the L02 MSR bitmap does not intercept the MSR, then we need to
9589 * save it. 9590 * save it.
9590 */ 9591 */
9591 if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) 9592 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
9592 rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); 9593 vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
9593 9594
9594 if (vmx->spec_ctrl) 9595 if (vmx->spec_ctrl)
9595 wrmsrl(MSR_IA32_SPEC_CTRL, 0); 9596 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
9596 9597
9597 /* Eliminate branch target predictions from guest mode */ 9598 /* Eliminate branch target predictions from guest mode */
9598 vmexit_fill_RSB(); 9599 vmexit_fill_RSB();
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 91e9700cc6dc..25a972c61b0a 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
28lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o 28lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
29lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o 29lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
30lib-$(CONFIG_RETPOLINE) += retpoline.o 30lib-$(CONFIG_RETPOLINE) += retpoline.o
31OBJECT_FILES_NON_STANDARD_retpoline.o :=y
32 31
33obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o 32obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
34 33
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 480edc3a5e03..c909961e678a 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,7 +7,6 @@
7#include <asm/alternative-asm.h> 7#include <asm/alternative-asm.h>
8#include <asm/export.h> 8#include <asm/export.h>
9#include <asm/nospec-branch.h> 9#include <asm/nospec-branch.h>
10#include <asm/bitsperlong.h>
11 10
12.macro THUNK reg 11.macro THUNK reg
13 .section .text.__x86.indirect_thunk 12 .section .text.__x86.indirect_thunk
@@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
47GENERATE_THUNK(r14) 46GENERATE_THUNK(r14)
48GENERATE_THUNK(r15) 47GENERATE_THUNK(r15)
49#endif 48#endif
50
51/*
52 * Fill the CPU return stack buffer.
53 *
54 * Each entry in the RSB, if used for a speculative 'ret', contains an
55 * infinite 'pause; lfence; jmp' loop to capture speculative execution.
56 *
57 * This is required in various cases for retpoline and IBRS-based
58 * mitigations for the Spectre variant 2 vulnerability. Sometimes to
59 * eliminate potentially bogus entries from the RSB, and sometimes
60 * purely to ensure that it doesn't get empty, which on some CPUs would
61 * allow predictions from other (unwanted!) sources to be used.
62 *
63 * Google experimented with loop-unrolling and this turned out to be
64 * the optimal version - two calls, each with their own speculation
65 * trap should their return address end up getting used, in a loop.
66 */
67.macro STUFF_RSB nr:req sp:req
68 mov $(\nr / 2), %_ASM_BX
69 .align 16
70771:
71 call 772f
72773: /* speculation trap */
73 pause
74 lfence
75 jmp 773b
76 .align 16
77772:
78 call 774f
79775: /* speculation trap */
80 pause
81 lfence
82 jmp 775b
83 .align 16
84774:
85 dec %_ASM_BX
86 jnz 771b
87 add $((BITS_PER_LONG/8) * \nr), \sp
88.endm
89
90#define RSB_FILL_LOOPS 16 /* To avoid underflow */
91
92ENTRY(__fill_rsb)
93 STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
94 ret
95END(__fill_rsb)
96EXPORT_SYMBOL_GPL(__fill_rsb)
97
98#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
99
100ENTRY(__clear_rsb)
101 STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
102 ret
103END(__clear_rsb)
104EXPORT_SYMBOL_GPL(__clear_rsb)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 800de815519c..c88573d90f3e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1248,10 +1248,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1248 tsk = current; 1248 tsk = current;
1249 mm = tsk->mm; 1249 mm = tsk->mm;
1250 1250
1251 /*
1252 * Detect and handle instructions that would cause a page fault for
1253 * both a tracked kernel page and a userspace page.
1254 */
1255 prefetchw(&mm->mmap_sem); 1251 prefetchw(&mm->mmap_sem);
1256 1252
1257 if (unlikely(kmmio_fault(regs, address))) 1253 if (unlikely(kmmio_fault(regs, address)))
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 01f682cf77a8..40a6085063d6 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -15,6 +15,7 @@
15#include <asm/page.h> 15#include <asm/page.h>
16#include <asm/processor-flags.h> 16#include <asm/processor-flags.h>
17#include <asm/msr-index.h> 17#include <asm/msr-index.h>
18#include <asm/nospec-branch.h>
18 19
19 .text 20 .text
20 .code64 21 .code64
@@ -59,6 +60,7 @@ ENTRY(sme_encrypt_execute)
59 movq %rax, %r8 /* Workarea encryption routine */ 60 movq %rax, %r8 /* Workarea encryption routine */
60 addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */ 61 addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */
61 62
63 ANNOTATE_RETPOLINE_SAFE
62 call *%rax /* Call the encryption routine */ 64 call *%rax /* Call the encryption routine */
63 65
64 pop %r12 66 pop %r12
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index de53bd15df5a..24bb7598774e 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -102,7 +102,7 @@ ENTRY(startup_32)
102 * don't we'll eventually crash trying to execute encrypted 102 * don't we'll eventually crash trying to execute encrypted
103 * instructions. 103 * instructions.
104 */ 104 */
105 bt $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags 105 btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
106 jnc .Ldone 106 jnc .Ldone
107 movl $MSR_K8_SYSCFG, %ecx 107 movl $MSR_K8_SYSCFG, %ecx
108 rdmsr 108 rdmsr
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index d02a4df3f473..d3f264a5b04d 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -27,3 +27,8 @@
27#if __has_feature(address_sanitizer) 27#if __has_feature(address_sanitizer)
28#define __SANITIZE_ADDRESS__ 28#define __SANITIZE_ADDRESS__
29#endif 29#endif
30
31/* Clang doesn't have a way to turn it off per-function, yet. */
32#ifdef __noretpoline
33#undef __noretpoline
34#endif
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 901c1ccb3374..e2c7f4369eff 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -93,6 +93,10 @@
93#define __weak __attribute__((weak)) 93#define __weak __attribute__((weak))
94#define __alias(symbol) __attribute__((alias(#symbol))) 94#define __alias(symbol) __attribute__((alias(#symbol)))
95 95
96#ifdef RETPOLINE
97#define __noretpoline __attribute__((indirect_branch("keep")))
98#endif
99
96/* 100/*
97 * it doesn't make sense on ARM (currently the only user of __naked) 101 * it doesn't make sense on ARM (currently the only user of __naked)
98 * to trace naked functions because then mcount is called without 102 * to trace naked functions because then mcount is called without
diff --git a/include/linux/init.h b/include/linux/init.h
index 506a98151131..bc27cf03c41e 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -6,10 +6,10 @@
6#include <linux/types.h> 6#include <linux/types.h>
7 7
8/* Built-in __init functions needn't be compiled with retpoline */ 8/* Built-in __init functions needn't be compiled with retpoline */
9#if defined(RETPOLINE) && !defined(MODULE) 9#if defined(__noretpoline) && !defined(MODULE)
10#define __noretpoline __attribute__((indirect_branch("keep"))) 10#define __noinitretpoline __noretpoline
11#else 11#else
12#define __noretpoline 12#define __noinitretpoline
13#endif 13#endif
14 14
15/* These macros are used to mark some functions or 15/* These macros are used to mark some functions or
@@ -47,7 +47,7 @@
47 47
48/* These are for everybody (although not all archs will actually 48/* These are for everybody (although not all archs will actually
49 discard it in modules) */ 49 discard it in modules) */
50#define __init __section(.init.text) __cold __latent_entropy __noretpoline 50#define __init __section(.init.text) __cold __latent_entropy __noinitretpoline
51#define __initdata __section(.init.data) 51#define __initdata __section(.init.data)
52#define __initconst __section(.init.rodata) 52#define __initconst __section(.init.rodata)
53#define __exitdata __section(.exit.data) 53#define __exitdata __section(.exit.data)
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index b6a29c126cc4..2168cc6b8b30 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -151,6 +151,7 @@ extern struct jump_entry __start___jump_table[];
151extern struct jump_entry __stop___jump_table[]; 151extern struct jump_entry __stop___jump_table[];
152 152
153extern void jump_label_init(void); 153extern void jump_label_init(void);
154extern void jump_label_invalidate_init(void);
154extern void jump_label_lock(void); 155extern void jump_label_lock(void);
155extern void jump_label_unlock(void); 156extern void jump_label_unlock(void);
156extern void arch_jump_label_transform(struct jump_entry *entry, 157extern void arch_jump_label_transform(struct jump_entry *entry,
@@ -198,6 +199,8 @@ static __always_inline void jump_label_init(void)
198 static_key_initialized = true; 199 static_key_initialized = true;
199} 200}
200 201
202static inline void jump_label_invalidate_init(void) {}
203
201static __always_inline bool static_key_false(struct static_key *key) 204static __always_inline bool static_key_false(struct static_key *key)
202{ 205{
203 if (unlikely(static_key_count(key) > 0)) 206 if (unlikely(static_key_count(key) > 0))
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index ce51455e2adf..3fd291503576 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -472,6 +472,7 @@ extern bool parse_option_str(const char *str, const char *option);
472extern char *next_arg(char *args, char **param, char **val); 472extern char *next_arg(char *args, char **param, char **val);
473 473
474extern int core_kernel_text(unsigned long addr); 474extern int core_kernel_text(unsigned long addr);
475extern int init_kernel_text(unsigned long addr);
475extern int core_kernel_data(unsigned long addr); 476extern int core_kernel_data(unsigned long addr);
476extern int __kernel_text_address(unsigned long addr); 477extern int __kernel_text_address(unsigned long addr);
477extern int kernel_text_address(unsigned long addr); 478extern int kernel_text_address(unsigned long addr);
diff --git a/include/linux/nospec.h b/include/linux/nospec.h
index fbc98e2c8228..e791ebc65c9c 100644
--- a/include/linux/nospec.h
+++ b/include/linux/nospec.h
@@ -5,6 +5,7 @@
5 5
6#ifndef _LINUX_NOSPEC_H 6#ifndef _LINUX_NOSPEC_H
7#define _LINUX_NOSPEC_H 7#define _LINUX_NOSPEC_H
8#include <asm/barrier.h>
8 9
9/** 10/**
10 * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise 11 * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
@@ -30,26 +31,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
30#endif 31#endif
31 32
32/* 33/*
33 * Warn developers about inappropriate array_index_nospec() usage.
34 *
35 * Even if the CPU speculates past the WARN_ONCE branch, the
36 * sign bit of @index is taken into account when generating the
37 * mask.
38 *
39 * This warning is compiled out when the compiler can infer that
40 * @index and @size are less than LONG_MAX.
41 */
42#define array_index_mask_nospec_check(index, size) \
43({ \
44 if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \
45 "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \
46 _mask = 0; \
47 else \
48 _mask = array_index_mask_nospec(index, size); \
49 _mask; \
50})
51
52/*
53 * array_index_nospec - sanitize an array index after a bounds check 34 * array_index_nospec - sanitize an array index after a bounds check
54 * 35 *
55 * For a code sequence like: 36 * For a code sequence like:
@@ -67,12 +48,11 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
67({ \ 48({ \
68 typeof(index) _i = (index); \ 49 typeof(index) _i = (index); \
69 typeof(size) _s = (size); \ 50 typeof(size) _s = (size); \
70 unsigned long _mask = array_index_mask_nospec_check(_i, _s); \ 51 unsigned long _mask = array_index_mask_nospec(_i, _s); \
71 \ 52 \
72 BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ 53 BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \
73 BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ 54 BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \
74 \ 55 \
75 _i &= _mask; \ 56 (typeof(_i)) (_i & _mask); \
76 _i; \
77}) 57})
78#endif /* _LINUX_NOSPEC_H */ 58#endif /* _LINUX_NOSPEC_H */
diff --git a/init/main.c b/init/main.c
index a8100b954839..969eaf140ef0 100644
--- a/init/main.c
+++ b/init/main.c
@@ -89,6 +89,7 @@
89#include <linux/io.h> 89#include <linux/io.h>
90#include <linux/cache.h> 90#include <linux/cache.h>
91#include <linux/rodata_test.h> 91#include <linux/rodata_test.h>
92#include <linux/jump_label.h>
92 93
93#include <asm/io.h> 94#include <asm/io.h>
94#include <asm/bugs.h> 95#include <asm/bugs.h>
@@ -1000,6 +1001,7 @@ static int __ref kernel_init(void *unused)
1000 /* need to finish all async __init code before freeing the memory */ 1001 /* need to finish all async __init code before freeing the memory */
1001 async_synchronize_full(); 1002 async_synchronize_full();
1002 ftrace_free_init_mem(); 1003 ftrace_free_init_mem();
1004 jump_label_invalidate_init();
1003 free_initmem(); 1005 free_initmem();
1004 mark_readonly(); 1006 mark_readonly();
1005 system_state = SYSTEM_RUNNING; 1007 system_state = SYSTEM_RUNNING;
diff --git a/kernel/extable.c b/kernel/extable.c
index a17fdb63dc3e..6a5b61ebc66c 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -64,7 +64,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
64 return e; 64 return e;
65} 65}
66 66
67static inline int init_kernel_text(unsigned long addr) 67int init_kernel_text(unsigned long addr)
68{ 68{
69 if (addr >= (unsigned long)_sinittext && 69 if (addr >= (unsigned long)_sinittext &&
70 addr < (unsigned long)_einittext) 70 addr < (unsigned long)_einittext)
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index b4517095db6a..52a0a7af8640 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -366,12 +366,15 @@ static void __jump_label_update(struct static_key *key,
366{ 366{
367 for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) { 367 for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
368 /* 368 /*
369 * entry->code set to 0 invalidates module init text sections 369 * An entry->code of 0 indicates an entry which has been
370 * kernel_text_address() verifies we are not in core kernel 370 * disabled because it was in an init text area.
371 * init code, see jump_label_invalidate_module_init().
372 */ 371 */
373 if (entry->code && kernel_text_address(entry->code)) 372 if (entry->code) {
374 arch_jump_label_transform(entry, jump_label_type(entry)); 373 if (kernel_text_address(entry->code))
374 arch_jump_label_transform(entry, jump_label_type(entry));
375 else
376 WARN_ONCE(1, "can't patch jump_label at %pS", (void *)entry->code);
377 }
375 } 378 }
376} 379}
377 380
@@ -417,6 +420,19 @@ void __init jump_label_init(void)
417 cpus_read_unlock(); 420 cpus_read_unlock();
418} 421}
419 422
423/* Disable any jump label entries in __init code */
424void __init jump_label_invalidate_init(void)
425{
426 struct jump_entry *iter_start = __start___jump_table;
427 struct jump_entry *iter_stop = __stop___jump_table;
428 struct jump_entry *iter;
429
430 for (iter = iter_start; iter < iter_stop; iter++) {
431 if (init_kernel_text(iter->code))
432 iter->code = 0;
433 }
434}
435
420#ifdef CONFIG_MODULES 436#ifdef CONFIG_MODULES
421 437
422static enum jump_label_type jump_label_init_type(struct jump_entry *entry) 438static enum jump_label_type jump_label_init_type(struct jump_entry *entry)
@@ -633,6 +649,7 @@ static void jump_label_del_module(struct module *mod)
633 } 649 }
634} 650}
635 651
652/* Disable any jump label entries in module init code */
636static void jump_label_invalidate_module_init(struct module *mod) 653static void jump_label_invalidate_module_init(struct module *mod)
637{ 654{
638 struct jump_entry *iter_start = mod->jump_entries; 655 struct jump_entry *iter_start = mod->jump_entries;
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 47cddf32aeba..4f2b25d43ec9 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -256,6 +256,8 @@ __objtool_obj := $(objtree)/tools/objtool/objtool
256 256
257objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check) 257objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
258 258
259objtool_args += $(if $(part-of-module), --module,)
260
259ifndef CONFIG_FRAME_POINTER 261ifndef CONFIG_FRAME_POINTER
260objtool_args += --no-fp 262objtool_args += --no-fp
261endif 263endif
@@ -264,6 +266,12 @@ objtool_args += --no-unreachable
264else 266else
265objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable) 267objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
266endif 268endif
269ifdef CONFIG_RETPOLINE
270ifneq ($(RETPOLINE_CFLAGS),)
271 objtool_args += --retpoline
272endif
273endif
274
267 275
268ifdef CONFIG_MODVERSIONS 276ifdef CONFIG_MODVERSIONS
269objtool_o = $(@D)/.tmp_$(@F) 277objtool_o = $(@D)/.tmp_$(@F)
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 57254f5b2779..694abc628e9b 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -29,7 +29,7 @@
29#include "builtin.h" 29#include "builtin.h"
30#include "check.h" 30#include "check.h"
31 31
32bool no_fp, no_unreachable; 32bool no_fp, no_unreachable, retpoline, module;
33 33
34static const char * const check_usage[] = { 34static const char * const check_usage[] = {
35 "objtool check [<options>] file.o", 35 "objtool check [<options>] file.o",
@@ -39,6 +39,8 @@ static const char * const check_usage[] = {
39const struct option check_options[] = { 39const struct option check_options[] = {
40 OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), 40 OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
41 OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), 41 OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
42 OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
43 OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
42 OPT_END(), 44 OPT_END(),
43}; 45};
44 46
@@ -53,5 +55,5 @@ int cmd_check(int argc, const char **argv)
53 55
54 objname = argv[0]; 56 objname = argv[0];
55 57
56 return check(objname, no_fp, no_unreachable, false); 58 return check(objname, false);
57} 59}
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 91e8e19ff5e0..77ea2b97117d 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -25,7 +25,6 @@
25 */ 25 */
26 26
27#include <string.h> 27#include <string.h>
28#include <subcmd/parse-options.h>
29#include "builtin.h" 28#include "builtin.h"
30#include "check.h" 29#include "check.h"
31 30
@@ -36,9 +35,6 @@ static const char *orc_usage[] = {
36 NULL, 35 NULL,
37}; 36};
38 37
39extern const struct option check_options[];
40extern bool no_fp, no_unreachable;
41
42int cmd_orc(int argc, const char **argv) 38int cmd_orc(int argc, const char **argv)
43{ 39{
44 const char *objname; 40 const char *objname;
@@ -54,7 +50,7 @@ int cmd_orc(int argc, const char **argv)
54 50
55 objname = argv[0]; 51 objname = argv[0];
56 52
57 return check(objname, no_fp, no_unreachable, true); 53 return check(objname, true);
58 } 54 }
59 55
60 if (!strcmp(argv[0], "dump")) { 56 if (!strcmp(argv[0], "dump")) {
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
index dd526067fed5..28ff40e19a14 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/builtin.h
@@ -17,6 +17,11 @@
17#ifndef _BUILTIN_H 17#ifndef _BUILTIN_H
18#define _BUILTIN_H 18#define _BUILTIN_H
19 19
20#include <subcmd/parse-options.h>
21
22extern const struct option check_options[];
23extern bool no_fp, no_unreachable, retpoline, module;
24
20extern int cmd_check(int argc, const char **argv); 25extern int cmd_check(int argc, const char **argv);
21extern int cmd_orc(int argc, const char **argv); 26extern int cmd_orc(int argc, const char **argv);
22 27
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index a8cb69a26576..472e64e95891 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -18,6 +18,7 @@
18#include <string.h> 18#include <string.h>
19#include <stdlib.h> 19#include <stdlib.h>
20 20
21#include "builtin.h"
21#include "check.h" 22#include "check.h"
22#include "elf.h" 23#include "elf.h"
23#include "special.h" 24#include "special.h"
@@ -33,7 +34,6 @@ struct alternative {
33}; 34};
34 35
35const char *objname; 36const char *objname;
36static bool no_fp;
37struct cfi_state initial_func_cfi; 37struct cfi_state initial_func_cfi;
38 38
39struct instruction *find_insn(struct objtool_file *file, 39struct instruction *find_insn(struct objtool_file *file,
@@ -497,6 +497,7 @@ static int add_jump_destinations(struct objtool_file *file)
497 * disguise, so convert them accordingly. 497 * disguise, so convert them accordingly.
498 */ 498 */
499 insn->type = INSN_JUMP_DYNAMIC; 499 insn->type = INSN_JUMP_DYNAMIC;
500 insn->retpoline_safe = true;
500 continue; 501 continue;
501 } else { 502 } else {
502 /* sibling call */ 503 /* sibling call */
@@ -548,7 +549,8 @@ static int add_call_destinations(struct objtool_file *file)
548 if (!insn->call_dest && !insn->ignore) { 549 if (!insn->call_dest && !insn->ignore) {
549 WARN_FUNC("unsupported intra-function call", 550 WARN_FUNC("unsupported intra-function call",
550 insn->sec, insn->offset); 551 insn->sec, insn->offset);
551 WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."); 552 if (retpoline)
553 WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
552 return -1; 554 return -1;
553 } 555 }
554 556
@@ -1108,6 +1110,54 @@ static int read_unwind_hints(struct objtool_file *file)
1108 return 0; 1110 return 0;
1109} 1111}
1110 1112
1113static int read_retpoline_hints(struct objtool_file *file)
1114{
1115 struct section *sec, *relasec;
1116 struct instruction *insn;
1117 struct rela *rela;
1118 int i;
1119
1120 sec = find_section_by_name(file->elf, ".discard.retpoline_safe");
1121 if (!sec)
1122 return 0;
1123
1124 relasec = sec->rela;
1125 if (!relasec) {
1126 WARN("missing .rela.discard.retpoline_safe section");
1127 return -1;
1128 }
1129
1130 if (sec->len % sizeof(unsigned long)) {
1131 WARN("retpoline_safe size mismatch: %d %ld", sec->len, sizeof(unsigned long));
1132 return -1;
1133 }
1134
1135 for (i = 0; i < sec->len / sizeof(unsigned long); i++) {
1136 rela = find_rela_by_dest(sec, i * sizeof(unsigned long));
1137 if (!rela) {
1138 WARN("can't find rela for retpoline_safe[%d]", i);
1139 return -1;
1140 }
1141
1142 insn = find_insn(file, rela->sym->sec, rela->addend);
1143 if (!insn) {
1144 WARN("can't find insn for retpoline_safe[%d]", i);
1145 return -1;
1146 }
1147
1148 if (insn->type != INSN_JUMP_DYNAMIC &&
1149 insn->type != INSN_CALL_DYNAMIC) {
1150 WARN_FUNC("retpoline_safe hint not a indirect jump/call",
1151 insn->sec, insn->offset);
1152 return -1;
1153 }
1154
1155 insn->retpoline_safe = true;
1156 }
1157
1158 return 0;
1159}
1160
1111static int decode_sections(struct objtool_file *file) 1161static int decode_sections(struct objtool_file *file)
1112{ 1162{
1113 int ret; 1163 int ret;
@@ -1146,6 +1196,10 @@ static int decode_sections(struct objtool_file *file)
1146 if (ret) 1196 if (ret)
1147 return ret; 1197 return ret;
1148 1198
1199 ret = read_retpoline_hints(file);
1200 if (ret)
1201 return ret;
1202
1149 return 0; 1203 return 0;
1150} 1204}
1151 1205
@@ -1891,6 +1945,38 @@ static int validate_unwind_hints(struct objtool_file *file)
1891 return warnings; 1945 return warnings;
1892} 1946}
1893 1947
1948static int validate_retpoline(struct objtool_file *file)
1949{
1950 struct instruction *insn;
1951 int warnings = 0;
1952
1953 for_each_insn(file, insn) {
1954 if (insn->type != INSN_JUMP_DYNAMIC &&
1955 insn->type != INSN_CALL_DYNAMIC)
1956 continue;
1957
1958 if (insn->retpoline_safe)
1959 continue;
1960
1961 /*
1962 * .init.text code is ran before userspace and thus doesn't
1963 * strictly need retpolines, except for modules which are
1964 * loaded late, they very much do need retpoline in their
1965 * .init.text
1966 */
1967 if (!strcmp(insn->sec->name, ".init.text") && !module)
1968 continue;
1969
1970 WARN_FUNC("indirect %s found in RETPOLINE build",
1971 insn->sec, insn->offset,
1972 insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
1973
1974 warnings++;
1975 }
1976
1977 return warnings;
1978}
1979
1894static bool is_kasan_insn(struct instruction *insn) 1980static bool is_kasan_insn(struct instruction *insn)
1895{ 1981{
1896 return (insn->type == INSN_CALL && 1982 return (insn->type == INSN_CALL &&
@@ -2022,13 +2108,12 @@ static void cleanup(struct objtool_file *file)
2022 elf_close(file->elf); 2108 elf_close(file->elf);
2023} 2109}
2024 2110
2025int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc) 2111int check(const char *_objname, bool orc)
2026{ 2112{
2027 struct objtool_file file; 2113 struct objtool_file file;
2028 int ret, warnings = 0; 2114 int ret, warnings = 0;
2029 2115
2030 objname = _objname; 2116 objname = _objname;
2031 no_fp = _no_fp;
2032 2117
2033 file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY); 2118 file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY);
2034 if (!file.elf) 2119 if (!file.elf)
@@ -2052,6 +2137,13 @@ int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc)
2052 if (list_empty(&file.insn_list)) 2137 if (list_empty(&file.insn_list))
2053 goto out; 2138 goto out;
2054 2139
2140 if (retpoline) {
2141 ret = validate_retpoline(&file);
2142 if (ret < 0)
2143 return ret;
2144 warnings += ret;
2145 }
2146
2055 ret = validate_functions(&file); 2147 ret = validate_functions(&file);
2056 if (ret < 0) 2148 if (ret < 0)
2057 goto out; 2149 goto out;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 23a1d065cae1..c6b68fcb926f 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -45,6 +45,7 @@ struct instruction {
45 unsigned char type; 45 unsigned char type;
46 unsigned long immediate; 46 unsigned long immediate;
47 bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts; 47 bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
48 bool retpoline_safe;
48 struct symbol *call_dest; 49 struct symbol *call_dest;
49 struct instruction *jump_dest; 50 struct instruction *jump_dest;
50 struct instruction *first_jump_src; 51 struct instruction *first_jump_src;
@@ -63,7 +64,7 @@ struct objtool_file {
63 bool ignore_unreachables, c_file, hints; 64 bool ignore_unreachables, c_file, hints;
64}; 65};
65 66
66int check(const char *objname, bool no_fp, bool no_unreachable, bool orc); 67int check(const char *objname, bool orc);
67 68
68struct instruction *find_insn(struct objtool_file *file, 69struct instruction *find_insn(struct objtool_file *file,
69 struct section *sec, unsigned long offset); 70 struct section *sec, unsigned long offset);