aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Makefile7
-rw-r--r--arch/x86/boot/compressed/eboot.c4
-rw-r--r--arch/x86/entry/calling.h34
-rw-r--r--arch/x86/entry/entry_32.S3
-rw-r--r--arch/x86/entry/entry_64.S153
-rw-r--r--arch/x86/entry/entry_64_compat.S71
-rw-r--r--arch/x86/include/asm/apm.h6
-rw-r--r--arch/x86/include/asm/asm-prototypes.h3
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/efi.h17
-rw-r--r--arch/x86/include/asm/kvm_host.h6
-rw-r--r--arch/x86/include/asm/microcode.h9
-rw-r--r--arch/x86/include/asm/mmu_context.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h175
-rw-r--r--arch/x86/include/asm/paravirt.h17
-rw-r--r--arch/x86/include/asm/paravirt_types.h5
-rw-r--r--arch/x86/include/asm/pgtable.h8
-rw-r--r--arch/x86/include/asm/pgtable_types.h10
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/refcount.h4
-rw-r--r--arch/x86/include/asm/rmwcc.h16
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h18
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/apic/vector.c25
-rw-r--r--arch/x86/kernel/cpu/bugs.c12
-rw-r--r--arch/x86/kernel/cpu/common.c30
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c1
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c10
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c39
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c10
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/kvm.c20
-rw-r--r--arch/x86/kernel/machine_kexec_64.c1
-rw-r--r--arch/x86/kernel/module.c1
-rw-r--r--arch/x86/kernel/smpboot.c1
-rw-r--r--arch/x86/kernel/unwind_orc.c3
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/lapic.c11
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/svm.c90
-rw-r--r--arch/x86/kvm/vmx.c26
-rw-r--r--arch/x86/kvm/x86.c107
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/retpoline.S56
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S2
-rw-r--r--arch/x86/net/bpf_jit_comp.c9
-rw-r--r--arch/x86/oprofile/nmi_int.c2
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S2
-rw-r--r--arch/x86/tools/relocs.c3
-rw-r--r--arch/x86/xen/enlighten_pv.c6
53 files changed, 723 insertions, 329 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c1236b187824..eb7f43f23521 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -430,6 +430,7 @@ config GOLDFISH
430config RETPOLINE 430config RETPOLINE
431 bool "Avoid speculative indirect branches in kernel" 431 bool "Avoid speculative indirect branches in kernel"
432 default y 432 default y
433 select STACK_VALIDATION if HAVE_STACK_VALIDATION
433 help 434 help
434 Compile kernel with the retpoline compiler options to guard against 435 Compile kernel with the retpoline compiler options to guard against
435 kernel-to-user data leaks by avoiding speculative indirect 436 kernel-to-user data leaks by avoiding speculative indirect
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index fad55160dcb9..498c1b812300 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -232,10 +232,9 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
232 232
233# Avoid indirect branches in kernel to deal with Spectre 233# Avoid indirect branches in kernel to deal with Spectre
234ifdef CONFIG_RETPOLINE 234ifdef CONFIG_RETPOLINE
235 RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) 235ifneq ($(RETPOLINE_CFLAGS),)
236 ifneq ($(RETPOLINE_CFLAGS),) 236 KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
237 KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE 237endif
238 endif
239endif 238endif
240 239
241archscripts: scripts_basic 240archscripts: scripts_basic
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 353e20c3f114..886a9115af62 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -439,7 +439,7 @@ setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height)
439 struct efi_uga_draw_protocol *uga = NULL, *first_uga; 439 struct efi_uga_draw_protocol *uga = NULL, *first_uga;
440 efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; 440 efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
441 unsigned long nr_ugas; 441 unsigned long nr_ugas;
442 u32 *handles = (u32 *)uga_handle;; 442 u32 *handles = (u32 *)uga_handle;
443 efi_status_t status = EFI_INVALID_PARAMETER; 443 efi_status_t status = EFI_INVALID_PARAMETER;
444 int i; 444 int i;
445 445
@@ -484,7 +484,7 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height)
484 struct efi_uga_draw_protocol *uga = NULL, *first_uga; 484 struct efi_uga_draw_protocol *uga = NULL, *first_uga;
485 efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; 485 efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
486 unsigned long nr_ugas; 486 unsigned long nr_ugas;
487 u64 *handles = (u64 *)uga_handle;; 487 u64 *handles = (u64 *)uga_handle;
488 efi_status_t status = EFI_INVALID_PARAMETER; 488 efi_status_t status = EFI_INVALID_PARAMETER;
489 int i; 489 int i;
490 490
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index dce7092ab24a..be63330c5511 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with
97 97
98#define SIZEOF_PTREGS 21*8 98#define SIZEOF_PTREGS 21*8
99 99
100.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax 100.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
101 /* 101 /*
102 * Push registers and sanitize registers of values that a 102 * Push registers and sanitize registers of values that a
103 * speculation attack might otherwise want to exploit. The 103 * speculation attack might otherwise want to exploit. The
@@ -105,32 +105,41 @@ For 32-bit we have the following conventions - kernel is built with
105 * could be put to use in a speculative execution gadget. 105 * could be put to use in a speculative execution gadget.
106 * Interleave XOR with PUSH for better uop scheduling: 106 * Interleave XOR with PUSH for better uop scheduling:
107 */ 107 */
108 .if \save_ret
109 pushq %rsi /* pt_regs->si */
110 movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
111 movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */
112 .else
108 pushq %rdi /* pt_regs->di */ 113 pushq %rdi /* pt_regs->di */
109 pushq %rsi /* pt_regs->si */ 114 pushq %rsi /* pt_regs->si */
115 .endif
110 pushq \rdx /* pt_regs->dx */ 116 pushq \rdx /* pt_regs->dx */
111 pushq %rcx /* pt_regs->cx */ 117 pushq %rcx /* pt_regs->cx */
112 pushq \rax /* pt_regs->ax */ 118 pushq \rax /* pt_regs->ax */
113 pushq %r8 /* pt_regs->r8 */ 119 pushq %r8 /* pt_regs->r8 */
114 xorq %r8, %r8 /* nospec r8 */ 120 xorl %r8d, %r8d /* nospec r8 */
115 pushq %r9 /* pt_regs->r9 */ 121 pushq %r9 /* pt_regs->r9 */
116 xorq %r9, %r9 /* nospec r9 */ 122 xorl %r9d, %r9d /* nospec r9 */
117 pushq %r10 /* pt_regs->r10 */ 123 pushq %r10 /* pt_regs->r10 */
118 xorq %r10, %r10 /* nospec r10 */ 124 xorl %r10d, %r10d /* nospec r10 */
119 pushq %r11 /* pt_regs->r11 */ 125 pushq %r11 /* pt_regs->r11 */
120 xorq %r11, %r11 /* nospec r11*/ 126 xorl %r11d, %r11d /* nospec r11*/
121 pushq %rbx /* pt_regs->rbx */ 127 pushq %rbx /* pt_regs->rbx */
122 xorl %ebx, %ebx /* nospec rbx*/ 128 xorl %ebx, %ebx /* nospec rbx*/
123 pushq %rbp /* pt_regs->rbp */ 129 pushq %rbp /* pt_regs->rbp */
124 xorl %ebp, %ebp /* nospec rbp*/ 130 xorl %ebp, %ebp /* nospec rbp*/
125 pushq %r12 /* pt_regs->r12 */ 131 pushq %r12 /* pt_regs->r12 */
126 xorq %r12, %r12 /* nospec r12*/ 132 xorl %r12d, %r12d /* nospec r12*/
127 pushq %r13 /* pt_regs->r13 */ 133 pushq %r13 /* pt_regs->r13 */
128 xorq %r13, %r13 /* nospec r13*/ 134 xorl %r13d, %r13d /* nospec r13*/
129 pushq %r14 /* pt_regs->r14 */ 135 pushq %r14 /* pt_regs->r14 */
130 xorq %r14, %r14 /* nospec r14*/ 136 xorl %r14d, %r14d /* nospec r14*/
131 pushq %r15 /* pt_regs->r15 */ 137 pushq %r15 /* pt_regs->r15 */
132 xorq %r15, %r15 /* nospec r15*/ 138 xorl %r15d, %r15d /* nospec r15*/
133 UNWIND_HINT_REGS 139 UNWIND_HINT_REGS
140 .if \save_ret
141 pushq %rsi /* return address on top of stack */
142 .endif
134.endm 143.endm
135 144
136.macro POP_REGS pop_rdi=1 skip_r11rcx=0 145.macro POP_REGS pop_rdi=1 skip_r11rcx=0
@@ -172,12 +181,7 @@ For 32-bit we have the following conventions - kernel is built with
172 */ 181 */
173.macro ENCODE_FRAME_POINTER ptregs_offset=0 182.macro ENCODE_FRAME_POINTER ptregs_offset=0
174#ifdef CONFIG_FRAME_POINTER 183#ifdef CONFIG_FRAME_POINTER
175 .if \ptregs_offset 184 leaq 1+\ptregs_offset(%rsp), %rbp
176 leaq \ptregs_offset(%rsp), %rbp
177 .else
178 mov %rsp, %rbp
179 .endif
180 orq $0x1, %rbp
181#endif 185#endif
182.endm 186.endm
183 187
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 16c2c022540d..6ad064c8cf35 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
252 * exist, overwrite the RSB with entries which capture 252 * exist, overwrite the RSB with entries which capture
253 * speculative execution to prevent attack. 253 * speculative execution to prevent attack.
254 */ 254 */
255 /* Clobbers %ebx */ 255 FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
256 FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
257#endif 256#endif
258 257
259 /* restore callee-saved registers */ 258 /* restore callee-saved registers */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 8971bd64d515..d5c7f18f79ac 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -364,8 +364,7 @@ ENTRY(__switch_to_asm)
364 * exist, overwrite the RSB with entries which capture 364 * exist, overwrite the RSB with entries which capture
365 * speculative execution to prevent attack. 365 * speculative execution to prevent attack.
366 */ 366 */
367 /* Clobbers %rbx */ 367 FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
368 FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
369#endif 368#endif
370 369
371 /* restore callee-saved registers */ 370 /* restore callee-saved registers */
@@ -449,9 +448,19 @@ END(irq_entries_start)
449 * 448 *
450 * The invariant is that, if irq_count != -1, then the IRQ stack is in use. 449 * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
451 */ 450 */
452.macro ENTER_IRQ_STACK regs=1 old_rsp 451.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
453 DEBUG_ENTRY_ASSERT_IRQS_OFF 452 DEBUG_ENTRY_ASSERT_IRQS_OFF
453
454 .if \save_ret
455 /*
456 * If save_ret is set, the original stack contains one additional
457 * entry -- the return address. Therefore, move the address one
458 * entry below %rsp to \old_rsp.
459 */
460 leaq 8(%rsp), \old_rsp
461 .else
454 movq %rsp, \old_rsp 462 movq %rsp, \old_rsp
463 .endif
455 464
456 .if \regs 465 .if \regs
457 UNWIND_HINT_REGS base=\old_rsp 466 UNWIND_HINT_REGS base=\old_rsp
@@ -497,6 +506,15 @@ END(irq_entries_start)
497 .if \regs 506 .if \regs
498 UNWIND_HINT_REGS indirect=1 507 UNWIND_HINT_REGS indirect=1
499 .endif 508 .endif
509
510 .if \save_ret
511 /*
512 * Push the return address to the stack. This return address can
513 * be found at the "real" original RSP, which was offset by 8 at
514 * the beginning of this macro.
515 */
516 pushq -8(\old_rsp)
517 .endif
500.endm 518.endm
501 519
502/* 520/*
@@ -520,27 +538,65 @@ END(irq_entries_start)
520.endm 538.endm
521 539
522/* 540/*
523 * Interrupt entry/exit. 541 * Interrupt entry helper function.
524 *
525 * Interrupt entry points save only callee clobbered registers in fast path.
526 * 542 *
527 * Entry runs with interrupts off. 543 * Entry runs with interrupts off. Stack layout at entry:
544 * +----------------------------------------------------+
545 * | regs->ss |
546 * | regs->rsp |
547 * | regs->eflags |
548 * | regs->cs |
549 * | regs->ip |
550 * +----------------------------------------------------+
551 * | regs->orig_ax = ~(interrupt number) |
552 * +----------------------------------------------------+
553 * | return address |
554 * +----------------------------------------------------+
528 */ 555 */
529 556ENTRY(interrupt_entry)
530/* 0(%rsp): ~(interrupt number) */ 557 UNWIND_HINT_FUNC
531 .macro interrupt func 558 ASM_CLAC
532 cld 559 cld
533 560
534 testb $3, CS-ORIG_RAX(%rsp) 561 testb $3, CS-ORIG_RAX+8(%rsp)
535 jz 1f 562 jz 1f
536 SWAPGS 563 SWAPGS
537 call switch_to_thread_stack 564
565 /*
566 * Switch to the thread stack. The IRET frame and orig_ax are
567 * on the stack, as well as the return address. RDI..R12 are
568 * not (yet) on the stack and space has not (yet) been
569 * allocated for them.
570 */
571 pushq %rdi
572
573 /* Need to switch before accessing the thread stack. */
574 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
575 movq %rsp, %rdi
576 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
577
578 /*
579 * We have RDI, return address, and orig_ax on the stack on
580 * top of the IRET frame. That means offset=24
581 */
582 UNWIND_HINT_IRET_REGS base=%rdi offset=24
583
584 pushq 7*8(%rdi) /* regs->ss */
585 pushq 6*8(%rdi) /* regs->rsp */
586 pushq 5*8(%rdi) /* regs->eflags */
587 pushq 4*8(%rdi) /* regs->cs */
588 pushq 3*8(%rdi) /* regs->ip */
589 pushq 2*8(%rdi) /* regs->orig_ax */
590 pushq 8(%rdi) /* return address */
591 UNWIND_HINT_FUNC
592
593 movq (%rdi), %rdi
5381: 5941:
539 595
540 PUSH_AND_CLEAR_REGS 596 PUSH_AND_CLEAR_REGS save_ret=1
541 ENCODE_FRAME_POINTER 597 ENCODE_FRAME_POINTER 8
542 598
543 testb $3, CS(%rsp) 599 testb $3, CS+8(%rsp)
544 jz 1f 600 jz 1f
545 601
546 /* 602 /*
@@ -548,7 +604,7 @@ END(irq_entries_start)
548 * 604 *
549 * We need to tell lockdep that IRQs are off. We can't do this until 605 * We need to tell lockdep that IRQs are off. We can't do this until
550 * we fix gsbase, and we should do it before enter_from_user_mode 606 * we fix gsbase, and we should do it before enter_from_user_mode
551 * (which can take locks). Since TRACE_IRQS_OFF idempotent, 607 * (which can take locks). Since TRACE_IRQS_OFF is idempotent,
552 * the simplest way to handle it is to just call it twice if 608 * the simplest way to handle it is to just call it twice if
553 * we enter from user mode. There's no reason to optimize this since 609 * we enter from user mode. There's no reason to optimize this since
554 * TRACE_IRQS_OFF is a no-op if lockdep is off. 610 * TRACE_IRQS_OFF is a no-op if lockdep is off.
@@ -558,12 +614,15 @@ END(irq_entries_start)
558 CALL_enter_from_user_mode 614 CALL_enter_from_user_mode
559 615
5601: 6161:
561 ENTER_IRQ_STACK old_rsp=%rdi 617 ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
562 /* We entered an interrupt context - irqs are off: */ 618 /* We entered an interrupt context - irqs are off: */
563 TRACE_IRQS_OFF 619 TRACE_IRQS_OFF
564 620
565 call \func /* rdi points to pt_regs */ 621 ret
566 .endm 622END(interrupt_entry)
623
624
625/* Interrupt entry/exit. */
567 626
568 /* 627 /*
569 * The interrupt stubs push (~vector+0x80) onto the stack and 628 * The interrupt stubs push (~vector+0x80) onto the stack and
@@ -571,9 +630,10 @@ END(irq_entries_start)
571 */ 630 */
572 .p2align CONFIG_X86_L1_CACHE_SHIFT 631 .p2align CONFIG_X86_L1_CACHE_SHIFT
573common_interrupt: 632common_interrupt:
574 ASM_CLAC
575 addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ 633 addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
576 interrupt do_IRQ 634 call interrupt_entry
635 UNWIND_HINT_REGS indirect=1
636 call do_IRQ /* rdi points to pt_regs */
577 /* 0(%rsp): old RSP */ 637 /* 0(%rsp): old RSP */
578ret_from_intr: 638ret_from_intr:
579 DISABLE_INTERRUPTS(CLBR_ANY) 639 DISABLE_INTERRUPTS(CLBR_ANY)
@@ -766,10 +826,11 @@ END(common_interrupt)
766.macro apicinterrupt3 num sym do_sym 826.macro apicinterrupt3 num sym do_sym
767ENTRY(\sym) 827ENTRY(\sym)
768 UNWIND_HINT_IRET_REGS 828 UNWIND_HINT_IRET_REGS
769 ASM_CLAC
770 pushq $~(\num) 829 pushq $~(\num)
771.Lcommon_\sym: 830.Lcommon_\sym:
772 interrupt \do_sym 831 call interrupt_entry
832 UNWIND_HINT_REGS indirect=1
833 call \do_sym /* rdi points to pt_regs */
773 jmp ret_from_intr 834 jmp ret_from_intr
774END(\sym) 835END(\sym)
775.endm 836.endm
@@ -832,34 +893,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
832 */ 893 */
833#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) 894#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
834 895
835/*
836 * Switch to the thread stack. This is called with the IRET frame and
837 * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
838 * space has not been allocated for them.)
839 */
840ENTRY(switch_to_thread_stack)
841 UNWIND_HINT_FUNC
842
843 pushq %rdi
844 /* Need to switch before accessing the thread stack. */
845 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
846 movq %rsp, %rdi
847 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
848 UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
849
850 pushq 7*8(%rdi) /* regs->ss */
851 pushq 6*8(%rdi) /* regs->rsp */
852 pushq 5*8(%rdi) /* regs->eflags */
853 pushq 4*8(%rdi) /* regs->cs */
854 pushq 3*8(%rdi) /* regs->ip */
855 pushq 2*8(%rdi) /* regs->orig_ax */
856 pushq 8(%rdi) /* return address */
857 UNWIND_HINT_FUNC
858
859 movq (%rdi), %rdi
860 ret
861END(switch_to_thread_stack)
862
863.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 896.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
864ENTRY(\sym) 897ENTRY(\sym)
865 UNWIND_HINT_IRET_REGS offset=\has_error_code*8 898 UNWIND_HINT_IRET_REGS offset=\has_error_code*8
@@ -875,12 +908,8 @@ ENTRY(\sym)
875 pushq $-1 /* ORIG_RAX: no syscall to restart */ 908 pushq $-1 /* ORIG_RAX: no syscall to restart */
876 .endif 909 .endif
877 910
878 /* Save all registers in pt_regs */
879 PUSH_AND_CLEAR_REGS
880 ENCODE_FRAME_POINTER
881
882 .if \paranoid < 2 911 .if \paranoid < 2
883 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ 912 testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */
884 jnz .Lfrom_usermode_switch_stack_\@ 913 jnz .Lfrom_usermode_switch_stack_\@
885 .endif 914 .endif
886 915
@@ -1130,13 +1159,15 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1
1130#endif 1159#endif
1131 1160
1132/* 1161/*
1133 * Switch gs if needed. 1162 * Save all registers in pt_regs, and switch gs if needed.
1134 * Use slow, but surefire "are we in kernel?" check. 1163 * Use slow, but surefire "are we in kernel?" check.
1135 * Return: ebx=0: need swapgs on exit, ebx=1: otherwise 1164 * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
1136 */ 1165 */
1137ENTRY(paranoid_entry) 1166ENTRY(paranoid_entry)
1138 UNWIND_HINT_FUNC 1167 UNWIND_HINT_FUNC
1139 cld 1168 cld
1169 PUSH_AND_CLEAR_REGS save_ret=1
1170 ENCODE_FRAME_POINTER 8
1140 movl $1, %ebx 1171 movl $1, %ebx
1141 movl $MSR_GS_BASE, %ecx 1172 movl $MSR_GS_BASE, %ecx
1142 rdmsr 1173 rdmsr
@@ -1181,12 +1212,14 @@ ENTRY(paranoid_exit)
1181END(paranoid_exit) 1212END(paranoid_exit)
1182 1213
1183/* 1214/*
1184 * Switch gs if needed. 1215 * Save all registers in pt_regs, and switch GS if needed.
1185 * Return: EBX=0: came from user mode; EBX=1: otherwise 1216 * Return: EBX=0: came from user mode; EBX=1: otherwise
1186 */ 1217 */
1187ENTRY(error_entry) 1218ENTRY(error_entry)
1188 UNWIND_HINT_REGS offset=8 1219 UNWIND_HINT_FUNC
1189 cld 1220 cld
1221 PUSH_AND_CLEAR_REGS save_ret=1
1222 ENCODE_FRAME_POINTER 8
1190 testb $3, CS+8(%rsp) 1223 testb $3, CS+8(%rsp)
1191 jz .Lerror_kernelspace 1224 jz .Lerror_kernelspace
1192 1225
@@ -1577,8 +1610,6 @@ end_repeat_nmi:
1577 * frame to point back to repeat_nmi. 1610 * frame to point back to repeat_nmi.
1578 */ 1611 */
1579 pushq $-1 /* ORIG_RAX: no syscall to restart */ 1612 pushq $-1 /* ORIG_RAX: no syscall to restart */
1580 PUSH_AND_CLEAR_REGS
1581 ENCODE_FRAME_POINTER
1582 1613
1583 /* 1614 /*
1584 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit 1615 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index fd65e016e413..e811dd9c5e99 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat)
85 pushq %rcx /* pt_regs->cx */ 85 pushq %rcx /* pt_regs->cx */
86 pushq $-ENOSYS /* pt_regs->ax */ 86 pushq $-ENOSYS /* pt_regs->ax */
87 pushq $0 /* pt_regs->r8 = 0 */ 87 pushq $0 /* pt_regs->r8 = 0 */
88 xorq %r8, %r8 /* nospec r8 */ 88 xorl %r8d, %r8d /* nospec r8 */
89 pushq $0 /* pt_regs->r9 = 0 */ 89 pushq $0 /* pt_regs->r9 = 0 */
90 xorq %r9, %r9 /* nospec r9 */ 90 xorl %r9d, %r9d /* nospec r9 */
91 pushq $0 /* pt_regs->r10 = 0 */ 91 pushq $0 /* pt_regs->r10 = 0 */
92 xorq %r10, %r10 /* nospec r10 */ 92 xorl %r10d, %r10d /* nospec r10 */
93 pushq $0 /* pt_regs->r11 = 0 */ 93 pushq $0 /* pt_regs->r11 = 0 */
94 xorq %r11, %r11 /* nospec r11 */ 94 xorl %r11d, %r11d /* nospec r11 */
95 pushq %rbx /* pt_regs->rbx */ 95 pushq %rbx /* pt_regs->rbx */
96 xorl %ebx, %ebx /* nospec rbx */ 96 xorl %ebx, %ebx /* nospec rbx */
97 pushq %rbp /* pt_regs->rbp (will be overwritten) */ 97 pushq %rbp /* pt_regs->rbp (will be overwritten) */
98 xorl %ebp, %ebp /* nospec rbp */ 98 xorl %ebp, %ebp /* nospec rbp */
99 pushq $0 /* pt_regs->r12 = 0 */ 99 pushq $0 /* pt_regs->r12 = 0 */
100 xorq %r12, %r12 /* nospec r12 */ 100 xorl %r12d, %r12d /* nospec r12 */
101 pushq $0 /* pt_regs->r13 = 0 */ 101 pushq $0 /* pt_regs->r13 = 0 */
102 xorq %r13, %r13 /* nospec r13 */ 102 xorl %r13d, %r13d /* nospec r13 */
103 pushq $0 /* pt_regs->r14 = 0 */ 103 pushq $0 /* pt_regs->r14 = 0 */
104 xorq %r14, %r14 /* nospec r14 */ 104 xorl %r14d, %r14d /* nospec r14 */
105 pushq $0 /* pt_regs->r15 = 0 */ 105 pushq $0 /* pt_regs->r15 = 0 */
106 xorq %r15, %r15 /* nospec r15 */ 106 xorl %r15d, %r15d /* nospec r15 */
107 cld 107 cld
108 108
109 /* 109 /*
@@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
224 pushq %rbp /* pt_regs->cx (stashed in bp) */ 224 pushq %rbp /* pt_regs->cx (stashed in bp) */
225 pushq $-ENOSYS /* pt_regs->ax */ 225 pushq $-ENOSYS /* pt_regs->ax */
226 pushq $0 /* pt_regs->r8 = 0 */ 226 pushq $0 /* pt_regs->r8 = 0 */
227 xorq %r8, %r8 /* nospec r8 */ 227 xorl %r8d, %r8d /* nospec r8 */
228 pushq $0 /* pt_regs->r9 = 0 */ 228 pushq $0 /* pt_regs->r9 = 0 */
229 xorq %r9, %r9 /* nospec r9 */ 229 xorl %r9d, %r9d /* nospec r9 */
230 pushq $0 /* pt_regs->r10 = 0 */ 230 pushq $0 /* pt_regs->r10 = 0 */
231 xorq %r10, %r10 /* nospec r10 */ 231 xorl %r10d, %r10d /* nospec r10 */
232 pushq $0 /* pt_regs->r11 = 0 */ 232 pushq $0 /* pt_regs->r11 = 0 */
233 xorq %r11, %r11 /* nospec r11 */ 233 xorl %r11d, %r11d /* nospec r11 */
234 pushq %rbx /* pt_regs->rbx */ 234 pushq %rbx /* pt_regs->rbx */
235 xorl %ebx, %ebx /* nospec rbx */ 235 xorl %ebx, %ebx /* nospec rbx */
236 pushq %rbp /* pt_regs->rbp (will be overwritten) */ 236 pushq %rbp /* pt_regs->rbp (will be overwritten) */
237 xorl %ebp, %ebp /* nospec rbp */ 237 xorl %ebp, %ebp /* nospec rbp */
238 pushq $0 /* pt_regs->r12 = 0 */ 238 pushq $0 /* pt_regs->r12 = 0 */
239 xorq %r12, %r12 /* nospec r12 */ 239 xorl %r12d, %r12d /* nospec r12 */
240 pushq $0 /* pt_regs->r13 = 0 */ 240 pushq $0 /* pt_regs->r13 = 0 */
241 xorq %r13, %r13 /* nospec r13 */ 241 xorl %r13d, %r13d /* nospec r13 */
242 pushq $0 /* pt_regs->r14 = 0 */ 242 pushq $0 /* pt_regs->r14 = 0 */
243 xorq %r14, %r14 /* nospec r14 */ 243 xorl %r14d, %r14d /* nospec r14 */
244 pushq $0 /* pt_regs->r15 = 0 */ 244 pushq $0 /* pt_regs->r15 = 0 */
245 xorq %r15, %r15 /* nospec r15 */ 245 xorl %r15d, %r15d /* nospec r15 */
246 246
247 /* 247 /*
248 * User mode is traced as though IRQs are on, and SYSENTER 248 * User mode is traced as though IRQs are on, and SYSENTER
@@ -298,9 +298,9 @@ sysret32_from_system_call:
298 */ 298 */
299 SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 299 SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
300 300
301 xorq %r8, %r8 301 xorl %r8d, %r8d
302 xorq %r9, %r9 302 xorl %r9d, %r9d
303 xorq %r10, %r10 303 xorl %r10d, %r10d
304 swapgs 304 swapgs
305 sysretl 305 sysretl
306END(entry_SYSCALL_compat) 306END(entry_SYSCALL_compat)
@@ -347,10 +347,23 @@ ENTRY(entry_INT80_compat)
347 */ 347 */
348 movl %eax, %eax 348 movl %eax, %eax
349 349
350 /* switch to thread stack expects orig_ax and rdi to be pushed */
350 pushq %rax /* pt_regs->orig_ax */ 351 pushq %rax /* pt_regs->orig_ax */
352 pushq %rdi /* pt_regs->di */
353
354 /* Need to switch before accessing the thread stack. */
355 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
356 movq %rsp, %rdi
357 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
358
359 pushq 6*8(%rdi) /* regs->ss */
360 pushq 5*8(%rdi) /* regs->rsp */
361 pushq 4*8(%rdi) /* regs->eflags */
362 pushq 3*8(%rdi) /* regs->cs */
363 pushq 2*8(%rdi) /* regs->ip */
364 pushq 1*8(%rdi) /* regs->orig_ax */
351 365
352 /* switch to thread stack expects orig_ax to be pushed */ 366 movq (%rdi), %rdi /* restore %rdi */
353 call switch_to_thread_stack
354 367
355 pushq %rdi /* pt_regs->di */ 368 pushq %rdi /* pt_regs->di */
356 pushq %rsi /* pt_regs->si */ 369 pushq %rsi /* pt_regs->si */
@@ -358,25 +371,25 @@ ENTRY(entry_INT80_compat)
358 pushq %rcx /* pt_regs->cx */ 371 pushq %rcx /* pt_regs->cx */
359 pushq $-ENOSYS /* pt_regs->ax */ 372 pushq $-ENOSYS /* pt_regs->ax */
360 pushq $0 /* pt_regs->r8 = 0 */ 373 pushq $0 /* pt_regs->r8 = 0 */
361 xorq %r8, %r8 /* nospec r8 */ 374 xorl %r8d, %r8d /* nospec r8 */
362 pushq $0 /* pt_regs->r9 = 0 */ 375 pushq $0 /* pt_regs->r9 = 0 */
363 xorq %r9, %r9 /* nospec r9 */ 376 xorl %r9d, %r9d /* nospec r9 */
364 pushq $0 /* pt_regs->r10 = 0 */ 377 pushq $0 /* pt_regs->r10 = 0 */
365 xorq %r10, %r10 /* nospec r10 */ 378 xorl %r10d, %r10d /* nospec r10 */
366 pushq $0 /* pt_regs->r11 = 0 */ 379 pushq $0 /* pt_regs->r11 = 0 */
367 xorq %r11, %r11 /* nospec r11 */ 380 xorl %r11d, %r11d /* nospec r11 */
368 pushq %rbx /* pt_regs->rbx */ 381 pushq %rbx /* pt_regs->rbx */
369 xorl %ebx, %ebx /* nospec rbx */ 382 xorl %ebx, %ebx /* nospec rbx */
370 pushq %rbp /* pt_regs->rbp */ 383 pushq %rbp /* pt_regs->rbp */
371 xorl %ebp, %ebp /* nospec rbp */ 384 xorl %ebp, %ebp /* nospec rbp */
372 pushq %r12 /* pt_regs->r12 */ 385 pushq %r12 /* pt_regs->r12 */
373 xorq %r12, %r12 /* nospec r12 */ 386 xorl %r12d, %r12d /* nospec r12 */
374 pushq %r13 /* pt_regs->r13 */ 387 pushq %r13 /* pt_regs->r13 */
375 xorq %r13, %r13 /* nospec r13 */ 388 xorl %r13d, %r13d /* nospec r13 */
376 pushq %r14 /* pt_regs->r14 */ 389 pushq %r14 /* pt_regs->r14 */
377 xorq %r14, %r14 /* nospec r14 */ 390 xorl %r14d, %r14d /* nospec r14 */
378 pushq %r15 /* pt_regs->r15 */ 391 pushq %r15 /* pt_regs->r15 */
379 xorq %r15, %r15 /* nospec r15 */ 392 xorl %r15d, %r15d /* nospec r15 */
380 cld 393 cld
381 394
382 /* 395 /*
diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index 4d4015ddcf26..c356098b6fb9 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -7,6 +7,8 @@
7#ifndef _ASM_X86_MACH_DEFAULT_APM_H 7#ifndef _ASM_X86_MACH_DEFAULT_APM_H
8#define _ASM_X86_MACH_DEFAULT_APM_H 8#define _ASM_X86_MACH_DEFAULT_APM_H
9 9
10#include <asm/nospec-branch.h>
11
10#ifdef APM_ZERO_SEGS 12#ifdef APM_ZERO_SEGS
11# define APM_DO_ZERO_SEGS \ 13# define APM_DO_ZERO_SEGS \
12 "pushl %%ds\n\t" \ 14 "pushl %%ds\n\t" \
@@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
32 * N.B. We do NOT need a cld after the BIOS call 34 * N.B. We do NOT need a cld after the BIOS call
33 * because we always save and restore the flags. 35 * because we always save and restore the flags.
34 */ 36 */
37 firmware_restrict_branch_speculation_start();
35 __asm__ __volatile__(APM_DO_ZERO_SEGS 38 __asm__ __volatile__(APM_DO_ZERO_SEGS
36 "pushl %%edi\n\t" 39 "pushl %%edi\n\t"
37 "pushl %%ebp\n\t" 40 "pushl %%ebp\n\t"
@@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
44 "=S" (*esi) 47 "=S" (*esi)
45 : "a" (func), "b" (ebx_in), "c" (ecx_in) 48 : "a" (func), "b" (ebx_in), "c" (ecx_in)
46 : "memory", "cc"); 49 : "memory", "cc");
50 firmware_restrict_branch_speculation_end();
47} 51}
48 52
49static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, 53static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
56 * N.B. We do NOT need a cld after the BIOS call 60 * N.B. We do NOT need a cld after the BIOS call
57 * because we always save and restore the flags. 61 * because we always save and restore the flags.
58 */ 62 */
63 firmware_restrict_branch_speculation_start();
59 __asm__ __volatile__(APM_DO_ZERO_SEGS 64 __asm__ __volatile__(APM_DO_ZERO_SEGS
60 "pushl %%edi\n\t" 65 "pushl %%edi\n\t"
61 "pushl %%ebp\n\t" 66 "pushl %%ebp\n\t"
@@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
68 "=S" (si) 73 "=S" (si)
69 : "a" (func), "b" (ebx_in), "c" (ecx_in) 74 : "a" (func), "b" (ebx_in), "c" (ecx_in)
70 : "memory", "cc"); 75 : "memory", "cc");
76 firmware_restrict_branch_speculation_end();
71 return error; 77 return error;
72} 78}
73 79
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 4d111616524b..1908214b9125 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
38INDIRECT_THUNK(si) 38INDIRECT_THUNK(si)
39INDIRECT_THUNK(di) 39INDIRECT_THUNK(di)
40INDIRECT_THUNK(bp) 40INDIRECT_THUNK(bp)
41asmlinkage void __fill_rsb(void);
42asmlinkage void __clear_rsb(void);
43
44#endif /* CONFIG_RETPOLINE */ 41#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 0dfe4d3f74e2..f41079da38c5 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -213,6 +213,7 @@
213#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ 213#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
214 214
215#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ 215#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
216#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
216 217
217/* Virtualization flags: Linux defined, word 8 */ 218/* Virtualization flags: Linux defined, word 8 */
218#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ 219#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 85f6ccb80b91..a399c1ebf6f0 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -6,6 +6,7 @@
6#include <asm/pgtable.h> 6#include <asm/pgtable.h>
7#include <asm/processor-flags.h> 7#include <asm/processor-flags.h>
8#include <asm/tlb.h> 8#include <asm/tlb.h>
9#include <asm/nospec-branch.h>
9 10
10/* 11/*
11 * We map the EFI regions needed for runtime services non-contiguously, 12 * We map the EFI regions needed for runtime services non-contiguously,
@@ -36,8 +37,18 @@
36 37
37extern asmlinkage unsigned long efi_call_phys(void *, ...); 38extern asmlinkage unsigned long efi_call_phys(void *, ...);
38 39
39#define arch_efi_call_virt_setup() kernel_fpu_begin() 40#define arch_efi_call_virt_setup() \
40#define arch_efi_call_virt_teardown() kernel_fpu_end() 41({ \
42 kernel_fpu_begin(); \
43 firmware_restrict_branch_speculation_start(); \
44})
45
46#define arch_efi_call_virt_teardown() \
47({ \
48 firmware_restrict_branch_speculation_end(); \
49 kernel_fpu_end(); \
50})
51
41 52
42/* 53/*
43 * Wrap all the virtual calls in a way that forces the parameters on the stack. 54 * Wrap all the virtual calls in a way that forces the parameters on the stack.
@@ -73,6 +84,7 @@ struct efi_scratch {
73 efi_sync_low_kernel_mappings(); \ 84 efi_sync_low_kernel_mappings(); \
74 preempt_disable(); \ 85 preempt_disable(); \
75 __kernel_fpu_begin(); \ 86 __kernel_fpu_begin(); \
87 firmware_restrict_branch_speculation_start(); \
76 \ 88 \
77 if (efi_scratch.use_pgd) { \ 89 if (efi_scratch.use_pgd) { \
78 efi_scratch.prev_cr3 = __read_cr3(); \ 90 efi_scratch.prev_cr3 = __read_cr3(); \
@@ -91,6 +103,7 @@ struct efi_scratch {
91 __flush_tlb_all(); \ 103 __flush_tlb_all(); \
92 } \ 104 } \
93 \ 105 \
106 firmware_restrict_branch_speculation_end(); \
94 __kernel_fpu_end(); \ 107 __kernel_fpu_end(); \
95 preempt_enable(); \ 108 preempt_enable(); \
96}) 109})
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dd6f57a54a26..b605a5b6a30c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -507,6 +507,7 @@ struct kvm_vcpu_arch {
507 u64 smi_count; 507 u64 smi_count;
508 bool tpr_access_reporting; 508 bool tpr_access_reporting;
509 u64 ia32_xss; 509 u64 ia32_xss;
510 u64 microcode_version;
510 511
511 /* 512 /*
512 * Paging state of the vcpu 513 * Paging state of the vcpu
@@ -1095,6 +1096,8 @@ struct kvm_x86_ops {
1095 int (*mem_enc_op)(struct kvm *kvm, void __user *argp); 1096 int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
1096 int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); 1097 int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
1097 int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); 1098 int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
1099
1100 int (*get_msr_feature)(struct kvm_msr_entry *entry);
1098}; 1101};
1099 1102
1100struct kvm_arch_async_pf { 1103struct kvm_arch_async_pf {
@@ -1464,7 +1467,4 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
1464#define put_smstate(type, buf, offset, val) \ 1467#define put_smstate(type, buf, offset, val) \
1465 *(type *)((buf) + (offset) - 0x7e00) = val 1468 *(type *)((buf) + (offset) - 0x7e00) = val
1466 1469
1467void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
1468 unsigned long start, unsigned long end);
1469
1470#endif /* _ASM_X86_KVM_HOST_H */ 1470#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 55520cec8b27..7fb1047d61c7 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -37,7 +37,12 @@ struct cpu_signature {
37 37
38struct device; 38struct device;
39 39
40enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; 40enum ucode_state {
41 UCODE_OK = 0,
42 UCODE_UPDATED,
43 UCODE_NFOUND,
44 UCODE_ERROR,
45};
41 46
42struct microcode_ops { 47struct microcode_ops {
43 enum ucode_state (*request_microcode_user) (int cpu, 48 enum ucode_state (*request_microcode_user) (int cpu,
@@ -54,7 +59,7 @@ struct microcode_ops {
54 * are being called. 59 * are being called.
55 * See also the "Synchronization" section in microcode_core.c. 60 * See also the "Synchronization" section in microcode_core.c.
56 */ 61 */
57 int (*apply_microcode) (int cpu); 62 enum ucode_state (*apply_microcode) (int cpu);
58 int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); 63 int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
59}; 64};
60 65
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index c931b88982a0..1de72ce514cd 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -74,6 +74,7 @@ static inline void *ldt_slot_va(int slot)
74 return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); 74 return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
75#else 75#else
76 BUG(); 76 BUG();
77 return (void *)fix_to_virt(FIX_HOLE);
77#endif 78#endif
78} 79}
79 80
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 76b058533e47..d0dabeae0505 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -8,6 +8,50 @@
8#include <asm/cpufeatures.h> 8#include <asm/cpufeatures.h>
9#include <asm/msr-index.h> 9#include <asm/msr-index.h>
10 10
11/*
12 * Fill the CPU return stack buffer.
13 *
14 * Each entry in the RSB, if used for a speculative 'ret', contains an
15 * infinite 'pause; lfence; jmp' loop to capture speculative execution.
16 *
17 * This is required in various cases for retpoline and IBRS-based
18 * mitigations for the Spectre variant 2 vulnerability. Sometimes to
19 * eliminate potentially bogus entries from the RSB, and sometimes
20 * purely to ensure that it doesn't get empty, which on some CPUs would
21 * allow predictions from other (unwanted!) sources to be used.
22 *
23 * We define a CPP macro such that it can be used from both .S files and
24 * inline assembly. It's possible to do a .macro and then include that
25 * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
26 */
27
28#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
29#define RSB_FILL_LOOPS 16 /* To avoid underflow */
30
31/*
32 * Google experimented with loop-unrolling and this turned out to be
33 * the optimal version — two calls, each with their own speculation
34 * trap should their return address end up getting used, in a loop.
35 */
36#define __FILL_RETURN_BUFFER(reg, nr, sp) \
37 mov $(nr/2), reg; \
38771: \
39 call 772f; \
40773: /* speculation trap */ \
41 pause; \
42 lfence; \
43 jmp 773b; \
44772: \
45 call 774f; \
46775: /* speculation trap */ \
47 pause; \
48 lfence; \
49 jmp 775b; \
50774: \
51 dec reg; \
52 jnz 771b; \
53 add $(BITS_PER_LONG/8) * nr, sp;
54
11#ifdef __ASSEMBLY__ 55#ifdef __ASSEMBLY__
12 56
13/* 57/*
@@ -24,6 +68,18 @@
24.endm 68.endm
25 69
26/* 70/*
71 * This should be used immediately before an indirect jump/call. It tells
72 * objtool the subsequent indirect jump/call is vouched safe for retpoline
73 * builds.
74 */
75.macro ANNOTATE_RETPOLINE_SAFE
76 .Lannotate_\@:
77 .pushsection .discard.retpoline_safe
78 _ASM_PTR .Lannotate_\@
79 .popsection
80.endm
81
82/*
27 * These are the bare retpoline primitives for indirect jmp and call. 83 * These are the bare retpoline primitives for indirect jmp and call.
28 * Do not use these directly; they only exist to make the ALTERNATIVE 84 * Do not use these directly; they only exist to make the ALTERNATIVE
29 * invocation below less ugly. 85 * invocation below less ugly.
@@ -59,9 +115,9 @@
59.macro JMP_NOSPEC reg:req 115.macro JMP_NOSPEC reg:req
60#ifdef CONFIG_RETPOLINE 116#ifdef CONFIG_RETPOLINE
61 ANNOTATE_NOSPEC_ALTERNATIVE 117 ANNOTATE_NOSPEC_ALTERNATIVE
62 ALTERNATIVE_2 __stringify(jmp *\reg), \ 118 ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \
63 __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ 119 __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
64 __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD 120 __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
65#else 121#else
66 jmp *\reg 122 jmp *\reg
67#endif 123#endif
@@ -70,18 +126,25 @@
70.macro CALL_NOSPEC reg:req 126.macro CALL_NOSPEC reg:req
71#ifdef CONFIG_RETPOLINE 127#ifdef CONFIG_RETPOLINE
72 ANNOTATE_NOSPEC_ALTERNATIVE 128 ANNOTATE_NOSPEC_ALTERNATIVE
73 ALTERNATIVE_2 __stringify(call *\reg), \ 129 ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \
74 __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ 130 __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
75 __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD 131 __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
76#else 132#else
77 call *\reg 133 call *\reg
78#endif 134#endif
79.endm 135.endm
80 136
81/* This clobbers the BX register */ 137 /*
82.macro FILL_RETURN_BUFFER nr:req ftr:req 138 * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
139 * monstrosity above, manually.
140 */
141.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
83#ifdef CONFIG_RETPOLINE 142#ifdef CONFIG_RETPOLINE
84 ALTERNATIVE "", "call __clear_rsb", \ftr 143 ANNOTATE_NOSPEC_ALTERNATIVE
144 ALTERNATIVE "jmp .Lskip_rsb_\@", \
145 __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
146 \ftr
147.Lskip_rsb_\@:
85#endif 148#endif
86.endm 149.endm
87 150
@@ -93,6 +156,12 @@
93 ".long 999b - .\n\t" \ 156 ".long 999b - .\n\t" \
94 ".popsection\n\t" 157 ".popsection\n\t"
95 158
159#define ANNOTATE_RETPOLINE_SAFE \
160 "999:\n\t" \
161 ".pushsection .discard.retpoline_safe\n\t" \
162 _ASM_PTR " 999b\n\t" \
163 ".popsection\n\t"
164
96#if defined(CONFIG_X86_64) && defined(RETPOLINE) 165#if defined(CONFIG_X86_64) && defined(RETPOLINE)
97 166
98/* 167/*
@@ -102,6 +171,7 @@
102# define CALL_NOSPEC \ 171# define CALL_NOSPEC \
103 ANNOTATE_NOSPEC_ALTERNATIVE \ 172 ANNOTATE_NOSPEC_ALTERNATIVE \
104 ALTERNATIVE( \ 173 ALTERNATIVE( \
174 ANNOTATE_RETPOLINE_SAFE \
105 "call *%[thunk_target]\n", \ 175 "call *%[thunk_target]\n", \
106 "call __x86_indirect_thunk_%V[thunk_target]\n", \ 176 "call __x86_indirect_thunk_%V[thunk_target]\n", \
107 X86_FEATURE_RETPOLINE) 177 X86_FEATURE_RETPOLINE)
@@ -156,25 +226,90 @@ extern char __indirect_thunk_end[];
156static inline void vmexit_fill_RSB(void) 226static inline void vmexit_fill_RSB(void)
157{ 227{
158#ifdef CONFIG_RETPOLINE 228#ifdef CONFIG_RETPOLINE
159 alternative_input("", 229 unsigned long loops;
160 "call __fill_rsb", 230
161 X86_FEATURE_RETPOLINE, 231 asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
162 ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); 232 ALTERNATIVE("jmp 910f",
233 __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
234 X86_FEATURE_RETPOLINE)
235 "910:"
236 : "=r" (loops), ASM_CALL_CONSTRAINT
237 : : "memory" );
163#endif 238#endif
164} 239}
165 240
241#define alternative_msr_write(_msr, _val, _feature) \
242 asm volatile(ALTERNATIVE("", \
243 "movl %[msr], %%ecx\n\t" \
244 "movl %[val], %%eax\n\t" \
245 "movl $0, %%edx\n\t" \
246 "wrmsr", \
247 _feature) \
248 : : [msr] "i" (_msr), [val] "i" (_val) \
249 : "eax", "ecx", "edx", "memory")
250
166static inline void indirect_branch_prediction_barrier(void) 251static inline void indirect_branch_prediction_barrier(void)
167{ 252{
168 asm volatile(ALTERNATIVE("", 253 alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
169 "movl %[msr], %%ecx\n\t" 254 X86_FEATURE_USE_IBPB);
170 "movl %[val], %%eax\n\t"
171 "movl $0, %%edx\n\t"
172 "wrmsr",
173 X86_FEATURE_USE_IBPB)
174 : : [msr] "i" (MSR_IA32_PRED_CMD),
175 [val] "i" (PRED_CMD_IBPB)
176 : "eax", "ecx", "edx", "memory");
177} 255}
178 256
257/*
258 * With retpoline, we must use IBRS to restrict branch prediction
259 * before calling into firmware.
260 *
261 * (Implemented as CPP macros due to header hell.)
262 */
263#define firmware_restrict_branch_speculation_start() \
264do { \
265 preempt_disable(); \
266 alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \
267 X86_FEATURE_USE_IBRS_FW); \
268} while (0)
269
270#define firmware_restrict_branch_speculation_end() \
271do { \
272 alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \
273 X86_FEATURE_USE_IBRS_FW); \
274 preempt_enable(); \
275} while (0)
276
179#endif /* __ASSEMBLY__ */ 277#endif /* __ASSEMBLY__ */
278
279/*
280 * Below is used in the eBPF JIT compiler and emits the byte sequence
281 * for the following assembly:
282 *
283 * With retpolines configured:
284 *
285 * callq do_rop
286 * spec_trap:
287 * pause
288 * lfence
289 * jmp spec_trap
290 * do_rop:
291 * mov %rax,(%rsp)
292 * retq
293 *
294 * Without retpolines configured:
295 *
296 * jmp *%rax
297 */
298#ifdef CONFIG_RETPOLINE
299# define RETPOLINE_RAX_BPF_JIT_SIZE 17
300# define RETPOLINE_RAX_BPF_JIT() \
301 EMIT1_off32(0xE8, 7); /* callq do_rop */ \
302 /* spec_trap: */ \
303 EMIT2(0xF3, 0x90); /* pause */ \
304 EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
305 EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
306 /* do_rop: */ \
307 EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
308 EMIT1(0xC3); /* retq */
309#else
310# define RETPOLINE_RAX_BPF_JIT_SIZE 2
311# define RETPOLINE_RAX_BPF_JIT() \
312 EMIT2(0xFF, 0xE0); /* jmp *%rax */
313#endif
314
180#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ 315#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 554841fab717..c83a2f418cea 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -7,6 +7,7 @@
7#ifdef CONFIG_PARAVIRT 7#ifdef CONFIG_PARAVIRT
8#include <asm/pgtable_types.h> 8#include <asm/pgtable_types.h>
9#include <asm/asm.h> 9#include <asm/asm.h>
10#include <asm/nospec-branch.h>
10 11
11#include <asm/paravirt_types.h> 12#include <asm/paravirt_types.h>
12 13
@@ -879,23 +880,27 @@ extern void default_banner(void);
879 880
880#define INTERRUPT_RETURN \ 881#define INTERRUPT_RETURN \
881 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ 882 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
882 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) 883 ANNOTATE_RETPOLINE_SAFE; \
884 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);)
883 885
884#define DISABLE_INTERRUPTS(clobbers) \ 886#define DISABLE_INTERRUPTS(clobbers) \
885 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ 887 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
886 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ 888 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
889 ANNOTATE_RETPOLINE_SAFE; \
887 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ 890 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
888 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) 891 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
889 892
890#define ENABLE_INTERRUPTS(clobbers) \ 893#define ENABLE_INTERRUPTS(clobbers) \
891 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ 894 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
892 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ 895 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
896 ANNOTATE_RETPOLINE_SAFE; \
893 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ 897 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
894 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) 898 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
895 899
896#ifdef CONFIG_X86_32 900#ifdef CONFIG_X86_32
897#define GET_CR0_INTO_EAX \ 901#define GET_CR0_INTO_EAX \
898 push %ecx; push %edx; \ 902 push %ecx; push %edx; \
903 ANNOTATE_RETPOLINE_SAFE; \
899 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ 904 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
900 pop %edx; pop %ecx 905 pop %edx; pop %ecx
901#else /* !CONFIG_X86_32 */ 906#else /* !CONFIG_X86_32 */
@@ -917,21 +922,25 @@ extern void default_banner(void);
917 */ 922 */
918#define SWAPGS \ 923#define SWAPGS \
919 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ 924 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
920 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ 925 ANNOTATE_RETPOLINE_SAFE; \
926 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
921 ) 927 )
922 928
923#define GET_CR2_INTO_RAX \ 929#define GET_CR2_INTO_RAX \
924 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2) 930 ANNOTATE_RETPOLINE_SAFE; \
931 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2);
925 932
926#define USERGS_SYSRET64 \ 933#define USERGS_SYSRET64 \
927 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ 934 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
928 CLBR_NONE, \ 935 CLBR_NONE, \
929 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) 936 ANNOTATE_RETPOLINE_SAFE; \
937 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);)
930 938
931#ifdef CONFIG_DEBUG_ENTRY 939#ifdef CONFIG_DEBUG_ENTRY
932#define SAVE_FLAGS(clobbers) \ 940#define SAVE_FLAGS(clobbers) \
933 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \ 941 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
934 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ 942 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
943 ANNOTATE_RETPOLINE_SAFE; \
935 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \ 944 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
936 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) 945 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
937#endif 946#endif
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index f624f1f10316..180bc0bff0fb 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -43,6 +43,7 @@
43#include <asm/desc_defs.h> 43#include <asm/desc_defs.h>
44#include <asm/kmap_types.h> 44#include <asm/kmap_types.h>
45#include <asm/pgtable_types.h> 45#include <asm/pgtable_types.h>
46#include <asm/nospec-branch.h>
46 47
47struct page; 48struct page;
48struct thread_struct; 49struct thread_struct;
@@ -392,7 +393,9 @@ int paravirt_disable_iospace(void);
392 * offset into the paravirt_patch_template structure, and can therefore be 393 * offset into the paravirt_patch_template structure, and can therefore be
393 * freely converted back into a structure offset. 394 * freely converted back into a structure offset.
394 */ 395 */
395#define PARAVIRT_CALL "call *%c[paravirt_opptr];" 396#define PARAVIRT_CALL \
397 ANNOTATE_RETPOLINE_SAFE \
398 "call *%c[paravirt_opptr];"
396 399
397/* 400/*
398 * These macros are intended to wrap calls through one of the paravirt 401 * These macros are intended to wrap calls through one of the paravirt
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 63c2552b6b65..b444d83cfc95 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -350,14 +350,14 @@ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
350{ 350{
351 pmdval_t v = native_pmd_val(pmd); 351 pmdval_t v = native_pmd_val(pmd);
352 352
353 return __pmd(v | set); 353 return native_make_pmd(v | set);
354} 354}
355 355
356static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) 356static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
357{ 357{
358 pmdval_t v = native_pmd_val(pmd); 358 pmdval_t v = native_pmd_val(pmd);
359 359
360 return __pmd(v & ~clear); 360 return native_make_pmd(v & ~clear);
361} 361}
362 362
363static inline pmd_t pmd_mkold(pmd_t pmd) 363static inline pmd_t pmd_mkold(pmd_t pmd)
@@ -409,14 +409,14 @@ static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
409{ 409{
410 pudval_t v = native_pud_val(pud); 410 pudval_t v = native_pud_val(pud);
411 411
412 return __pud(v | set); 412 return native_make_pud(v | set);
413} 413}
414 414
415static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) 415static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
416{ 416{
417 pudval_t v = native_pud_val(pud); 417 pudval_t v = native_pud_val(pud);
418 418
419 return __pud(v & ~clear); 419 return native_make_pud(v & ~clear);
420} 420}
421 421
422static inline pud_t pud_mkold(pud_t pud) 422static inline pud_t pud_mkold(pud_t pud)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 3696398a9475..246f15b4e64c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -323,6 +323,11 @@ static inline pudval_t native_pud_val(pud_t pud)
323#else 323#else
324#include <asm-generic/pgtable-nopud.h> 324#include <asm-generic/pgtable-nopud.h>
325 325
326static inline pud_t native_make_pud(pudval_t val)
327{
328 return (pud_t) { .p4d.pgd = native_make_pgd(val) };
329}
330
326static inline pudval_t native_pud_val(pud_t pud) 331static inline pudval_t native_pud_val(pud_t pud)
327{ 332{
328 return native_pgd_val(pud.p4d.pgd); 333 return native_pgd_val(pud.p4d.pgd);
@@ -344,6 +349,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
344#else 349#else
345#include <asm-generic/pgtable-nopmd.h> 350#include <asm-generic/pgtable-nopmd.h>
346 351
352static inline pmd_t native_make_pmd(pmdval_t val)
353{
354 return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) };
355}
356
347static inline pmdval_t native_pmd_val(pmd_t pmd) 357static inline pmdval_t native_pmd_val(pmd_t pmd)
348{ 358{
349 return native_pgd_val(pmd.pud.p4d.pgd); 359 return native_pgd_val(pmd.pud.p4d.pgd);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1bd9ed87606f..b0ccd4847a58 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -977,4 +977,5 @@ bool xen_set_default_idle(void);
977 977
978void stop_this_cpu(void *dummy); 978void stop_this_cpu(void *dummy);
979void df_debug(struct pt_regs *regs, long error_code); 979void df_debug(struct pt_regs *regs, long error_code);
980void microcode_check(void);
980#endif /* _ASM_X86_PROCESSOR_H */ 981#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index 4e44250e7d0d..d65171120e90 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -67,13 +67,13 @@ static __always_inline __must_check
67bool refcount_sub_and_test(unsigned int i, refcount_t *r) 67bool refcount_sub_and_test(unsigned int i, refcount_t *r)
68{ 68{
69 GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, 69 GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
70 r->refs.counter, "er", i, "%0", e); 70 r->refs.counter, "er", i, "%0", e, "cx");
71} 71}
72 72
73static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) 73static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
74{ 74{
75 GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO, 75 GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
76 r->refs.counter, "%0", e); 76 r->refs.counter, "%0", e, "cx");
77} 77}
78 78
79static __always_inline __must_check 79static __always_inline __must_check
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index f91c365e57c3..4914a3e7c803 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -2,8 +2,7 @@
2#ifndef _ASM_X86_RMWcc 2#ifndef _ASM_X86_RMWcc
3#define _ASM_X86_RMWcc 3#define _ASM_X86_RMWcc
4 4
5#define __CLOBBERS_MEM "memory" 5#define __CLOBBERS_MEM(clb...) "memory", ## clb
6#define __CLOBBERS_MEM_CC_CX "memory", "cc", "cx"
7 6
8#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) 7#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
9 8
@@ -40,18 +39,19 @@ do { \
40#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ 39#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
41 40
42#define GEN_UNARY_RMWcc(op, var, arg0, cc) \ 41#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
43 __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM) 42 __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
44 43
45#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc) \ 44#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\
46 __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \ 45 __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \
47 __CLOBBERS_MEM_CC_CX) 46 __CLOBBERS_MEM(clobbers))
48 47
49#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ 48#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
50 __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \ 49 __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \
51 __CLOBBERS_MEM, vcon (val)) 50 __CLOBBERS_MEM(), vcon (val))
52 51
53#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc) \ 52#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc, \
53 clobbers...) \
54 __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \ 54 __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \
55 __CLOBBERS_MEM_CC_CX, vcon (val)) 55 __CLOBBERS_MEM(clobbers), vcon (val))
56 56
57#endif /* _ASM_X86_RMWcc */ 57#endif /* _ASM_X86_RMWcc */
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 197c2e6c7376..099414345865 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -241,24 +241,24 @@
241#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 241#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
242 242
243struct hv_reenlightenment_control { 243struct hv_reenlightenment_control {
244 u64 vector:8; 244 __u64 vector:8;
245 u64 reserved1:8; 245 __u64 reserved1:8;
246 u64 enabled:1; 246 __u64 enabled:1;
247 u64 reserved2:15; 247 __u64 reserved2:15;
248 u64 target_vp:32; 248 __u64 target_vp:32;
249}; 249};
250 250
251#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 251#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
252#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 252#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
253 253
254struct hv_tsc_emulation_control { 254struct hv_tsc_emulation_control {
255 u64 enabled:1; 255 __u64 enabled:1;
256 u64 reserved:63; 256 __u64 reserved:63;
257}; 257};
258 258
259struct hv_tsc_emulation_status { 259struct hv_tsc_emulation_status {
260 u64 inprogress:1; 260 __u64 inprogress:1;
261 u64 reserved:63; 261 __u64 reserved:63;
262}; 262};
263 263
264#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 264#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 7a2ade4aa235..6cfa9c8cb7d6 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -26,6 +26,7 @@
26#define KVM_FEATURE_PV_EOI 6 26#define KVM_FEATURE_PV_EOI 6
27#define KVM_FEATURE_PV_UNHALT 7 27#define KVM_FEATURE_PV_UNHALT 7
28#define KVM_FEATURE_PV_TLB_FLUSH 9 28#define KVM_FEATURE_PV_TLB_FLUSH 9
29#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
29 30
30/* The last 8 bits are used to indicate how to interpret the flags field 31/* The last 8 bits are used to indicate how to interpret the flags field
31 * in pvclock structure. If no bits are set, all flags are ignored. 32 * in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8ad2e410974f..7c5538769f7e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1603,7 +1603,7 @@ static void __init delay_with_tsc(void)
1603 do { 1603 do {
1604 rep_nop(); 1604 rep_nop();
1605 now = rdtsc(); 1605 now = rdtsc();
1606 } while ((now - start) < 40000000000UL / HZ && 1606 } while ((now - start) < 40000000000ULL / HZ &&
1607 time_before_eq(jiffies, end)); 1607 time_before_eq(jiffies, end));
1608} 1608}
1609 1609
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 3cc471beb50b..bb6f7a2148d7 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -134,21 +134,40 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
134{ 134{
135 struct apic_chip_data *apicd = apic_chip_data(irqd); 135 struct apic_chip_data *apicd = apic_chip_data(irqd);
136 struct irq_desc *desc = irq_data_to_desc(irqd); 136 struct irq_desc *desc = irq_data_to_desc(irqd);
137 bool managed = irqd_affinity_is_managed(irqd);
137 138
138 lockdep_assert_held(&vector_lock); 139 lockdep_assert_held(&vector_lock);
139 140
140 trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector, 141 trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector,
141 apicd->cpu); 142 apicd->cpu);
142 143
143 /* Setup the vector move, if required */ 144 /*
144 if (apicd->vector && cpu_online(apicd->cpu)) { 145 * If there is no vector associated or if the associated vector is
146 * the shutdown vector, which is associated to make PCI/MSI
147 * shutdown mode work, then there is nothing to release. Clear out
148 * prev_vector for this and the offlined target case.
149 */
150 apicd->prev_vector = 0;
151 if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR)
152 goto setnew;
153 /*
154 * If the target CPU of the previous vector is online, then mark
155 * the vector as move in progress and store it for cleanup when the
156 * first interrupt on the new vector arrives. If the target CPU is
157 * offline then the regular release mechanism via the cleanup
158 * vector is not possible and the vector can be immediately freed
159 * in the underlying matrix allocator.
160 */
161 if (cpu_online(apicd->cpu)) {
145 apicd->move_in_progress = true; 162 apicd->move_in_progress = true;
146 apicd->prev_vector = apicd->vector; 163 apicd->prev_vector = apicd->vector;
147 apicd->prev_cpu = apicd->cpu; 164 apicd->prev_cpu = apicd->cpu;
148 } else { 165 } else {
149 apicd->prev_vector = 0; 166 irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
167 managed);
150 } 168 }
151 169
170setnew:
152 apicd->vector = newvec; 171 apicd->vector = newvec;
153 apicd->cpu = newcpu; 172 apicd->cpu = newcpu;
154 BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); 173 BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d71c8b54b696..bfca937bdcc3 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -300,6 +300,15 @@ retpoline_auto:
300 setup_force_cpu_cap(X86_FEATURE_USE_IBPB); 300 setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
301 pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); 301 pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
302 } 302 }
303
304 /*
305 * Retpoline means the kernel is safe because it has no indirect
306 * branches. But firmware isn't, so use IBRS to protect that.
307 */
308 if (boot_cpu_has(X86_FEATURE_IBRS)) {
309 setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
310 pr_info("Enabling Restricted Speculation for firmware calls\n");
311 }
303} 312}
304 313
305#undef pr_fmt 314#undef pr_fmt
@@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
326 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) 335 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
327 return sprintf(buf, "Not affected\n"); 336 return sprintf(buf, "Not affected\n");
328 337
329 return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], 338 return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
330 boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", 339 boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
340 boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
331 spectre_v2_module_string()); 341 spectre_v2_module_string());
332} 342}
333#endif 343#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 824aee0117bb..348cf4821240 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1749,3 +1749,33 @@ static int __init init_cpu_syscore(void)
1749 return 0; 1749 return 0;
1750} 1750}
1751core_initcall(init_cpu_syscore); 1751core_initcall(init_cpu_syscore);
1752
1753/*
1754 * The microcode loader calls this upon late microcode load to recheck features,
1755 * only when microcode has been updated. Caller holds microcode_mutex and CPU
1756 * hotplug lock.
1757 */
1758void microcode_check(void)
1759{
1760 struct cpuinfo_x86 info;
1761
1762 perf_check_microcode();
1763
1764 /* Reload CPUID max function as it might've changed. */
1765 info.cpuid_level = cpuid_eax(0);
1766
1767 /*
1768 * Copy all capability leafs to pick up the synthetic ones so that
1769 * memcmp() below doesn't fail on that. The ones coming from CPUID will
1770 * get overwritten in get_cpu_cap().
1771 */
1772 memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability));
1773
1774 get_cpu_cap(&info);
1775
1776 if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)))
1777 return;
1778
1779 pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
1780 pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
1781}
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index bdab7d2f51af..fca759d272a1 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1804,6 +1804,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
1804 goto out_common_fail; 1804 goto out_common_fail;
1805 } 1805 }
1806 closid = ret; 1806 closid = ret;
1807 ret = 0;
1807 1808
1808 rdtgrp->closid = closid; 1809 rdtgrp->closid = closid;
1809 list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); 1810 list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 330b8462d426..a998e1a7d46f 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
498 return patch_size; 498 return patch_size;
499} 499}
500 500
501static int apply_microcode_amd(int cpu) 501static enum ucode_state apply_microcode_amd(int cpu)
502{ 502{
503 struct cpuinfo_x86 *c = &cpu_data(cpu); 503 struct cpuinfo_x86 *c = &cpu_data(cpu);
504 struct microcode_amd *mc_amd; 504 struct microcode_amd *mc_amd;
@@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu)
512 512
513 p = find_patch(cpu); 513 p = find_patch(cpu);
514 if (!p) 514 if (!p)
515 return 0; 515 return UCODE_NFOUND;
516 516
517 mc_amd = p->data; 517 mc_amd = p->data;
518 uci->mc = p->data; 518 uci->mc = p->data;
@@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu)
523 if (rev >= mc_amd->hdr.patch_id) { 523 if (rev >= mc_amd->hdr.patch_id) {
524 c->microcode = rev; 524 c->microcode = rev;
525 uci->cpu_sig.rev = rev; 525 uci->cpu_sig.rev = rev;
526 return 0; 526 return UCODE_OK;
527 } 527 }
528 528
529 if (__apply_microcode_amd(mc_amd)) { 529 if (__apply_microcode_amd(mc_amd)) {
530 pr_err("CPU%d: update failed for patch_level=0x%08x\n", 530 pr_err("CPU%d: update failed for patch_level=0x%08x\n",
531 cpu, mc_amd->hdr.patch_id); 531 cpu, mc_amd->hdr.patch_id);
532 return -1; 532 return UCODE_ERROR;
533 } 533 }
534 pr_info("CPU%d: new patch_level=0x%08x\n", cpu, 534 pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
535 mc_amd->hdr.patch_id); 535 mc_amd->hdr.patch_id);
@@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu)
537 uci->cpu_sig.rev = mc_amd->hdr.patch_id; 537 uci->cpu_sig.rev = mc_amd->hdr.patch_id;
538 c->microcode = mc_amd->hdr.patch_id; 538 c->microcode = mc_amd->hdr.patch_id;
539 539
540 return 0; 540 return UCODE_UPDATED;
541} 541}
542 542
543static int install_equiv_cpu_table(const u8 *buf) 543static int install_equiv_cpu_table(const u8 *buf)
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 319dd65f98a2..aa1b9a422f2b 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -374,7 +374,7 @@ static int collect_cpu_info(int cpu)
374} 374}
375 375
376struct apply_microcode_ctx { 376struct apply_microcode_ctx {
377 int err; 377 enum ucode_state err;
378}; 378};
379 379
380static void apply_microcode_local(void *arg) 380static void apply_microcode_local(void *arg)
@@ -489,31 +489,30 @@ static void __exit microcode_dev_exit(void)
489/* fake device for request_firmware */ 489/* fake device for request_firmware */
490static struct platform_device *microcode_pdev; 490static struct platform_device *microcode_pdev;
491 491
492static int reload_for_cpu(int cpu) 492static enum ucode_state reload_for_cpu(int cpu)
493{ 493{
494 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 494 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
495 enum ucode_state ustate; 495 enum ucode_state ustate;
496 int err = 0;
497 496
498 if (!uci->valid) 497 if (!uci->valid)
499 return err; 498 return UCODE_OK;
500 499
501 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true); 500 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
502 if (ustate == UCODE_OK) 501 if (ustate != UCODE_OK)
503 apply_microcode_on_target(cpu); 502 return ustate;
504 else 503
505 if (ustate == UCODE_ERROR) 504 return apply_microcode_on_target(cpu);
506 err = -EINVAL;
507 return err;
508} 505}
509 506
510static ssize_t reload_store(struct device *dev, 507static ssize_t reload_store(struct device *dev,
511 struct device_attribute *attr, 508 struct device_attribute *attr,
512 const char *buf, size_t size) 509 const char *buf, size_t size)
513{ 510{
511 enum ucode_state tmp_ret = UCODE_OK;
512 bool do_callback = false;
514 unsigned long val; 513 unsigned long val;
514 ssize_t ret = 0;
515 int cpu; 515 int cpu;
516 ssize_t ret = 0, tmp_ret;
517 516
518 ret = kstrtoul(buf, 0, &val); 517 ret = kstrtoul(buf, 0, &val);
519 if (ret) 518 if (ret)
@@ -526,15 +525,21 @@ static ssize_t reload_store(struct device *dev,
526 mutex_lock(&microcode_mutex); 525 mutex_lock(&microcode_mutex);
527 for_each_online_cpu(cpu) { 526 for_each_online_cpu(cpu) {
528 tmp_ret = reload_for_cpu(cpu); 527 tmp_ret = reload_for_cpu(cpu);
529 if (tmp_ret != 0) 528 if (tmp_ret > UCODE_NFOUND) {
530 pr_warn("Error reloading microcode on CPU %d\n", cpu); 529 pr_warn("Error reloading microcode on CPU %d\n", cpu);
531 530
532 /* save retval of the first encountered reload error */ 531 /* set retval for the first encountered reload error */
533 if (!ret) 532 if (!ret)
534 ret = tmp_ret; 533 ret = -EINVAL;
534 }
535
536 if (tmp_ret == UCODE_UPDATED)
537 do_callback = true;
535 } 538 }
536 if (!ret) 539
537 perf_check_microcode(); 540 if (!ret && do_callback)
541 microcode_check();
542
538 mutex_unlock(&microcode_mutex); 543 mutex_unlock(&microcode_mutex);
539 put_online_cpus(); 544 put_online_cpus();
540 545
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index a15db2b4e0d6..923054a6b760 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -772,7 +772,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
772 return 0; 772 return 0;
773} 773}
774 774
775static int apply_microcode_intel(int cpu) 775static enum ucode_state apply_microcode_intel(int cpu)
776{ 776{
777 struct microcode_intel *mc; 777 struct microcode_intel *mc;
778 struct ucode_cpu_info *uci; 778 struct ucode_cpu_info *uci;
@@ -782,7 +782,7 @@ static int apply_microcode_intel(int cpu)
782 782
783 /* We should bind the task to the CPU */ 783 /* We should bind the task to the CPU */
784 if (WARN_ON(raw_smp_processor_id() != cpu)) 784 if (WARN_ON(raw_smp_processor_id() != cpu))
785 return -1; 785 return UCODE_ERROR;
786 786
787 uci = ucode_cpu_info + cpu; 787 uci = ucode_cpu_info + cpu;
788 mc = uci->mc; 788 mc = uci->mc;
@@ -790,7 +790,7 @@ static int apply_microcode_intel(int cpu)
790 /* Look for a newer patch in our cache: */ 790 /* Look for a newer patch in our cache: */
791 mc = find_patch(uci); 791 mc = find_patch(uci);
792 if (!mc) 792 if (!mc)
793 return 0; 793 return UCODE_NFOUND;
794 } 794 }
795 795
796 /* write microcode via MSR 0x79 */ 796 /* write microcode via MSR 0x79 */
@@ -801,7 +801,7 @@ static int apply_microcode_intel(int cpu)
801 if (rev != mc->hdr.rev) { 801 if (rev != mc->hdr.rev) {
802 pr_err("CPU%d update to revision 0x%x failed\n", 802 pr_err("CPU%d update to revision 0x%x failed\n",
803 cpu, mc->hdr.rev); 803 cpu, mc->hdr.rev);
804 return -1; 804 return UCODE_ERROR;
805 } 805 }
806 806
807 if (rev != prev_rev) { 807 if (rev != prev_rev) {
@@ -818,7 +818,7 @@ static int apply_microcode_intel(int cpu)
818 uci->cpu_sig.rev = rev; 818 uci->cpu_sig.rev = rev;
819 c->microcode = rev; 819 c->microcode = rev;
820 820
821 return 0; 821 return UCODE_UPDATED;
822} 822}
823 823
824static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, 824static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 04a625f0fcda..0f545b3cf926 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -23,6 +23,7 @@
23#include <asm/nops.h> 23#include <asm/nops.h>
24#include "../entry/calling.h" 24#include "../entry/calling.h"
25#include <asm/export.h> 25#include <asm/export.h>
26#include <asm/nospec-branch.h>
26 27
27#ifdef CONFIG_PARAVIRT 28#ifdef CONFIG_PARAVIRT
28#include <asm/asm-offsets.h> 29#include <asm/asm-offsets.h>
@@ -134,6 +135,7 @@ ENTRY(secondary_startup_64)
134 135
135 /* Ensure I am executing from virtual addresses */ 136 /* Ensure I am executing from virtual addresses */
136 movq $1f, %rax 137 movq $1f, %rax
138 ANNOTATE_RETPOLINE_SAFE
137 jmp *%rax 139 jmp *%rax
1381: 1401:
139 UNWIND_HINT_EMPTY 141 UNWIND_HINT_EMPTY
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4e37d1a851a6..bc1a27280c4b 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -49,7 +49,7 @@
49 49
50static int kvmapf = 1; 50static int kvmapf = 1;
51 51
52static int parse_no_kvmapf(char *arg) 52static int __init parse_no_kvmapf(char *arg)
53{ 53{
54 kvmapf = 0; 54 kvmapf = 0;
55 return 0; 55 return 0;
@@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg)
58early_param("no-kvmapf", parse_no_kvmapf); 58early_param("no-kvmapf", parse_no_kvmapf);
59 59
60static int steal_acc = 1; 60static int steal_acc = 1;
61static int parse_no_stealacc(char *arg) 61static int __init parse_no_stealacc(char *arg)
62{ 62{
63 steal_acc = 0; 63 steal_acc = 0;
64 return 0; 64 return 0;
@@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg)
67early_param("no-steal-acc", parse_no_stealacc); 67early_param("no-steal-acc", parse_no_stealacc);
68 68
69static int kvmclock_vsyscall = 1; 69static int kvmclock_vsyscall = 1;
70static int parse_no_kvmclock_vsyscall(char *arg) 70static int __init parse_no_kvmclock_vsyscall(char *arg)
71{ 71{
72 kvmclock_vsyscall = 0; 72 kvmclock_vsyscall = 0;
73 return 0; 73 return 0;
@@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void)
341#endif 341#endif
342 pa |= KVM_ASYNC_PF_ENABLED; 342 pa |= KVM_ASYNC_PF_ENABLED;
343 343
344 /* Async page fault support for L1 hypervisor is optional */ 344 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
345 if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN, 345 pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
346 (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0) 346
347 wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); 347 wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
348 __this_cpu_write(apf_reason.enabled, 1); 348 __this_cpu_write(apf_reason.enabled, 1);
349 printk(KERN_INFO"KVM setup async PF for cpu %d\n", 349 printk(KERN_INFO"KVM setup async PF for cpu %d\n",
350 smp_processor_id()); 350 smp_processor_id());
@@ -545,7 +545,8 @@ static void __init kvm_guest_init(void)
545 pv_time_ops.steal_clock = kvm_steal_clock; 545 pv_time_ops.steal_clock = kvm_steal_clock;
546 } 546 }
547 547
548 if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) 548 if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
549 !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
549 pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; 550 pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
550 551
551 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) 552 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -633,7 +634,8 @@ static __init int kvm_setup_pv_tlb_flush(void)
633{ 634{
634 int cpu; 635 int cpu;
635 636
636 if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) { 637 if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
638 !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
637 for_each_possible_cpu(cpu) { 639 for_each_possible_cpu(cpu) {
638 zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), 640 zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
639 GFP_KERNEL, cpu_to_node(cpu)); 641 GFP_KERNEL, cpu_to_node(cpu));
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 1f790cf9d38f..3b7427aa7d85 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -542,6 +542,7 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
542 goto overflow; 542 goto overflow;
543 break; 543 break;
544 case R_X86_64_PC32: 544 case R_X86_64_PC32:
545 case R_X86_64_PLT32:
545 value -= (u64)address; 546 value -= (u64)address;
546 *(u32 *)location = value; 547 *(u32 *)location = value;
547 break; 548 break;
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index da0c160e5589..f58336af095c 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -191,6 +191,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
191 goto overflow; 191 goto overflow;
192 break; 192 break;
193 case R_X86_64_PC32: 193 case R_X86_64_PC32:
194 case R_X86_64_PLT32:
194 if (*(u32 *)loc != 0) 195 if (*(u32 *)loc != 0)
195 goto invalid_relocation; 196 goto invalid_relocation;
196 val -= (u64)loc; 197 val -= (u64)loc;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9eee25d07586..ff99e2b6fc54 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1437,6 +1437,7 @@ static void remove_siblinginfo(int cpu)
1437 cpumask_clear(topology_sibling_cpumask(cpu)); 1437 cpumask_clear(topology_sibling_cpumask(cpu));
1438 cpumask_clear(topology_core_cpumask(cpu)); 1438 cpumask_clear(topology_core_cpumask(cpu));
1439 c->cpu_core_id = 0; 1439 c->cpu_core_id = 0;
1440 c->booted_cores = 0;
1440 cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); 1441 cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
1441 recompute_smt_state(); 1442 recompute_smt_state();
1442} 1443}
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 1f9188f5357c..feb28fee6cea 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -5,7 +5,6 @@
5#include <asm/unwind.h> 5#include <asm/unwind.h>
6#include <asm/orc_types.h> 6#include <asm/orc_types.h>
7#include <asm/orc_lookup.h> 7#include <asm/orc_lookup.h>
8#include <asm/sections.h>
9 8
10#define orc_warn(fmt, ...) \ 9#define orc_warn(fmt, ...) \
11 printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__) 10 printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
@@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip)
148 } 147 }
149 148
150 /* vmlinux .init slow lookup: */ 149 /* vmlinux .init slow lookup: */
151 if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext) 150 if (init_kernel_text(ip))
152 return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, 151 return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
153 __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); 152 __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
154 153
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a0c5a69bc7c4..b671fc2d0422 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -607,7 +607,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
607 (1 << KVM_FEATURE_PV_EOI) | 607 (1 << KVM_FEATURE_PV_EOI) |
608 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | 608 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
609 (1 << KVM_FEATURE_PV_UNHALT) | 609 (1 << KVM_FEATURE_PV_UNHALT) |
610 (1 << KVM_FEATURE_PV_TLB_FLUSH); 610 (1 << KVM_FEATURE_PV_TLB_FLUSH) |
611 (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
611 612
612 if (sched_info_on()) 613 if (sched_info_on())
613 entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); 614 entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 924ac8ce9d50..391dda8d43b7 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2002,14 +2002,13 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
2002 2002
2003void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) 2003void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2004{ 2004{
2005 struct kvm_lapic *apic; 2005 struct kvm_lapic *apic = vcpu->arch.apic;
2006 int i; 2006 int i;
2007 2007
2008 apic_debug("%s\n", __func__); 2008 if (!apic)
2009 return;
2009 2010
2010 ASSERT(vcpu); 2011 apic_debug("%s\n", __func__);
2011 apic = vcpu->arch.apic;
2012 ASSERT(apic != NULL);
2013 2012
2014 /* Stop the timer in case it's a reset to an active apic */ 2013 /* Stop the timer in case it's a reset to an active apic */
2015 hrtimer_cancel(&apic->lapic_timer.timer); 2014 hrtimer_cancel(&apic->lapic_timer.timer);
@@ -2165,7 +2164,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
2165 */ 2164 */
2166 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; 2165 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2167 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ 2166 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2168 kvm_lapic_reset(vcpu, false);
2169 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 2167 kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2170 2168
2171 return 0; 2169 return 0;
@@ -2569,7 +2567,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
2569 2567
2570 pe = xchg(&apic->pending_events, 0); 2568 pe = xchg(&apic->pending_events, 0);
2571 if (test_bit(KVM_APIC_INIT, &pe)) { 2569 if (test_bit(KVM_APIC_INIT, &pe)) {
2572 kvm_lapic_reset(vcpu, true);
2573 kvm_vcpu_reset(vcpu, true); 2570 kvm_vcpu_reset(vcpu, true);
2574 if (kvm_vcpu_is_bsp(apic->vcpu)) 2571 if (kvm_vcpu_is_bsp(apic->vcpu))
2575 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 2572 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 46ff304140c7..f551962ac294 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3029,7 +3029,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
3029 return RET_PF_RETRY; 3029 return RET_PF_RETRY;
3030 } 3030 }
3031 3031
3032 return -EFAULT; 3032 return RET_PF_EMULATE;
3033} 3033}
3034 3034
3035static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, 3035static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b3e488a74828..be9c839e2c89 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -49,6 +49,7 @@
49#include <asm/debugreg.h> 49#include <asm/debugreg.h>
50#include <asm/kvm_para.h> 50#include <asm/kvm_para.h>
51#include <asm/irq_remapping.h> 51#include <asm/irq_remapping.h>
52#include <asm/microcode.h>
52#include <asm/nospec-branch.h> 53#include <asm/nospec-branch.h>
53 54
54#include <asm/virtext.h> 55#include <asm/virtext.h>
@@ -178,6 +179,8 @@ struct vcpu_svm {
178 uint64_t sysenter_eip; 179 uint64_t sysenter_eip;
179 uint64_t tsc_aux; 180 uint64_t tsc_aux;
180 181
182 u64 msr_decfg;
183
181 u64 next_rip; 184 u64 next_rip;
182 185
183 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; 186 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
@@ -300,6 +303,8 @@ module_param(vgif, int, 0444);
300static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); 303static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
301module_param(sev, int, 0444); 304module_param(sev, int, 0444);
302 305
306static u8 rsm_ins_bytes[] = "\x0f\xaa";
307
303static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); 308static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
304static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); 309static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
305static void svm_complete_interrupts(struct vcpu_svm *svm); 310static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -1383,6 +1388,7 @@ static void init_vmcb(struct vcpu_svm *svm)
1383 set_intercept(svm, INTERCEPT_SKINIT); 1388 set_intercept(svm, INTERCEPT_SKINIT);
1384 set_intercept(svm, INTERCEPT_WBINVD); 1389 set_intercept(svm, INTERCEPT_WBINVD);
1385 set_intercept(svm, INTERCEPT_XSETBV); 1390 set_intercept(svm, INTERCEPT_XSETBV);
1391 set_intercept(svm, INTERCEPT_RSM);
1386 1392
1387 if (!kvm_mwait_in_guest()) { 1393 if (!kvm_mwait_in_guest()) {
1388 set_intercept(svm, INTERCEPT_MONITOR); 1394 set_intercept(svm, INTERCEPT_MONITOR);
@@ -1902,6 +1908,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
1902 u32 dummy; 1908 u32 dummy;
1903 u32 eax = 1; 1909 u32 eax = 1;
1904 1910
1911 vcpu->arch.microcode_version = 0x01000065;
1905 svm->spec_ctrl = 0; 1912 svm->spec_ctrl = 0;
1906 1913
1907 if (!init_event) { 1914 if (!init_event) {
@@ -3699,6 +3706,12 @@ static int emulate_on_interception(struct vcpu_svm *svm)
3699 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 3706 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
3700} 3707}
3701 3708
3709static int rsm_interception(struct vcpu_svm *svm)
3710{
3711 return x86_emulate_instruction(&svm->vcpu, 0, 0,
3712 rsm_ins_bytes, 2) == EMULATE_DONE;
3713}
3714
3702static int rdpmc_interception(struct vcpu_svm *svm) 3715static int rdpmc_interception(struct vcpu_svm *svm)
3703{ 3716{
3704 int err; 3717 int err;
@@ -3860,6 +3873,22 @@ static int cr8_write_interception(struct vcpu_svm *svm)
3860 return 0; 3873 return 0;
3861} 3874}
3862 3875
3876static int svm_get_msr_feature(struct kvm_msr_entry *msr)
3877{
3878 msr->data = 0;
3879
3880 switch (msr->index) {
3881 case MSR_F10H_DECFG:
3882 if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
3883 msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
3884 break;
3885 default:
3886 return 1;
3887 }
3888
3889 return 0;
3890}
3891
3863static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 3892static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3864{ 3893{
3865 struct vcpu_svm *svm = to_svm(vcpu); 3894 struct vcpu_svm *svm = to_svm(vcpu);
@@ -3935,9 +3964,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3935 3964
3936 msr_info->data = svm->spec_ctrl; 3965 msr_info->data = svm->spec_ctrl;
3937 break; 3966 break;
3938 case MSR_IA32_UCODE_REV:
3939 msr_info->data = 0x01000065;
3940 break;
3941 case MSR_F15H_IC_CFG: { 3967 case MSR_F15H_IC_CFG: {
3942 3968
3943 int family, model; 3969 int family, model;
@@ -3955,6 +3981,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3955 msr_info->data = 0x1E; 3981 msr_info->data = 0x1E;
3956 } 3982 }
3957 break; 3983 break;
3984 case MSR_F10H_DECFG:
3985 msr_info->data = svm->msr_decfg;
3986 break;
3958 default: 3987 default:
3959 return kvm_get_msr_common(vcpu, msr_info); 3988 return kvm_get_msr_common(vcpu, msr_info);
3960 } 3989 }
@@ -4133,6 +4162,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
4133 case MSR_VM_IGNNE: 4162 case MSR_VM_IGNNE:
4134 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); 4163 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
4135 break; 4164 break;
4165 case MSR_F10H_DECFG: {
4166 struct kvm_msr_entry msr_entry;
4167
4168 msr_entry.index = msr->index;
4169 if (svm_get_msr_feature(&msr_entry))
4170 return 1;
4171
4172 /* Check the supported bits */
4173 if (data & ~msr_entry.data)
4174 return 1;
4175
4176 /* Don't allow the guest to change a bit, #GP */
4177 if (!msr->host_initiated && (data ^ msr_entry.data))
4178 return 1;
4179
4180 svm->msr_decfg = data;
4181 break;
4182 }
4136 case MSR_IA32_APICBASE: 4183 case MSR_IA32_APICBASE:
4137 if (kvm_vcpu_apicv_active(vcpu)) 4184 if (kvm_vcpu_apicv_active(vcpu))
4138 avic_update_vapic_bar(to_svm(vcpu), data); 4185 avic_update_vapic_bar(to_svm(vcpu), data);
@@ -4541,7 +4588,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
4541 [SVM_EXIT_MWAIT] = mwait_interception, 4588 [SVM_EXIT_MWAIT] = mwait_interception,
4542 [SVM_EXIT_XSETBV] = xsetbv_interception, 4589 [SVM_EXIT_XSETBV] = xsetbv_interception,
4543 [SVM_EXIT_NPF] = npf_interception, 4590 [SVM_EXIT_NPF] = npf_interception,
4544 [SVM_EXIT_RSM] = emulate_on_interception, 4591 [SVM_EXIT_RSM] = rsm_interception,
4545 [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, 4592 [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
4546 [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, 4593 [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
4547}; 4594};
@@ -5355,7 +5402,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
5355 * being speculatively taken. 5402 * being speculatively taken.
5356 */ 5403 */
5357 if (svm->spec_ctrl) 5404 if (svm->spec_ctrl)
5358 wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); 5405 native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
5359 5406
5360 asm volatile ( 5407 asm volatile (
5361 "push %%" _ASM_BP "; \n\t" 5408 "push %%" _ASM_BP "; \n\t"
@@ -5464,11 +5511,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
5464 * If the L02 MSR bitmap does not intercept the MSR, then we need to 5511 * If the L02 MSR bitmap does not intercept the MSR, then we need to
5465 * save it. 5512 * save it.
5466 */ 5513 */
5467 if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) 5514 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
5468 rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); 5515 svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
5469 5516
5470 if (svm->spec_ctrl) 5517 if (svm->spec_ctrl)
5471 wrmsrl(MSR_IA32_SPEC_CTRL, 0); 5518 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
5472 5519
5473 /* Eliminate branch target predictions from guest mode */ 5520 /* Eliminate branch target predictions from guest mode */
5474 vmexit_fill_RSB(); 5521 vmexit_fill_RSB();
@@ -6236,16 +6283,18 @@ e_free:
6236 6283
6237static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) 6284static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
6238{ 6285{
6286 void __user *measure = (void __user *)(uintptr_t)argp->data;
6239 struct kvm_sev_info *sev = &kvm->arch.sev_info; 6287 struct kvm_sev_info *sev = &kvm->arch.sev_info;
6240 struct sev_data_launch_measure *data; 6288 struct sev_data_launch_measure *data;
6241 struct kvm_sev_launch_measure params; 6289 struct kvm_sev_launch_measure params;
6290 void __user *p = NULL;
6242 void *blob = NULL; 6291 void *blob = NULL;
6243 int ret; 6292 int ret;
6244 6293
6245 if (!sev_guest(kvm)) 6294 if (!sev_guest(kvm))
6246 return -ENOTTY; 6295 return -ENOTTY;
6247 6296
6248 if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params))) 6297 if (copy_from_user(&params, measure, sizeof(params)))
6249 return -EFAULT; 6298 return -EFAULT;
6250 6299
6251 data = kzalloc(sizeof(*data), GFP_KERNEL); 6300 data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -6256,17 +6305,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
6256 if (!params.len) 6305 if (!params.len)
6257 goto cmd; 6306 goto cmd;
6258 6307
6259 if (params.uaddr) { 6308 p = (void __user *)(uintptr_t)params.uaddr;
6309 if (p) {
6260 if (params.len > SEV_FW_BLOB_MAX_SIZE) { 6310 if (params.len > SEV_FW_BLOB_MAX_SIZE) {
6261 ret = -EINVAL; 6311 ret = -EINVAL;
6262 goto e_free; 6312 goto e_free;
6263 } 6313 }
6264 6314
6265 if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
6266 ret = -EFAULT;
6267 goto e_free;
6268 }
6269
6270 ret = -ENOMEM; 6315 ret = -ENOMEM;
6271 blob = kmalloc(params.len, GFP_KERNEL); 6316 blob = kmalloc(params.len, GFP_KERNEL);
6272 if (!blob) 6317 if (!blob)
@@ -6290,13 +6335,13 @@ cmd:
6290 goto e_free_blob; 6335 goto e_free_blob;
6291 6336
6292 if (blob) { 6337 if (blob) {
6293 if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len)) 6338 if (copy_to_user(p, blob, params.len))
6294 ret = -EFAULT; 6339 ret = -EFAULT;
6295 } 6340 }
6296 6341
6297done: 6342done:
6298 params.len = data->len; 6343 params.len = data->len;
6299 if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) 6344 if (copy_to_user(measure, &params, sizeof(params)))
6300 ret = -EFAULT; 6345 ret = -EFAULT;
6301e_free_blob: 6346e_free_blob:
6302 kfree(blob); 6347 kfree(blob);
@@ -6597,7 +6642,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
6597 struct page **pages; 6642 struct page **pages;
6598 void *blob, *hdr; 6643 void *blob, *hdr;
6599 unsigned long n; 6644 unsigned long n;
6600 int ret; 6645 int ret, offset;
6601 6646
6602 if (!sev_guest(kvm)) 6647 if (!sev_guest(kvm))
6603 return -ENOTTY; 6648 return -ENOTTY;
@@ -6623,6 +6668,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
6623 if (!data) 6668 if (!data)
6624 goto e_unpin_memory; 6669 goto e_unpin_memory;
6625 6670
6671 offset = params.guest_uaddr & (PAGE_SIZE - 1);
6672 data->guest_address = __sme_page_pa(pages[0]) + offset;
6673 data->guest_len = params.guest_len;
6674
6626 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len); 6675 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
6627 if (IS_ERR(blob)) { 6676 if (IS_ERR(blob)) {
6628 ret = PTR_ERR(blob); 6677 ret = PTR_ERR(blob);
@@ -6637,8 +6686,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
6637 ret = PTR_ERR(hdr); 6686 ret = PTR_ERR(hdr);
6638 goto e_free_blob; 6687 goto e_free_blob;
6639 } 6688 }
6640 data->trans_address = __psp_pa(blob); 6689 data->hdr_address = __psp_pa(hdr);
6641 data->trans_len = params.trans_len; 6690 data->hdr_len = params.hdr_len;
6642 6691
6643 data->handle = sev->handle; 6692 data->handle = sev->handle;
6644 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error); 6693 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
@@ -6821,6 +6870,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
6821 .vcpu_unblocking = svm_vcpu_unblocking, 6870 .vcpu_unblocking = svm_vcpu_unblocking,
6822 6871
6823 .update_bp_intercept = update_bp_intercept, 6872 .update_bp_intercept = update_bp_intercept,
6873 .get_msr_feature = svm_get_msr_feature,
6824 .get_msr = svm_get_msr, 6874 .get_msr = svm_get_msr,
6825 .set_msr = svm_set_msr, 6875 .set_msr = svm_set_msr,
6826 .get_segment_base = svm_get_segment_base, 6876 .get_segment_base = svm_get_segment_base,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3dec126aa302..051dab74e4e9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -51,6 +51,7 @@
51#include <asm/apic.h> 51#include <asm/apic.h>
52#include <asm/irq_remapping.h> 52#include <asm/irq_remapping.h>
53#include <asm/mmu_context.h> 53#include <asm/mmu_context.h>
54#include <asm/microcode.h>
54#include <asm/nospec-branch.h> 55#include <asm/nospec-branch.h>
55 56
56#include "trace.h" 57#include "trace.h"
@@ -3226,6 +3227,11 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
3226 return !(val & ~valid_bits); 3227 return !(val & ~valid_bits);
3227} 3228}
3228 3229
3230static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
3231{
3232 return 1;
3233}
3234
3229/* 3235/*
3230 * Reads an msr value (of 'msr_index') into 'pdata'. 3236 * Reads an msr value (of 'msr_index') into 'pdata'.
3231 * Returns 0 on success, non-0 otherwise. 3237 * Returns 0 on success, non-0 otherwise.
@@ -4485,7 +4491,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
4485 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, 4491 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
4486 SECONDARY_EXEC_DESC); 4492 SECONDARY_EXEC_DESC);
4487 hw_cr4 &= ~X86_CR4_UMIP; 4493 hw_cr4 &= ~X86_CR4_UMIP;
4488 } else 4494 } else if (!is_guest_mode(vcpu) ||
4495 !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
4489 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, 4496 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
4490 SECONDARY_EXEC_DESC); 4497 SECONDARY_EXEC_DESC);
4491 4498
@@ -5765,6 +5772,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
5765 vmx->rmode.vm86_active = 0; 5772 vmx->rmode.vm86_active = 0;
5766 vmx->spec_ctrl = 0; 5773 vmx->spec_ctrl = 0;
5767 5774
5775 vcpu->arch.microcode_version = 0x100000000ULL;
5768 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 5776 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
5769 kvm_set_cr8(vcpu, 0); 5777 kvm_set_cr8(vcpu, 0);
5770 5778
@@ -9452,7 +9460,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9452 * being speculatively taken. 9460 * being speculatively taken.
9453 */ 9461 */
9454 if (vmx->spec_ctrl) 9462 if (vmx->spec_ctrl)
9455 wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); 9463 native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
9456 9464
9457 vmx->__launched = vmx->loaded_vmcs->launched; 9465 vmx->__launched = vmx->loaded_vmcs->launched;
9458 asm( 9466 asm(
@@ -9587,11 +9595,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9587 * If the L02 MSR bitmap does not intercept the MSR, then we need to 9595 * If the L02 MSR bitmap does not intercept the MSR, then we need to
9588 * save it. 9596 * save it.
9589 */ 9597 */
9590 if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) 9598 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
9591 rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); 9599 vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
9592 9600
9593 if (vmx->spec_ctrl) 9601 if (vmx->spec_ctrl)
9594 wrmsrl(MSR_IA32_SPEC_CTRL, 0); 9602 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
9595 9603
9596 /* Eliminate branch target predictions from guest mode */ 9604 /* Eliminate branch target predictions from guest mode */
9597 vmexit_fill_RSB(); 9605 vmexit_fill_RSB();
@@ -11199,7 +11207,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
11199 if (ret) 11207 if (ret)
11200 return ret; 11208 return ret;
11201 11209
11202 if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) 11210 /*
11211 * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
11212 * by event injection, halt vcpu.
11213 */
11214 if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
11215 !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))
11203 return kvm_vcpu_halt(vcpu); 11216 return kvm_vcpu_halt(vcpu);
11204 11217
11205 vmx->nested.nested_run_pending = 1; 11218 vmx->nested.nested_run_pending = 1;
@@ -12290,6 +12303,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
12290 .vcpu_put = vmx_vcpu_put, 12303 .vcpu_put = vmx_vcpu_put,
12291 12304
12292 .update_bp_intercept = update_exception_bitmap, 12305 .update_bp_intercept = update_exception_bitmap,
12306 .get_msr_feature = vmx_get_msr_feature,
12293 .get_msr = vmx_get_msr, 12307 .get_msr = vmx_get_msr,
12294 .set_msr = vmx_set_msr, 12308 .set_msr = vmx_set_msr,
12295 .get_segment_base = vmx_get_segment_base, 12309 .get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c8a0b545ac20..18b5ca7a3197 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1049,6 +1049,45 @@ static u32 emulated_msrs[] = {
1049 1049
1050static unsigned num_emulated_msrs; 1050static unsigned num_emulated_msrs;
1051 1051
1052/*
1053 * List of msr numbers which are used to expose MSR-based features that
1054 * can be used by a hypervisor to validate requested CPU features.
1055 */
1056static u32 msr_based_features[] = {
1057 MSR_F10H_DECFG,
1058 MSR_IA32_UCODE_REV,
1059};
1060
1061static unsigned int num_msr_based_features;
1062
1063static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
1064{
1065 switch (msr->index) {
1066 case MSR_IA32_UCODE_REV:
1067 rdmsrl(msr->index, msr->data);
1068 break;
1069 default:
1070 if (kvm_x86_ops->get_msr_feature(msr))
1071 return 1;
1072 }
1073 return 0;
1074}
1075
1076static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1077{
1078 struct kvm_msr_entry msr;
1079 int r;
1080
1081 msr.index = index;
1082 r = kvm_get_msr_feature(&msr);
1083 if (r)
1084 return r;
1085
1086 *data = msr.data;
1087
1088 return 0;
1089}
1090
1052bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) 1091bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1053{ 1092{
1054 if (efer & efer_reserved_bits) 1093 if (efer & efer_reserved_bits)
@@ -2222,7 +2261,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2222 2261
2223 switch (msr) { 2262 switch (msr) {
2224 case MSR_AMD64_NB_CFG: 2263 case MSR_AMD64_NB_CFG:
2225 case MSR_IA32_UCODE_REV:
2226 case MSR_IA32_UCODE_WRITE: 2264 case MSR_IA32_UCODE_WRITE:
2227 case MSR_VM_HSAVE_PA: 2265 case MSR_VM_HSAVE_PA:
2228 case MSR_AMD64_PATCH_LOADER: 2266 case MSR_AMD64_PATCH_LOADER:
@@ -2230,6 +2268,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2230 case MSR_AMD64_DC_CFG: 2268 case MSR_AMD64_DC_CFG:
2231 break; 2269 break;
2232 2270
2271 case MSR_IA32_UCODE_REV:
2272 if (msr_info->host_initiated)
2273 vcpu->arch.microcode_version = data;
2274 break;
2233 case MSR_EFER: 2275 case MSR_EFER:
2234 return set_efer(vcpu, data); 2276 return set_efer(vcpu, data);
2235 case MSR_K7_HWCR: 2277 case MSR_K7_HWCR:
@@ -2525,7 +2567,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2525 msr_info->data = 0; 2567 msr_info->data = 0;
2526 break; 2568 break;
2527 case MSR_IA32_UCODE_REV: 2569 case MSR_IA32_UCODE_REV:
2528 msr_info->data = 0x100000000ULL; 2570 msr_info->data = vcpu->arch.microcode_version;
2529 break; 2571 break;
2530 case MSR_MTRRcap: 2572 case MSR_MTRRcap:
2531 case 0x200 ... 0x2ff: 2573 case 0x200 ... 0x2ff:
@@ -2680,13 +2722,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
2680 int (*do_msr)(struct kvm_vcpu *vcpu, 2722 int (*do_msr)(struct kvm_vcpu *vcpu,
2681 unsigned index, u64 *data)) 2723 unsigned index, u64 *data))
2682{ 2724{
2683 int i, idx; 2725 int i;
2684 2726
2685 idx = srcu_read_lock(&vcpu->kvm->srcu);
2686 for (i = 0; i < msrs->nmsrs; ++i) 2727 for (i = 0; i < msrs->nmsrs; ++i)
2687 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 2728 if (do_msr(vcpu, entries[i].index, &entries[i].data))
2688 break; 2729 break;
2689 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2690 2730
2691 return i; 2731 return i;
2692} 2732}
@@ -2785,6 +2825,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
2785 case KVM_CAP_SET_BOOT_CPU_ID: 2825 case KVM_CAP_SET_BOOT_CPU_ID:
2786 case KVM_CAP_SPLIT_IRQCHIP: 2826 case KVM_CAP_SPLIT_IRQCHIP:
2787 case KVM_CAP_IMMEDIATE_EXIT: 2827 case KVM_CAP_IMMEDIATE_EXIT:
2828 case KVM_CAP_GET_MSR_FEATURES:
2788 r = 1; 2829 r = 1;
2789 break; 2830 break;
2790 case KVM_CAP_ADJUST_CLOCK: 2831 case KVM_CAP_ADJUST_CLOCK:
@@ -2899,6 +2940,31 @@ long kvm_arch_dev_ioctl(struct file *filp,
2899 goto out; 2940 goto out;
2900 r = 0; 2941 r = 0;
2901 break; 2942 break;
2943 case KVM_GET_MSR_FEATURE_INDEX_LIST: {
2944 struct kvm_msr_list __user *user_msr_list = argp;
2945 struct kvm_msr_list msr_list;
2946 unsigned int n;
2947
2948 r = -EFAULT;
2949 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
2950 goto out;
2951 n = msr_list.nmsrs;
2952 msr_list.nmsrs = num_msr_based_features;
2953 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
2954 goto out;
2955 r = -E2BIG;
2956 if (n < msr_list.nmsrs)
2957 goto out;
2958 r = -EFAULT;
2959 if (copy_to_user(user_msr_list->indices, &msr_based_features,
2960 num_msr_based_features * sizeof(u32)))
2961 goto out;
2962 r = 0;
2963 break;
2964 }
2965 case KVM_GET_MSRS:
2966 r = msr_io(NULL, argp, do_get_msr_feature, 1);
2967 break;
2902 } 2968 }
2903 default: 2969 default:
2904 r = -EINVAL; 2970 r = -EINVAL;
@@ -3636,12 +3702,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
3636 r = 0; 3702 r = 0;
3637 break; 3703 break;
3638 } 3704 }
3639 case KVM_GET_MSRS: 3705 case KVM_GET_MSRS: {
3706 int idx = srcu_read_lock(&vcpu->kvm->srcu);
3640 r = msr_io(vcpu, argp, do_get_msr, 1); 3707 r = msr_io(vcpu, argp, do_get_msr, 1);
3708 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3641 break; 3709 break;
3642 case KVM_SET_MSRS: 3710 }
3711 case KVM_SET_MSRS: {
3712 int idx = srcu_read_lock(&vcpu->kvm->srcu);
3643 r = msr_io(vcpu, argp, do_set_msr, 0); 3713 r = msr_io(vcpu, argp, do_set_msr, 0);
3714 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3644 break; 3715 break;
3716 }
3645 case KVM_TPR_ACCESS_REPORTING: { 3717 case KVM_TPR_ACCESS_REPORTING: {
3646 struct kvm_tpr_access_ctl tac; 3718 struct kvm_tpr_access_ctl tac;
3647 3719
@@ -4464,6 +4536,19 @@ static void kvm_init_msr_list(void)
4464 j++; 4536 j++;
4465 } 4537 }
4466 num_emulated_msrs = j; 4538 num_emulated_msrs = j;
4539
4540 for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
4541 struct kvm_msr_entry msr;
4542
4543 msr.index = msr_based_features[i];
4544 if (kvm_get_msr_feature(&msr))
4545 continue;
4546
4547 if (j < i)
4548 msr_based_features[j] = msr_based_features[i];
4549 j++;
4550 }
4551 num_msr_based_features = j;
4467} 4552}
4468 4553
4469static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, 4554static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@ -8017,6 +8102,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
8017 8102
8018void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) 8103void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
8019{ 8104{
8105 kvm_lapic_reset(vcpu, init_event);
8106
8020 vcpu->arch.hflags = 0; 8107 vcpu->arch.hflags = 0;
8021 8108
8022 vcpu->arch.smi_pending = 0; 8109 vcpu->arch.smi_pending = 0;
@@ -8460,10 +8547,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
8460 return r; 8547 return r;
8461 } 8548 }
8462 8549
8463 if (!size) { 8550 if (!size)
8464 r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); 8551 vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
8465 WARN_ON(r < 0);
8466 }
8467 8552
8468 return 0; 8553 return 0;
8469} 8554}
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 91e9700cc6dc..25a972c61b0a 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
28lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o 28lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
29lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o 29lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
30lib-$(CONFIG_RETPOLINE) += retpoline.o 30lib-$(CONFIG_RETPOLINE) += retpoline.o
31OBJECT_FILES_NON_STANDARD_retpoline.o :=y
32 31
33obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o 32obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
34 33
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 480edc3a5e03..c909961e678a 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,7 +7,6 @@
7#include <asm/alternative-asm.h> 7#include <asm/alternative-asm.h>
8#include <asm/export.h> 8#include <asm/export.h>
9#include <asm/nospec-branch.h> 9#include <asm/nospec-branch.h>
10#include <asm/bitsperlong.h>
11 10
12.macro THUNK reg 11.macro THUNK reg
13 .section .text.__x86.indirect_thunk 12 .section .text.__x86.indirect_thunk
@@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
47GENERATE_THUNK(r14) 46GENERATE_THUNK(r14)
48GENERATE_THUNK(r15) 47GENERATE_THUNK(r15)
49#endif 48#endif
50
51/*
52 * Fill the CPU return stack buffer.
53 *
54 * Each entry in the RSB, if used for a speculative 'ret', contains an
55 * infinite 'pause; lfence; jmp' loop to capture speculative execution.
56 *
57 * This is required in various cases for retpoline and IBRS-based
58 * mitigations for the Spectre variant 2 vulnerability. Sometimes to
59 * eliminate potentially bogus entries from the RSB, and sometimes
60 * purely to ensure that it doesn't get empty, which on some CPUs would
61 * allow predictions from other (unwanted!) sources to be used.
62 *
63 * Google experimented with loop-unrolling and this turned out to be
64 * the optimal version - two calls, each with their own speculation
65 * trap should their return address end up getting used, in a loop.
66 */
67.macro STUFF_RSB nr:req sp:req
68 mov $(\nr / 2), %_ASM_BX
69 .align 16
70771:
71 call 772f
72773: /* speculation trap */
73 pause
74 lfence
75 jmp 773b
76 .align 16
77772:
78 call 774f
79775: /* speculation trap */
80 pause
81 lfence
82 jmp 775b
83 .align 16
84774:
85 dec %_ASM_BX
86 jnz 771b
87 add $((BITS_PER_LONG/8) * \nr), \sp
88.endm
89
90#define RSB_FILL_LOOPS 16 /* To avoid underflow */
91
92ENTRY(__fill_rsb)
93 STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
94 ret
95END(__fill_rsb)
96EXPORT_SYMBOL_GPL(__fill_rsb)
97
98#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
99
100ENTRY(__clear_rsb)
101 STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
102 ret
103END(__clear_rsb)
104EXPORT_SYMBOL_GPL(__clear_rsb)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 800de815519c..c88573d90f3e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1248,10 +1248,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1248 tsk = current; 1248 tsk = current;
1249 mm = tsk->mm; 1249 mm = tsk->mm;
1250 1250
1251 /*
1252 * Detect and handle instructions that would cause a page fault for
1253 * both a tracked kernel page and a userspace page.
1254 */
1255 prefetchw(&mm->mmap_sem); 1251 prefetchw(&mm->mmap_sem);
1256 1252
1257 if (unlikely(kmmio_fault(regs, address))) 1253 if (unlikely(kmmio_fault(regs, address)))
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 01f682cf77a8..40a6085063d6 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -15,6 +15,7 @@
15#include <asm/page.h> 15#include <asm/page.h>
16#include <asm/processor-flags.h> 16#include <asm/processor-flags.h>
17#include <asm/msr-index.h> 17#include <asm/msr-index.h>
18#include <asm/nospec-branch.h>
18 19
19 .text 20 .text
20 .code64 21 .code64
@@ -59,6 +60,7 @@ ENTRY(sme_encrypt_execute)
59 movq %rax, %r8 /* Workarea encryption routine */ 60 movq %rax, %r8 /* Workarea encryption routine */
60 addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */ 61 addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */
61 62
63 ANNOTATE_RETPOLINE_SAFE
62 call *%rax /* Call the encryption routine */ 64 call *%rax /* Call the encryption routine */
63 65
64 pop %r12 66 pop %r12
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4923d92f918d..45e4eb5bcbb2 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -13,6 +13,7 @@
13#include <linux/if_vlan.h> 13#include <linux/if_vlan.h>
14#include <asm/cacheflush.h> 14#include <asm/cacheflush.h>
15#include <asm/set_memory.h> 15#include <asm/set_memory.h>
16#include <asm/nospec-branch.h>
16#include <linux/bpf.h> 17#include <linux/bpf.h>
17 18
18/* 19/*
@@ -290,7 +291,7 @@ static void emit_bpf_tail_call(u8 **pprog)
290 EMIT2(0x89, 0xD2); /* mov edx, edx */ 291 EMIT2(0x89, 0xD2); /* mov edx, edx */
291 EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ 292 EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
292 offsetof(struct bpf_array, map.max_entries)); 293 offsetof(struct bpf_array, map.max_entries));
293#define OFFSET1 43 /* number of bytes to jump */ 294#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */
294 EMIT2(X86_JBE, OFFSET1); /* jbe out */ 295 EMIT2(X86_JBE, OFFSET1); /* jbe out */
295 label1 = cnt; 296 label1 = cnt;
296 297
@@ -299,7 +300,7 @@ static void emit_bpf_tail_call(u8 **pprog)
299 */ 300 */
300 EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ 301 EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
301 EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ 302 EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
302#define OFFSET2 32 303#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
303 EMIT2(X86_JA, OFFSET2); /* ja out */ 304 EMIT2(X86_JA, OFFSET2); /* ja out */
304 label2 = cnt; 305 label2 = cnt;
305 EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ 306 EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
@@ -313,7 +314,7 @@ static void emit_bpf_tail_call(u8 **pprog)
313 * goto out; 314 * goto out;
314 */ 315 */
315 EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ 316 EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
316#define OFFSET3 10 317#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
317 EMIT2(X86_JE, OFFSET3); /* je out */ 318 EMIT2(X86_JE, OFFSET3); /* je out */
318 label3 = cnt; 319 label3 = cnt;
319 320
@@ -326,7 +327,7 @@ static void emit_bpf_tail_call(u8 **pprog)
326 * rdi == ctx (1st arg) 327 * rdi == ctx (1st arg)
327 * rax == prog->bpf_func + prologue_size 328 * rax == prog->bpf_func + prologue_size
328 */ 329 */
329 EMIT2(0xFF, 0xE0); /* jmp rax */ 330 RETPOLINE_RAX_BPF_JIT();
330 331
331 /* out: */ 332 /* out: */
332 BUILD_BUG_ON(cnt - label1 != OFFSET1); 333 BUILD_BUG_ON(cnt - label1 != OFFSET1);
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 174c59774cc9..a7a7677265b6 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -460,7 +460,7 @@ static int nmi_setup(void)
460 goto fail; 460 goto fail;
461 461
462 for_each_possible_cpu(cpu) { 462 for_each_possible_cpu(cpu) {
463 if (!cpu) 463 if (!IS_ENABLED(CONFIG_SMP) || !cpu)
464 continue; 464 continue;
465 465
466 memcpy(per_cpu(cpu_msrs, cpu).counters, 466 memcpy(per_cpu(cpu_msrs, cpu).counters,
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index de53bd15df5a..24bb7598774e 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -102,7 +102,7 @@ ENTRY(startup_32)
102 * don't we'll eventually crash trying to execute encrypted 102 * don't we'll eventually crash trying to execute encrypted
103 * instructions. 103 * instructions.
104 */ 104 */
105 bt $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags 105 btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
106 jnc .Ldone 106 jnc .Ldone
107 movl $MSR_K8_SYSCFG, %ecx 107 movl $MSR_K8_SYSCFG, %ecx
108 rdmsr 108 rdmsr
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 5d73c443e778..220e97841e49 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -770,9 +770,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
770 break; 770 break;
771 771
772 case R_X86_64_PC32: 772 case R_X86_64_PC32:
773 case R_X86_64_PLT32:
773 /* 774 /*
774 * PC relative relocations don't need to be adjusted unless 775 * PC relative relocations don't need to be adjusted unless
775 * referencing a percpu symbol. 776 * referencing a percpu symbol.
777 *
778 * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32.
776 */ 779 */
777 if (is_percpu_sym(sym, symname)) 780 if (is_percpu_sym(sym, symname))
778 add_reloc(&relocs32neg, offset); 781 add_reloc(&relocs32neg, offset);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index c047f42552e1..3c2c2530737e 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1376,8 +1376,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
1376 1376
1377 if (!xen_initial_domain()) { 1377 if (!xen_initial_domain()) {
1378 add_preferred_console("xenboot", 0, NULL); 1378 add_preferred_console("xenboot", 0, NULL);
1379 add_preferred_console("tty", 0, NULL);
1380 add_preferred_console("hvc", 0, NULL);
1381 if (pci_xen) 1379 if (pci_xen)
1382 x86_init.pci.arch_init = pci_xen_init; 1380 x86_init.pci.arch_init = pci_xen_init;
1383 } else { 1381 } else {
@@ -1410,6 +1408,10 @@ asmlinkage __visible void __init xen_start_kernel(void)
1410 1408
1411 xen_boot_params_init_edd(); 1409 xen_boot_params_init_edd();
1412 } 1410 }
1411
1412 add_preferred_console("tty", 0, NULL);
1413 add_preferred_console("hvc", 0, NULL);
1414
1413#ifdef CONFIG_PCI 1415#ifdef CONFIG_PCI
1414 /* PCI BIOS service won't work from a PV guest. */ 1416 /* PCI BIOS service won't work from a PV guest. */
1415 pci_probe &= ~PCI_PROBE_BIOS; 1417 pci_probe &= ~PCI_PROBE_BIOS;