aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2018-03-12 07:10:03 -0400
committerIngo Molnar <mingo@kernel.org>2018-03-12 07:10:03 -0400
commit3c76db70eb70a0fbd40b3e0dec8f69ca344d1ff8 (patch)
treea9dd314ac18992cdc2c50d4de60535a1b76ee351
parent194a9749c73d650c0b1dfdee04fb0bdf0a888ba8 (diff)
parent7958b2246fadf54b7ff820a2a5a2c5ca1554716f (diff)
Merge branch 'x86/pti' into x86/mm, to pick up dependencies
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Makefile5
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--arch/x86/Makefile7
-rw-r--r--arch/x86/entry/calling.h34
-rw-r--r--arch/x86/entry/entry_32.S3
-rw-r--r--arch/x86/entry/entry_64.S153
-rw-r--r--arch/x86/entry/entry_64_compat.S83
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl38
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c16
-rw-r--r--arch/x86/ia32/sys_ia32.c74
-rw-r--r--arch/x86/include/asm/apm.h6
-rw-r--r--arch/x86/include/asm/asm-prototypes.h3
-rw-r--r--arch/x86/include/asm/cpufeatures.h3
-rw-r--r--arch/x86/include/asm/efi.h17
-rw-r--r--arch/x86/include/asm/microcode.h9
-rw-r--r--arch/x86/include/asm/mmu_context.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h138
-rw-r--r--arch/x86/include/asm/paravirt.h17
-rw-r--r--arch/x86/include/asm/paravirt_types.h5
-rw-r--r--arch/x86/include/asm/pgtable.h8
-rw-r--r--arch/x86/include/asm/pgtable_32.h1
-rw-r--r--arch/x86/include/asm/pgtable_64.h1
-rw-r--r--arch/x86/include/asm/pgtable_types.h12
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/refcount.h4
-rw-r--r--arch/x86/include/asm/rmwcc.h16
-rw-r--r--arch/x86/include/asm/sections.h1
-rw-r--r--arch/x86/include/asm/sys_ia32.h48
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/cpu/bugs.c12
-rw-r--r--arch/x86/kernel/cpu/common.c30
-rw-r--r--arch/x86/kernel/cpu/intel.c7
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c10
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c161
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c58
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/ioport.c2
-rw-r--r--arch/x86/kernel/kprobes/core.c10
-rw-r--r--arch/x86/kernel/setup.c17
-rw-r--r--arch/x86/kernel/setup_percpu.c17
-rw-r--r--arch/x86/kernel/unwind_orc.c3
-rw-r--r--arch/x86/kernel/vmlinux.lds.S2
-rw-r--r--arch/x86/kvm/svm.c9
-rw-r--r--arch/x86/kvm/vmx.c9
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/retpoline.S56
-rw-r--r--arch/x86/mm/cpu_entry_area.c6
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/init_32.c15
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S2
-rw-r--r--arch/x86/mm/pti.c2
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S2
-rw-r--r--arch/x86/xen/suspend.c16
-rw-r--r--include/linux/compiler-clang.h5
-rw-r--r--include/linux/compiler-gcc.h4
-rw-r--r--include/linux/init.h8
-rw-r--r--include/linux/jump_label.h3
-rw-r--r--include/linux/kernel.h1
-rw-r--r--include/linux/nospec.h26
-rw-r--r--init/main.c2
-rw-r--r--kernel/extable.c2
-rw-r--r--kernel/jump_label.c27
-rw-r--r--scripts/Makefile.build8
-rw-r--r--tools/objtool/builtin-check.c6
-rw-r--r--tools/objtool/builtin-orc.c6
-rw-r--r--tools/objtool/builtin.h5
-rw-r--r--tools/objtool/check.c93
-rw-r--r--tools/objtool/check.h3
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c11
69 files changed, 910 insertions, 471 deletions
diff --git a/Makefile b/Makefile
index 659a7780aeb3..fb94072fd80d 100644
--- a/Makefile
+++ b/Makefile
@@ -489,6 +489,11 @@ KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
489KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) 489KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
490endif 490endif
491 491
492RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register
493RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
494RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
495export RETPOLINE_CFLAGS
496
492ifeq ($(config-targets),1) 497ifeq ($(config-targets),1)
493# =========================================================================== 498# ===========================================================================
494# *config targets only - make sure prerequisites are updated, and descend 499# *config targets only - make sure prerequisites are updated, and descend
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 552b3d0eae36..18233e459bff 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -430,6 +430,7 @@ config GOLDFISH
430config RETPOLINE 430config RETPOLINE
431 bool "Avoid speculative indirect branches in kernel" 431 bool "Avoid speculative indirect branches in kernel"
432 default y 432 default y
433 select STACK_VALIDATION if HAVE_STACK_VALIDATION
433 help 434 help
434 Compile kernel with the retpoline compiler options to guard against 435 Compile kernel with the retpoline compiler options to guard against
435 kernel-to-user data leaks by avoiding speculative indirect 436 kernel-to-user data leaks by avoiding speculative indirect
@@ -2315,7 +2316,7 @@ choice
2315 it can be used to assist security vulnerability exploitation. 2316 it can be used to assist security vulnerability exploitation.
2316 2317
2317 This setting can be changed at boot time via the kernel command 2318 This setting can be changed at boot time via the kernel command
2318 line parameter vsyscall=[native|emulate|none]. 2319 line parameter vsyscall=[emulate|none].
2319 2320
2320 On a system with recent enough glibc (2.14 or newer) and no 2321 On a system with recent enough glibc (2.14 or newer) and no
2321 static binaries, you can say None without a performance penalty 2322 static binaries, you can say None without a performance penalty
@@ -2323,15 +2324,6 @@ choice
2323 2324
2324 If unsure, select "Emulate". 2325 If unsure, select "Emulate".
2325 2326
2326 config LEGACY_VSYSCALL_NATIVE
2327 bool "Native"
2328 help
2329 Actual executable code is located in the fixed vsyscall
2330 address mapping, implementing time() efficiently. Since
2331 this makes the mapping executable, it can be used during
2332 security vulnerability exploitation (traditionally as
2333 ROP gadgets). This configuration is not recommended.
2334
2335 config LEGACY_VSYSCALL_EMULATE 2327 config LEGACY_VSYSCALL_EMULATE
2336 bool "Emulate" 2328 bool "Emulate"
2337 help 2329 help
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index fad55160dcb9..498c1b812300 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -232,10 +232,9 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
232 232
233# Avoid indirect branches in kernel to deal with Spectre 233# Avoid indirect branches in kernel to deal with Spectre
234ifdef CONFIG_RETPOLINE 234ifdef CONFIG_RETPOLINE
235 RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) 235ifneq ($(RETPOLINE_CFLAGS),)
236 ifneq ($(RETPOLINE_CFLAGS),) 236 KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
237 KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE 237endif
238 endif
239endif 238endif
240 239
241archscripts: scripts_basic 240archscripts: scripts_basic
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index dce7092ab24a..be63330c5511 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with
97 97
98#define SIZEOF_PTREGS 21*8 98#define SIZEOF_PTREGS 21*8
99 99
100.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax 100.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
101 /* 101 /*
102 * Push registers and sanitize registers of values that a 102 * Push registers and sanitize registers of values that a
103 * speculation attack might otherwise want to exploit. The 103 * speculation attack might otherwise want to exploit. The
@@ -105,32 +105,41 @@ For 32-bit we have the following conventions - kernel is built with
105 * could be put to use in a speculative execution gadget. 105 * could be put to use in a speculative execution gadget.
106 * Interleave XOR with PUSH for better uop scheduling: 106 * Interleave XOR with PUSH for better uop scheduling:
107 */ 107 */
108 .if \save_ret
109 pushq %rsi /* pt_regs->si */
110 movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
111 movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */
112 .else
108 pushq %rdi /* pt_regs->di */ 113 pushq %rdi /* pt_regs->di */
109 pushq %rsi /* pt_regs->si */ 114 pushq %rsi /* pt_regs->si */
115 .endif
110 pushq \rdx /* pt_regs->dx */ 116 pushq \rdx /* pt_regs->dx */
111 pushq %rcx /* pt_regs->cx */ 117 pushq %rcx /* pt_regs->cx */
112 pushq \rax /* pt_regs->ax */ 118 pushq \rax /* pt_regs->ax */
113 pushq %r8 /* pt_regs->r8 */ 119 pushq %r8 /* pt_regs->r8 */
114 xorq %r8, %r8 /* nospec r8 */ 120 xorl %r8d, %r8d /* nospec r8 */
115 pushq %r9 /* pt_regs->r9 */ 121 pushq %r9 /* pt_regs->r9 */
116 xorq %r9, %r9 /* nospec r9 */ 122 xorl %r9d, %r9d /* nospec r9 */
117 pushq %r10 /* pt_regs->r10 */ 123 pushq %r10 /* pt_regs->r10 */
118 xorq %r10, %r10 /* nospec r10 */ 124 xorl %r10d, %r10d /* nospec r10 */
119 pushq %r11 /* pt_regs->r11 */ 125 pushq %r11 /* pt_regs->r11 */
120 xorq %r11, %r11 /* nospec r11*/ 126 xorl %r11d, %r11d /* nospec r11*/
121 pushq %rbx /* pt_regs->rbx */ 127 pushq %rbx /* pt_regs->rbx */
122 xorl %ebx, %ebx /* nospec rbx*/ 128 xorl %ebx, %ebx /* nospec rbx*/
123 pushq %rbp /* pt_regs->rbp */ 129 pushq %rbp /* pt_regs->rbp */
124 xorl %ebp, %ebp /* nospec rbp*/ 130 xorl %ebp, %ebp /* nospec rbp*/
125 pushq %r12 /* pt_regs->r12 */ 131 pushq %r12 /* pt_regs->r12 */
126 xorq %r12, %r12 /* nospec r12*/ 132 xorl %r12d, %r12d /* nospec r12*/
127 pushq %r13 /* pt_regs->r13 */ 133 pushq %r13 /* pt_regs->r13 */
128 xorq %r13, %r13 /* nospec r13*/ 134 xorl %r13d, %r13d /* nospec r13*/
129 pushq %r14 /* pt_regs->r14 */ 135 pushq %r14 /* pt_regs->r14 */
130 xorq %r14, %r14 /* nospec r14*/ 136 xorl %r14d, %r14d /* nospec r14*/
131 pushq %r15 /* pt_regs->r15 */ 137 pushq %r15 /* pt_regs->r15 */
132 xorq %r15, %r15 /* nospec r15*/ 138 xorl %r15d, %r15d /* nospec r15*/
133 UNWIND_HINT_REGS 139 UNWIND_HINT_REGS
140 .if \save_ret
141 pushq %rsi /* return address on top of stack */
142 .endif
134.endm 143.endm
135 144
136.macro POP_REGS pop_rdi=1 skip_r11rcx=0 145.macro POP_REGS pop_rdi=1 skip_r11rcx=0
@@ -172,12 +181,7 @@ For 32-bit we have the following conventions - kernel is built with
172 */ 181 */
173.macro ENCODE_FRAME_POINTER ptregs_offset=0 182.macro ENCODE_FRAME_POINTER ptregs_offset=0
174#ifdef CONFIG_FRAME_POINTER 183#ifdef CONFIG_FRAME_POINTER
175 .if \ptregs_offset 184 leaq 1+\ptregs_offset(%rsp), %rbp
176 leaq \ptregs_offset(%rsp), %rbp
177 .else
178 mov %rsp, %rbp
179 .endif
180 orq $0x1, %rbp
181#endif 185#endif
182.endm 186.endm
183 187
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 16c2c022540d..6ad064c8cf35 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
252 * exist, overwrite the RSB with entries which capture 252 * exist, overwrite the RSB with entries which capture
253 * speculative execution to prevent attack. 253 * speculative execution to prevent attack.
254 */ 254 */
255 /* Clobbers %ebx */ 255 FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
256 FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
257#endif 256#endif
258 257
259 /* restore callee-saved registers */ 258 /* restore callee-saved registers */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c9e55b89f03a..8a78030a82f2 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -369,8 +369,7 @@ ENTRY(__switch_to_asm)
369 * exist, overwrite the RSB with entries which capture 369 * exist, overwrite the RSB with entries which capture
370 * speculative execution to prevent attack. 370 * speculative execution to prevent attack.
371 */ 371 */
372 /* Clobbers %rbx */ 372 FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
373 FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
374#endif 373#endif
375 374
376 /* restore callee-saved registers */ 375 /* restore callee-saved registers */
@@ -454,9 +453,19 @@ END(irq_entries_start)
454 * 453 *
455 * The invariant is that, if irq_count != -1, then the IRQ stack is in use. 454 * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
456 */ 455 */
457.macro ENTER_IRQ_STACK regs=1 old_rsp 456.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
458 DEBUG_ENTRY_ASSERT_IRQS_OFF 457 DEBUG_ENTRY_ASSERT_IRQS_OFF
458
459 .if \save_ret
460 /*
461 * If save_ret is set, the original stack contains one additional
462 * entry -- the return address. Therefore, move the address one
463 * entry below %rsp to \old_rsp.
464 */
465 leaq 8(%rsp), \old_rsp
466 .else
459 movq %rsp, \old_rsp 467 movq %rsp, \old_rsp
468 .endif
460 469
461 .if \regs 470 .if \regs
462 UNWIND_HINT_REGS base=\old_rsp 471 UNWIND_HINT_REGS base=\old_rsp
@@ -502,6 +511,15 @@ END(irq_entries_start)
502 .if \regs 511 .if \regs
503 UNWIND_HINT_REGS indirect=1 512 UNWIND_HINT_REGS indirect=1
504 .endif 513 .endif
514
515 .if \save_ret
516 /*
517 * Push the return address to the stack. This return address can
518 * be found at the "real" original RSP, which was offset by 8 at
519 * the beginning of this macro.
520 */
521 pushq -8(\old_rsp)
522 .endif
505.endm 523.endm
506 524
507/* 525/*
@@ -525,27 +543,65 @@ END(irq_entries_start)
525.endm 543.endm
526 544
527/* 545/*
528 * Interrupt entry/exit. 546 * Interrupt entry helper function.
529 *
530 * Interrupt entry points save only callee clobbered registers in fast path.
531 * 547 *
532 * Entry runs with interrupts off. 548 * Entry runs with interrupts off. Stack layout at entry:
549 * +----------------------------------------------------+
550 * | regs->ss |
551 * | regs->rsp |
552 * | regs->eflags |
553 * | regs->cs |
554 * | regs->ip |
555 * +----------------------------------------------------+
556 * | regs->orig_ax = ~(interrupt number) |
557 * +----------------------------------------------------+
558 * | return address |
559 * +----------------------------------------------------+
533 */ 560 */
534 561ENTRY(interrupt_entry)
535/* 0(%rsp): ~(interrupt number) */ 562 UNWIND_HINT_FUNC
536 .macro interrupt func 563 ASM_CLAC
537 cld 564 cld
538 565
539 testb $3, CS-ORIG_RAX(%rsp) 566 testb $3, CS-ORIG_RAX+8(%rsp)
540 jz 1f 567 jz 1f
541 SWAPGS 568 SWAPGS
542 call switch_to_thread_stack 569
570 /*
571 * Switch to the thread stack. The IRET frame and orig_ax are
572 * on the stack, as well as the return address. RDI..R12 are
573 * not (yet) on the stack and space has not (yet) been
574 * allocated for them.
575 */
576 pushq %rdi
577
578 /* Need to switch before accessing the thread stack. */
579 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
580 movq %rsp, %rdi
581 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
582
583 /*
584 * We have RDI, return address, and orig_ax on the stack on
585 * top of the IRET frame. That means offset=24
586 */
587 UNWIND_HINT_IRET_REGS base=%rdi offset=24
588
589 pushq 7*8(%rdi) /* regs->ss */
590 pushq 6*8(%rdi) /* regs->rsp */
591 pushq 5*8(%rdi) /* regs->eflags */
592 pushq 4*8(%rdi) /* regs->cs */
593 pushq 3*8(%rdi) /* regs->ip */
594 pushq 2*8(%rdi) /* regs->orig_ax */
595 pushq 8(%rdi) /* return address */
596 UNWIND_HINT_FUNC
597
598 movq (%rdi), %rdi
5431: 5991:
544 600
545 PUSH_AND_CLEAR_REGS 601 PUSH_AND_CLEAR_REGS save_ret=1
546 ENCODE_FRAME_POINTER 602 ENCODE_FRAME_POINTER 8
547 603
548 testb $3, CS(%rsp) 604 testb $3, CS+8(%rsp)
549 jz 1f 605 jz 1f
550 606
551 /* 607 /*
@@ -553,7 +609,7 @@ END(irq_entries_start)
553 * 609 *
554 * We need to tell lockdep that IRQs are off. We can't do this until 610 * We need to tell lockdep that IRQs are off. We can't do this until
555 * we fix gsbase, and we should do it before enter_from_user_mode 611 * we fix gsbase, and we should do it before enter_from_user_mode
556 * (which can take locks). Since TRACE_IRQS_OFF idempotent, 612 * (which can take locks). Since TRACE_IRQS_OFF is idempotent,
557 * the simplest way to handle it is to just call it twice if 613 * the simplest way to handle it is to just call it twice if
558 * we enter from user mode. There's no reason to optimize this since 614 * we enter from user mode. There's no reason to optimize this since
559 * TRACE_IRQS_OFF is a no-op if lockdep is off. 615 * TRACE_IRQS_OFF is a no-op if lockdep is off.
@@ -563,12 +619,15 @@ END(irq_entries_start)
563 CALL_enter_from_user_mode 619 CALL_enter_from_user_mode
564 620
5651: 6211:
566 ENTER_IRQ_STACK old_rsp=%rdi 622 ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
567 /* We entered an interrupt context - irqs are off: */ 623 /* We entered an interrupt context - irqs are off: */
568 TRACE_IRQS_OFF 624 TRACE_IRQS_OFF
569 625
570 call \func /* rdi points to pt_regs */ 626 ret
571 .endm 627END(interrupt_entry)
628
629
630/* Interrupt entry/exit. */
572 631
573 /* 632 /*
574 * The interrupt stubs push (~vector+0x80) onto the stack and 633 * The interrupt stubs push (~vector+0x80) onto the stack and
@@ -576,9 +635,10 @@ END(irq_entries_start)
576 */ 635 */
577 .p2align CONFIG_X86_L1_CACHE_SHIFT 636 .p2align CONFIG_X86_L1_CACHE_SHIFT
578common_interrupt: 637common_interrupt:
579 ASM_CLAC
580 addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ 638 addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
581 interrupt do_IRQ 639 call interrupt_entry
640 UNWIND_HINT_REGS indirect=1
641 call do_IRQ /* rdi points to pt_regs */
582 /* 0(%rsp): old RSP */ 642 /* 0(%rsp): old RSP */
583ret_from_intr: 643ret_from_intr:
584 DISABLE_INTERRUPTS(CLBR_ANY) 644 DISABLE_INTERRUPTS(CLBR_ANY)
@@ -771,10 +831,11 @@ END(common_interrupt)
771.macro apicinterrupt3 num sym do_sym 831.macro apicinterrupt3 num sym do_sym
772ENTRY(\sym) 832ENTRY(\sym)
773 UNWIND_HINT_IRET_REGS 833 UNWIND_HINT_IRET_REGS
774 ASM_CLAC
775 pushq $~(\num) 834 pushq $~(\num)
776.Lcommon_\sym: 835.Lcommon_\sym:
777 interrupt \do_sym 836 call interrupt_entry
837 UNWIND_HINT_REGS indirect=1
838 call \do_sym /* rdi points to pt_regs */
778 jmp ret_from_intr 839 jmp ret_from_intr
779END(\sym) 840END(\sym)
780.endm 841.endm
@@ -837,34 +898,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
837 */ 898 */
838#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) 899#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
839 900
840/*
841 * Switch to the thread stack. This is called with the IRET frame and
842 * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
843 * space has not been allocated for them.)
844 */
845ENTRY(switch_to_thread_stack)
846 UNWIND_HINT_FUNC
847
848 pushq %rdi
849 /* Need to switch before accessing the thread stack. */
850 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
851 movq %rsp, %rdi
852 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
853 UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
854
855 pushq 7*8(%rdi) /* regs->ss */
856 pushq 6*8(%rdi) /* regs->rsp */
857 pushq 5*8(%rdi) /* regs->eflags */
858 pushq 4*8(%rdi) /* regs->cs */
859 pushq 3*8(%rdi) /* regs->ip */
860 pushq 2*8(%rdi) /* regs->orig_ax */
861 pushq 8(%rdi) /* return address */
862 UNWIND_HINT_FUNC
863
864 movq (%rdi), %rdi
865 ret
866END(switch_to_thread_stack)
867
868.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 901.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
869ENTRY(\sym) 902ENTRY(\sym)
870 UNWIND_HINT_IRET_REGS offset=\has_error_code*8 903 UNWIND_HINT_IRET_REGS offset=\has_error_code*8
@@ -880,12 +913,8 @@ ENTRY(\sym)
880 pushq $-1 /* ORIG_RAX: no syscall to restart */ 913 pushq $-1 /* ORIG_RAX: no syscall to restart */
881 .endif 914 .endif
882 915
883 /* Save all registers in pt_regs */
884 PUSH_AND_CLEAR_REGS
885 ENCODE_FRAME_POINTER
886
887 .if \paranoid < 2 916 .if \paranoid < 2
888 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ 917 testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */
889 jnz .Lfrom_usermode_switch_stack_\@ 918 jnz .Lfrom_usermode_switch_stack_\@
890 .endif 919 .endif
891 920
@@ -1135,13 +1164,15 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1
1135#endif 1164#endif
1136 1165
1137/* 1166/*
1138 * Switch gs if needed. 1167 * Save all registers in pt_regs, and switch gs if needed.
1139 * Use slow, but surefire "are we in kernel?" check. 1168 * Use slow, but surefire "are we in kernel?" check.
1140 * Return: ebx=0: need swapgs on exit, ebx=1: otherwise 1169 * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
1141 */ 1170 */
1142ENTRY(paranoid_entry) 1171ENTRY(paranoid_entry)
1143 UNWIND_HINT_FUNC 1172 UNWIND_HINT_FUNC
1144 cld 1173 cld
1174 PUSH_AND_CLEAR_REGS save_ret=1
1175 ENCODE_FRAME_POINTER 8
1145 movl $1, %ebx 1176 movl $1, %ebx
1146 movl $MSR_GS_BASE, %ecx 1177 movl $MSR_GS_BASE, %ecx
1147 rdmsr 1178 rdmsr
@@ -1186,12 +1217,14 @@ ENTRY(paranoid_exit)
1186END(paranoid_exit) 1217END(paranoid_exit)
1187 1218
1188/* 1219/*
1189 * Switch gs if needed. 1220 * Save all registers in pt_regs, and switch GS if needed.
1190 * Return: EBX=0: came from user mode; EBX=1: otherwise 1221 * Return: EBX=0: came from user mode; EBX=1: otherwise
1191 */ 1222 */
1192ENTRY(error_entry) 1223ENTRY(error_entry)
1193 UNWIND_HINT_REGS offset=8 1224 UNWIND_HINT_FUNC
1194 cld 1225 cld
1226 PUSH_AND_CLEAR_REGS save_ret=1
1227 ENCODE_FRAME_POINTER 8
1195 testb $3, CS+8(%rsp) 1228 testb $3, CS+8(%rsp)
1196 jz .Lerror_kernelspace 1229 jz .Lerror_kernelspace
1197 1230
@@ -1582,8 +1615,6 @@ end_repeat_nmi:
1582 * frame to point back to repeat_nmi. 1615 * frame to point back to repeat_nmi.
1583 */ 1616 */
1584 pushq $-1 /* ORIG_RAX: no syscall to restart */ 1617 pushq $-1 /* ORIG_RAX: no syscall to restart */
1585 PUSH_AND_CLEAR_REGS
1586 ENCODE_FRAME_POINTER
1587 1618
1588 /* 1619 /*
1589 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit 1620 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index fd65e016e413..08425c42f8b7 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat)
85 pushq %rcx /* pt_regs->cx */ 85 pushq %rcx /* pt_regs->cx */
86 pushq $-ENOSYS /* pt_regs->ax */ 86 pushq $-ENOSYS /* pt_regs->ax */
87 pushq $0 /* pt_regs->r8 = 0 */ 87 pushq $0 /* pt_regs->r8 = 0 */
88 xorq %r8, %r8 /* nospec r8 */ 88 xorl %r8d, %r8d /* nospec r8 */
89 pushq $0 /* pt_regs->r9 = 0 */ 89 pushq $0 /* pt_regs->r9 = 0 */
90 xorq %r9, %r9 /* nospec r9 */ 90 xorl %r9d, %r9d /* nospec r9 */
91 pushq $0 /* pt_regs->r10 = 0 */ 91 pushq $0 /* pt_regs->r10 = 0 */
92 xorq %r10, %r10 /* nospec r10 */ 92 xorl %r10d, %r10d /* nospec r10 */
93 pushq $0 /* pt_regs->r11 = 0 */ 93 pushq $0 /* pt_regs->r11 = 0 */
94 xorq %r11, %r11 /* nospec r11 */ 94 xorl %r11d, %r11d /* nospec r11 */
95 pushq %rbx /* pt_regs->rbx */ 95 pushq %rbx /* pt_regs->rbx */
96 xorl %ebx, %ebx /* nospec rbx */ 96 xorl %ebx, %ebx /* nospec rbx */
97 pushq %rbp /* pt_regs->rbp (will be overwritten) */ 97 pushq %rbp /* pt_regs->rbp (will be overwritten) */
98 xorl %ebp, %ebp /* nospec rbp */ 98 xorl %ebp, %ebp /* nospec rbp */
99 pushq $0 /* pt_regs->r12 = 0 */ 99 pushq $0 /* pt_regs->r12 = 0 */
100 xorq %r12, %r12 /* nospec r12 */ 100 xorl %r12d, %r12d /* nospec r12 */
101 pushq $0 /* pt_regs->r13 = 0 */ 101 pushq $0 /* pt_regs->r13 = 0 */
102 xorq %r13, %r13 /* nospec r13 */ 102 xorl %r13d, %r13d /* nospec r13 */
103 pushq $0 /* pt_regs->r14 = 0 */ 103 pushq $0 /* pt_regs->r14 = 0 */
104 xorq %r14, %r14 /* nospec r14 */ 104 xorl %r14d, %r14d /* nospec r14 */
105 pushq $0 /* pt_regs->r15 = 0 */ 105 pushq $0 /* pt_regs->r15 = 0 */
106 xorq %r15, %r15 /* nospec r15 */ 106 xorl %r15d, %r15d /* nospec r15 */
107 cld 107 cld
108 108
109 /* 109 /*
@@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
224 pushq %rbp /* pt_regs->cx (stashed in bp) */ 224 pushq %rbp /* pt_regs->cx (stashed in bp) */
225 pushq $-ENOSYS /* pt_regs->ax */ 225 pushq $-ENOSYS /* pt_regs->ax */
226 pushq $0 /* pt_regs->r8 = 0 */ 226 pushq $0 /* pt_regs->r8 = 0 */
227 xorq %r8, %r8 /* nospec r8 */ 227 xorl %r8d, %r8d /* nospec r8 */
228 pushq $0 /* pt_regs->r9 = 0 */ 228 pushq $0 /* pt_regs->r9 = 0 */
229 xorq %r9, %r9 /* nospec r9 */ 229 xorl %r9d, %r9d /* nospec r9 */
230 pushq $0 /* pt_regs->r10 = 0 */ 230 pushq $0 /* pt_regs->r10 = 0 */
231 xorq %r10, %r10 /* nospec r10 */ 231 xorl %r10d, %r10d /* nospec r10 */
232 pushq $0 /* pt_regs->r11 = 0 */ 232 pushq $0 /* pt_regs->r11 = 0 */
233 xorq %r11, %r11 /* nospec r11 */ 233 xorl %r11d, %r11d /* nospec r11 */
234 pushq %rbx /* pt_regs->rbx */ 234 pushq %rbx /* pt_regs->rbx */
235 xorl %ebx, %ebx /* nospec rbx */ 235 xorl %ebx, %ebx /* nospec rbx */
236 pushq %rbp /* pt_regs->rbp (will be overwritten) */ 236 pushq %rbp /* pt_regs->rbp (will be overwritten) */
237 xorl %ebp, %ebp /* nospec rbp */ 237 xorl %ebp, %ebp /* nospec rbp */
238 pushq $0 /* pt_regs->r12 = 0 */ 238 pushq $0 /* pt_regs->r12 = 0 */
239 xorq %r12, %r12 /* nospec r12 */ 239 xorl %r12d, %r12d /* nospec r12 */
240 pushq $0 /* pt_regs->r13 = 0 */ 240 pushq $0 /* pt_regs->r13 = 0 */
241 xorq %r13, %r13 /* nospec r13 */ 241 xorl %r13d, %r13d /* nospec r13 */
242 pushq $0 /* pt_regs->r14 = 0 */ 242 pushq $0 /* pt_regs->r14 = 0 */
243 xorq %r14, %r14 /* nospec r14 */ 243 xorl %r14d, %r14d /* nospec r14 */
244 pushq $0 /* pt_regs->r15 = 0 */ 244 pushq $0 /* pt_regs->r15 = 0 */
245 xorq %r15, %r15 /* nospec r15 */ 245 xorl %r15d, %r15d /* nospec r15 */
246 246
247 /* 247 /*
248 * User mode is traced as though IRQs are on, and SYSENTER 248 * User mode is traced as though IRQs are on, and SYSENTER
@@ -298,9 +298,9 @@ sysret32_from_system_call:
298 */ 298 */
299 SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 299 SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
300 300
301 xorq %r8, %r8 301 xorl %r8d, %r8d
302 xorq %r9, %r9 302 xorl %r9d, %r9d
303 xorq %r10, %r10 303 xorl %r10d, %r10d
304 swapgs 304 swapgs
305 sysretl 305 sysretl
306END(entry_SYSCALL_compat) 306END(entry_SYSCALL_compat)
@@ -347,36 +347,47 @@ ENTRY(entry_INT80_compat)
347 */ 347 */
348 movl %eax, %eax 348 movl %eax, %eax
349 349
350 /* switch to thread stack expects orig_ax and rdi to be pushed */
350 pushq %rax /* pt_regs->orig_ax */ 351 pushq %rax /* pt_regs->orig_ax */
352 pushq %rdi /* pt_regs->di */
351 353
352 /* switch to thread stack expects orig_ax to be pushed */ 354 /* Need to switch before accessing the thread stack. */
353 call switch_to_thread_stack 355 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
356 movq %rsp, %rdi
357 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
354 358
355 pushq %rdi /* pt_regs->di */ 359 pushq 6*8(%rdi) /* regs->ss */
360 pushq 5*8(%rdi) /* regs->rsp */
361 pushq 4*8(%rdi) /* regs->eflags */
362 pushq 3*8(%rdi) /* regs->cs */
363 pushq 2*8(%rdi) /* regs->ip */
364 pushq 1*8(%rdi) /* regs->orig_ax */
365
366 pushq (%rdi) /* pt_regs->di */
356 pushq %rsi /* pt_regs->si */ 367 pushq %rsi /* pt_regs->si */
357 pushq %rdx /* pt_regs->dx */ 368 pushq %rdx /* pt_regs->dx */
358 pushq %rcx /* pt_regs->cx */ 369 pushq %rcx /* pt_regs->cx */
359 pushq $-ENOSYS /* pt_regs->ax */ 370 pushq $-ENOSYS /* pt_regs->ax */
360 pushq $0 /* pt_regs->r8 = 0 */ 371 pushq $0 /* pt_regs->r8 = 0 */
361 xorq %r8, %r8 /* nospec r8 */ 372 xorl %r8d, %r8d /* nospec r8 */
362 pushq $0 /* pt_regs->r9 = 0 */ 373 pushq $0 /* pt_regs->r9 = 0 */
363 xorq %r9, %r9 /* nospec r9 */ 374 xorl %r9d, %r9d /* nospec r9 */
364 pushq $0 /* pt_regs->r10 = 0 */ 375 pushq $0 /* pt_regs->r10 = 0 */
365 xorq %r10, %r10 /* nospec r10 */ 376 xorl %r10d, %r10d /* nospec r10 */
366 pushq $0 /* pt_regs->r11 = 0 */ 377 pushq $0 /* pt_regs->r11 = 0 */
367 xorq %r11, %r11 /* nospec r11 */ 378 xorl %r11d, %r11d /* nospec r11 */
368 pushq %rbx /* pt_regs->rbx */ 379 pushq %rbx /* pt_regs->rbx */
369 xorl %ebx, %ebx /* nospec rbx */ 380 xorl %ebx, %ebx /* nospec rbx */
370 pushq %rbp /* pt_regs->rbp */ 381 pushq %rbp /* pt_regs->rbp */
371 xorl %ebp, %ebp /* nospec rbp */ 382 xorl %ebp, %ebp /* nospec rbp */
372 pushq %r12 /* pt_regs->r12 */ 383 pushq %r12 /* pt_regs->r12 */
373 xorq %r12, %r12 /* nospec r12 */ 384 xorl %r12d, %r12d /* nospec r12 */
374 pushq %r13 /* pt_regs->r13 */ 385 pushq %r13 /* pt_regs->r13 */
375 xorq %r13, %r13 /* nospec r13 */ 386 xorl %r13d, %r13d /* nospec r13 */
376 pushq %r14 /* pt_regs->r14 */ 387 pushq %r14 /* pt_regs->r14 */
377 xorq %r14, %r14 /* nospec r14 */ 388 xorl %r14d, %r14d /* nospec r14 */
378 pushq %r15 /* pt_regs->r15 */ 389 pushq %r15 /* pt_regs->r15 */
379 xorq %r15, %r15 /* nospec r15 */ 390 xorl %r15d, %r15d /* nospec r15 */
380 cld 391 cld
381 392
382 /* 393 /*
@@ -393,15 +404,3 @@ ENTRY(entry_INT80_compat)
393 TRACE_IRQS_ON 404 TRACE_IRQS_ON
394 jmp swapgs_restore_regs_and_return_to_usermode 405 jmp swapgs_restore_regs_and_return_to_usermode
395END(entry_INT80_compat) 406END(entry_INT80_compat)
396
397ENTRY(stub32_clone)
398 /*
399 * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
400 * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
401 *
402 * The native 64-bit kernel's sys_clone() implements the latter,
403 * so we need to swap arguments here before calling it:
404 */
405 xchg %r8, %rcx
406 jmp sys_clone
407ENDPROC(stub32_clone)
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 448ac2161112..2a5e99cff859 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -8,12 +8,12 @@
8# 8#
90 i386 restart_syscall sys_restart_syscall 90 i386 restart_syscall sys_restart_syscall
101 i386 exit sys_exit 101 i386 exit sys_exit
112 i386 fork sys_fork sys_fork 112 i386 fork sys_fork
123 i386 read sys_read 123 i386 read sys_read
134 i386 write sys_write 134 i386 write sys_write
145 i386 open sys_open compat_sys_open 145 i386 open sys_open compat_sys_open
156 i386 close sys_close 156 i386 close sys_close
167 i386 waitpid sys_waitpid sys32_waitpid 167 i386 waitpid sys_waitpid compat_sys_x86_waitpid
178 i386 creat sys_creat 178 i386 creat sys_creat
189 i386 link sys_link 189 i386 link sys_link
1910 i386 unlink sys_unlink 1910 i386 unlink sys_unlink
@@ -78,7 +78,7 @@
7869 i386 ssetmask sys_ssetmask 7869 i386 ssetmask sys_ssetmask
7970 i386 setreuid sys_setreuid16 7970 i386 setreuid sys_setreuid16
8071 i386 setregid sys_setregid16 8071 i386 setregid sys_setregid16
8172 i386 sigsuspend sys_sigsuspend sys_sigsuspend 8172 i386 sigsuspend sys_sigsuspend
8273 i386 sigpending sys_sigpending compat_sys_sigpending 8273 i386 sigpending sys_sigpending compat_sys_sigpending
8374 i386 sethostname sys_sethostname 8374 i386 sethostname sys_sethostname
8475 i386 setrlimit sys_setrlimit compat_sys_setrlimit 8475 i386 setrlimit sys_setrlimit compat_sys_setrlimit
@@ -96,7 +96,7 @@
9687 i386 swapon sys_swapon 9687 i386 swapon sys_swapon
9788 i386 reboot sys_reboot 9788 i386 reboot sys_reboot
9889 i386 readdir sys_old_readdir compat_sys_old_readdir 9889 i386 readdir sys_old_readdir compat_sys_old_readdir
9990 i386 mmap sys_old_mmap sys32_mmap 9990 i386 mmap sys_old_mmap compat_sys_x86_mmap
10091 i386 munmap sys_munmap 10091 i386 munmap sys_munmap
10192 i386 truncate sys_truncate compat_sys_truncate 10192 i386 truncate sys_truncate compat_sys_truncate
10293 i386 ftruncate sys_ftruncate compat_sys_ftruncate 10293 i386 ftruncate sys_ftruncate compat_sys_ftruncate
@@ -126,7 +126,7 @@
126117 i386 ipc sys_ipc compat_sys_ipc 126117 i386 ipc sys_ipc compat_sys_ipc
127118 i386 fsync sys_fsync 127118 i386 fsync sys_fsync
128119 i386 sigreturn sys_sigreturn sys32_sigreturn 128119 i386 sigreturn sys_sigreturn sys32_sigreturn
129120 i386 clone sys_clone stub32_clone 129120 i386 clone sys_clone compat_sys_x86_clone
130121 i386 setdomainname sys_setdomainname 130121 i386 setdomainname sys_setdomainname
131122 i386 uname sys_newuname 131122 i386 uname sys_newuname
132123 i386 modify_ldt sys_modify_ldt 132123 i386 modify_ldt sys_modify_ldt
@@ -186,8 +186,8 @@
186177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait 186177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
187178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 187178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
188179 i386 rt_sigsuspend sys_rt_sigsuspend 188179 i386 rt_sigsuspend sys_rt_sigsuspend
189180 i386 pread64 sys_pread64 sys32_pread 189180 i386 pread64 sys_pread64 compat_sys_x86_pread
190181 i386 pwrite64 sys_pwrite64 sys32_pwrite 190181 i386 pwrite64 sys_pwrite64 compat_sys_x86_pwrite
191182 i386 chown sys_chown16 191182 i386 chown sys_chown16
192183 i386 getcwd sys_getcwd 192183 i386 getcwd sys_getcwd
193184 i386 capget sys_capget 193184 i386 capget sys_capget
@@ -196,14 +196,14 @@
196187 i386 sendfile sys_sendfile compat_sys_sendfile 196187 i386 sendfile sys_sendfile compat_sys_sendfile
197188 i386 getpmsg 197188 i386 getpmsg
198189 i386 putpmsg 198189 i386 putpmsg
199190 i386 vfork sys_vfork sys_vfork 199190 i386 vfork sys_vfork
200191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit 200191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
201192 i386 mmap2 sys_mmap_pgoff 201192 i386 mmap2 sys_mmap_pgoff
202193 i386 truncate64 sys_truncate64 sys32_truncate64 202193 i386 truncate64 sys_truncate64 compat_sys_x86_truncate64
203194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64 203194 i386 ftruncate64 sys_ftruncate64 compat_sys_x86_ftruncate64
204195 i386 stat64 sys_stat64 sys32_stat64 204195 i386 stat64 sys_stat64 compat_sys_x86_stat64
205196 i386 lstat64 sys_lstat64 sys32_lstat64 205196 i386 lstat64 sys_lstat64 compat_sys_x86_lstat64
206197 i386 fstat64 sys_fstat64 sys32_fstat64 206197 i386 fstat64 sys_fstat64 compat_sys_x86_fstat64
207198 i386 lchown32 sys_lchown 207198 i386 lchown32 sys_lchown
208199 i386 getuid32 sys_getuid 208199 i386 getuid32 sys_getuid
209200 i386 getgid32 sys_getgid 209200 i386 getgid32 sys_getgid
@@ -231,7 +231,7 @@
231# 222 is unused 231# 222 is unused
232# 223 is unused 232# 223 is unused
233224 i386 gettid sys_gettid 233224 i386 gettid sys_gettid
234225 i386 readahead sys_readahead sys32_readahead 234225 i386 readahead sys_readahead compat_sys_x86_readahead
235226 i386 setxattr sys_setxattr 235226 i386 setxattr sys_setxattr
236227 i386 lsetxattr sys_lsetxattr 236227 i386 lsetxattr sys_lsetxattr
237228 i386 fsetxattr sys_fsetxattr 237228 i386 fsetxattr sys_fsetxattr
@@ -256,7 +256,7 @@
256247 i386 io_getevents sys_io_getevents compat_sys_io_getevents 256247 i386 io_getevents sys_io_getevents compat_sys_io_getevents
257248 i386 io_submit sys_io_submit compat_sys_io_submit 257248 i386 io_submit sys_io_submit compat_sys_io_submit
258249 i386 io_cancel sys_io_cancel 258249 i386 io_cancel sys_io_cancel
259250 i386 fadvise64 sys_fadvise64 sys32_fadvise64 259250 i386 fadvise64 sys_fadvise64 compat_sys_x86_fadvise64
260# 251 is available for reuse (was briefly sys_set_zone_reclaim) 260# 251 is available for reuse (was briefly sys_set_zone_reclaim)
261252 i386 exit_group sys_exit_group 261252 i386 exit_group sys_exit_group
262253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie 262253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
@@ -278,7 +278,7 @@
278269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 278269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
279270 i386 tgkill sys_tgkill 279270 i386 tgkill sys_tgkill
280271 i386 utimes sys_utimes compat_sys_utimes 280271 i386 utimes sys_utimes compat_sys_utimes
281272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64 281272 i386 fadvise64_64 sys_fadvise64_64 compat_sys_x86_fadvise64_64
282273 i386 vserver 282273 i386 vserver
283274 i386 mbind sys_mbind 283274 i386 mbind sys_mbind
284275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy 284275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
@@ -306,7 +306,7 @@
306297 i386 mknodat sys_mknodat 306297 i386 mknodat sys_mknodat
307298 i386 fchownat sys_fchownat 307298 i386 fchownat sys_fchownat
308299 i386 futimesat sys_futimesat compat_sys_futimesat 308299 i386 futimesat sys_futimesat compat_sys_futimesat
309300 i386 fstatat64 sys_fstatat64 sys32_fstatat 309300 i386 fstatat64 sys_fstatat64 compat_sys_x86_fstatat
310301 i386 unlinkat sys_unlinkat 310301 i386 unlinkat sys_unlinkat
311302 i386 renameat sys_renameat 311302 i386 renameat sys_renameat
312303 i386 linkat sys_linkat 312303 i386 linkat sys_linkat
@@ -320,7 +320,7 @@
320311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list 320311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list
321312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list 321312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list
322313 i386 splice sys_splice 322313 i386 splice sys_splice
323314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range 323314 i386 sync_file_range sys_sync_file_range compat_sys_x86_sync_file_range
324315 i386 tee sys_tee 324315 i386 tee sys_tee
325316 i386 vmsplice sys_vmsplice compat_sys_vmsplice 325316 i386 vmsplice sys_vmsplice compat_sys_vmsplice
326317 i386 move_pages sys_move_pages compat_sys_move_pages 326317 i386 move_pages sys_move_pages compat_sys_move_pages
@@ -330,7 +330,7 @@
330321 i386 signalfd sys_signalfd compat_sys_signalfd 330321 i386 signalfd sys_signalfd compat_sys_signalfd
331322 i386 timerfd_create sys_timerfd_create 331322 i386 timerfd_create sys_timerfd_create
332323 i386 eventfd sys_eventfd 332323 i386 eventfd sys_eventfd
333324 i386 fallocate sys_fallocate sys32_fallocate 333324 i386 fallocate sys_fallocate compat_sys_x86_fallocate
334325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime 334325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
335326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime 335326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
336327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4 336327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 577fa8adb785..8560ef68a9d6 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -42,10 +42,8 @@
42#define CREATE_TRACE_POINTS 42#define CREATE_TRACE_POINTS
43#include "vsyscall_trace.h" 43#include "vsyscall_trace.h"
44 44
45static enum { EMULATE, NATIVE, NONE } vsyscall_mode = 45static enum { EMULATE, NONE } vsyscall_mode =
46#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE) 46#ifdef CONFIG_LEGACY_VSYSCALL_NONE
47 NATIVE;
48#elif defined(CONFIG_LEGACY_VSYSCALL_NONE)
49 NONE; 47 NONE;
50#else 48#else
51 EMULATE; 49 EMULATE;
@@ -56,8 +54,6 @@ static int __init vsyscall_setup(char *str)
56 if (str) { 54 if (str) {
57 if (!strcmp("emulate", str)) 55 if (!strcmp("emulate", str))
58 vsyscall_mode = EMULATE; 56 vsyscall_mode = EMULATE;
59 else if (!strcmp("native", str))
60 vsyscall_mode = NATIVE;
61 else if (!strcmp("none", str)) 57 else if (!strcmp("none", str))
62 vsyscall_mode = NONE; 58 vsyscall_mode = NONE;
63 else 59 else
@@ -139,10 +135,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
139 135
140 WARN_ON_ONCE(address != regs->ip); 136 WARN_ON_ONCE(address != regs->ip);
141 137
142 /* This should be unreachable in NATIVE mode. */
143 if (WARN_ON(vsyscall_mode == NATIVE))
144 return false;
145
146 if (vsyscall_mode == NONE) { 138 if (vsyscall_mode == NONE) {
147 warn_bad_vsyscall(KERN_INFO, regs, 139 warn_bad_vsyscall(KERN_INFO, regs,
148 "vsyscall attempted with vsyscall=none"); 140 "vsyscall attempted with vsyscall=none");
@@ -370,9 +362,7 @@ void __init map_vsyscall(void)
370 362
371 if (vsyscall_mode != NONE) { 363 if (vsyscall_mode != NONE) {
372 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, 364 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
373 vsyscall_mode == NATIVE 365 PAGE_KERNEL_VVAR);
374 ? PAGE_KERNEL_VSYSCALL
375 : PAGE_KERNEL_VVAR);
376 set_vsyscall_pgtable_user_bits(swapper_pg_dir); 366 set_vsyscall_pgtable_user_bits(swapper_pg_dir);
377 } 367 }
378 368
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 96cd33bbfc85..6512498bbef6 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -51,15 +51,14 @@
51#define AA(__x) ((unsigned long)(__x)) 51#define AA(__x) ((unsigned long)(__x))
52 52
53 53
54asmlinkage long sys32_truncate64(const char __user *filename, 54COMPAT_SYSCALL_DEFINE3(x86_truncate64, const char __user *, filename,
55 unsigned long offset_low, 55 unsigned long, offset_low, unsigned long, offset_high)
56 unsigned long offset_high)
57{ 56{
58 return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low); 57 return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low);
59} 58}
60 59
61asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low, 60COMPAT_SYSCALL_DEFINE3(x86_ftruncate64, unsigned int, fd,
62 unsigned long offset_high) 61 unsigned long, offset_low, unsigned long, offset_high)
63{ 62{
64 return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low); 63 return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
65} 64}
@@ -96,8 +95,8 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
96 return 0; 95 return 0;
97} 96}
98 97
99asmlinkage long sys32_stat64(const char __user *filename, 98COMPAT_SYSCALL_DEFINE2(x86_stat64, const char __user *, filename,
100 struct stat64 __user *statbuf) 99 struct stat64 __user *, statbuf)
101{ 100{
102 struct kstat stat; 101 struct kstat stat;
103 int ret = vfs_stat(filename, &stat); 102 int ret = vfs_stat(filename, &stat);
@@ -107,8 +106,8 @@ asmlinkage long sys32_stat64(const char __user *filename,
107 return ret; 106 return ret;
108} 107}
109 108
110asmlinkage long sys32_lstat64(const char __user *filename, 109COMPAT_SYSCALL_DEFINE2(x86_lstat64, const char __user *, filename,
111 struct stat64 __user *statbuf) 110 struct stat64 __user *, statbuf)
112{ 111{
113 struct kstat stat; 112 struct kstat stat;
114 int ret = vfs_lstat(filename, &stat); 113 int ret = vfs_lstat(filename, &stat);
@@ -117,7 +116,8 @@ asmlinkage long sys32_lstat64(const char __user *filename,
117 return ret; 116 return ret;
118} 117}
119 118
120asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) 119COMPAT_SYSCALL_DEFINE2(x86_fstat64, unsigned int, fd,
120 struct stat64 __user *, statbuf)
121{ 121{
122 struct kstat stat; 122 struct kstat stat;
123 int ret = vfs_fstat(fd, &stat); 123 int ret = vfs_fstat(fd, &stat);
@@ -126,8 +126,9 @@ asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
126 return ret; 126 return ret;
127} 127}
128 128
129asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename, 129COMPAT_SYSCALL_DEFINE4(x86_fstatat, unsigned int, dfd,
130 struct stat64 __user *statbuf, int flag) 130 const char __user *, filename,
131 struct stat64 __user *, statbuf, int, flag)
131{ 132{
132 struct kstat stat; 133 struct kstat stat;
133 int error; 134 int error;
@@ -153,7 +154,7 @@ struct mmap_arg_struct32 {
153 unsigned int offset; 154 unsigned int offset;
154}; 155};
155 156
156asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg) 157COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg)
157{ 158{
158 struct mmap_arg_struct32 a; 159 struct mmap_arg_struct32 a;
159 160
@@ -167,22 +168,22 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
167 a.offset>>PAGE_SHIFT); 168 a.offset>>PAGE_SHIFT);
168} 169}
169 170
170asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, 171COMPAT_SYSCALL_DEFINE3(x86_waitpid, compat_pid_t, pid, unsigned int __user *,
171 int options) 172 stat_addr, int, options)
172{ 173{
173 return compat_sys_wait4(pid, stat_addr, options, NULL); 174 return compat_sys_wait4(pid, stat_addr, options, NULL);
174} 175}
175 176
176/* warning: next two assume little endian */ 177/* warning: next two assume little endian */
177asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, 178COMPAT_SYSCALL_DEFINE5(x86_pread, unsigned int, fd, char __user *, ubuf,
178 u32 poslo, u32 poshi) 179 u32, count, u32, poslo, u32, poshi)
179{ 180{
180 return sys_pread64(fd, ubuf, count, 181 return sys_pread64(fd, ubuf, count,
181 ((loff_t)AA(poshi) << 32) | AA(poslo)); 182 ((loff_t)AA(poshi) << 32) | AA(poslo));
182} 183}
183 184
184asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf, 185COMPAT_SYSCALL_DEFINE5(x86_pwrite, unsigned int, fd, const char __user *, ubuf,
185 u32 count, u32 poslo, u32 poshi) 186 u32, count, u32, poslo, u32, poshi)
186{ 187{
187 return sys_pwrite64(fd, ubuf, count, 188 return sys_pwrite64(fd, ubuf, count,
188 ((loff_t)AA(poshi) << 32) | AA(poslo)); 189 ((loff_t)AA(poshi) << 32) | AA(poslo));
@@ -193,8 +194,9 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
193 * Some system calls that need sign extended arguments. This could be 194 * Some system calls that need sign extended arguments. This could be
194 * done by a generic wrapper. 195 * done by a generic wrapper.
195 */ 196 */
196long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, 197COMPAT_SYSCALL_DEFINE6(x86_fadvise64_64, int, fd, __u32, offset_low,
197 __u32 len_low, __u32 len_high, int advice) 198 __u32, offset_high, __u32, len_low, __u32, len_high,
199 int, advice)
198{ 200{
199 return sys_fadvise64_64(fd, 201 return sys_fadvise64_64(fd,
200 (((u64)offset_high)<<32) | offset_low, 202 (((u64)offset_high)<<32) | offset_low,
@@ -202,31 +204,43 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
202 advice); 204 advice);
203} 205}
204 206
205asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, 207COMPAT_SYSCALL_DEFINE4(x86_readahead, int, fd, unsigned int, off_lo,
206 size_t count) 208 unsigned int, off_hi, size_t, count)
207{ 209{
208 return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count); 210 return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
209} 211}
210 212
211asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi, 213COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low,
212 unsigned n_low, unsigned n_hi, int flags) 214 unsigned int, off_hi, unsigned int, n_low,
215 unsigned int, n_hi, int, flags)
213{ 216{
214 return sys_sync_file_range(fd, 217 return sys_sync_file_range(fd,
215 ((u64)off_hi << 32) | off_low, 218 ((u64)off_hi << 32) | off_low,
216 ((u64)n_hi << 32) | n_low, flags); 219 ((u64)n_hi << 32) | n_low, flags);
217} 220}
218 221
219asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, 222COMPAT_SYSCALL_DEFINE5(x86_fadvise64, int, fd, unsigned int, offset_lo,
220 size_t len, int advice) 223 unsigned int, offset_hi, size_t, len, int, advice)
221{ 224{
222 return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo, 225 return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
223 len, advice); 226 len, advice);
224} 227}
225 228
226asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo, 229COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode,
227 unsigned offset_hi, unsigned len_lo, 230 unsigned int, offset_lo, unsigned int, offset_hi,
228 unsigned len_hi) 231 unsigned int, len_lo, unsigned int, len_hi)
229{ 232{
230 return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo, 233 return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
231 ((u64)len_hi << 32) | len_lo); 234 ((u64)len_hi << 32) | len_lo);
232} 235}
236
237/*
238 * The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS
239 */
240COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags,
241 unsigned long, newsp, int __user *, parent_tidptr,
242 unsigned long, tls_val, int __user *, child_tidptr)
243{
244 return sys_clone(clone_flags, newsp, parent_tidptr, child_tidptr,
245 tls_val);
246}
diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index 4d4015ddcf26..c356098b6fb9 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -7,6 +7,8 @@
7#ifndef _ASM_X86_MACH_DEFAULT_APM_H 7#ifndef _ASM_X86_MACH_DEFAULT_APM_H
8#define _ASM_X86_MACH_DEFAULT_APM_H 8#define _ASM_X86_MACH_DEFAULT_APM_H
9 9
10#include <asm/nospec-branch.h>
11
10#ifdef APM_ZERO_SEGS 12#ifdef APM_ZERO_SEGS
11# define APM_DO_ZERO_SEGS \ 13# define APM_DO_ZERO_SEGS \
12 "pushl %%ds\n\t" \ 14 "pushl %%ds\n\t" \
@@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
32 * N.B. We do NOT need a cld after the BIOS call 34 * N.B. We do NOT need a cld after the BIOS call
33 * because we always save and restore the flags. 35 * because we always save and restore the flags.
34 */ 36 */
37 firmware_restrict_branch_speculation_start();
35 __asm__ __volatile__(APM_DO_ZERO_SEGS 38 __asm__ __volatile__(APM_DO_ZERO_SEGS
36 "pushl %%edi\n\t" 39 "pushl %%edi\n\t"
37 "pushl %%ebp\n\t" 40 "pushl %%ebp\n\t"
@@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
44 "=S" (*esi) 47 "=S" (*esi)
45 : "a" (func), "b" (ebx_in), "c" (ecx_in) 48 : "a" (func), "b" (ebx_in), "c" (ecx_in)
46 : "memory", "cc"); 49 : "memory", "cc");
50 firmware_restrict_branch_speculation_end();
47} 51}
48 52
49static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, 53static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
56 * N.B. We do NOT need a cld after the BIOS call 60 * N.B. We do NOT need a cld after the BIOS call
57 * because we always save and restore the flags. 61 * because we always save and restore the flags.
58 */ 62 */
63 firmware_restrict_branch_speculation_start();
59 __asm__ __volatile__(APM_DO_ZERO_SEGS 64 __asm__ __volatile__(APM_DO_ZERO_SEGS
60 "pushl %%edi\n\t" 65 "pushl %%edi\n\t"
61 "pushl %%ebp\n\t" 66 "pushl %%ebp\n\t"
@@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
68 "=S" (si) 73 "=S" (si)
69 : "a" (func), "b" (ebx_in), "c" (ecx_in) 74 : "a" (func), "b" (ebx_in), "c" (ecx_in)
70 : "memory", "cc"); 75 : "memory", "cc");
76 firmware_restrict_branch_speculation_end();
71 return error; 77 return error;
72} 78}
73 79
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 4d111616524b..1908214b9125 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
38INDIRECT_THUNK(si) 38INDIRECT_THUNK(si)
39INDIRECT_THUNK(di) 39INDIRECT_THUNK(di)
40INDIRECT_THUNK(bp) 40INDIRECT_THUNK(bp)
41asmlinkage void __fill_rsb(void);
42asmlinkage void __clear_rsb(void);
43
44#endif /* CONFIG_RETPOLINE */ 41#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 0dfe4d3f74e2..d554c11e01ff 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -213,6 +213,7 @@
213#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ 213#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
214 214
215#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ 215#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
216#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
216 217
217/* Virtualization flags: Linux defined, word 8 */ 218/* Virtualization flags: Linux defined, word 8 */
218#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ 219#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -315,6 +316,7 @@
315#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ 316#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
316#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ 317#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
317#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ 318#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
319#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */
318#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ 320#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
319#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ 321#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
320#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ 322#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
@@ -327,6 +329,7 @@
327/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ 329/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
328#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ 330#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
329#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ 331#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
332#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
330#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ 333#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
331#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ 334#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
332#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ 335#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 85f6ccb80b91..a399c1ebf6f0 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -6,6 +6,7 @@
6#include <asm/pgtable.h> 6#include <asm/pgtable.h>
7#include <asm/processor-flags.h> 7#include <asm/processor-flags.h>
8#include <asm/tlb.h> 8#include <asm/tlb.h>
9#include <asm/nospec-branch.h>
9 10
10/* 11/*
11 * We map the EFI regions needed for runtime services non-contiguously, 12 * We map the EFI regions needed for runtime services non-contiguously,
@@ -36,8 +37,18 @@
36 37
37extern asmlinkage unsigned long efi_call_phys(void *, ...); 38extern asmlinkage unsigned long efi_call_phys(void *, ...);
38 39
39#define arch_efi_call_virt_setup() kernel_fpu_begin() 40#define arch_efi_call_virt_setup() \
40#define arch_efi_call_virt_teardown() kernel_fpu_end() 41({ \
42 kernel_fpu_begin(); \
43 firmware_restrict_branch_speculation_start(); \
44})
45
46#define arch_efi_call_virt_teardown() \
47({ \
48 firmware_restrict_branch_speculation_end(); \
49 kernel_fpu_end(); \
50})
51
41 52
42/* 53/*
43 * Wrap all the virtual calls in a way that forces the parameters on the stack. 54 * Wrap all the virtual calls in a way that forces the parameters on the stack.
@@ -73,6 +84,7 @@ struct efi_scratch {
73 efi_sync_low_kernel_mappings(); \ 84 efi_sync_low_kernel_mappings(); \
74 preempt_disable(); \ 85 preempt_disable(); \
75 __kernel_fpu_begin(); \ 86 __kernel_fpu_begin(); \
87 firmware_restrict_branch_speculation_start(); \
76 \ 88 \
77 if (efi_scratch.use_pgd) { \ 89 if (efi_scratch.use_pgd) { \
78 efi_scratch.prev_cr3 = __read_cr3(); \ 90 efi_scratch.prev_cr3 = __read_cr3(); \
@@ -91,6 +103,7 @@ struct efi_scratch {
91 __flush_tlb_all(); \ 103 __flush_tlb_all(); \
92 } \ 104 } \
93 \ 105 \
106 firmware_restrict_branch_speculation_end(); \
94 __kernel_fpu_end(); \ 107 __kernel_fpu_end(); \
95 preempt_enable(); \ 108 preempt_enable(); \
96}) 109})
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 55520cec8b27..7fb1047d61c7 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -37,7 +37,12 @@ struct cpu_signature {
37 37
38struct device; 38struct device;
39 39
40enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; 40enum ucode_state {
41 UCODE_OK = 0,
42 UCODE_UPDATED,
43 UCODE_NFOUND,
44 UCODE_ERROR,
45};
41 46
42struct microcode_ops { 47struct microcode_ops {
43 enum ucode_state (*request_microcode_user) (int cpu, 48 enum ucode_state (*request_microcode_user) (int cpu,
@@ -54,7 +59,7 @@ struct microcode_ops {
54 * are being called. 59 * are being called.
55 * See also the "Synchronization" section in microcode_core.c. 60 * See also the "Synchronization" section in microcode_core.c.
56 */ 61 */
57 int (*apply_microcode) (int cpu); 62 enum ucode_state (*apply_microcode) (int cpu);
58 int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); 63 int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
59}; 64};
60 65
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index c931b88982a0..1de72ce514cd 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -74,6 +74,7 @@ static inline void *ldt_slot_va(int slot)
74 return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); 74 return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
75#else 75#else
76 BUG(); 76 BUG();
77 return (void *)fix_to_virt(FIX_HOLE);
77#endif 78#endif
78} 79}
79 80
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 81a1be326571..d0dabeae0505 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -8,6 +8,50 @@
8#include <asm/cpufeatures.h> 8#include <asm/cpufeatures.h>
9#include <asm/msr-index.h> 9#include <asm/msr-index.h>
10 10
11/*
12 * Fill the CPU return stack buffer.
13 *
14 * Each entry in the RSB, if used for a speculative 'ret', contains an
15 * infinite 'pause; lfence; jmp' loop to capture speculative execution.
16 *
17 * This is required in various cases for retpoline and IBRS-based
18 * mitigations for the Spectre variant 2 vulnerability. Sometimes to
19 * eliminate potentially bogus entries from the RSB, and sometimes
20 * purely to ensure that it doesn't get empty, which on some CPUs would
21 * allow predictions from other (unwanted!) sources to be used.
22 *
23 * We define a CPP macro such that it can be used from both .S files and
24 * inline assembly. It's possible to do a .macro and then include that
25 * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
26 */
27
28#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
29#define RSB_FILL_LOOPS 16 /* To avoid underflow */
30
31/*
32 * Google experimented with loop-unrolling and this turned out to be
33 * the optimal version — two calls, each with their own speculation
34 * trap should their return address end up getting used, in a loop.
35 */
36#define __FILL_RETURN_BUFFER(reg, nr, sp) \
37 mov $(nr/2), reg; \
38771: \
39 call 772f; \
40773: /* speculation trap */ \
41 pause; \
42 lfence; \
43 jmp 773b; \
44772: \
45 call 774f; \
46775: /* speculation trap */ \
47 pause; \
48 lfence; \
49 jmp 775b; \
50774: \
51 dec reg; \
52 jnz 771b; \
53 add $(BITS_PER_LONG/8) * nr, sp;
54
11#ifdef __ASSEMBLY__ 55#ifdef __ASSEMBLY__
12 56
13/* 57/*
@@ -24,6 +68,18 @@
24.endm 68.endm
25 69
26/* 70/*
71 * This should be used immediately before an indirect jump/call. It tells
72 * objtool the subsequent indirect jump/call is vouched safe for retpoline
73 * builds.
74 */
75.macro ANNOTATE_RETPOLINE_SAFE
76 .Lannotate_\@:
77 .pushsection .discard.retpoline_safe
78 _ASM_PTR .Lannotate_\@
79 .popsection
80.endm
81
82/*
27 * These are the bare retpoline primitives for indirect jmp and call. 83 * These are the bare retpoline primitives for indirect jmp and call.
28 * Do not use these directly; they only exist to make the ALTERNATIVE 84 * Do not use these directly; they only exist to make the ALTERNATIVE
29 * invocation below less ugly. 85 * invocation below less ugly.
@@ -59,9 +115,9 @@
59.macro JMP_NOSPEC reg:req 115.macro JMP_NOSPEC reg:req
60#ifdef CONFIG_RETPOLINE 116#ifdef CONFIG_RETPOLINE
61 ANNOTATE_NOSPEC_ALTERNATIVE 117 ANNOTATE_NOSPEC_ALTERNATIVE
62 ALTERNATIVE_2 __stringify(jmp *\reg), \ 118 ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \
63 __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ 119 __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
64 __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD 120 __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
65#else 121#else
66 jmp *\reg 122 jmp *\reg
67#endif 123#endif
@@ -70,18 +126,25 @@
70.macro CALL_NOSPEC reg:req 126.macro CALL_NOSPEC reg:req
71#ifdef CONFIG_RETPOLINE 127#ifdef CONFIG_RETPOLINE
72 ANNOTATE_NOSPEC_ALTERNATIVE 128 ANNOTATE_NOSPEC_ALTERNATIVE
73 ALTERNATIVE_2 __stringify(call *\reg), \ 129 ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \
74 __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ 130 __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
75 __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD 131 __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
76#else 132#else
77 call *\reg 133 call *\reg
78#endif 134#endif
79.endm 135.endm
80 136
81/* This clobbers the BX register */ 137 /*
82.macro FILL_RETURN_BUFFER nr:req ftr:req 138 * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
139 * monstrosity above, manually.
140 */
141.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
83#ifdef CONFIG_RETPOLINE 142#ifdef CONFIG_RETPOLINE
84 ALTERNATIVE "", "call __clear_rsb", \ftr 143 ANNOTATE_NOSPEC_ALTERNATIVE
144 ALTERNATIVE "jmp .Lskip_rsb_\@", \
145 __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
146 \ftr
147.Lskip_rsb_\@:
85#endif 148#endif
86.endm 149.endm
87 150
@@ -93,6 +156,12 @@
93 ".long 999b - .\n\t" \ 156 ".long 999b - .\n\t" \
94 ".popsection\n\t" 157 ".popsection\n\t"
95 158
159#define ANNOTATE_RETPOLINE_SAFE \
160 "999:\n\t" \
161 ".pushsection .discard.retpoline_safe\n\t" \
162 _ASM_PTR " 999b\n\t" \
163 ".popsection\n\t"
164
96#if defined(CONFIG_X86_64) && defined(RETPOLINE) 165#if defined(CONFIG_X86_64) && defined(RETPOLINE)
97 166
98/* 167/*
@@ -102,6 +171,7 @@
102# define CALL_NOSPEC \ 171# define CALL_NOSPEC \
103 ANNOTATE_NOSPEC_ALTERNATIVE \ 172 ANNOTATE_NOSPEC_ALTERNATIVE \
104 ALTERNATIVE( \ 173 ALTERNATIVE( \
174 ANNOTATE_RETPOLINE_SAFE \
105 "call *%[thunk_target]\n", \ 175 "call *%[thunk_target]\n", \
106 "call __x86_indirect_thunk_%V[thunk_target]\n", \ 176 "call __x86_indirect_thunk_%V[thunk_target]\n", \
107 X86_FEATURE_RETPOLINE) 177 X86_FEATURE_RETPOLINE)
@@ -156,26 +226,54 @@ extern char __indirect_thunk_end[];
156static inline void vmexit_fill_RSB(void) 226static inline void vmexit_fill_RSB(void)
157{ 227{
158#ifdef CONFIG_RETPOLINE 228#ifdef CONFIG_RETPOLINE
159 alternative_input("", 229 unsigned long loops;
160 "call __fill_rsb", 230
161 X86_FEATURE_RETPOLINE, 231 asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
162 ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); 232 ALTERNATIVE("jmp 910f",
233 __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
234 X86_FEATURE_RETPOLINE)
235 "910:"
236 : "=r" (loops), ASM_CALL_CONSTRAINT
237 : : "memory" );
163#endif 238#endif
164} 239}
165 240
241#define alternative_msr_write(_msr, _val, _feature) \
242 asm volatile(ALTERNATIVE("", \
243 "movl %[msr], %%ecx\n\t" \
244 "movl %[val], %%eax\n\t" \
245 "movl $0, %%edx\n\t" \
246 "wrmsr", \
247 _feature) \
248 : : [msr] "i" (_msr), [val] "i" (_val) \
249 : "eax", "ecx", "edx", "memory")
250
166static inline void indirect_branch_prediction_barrier(void) 251static inline void indirect_branch_prediction_barrier(void)
167{ 252{
168 asm volatile(ALTERNATIVE("", 253 alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
169 "movl %[msr], %%ecx\n\t" 254 X86_FEATURE_USE_IBPB);
170 "movl %[val], %%eax\n\t"
171 "movl $0, %%edx\n\t"
172 "wrmsr",
173 X86_FEATURE_USE_IBPB)
174 : : [msr] "i" (MSR_IA32_PRED_CMD),
175 [val] "i" (PRED_CMD_IBPB)
176 : "eax", "ecx", "edx", "memory");
177} 255}
178 256
257/*
258 * With retpoline, we must use IBRS to restrict branch prediction
259 * before calling into firmware.
260 *
261 * (Implemented as CPP macros due to header hell.)
262 */
263#define firmware_restrict_branch_speculation_start() \
264do { \
265 preempt_disable(); \
266 alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \
267 X86_FEATURE_USE_IBRS_FW); \
268} while (0)
269
270#define firmware_restrict_branch_speculation_end() \
271do { \
272 alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \
273 X86_FEATURE_USE_IBRS_FW); \
274 preempt_enable(); \
275} while (0)
276
179#endif /* __ASSEMBLY__ */ 277#endif /* __ASSEMBLY__ */
180 278
181/* 279/*
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 6d3b921ae43a..9be2bf13825b 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -7,6 +7,7 @@
7#ifdef CONFIG_PARAVIRT 7#ifdef CONFIG_PARAVIRT
8#include <asm/pgtable_types.h> 8#include <asm/pgtable_types.h>
9#include <asm/asm.h> 9#include <asm/asm.h>
10#include <asm/nospec-branch.h>
10 11
11#include <asm/paravirt_types.h> 12#include <asm/paravirt_types.h>
12 13
@@ -884,23 +885,27 @@ extern void default_banner(void);
884 885
885#define INTERRUPT_RETURN \ 886#define INTERRUPT_RETURN \
886 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ 887 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
887 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) 888 ANNOTATE_RETPOLINE_SAFE; \
889 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);)
888 890
889#define DISABLE_INTERRUPTS(clobbers) \ 891#define DISABLE_INTERRUPTS(clobbers) \
890 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ 892 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
891 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ 893 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
894 ANNOTATE_RETPOLINE_SAFE; \
892 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ 895 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
893 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) 896 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
894 897
895#define ENABLE_INTERRUPTS(clobbers) \ 898#define ENABLE_INTERRUPTS(clobbers) \
896 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ 899 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
897 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ 900 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
901 ANNOTATE_RETPOLINE_SAFE; \
898 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ 902 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
899 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) 903 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
900 904
901#ifdef CONFIG_X86_32 905#ifdef CONFIG_X86_32
902#define GET_CR0_INTO_EAX \ 906#define GET_CR0_INTO_EAX \
903 push %ecx; push %edx; \ 907 push %ecx; push %edx; \
908 ANNOTATE_RETPOLINE_SAFE; \
904 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ 909 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
905 pop %edx; pop %ecx 910 pop %edx; pop %ecx
906#else /* !CONFIG_X86_32 */ 911#else /* !CONFIG_X86_32 */
@@ -922,21 +927,25 @@ extern void default_banner(void);
922 */ 927 */
923#define SWAPGS \ 928#define SWAPGS \
924 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ 929 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
925 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ 930 ANNOTATE_RETPOLINE_SAFE; \
931 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
926 ) 932 )
927 933
928#define GET_CR2_INTO_RAX \ 934#define GET_CR2_INTO_RAX \
929 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2) 935 ANNOTATE_RETPOLINE_SAFE; \
936 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2);
930 937
931#define USERGS_SYSRET64 \ 938#define USERGS_SYSRET64 \
932 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ 939 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
933 CLBR_NONE, \ 940 CLBR_NONE, \
934 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) 941 ANNOTATE_RETPOLINE_SAFE; \
942 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);)
935 943
936#ifdef CONFIG_DEBUG_ENTRY 944#ifdef CONFIG_DEBUG_ENTRY
937#define SAVE_FLAGS(clobbers) \ 945#define SAVE_FLAGS(clobbers) \
938 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \ 946 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
939 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ 947 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
948 ANNOTATE_RETPOLINE_SAFE; \
940 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \ 949 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
941 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) 950 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
942#endif 951#endif
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index f624f1f10316..180bc0bff0fb 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -43,6 +43,7 @@
43#include <asm/desc_defs.h> 43#include <asm/desc_defs.h>
44#include <asm/kmap_types.h> 44#include <asm/kmap_types.h>
45#include <asm/pgtable_types.h> 45#include <asm/pgtable_types.h>
46#include <asm/nospec-branch.h>
46 47
47struct page; 48struct page;
48struct thread_struct; 49struct thread_struct;
@@ -392,7 +393,9 @@ int paravirt_disable_iospace(void);
392 * offset into the paravirt_patch_template structure, and can therefore be 393 * offset into the paravirt_patch_template structure, and can therefore be
393 * freely converted back into a structure offset. 394 * freely converted back into a structure offset.
394 */ 395 */
395#define PARAVIRT_CALL "call *%c[paravirt_opptr];" 396#define PARAVIRT_CALL \
397 ANNOTATE_RETPOLINE_SAFE \
398 "call *%c[paravirt_opptr];"
396 399
397/* 400/*
398 * These macros are intended to wrap calls through one of the paravirt 401 * These macros are intended to wrap calls through one of the paravirt
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index c8baa7f12d1b..89d5c8886c85 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -350,14 +350,14 @@ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
350{ 350{
351 pmdval_t v = native_pmd_val(pmd); 351 pmdval_t v = native_pmd_val(pmd);
352 352
353 return __pmd(v | set); 353 return native_make_pmd(v | set);
354} 354}
355 355
356static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) 356static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
357{ 357{
358 pmdval_t v = native_pmd_val(pmd); 358 pmdval_t v = native_pmd_val(pmd);
359 359
360 return __pmd(v & ~clear); 360 return native_make_pmd(v & ~clear);
361} 361}
362 362
363static inline pmd_t pmd_mkold(pmd_t pmd) 363static inline pmd_t pmd_mkold(pmd_t pmd)
@@ -409,14 +409,14 @@ static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
409{ 409{
410 pudval_t v = native_pud_val(pud); 410 pudval_t v = native_pud_val(pud);
411 411
412 return __pud(v | set); 412 return native_make_pud(v | set);
413} 413}
414 414
415static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) 415static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
416{ 416{
417 pudval_t v = native_pud_val(pud); 417 pudval_t v = native_pud_val(pud);
418 418
419 return __pud(v & ~clear); 419 return native_make_pud(v & ~clear);
420} 420}
421 421
422static inline pud_t pud_mkold(pud_t pud) 422static inline pud_t pud_mkold(pud_t pud)
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index b838c51d8c78..88a056b01db4 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -32,6 +32,7 @@ extern pmd_t initial_pg_pmd[];
32static inline void pgtable_cache_init(void) { } 32static inline void pgtable_cache_init(void) { }
33static inline void check_pgt_cache(void) { } 33static inline void check_pgt_cache(void) { }
34void paging_init(void); 34void paging_init(void);
35void sync_initial_page_table(void);
35 36
36static inline int pgd_large(pgd_t pgd) { return 0; } 37static inline int pgd_large(pgd_t pgd) { return 0; }
37 38
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 163e01a0631d..877bc27718ae 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -28,6 +28,7 @@ extern pgd_t init_top_pgt[];
28#define swapper_pg_dir init_top_pgt 28#define swapper_pg_dir init_top_pgt
29 29
30extern void paging_init(void); 30extern void paging_init(void);
31static inline void sync_initial_page_table(void) { }
31 32
32#define pte_ERROR(e) \ 33#define pte_ERROR(e) \
33 pr_err("%s:%d: bad pte %p(%016lx)\n", \ 34 pr_err("%s:%d: bad pte %p(%016lx)\n", \
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 3696398a9475..acfe755562a6 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -174,7 +174,6 @@ enum page_cache_mode {
174#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) 174#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
175#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) 175#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
176#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE) 176#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE)
177#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
178#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) 177#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
179#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) 178#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
180#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) 179#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
@@ -206,7 +205,6 @@ enum page_cache_mode {
206#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) 205#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
207#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) 206#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
208#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) 207#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
209#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC)
210#define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) 208#define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
211 209
212#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) 210#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
@@ -323,6 +321,11 @@ static inline pudval_t native_pud_val(pud_t pud)
323#else 321#else
324#include <asm-generic/pgtable-nopud.h> 322#include <asm-generic/pgtable-nopud.h>
325 323
324static inline pud_t native_make_pud(pudval_t val)
325{
326 return (pud_t) { .p4d.pgd = native_make_pgd(val) };
327}
328
326static inline pudval_t native_pud_val(pud_t pud) 329static inline pudval_t native_pud_val(pud_t pud)
327{ 330{
328 return native_pgd_val(pud.p4d.pgd); 331 return native_pgd_val(pud.p4d.pgd);
@@ -344,6 +347,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
344#else 347#else
345#include <asm-generic/pgtable-nopmd.h> 348#include <asm-generic/pgtable-nopmd.h>
346 349
350static inline pmd_t native_make_pmd(pmdval_t val)
351{
352 return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) };
353}
354
347static inline pmdval_t native_pmd_val(pmd_t pmd) 355static inline pmdval_t native_pmd_val(pmd_t pmd)
348{ 356{
349 return native_pgd_val(pmd.pud.p4d.pgd); 357 return native_pgd_val(pmd.pud.p4d.pgd);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1bd9ed87606f..b0ccd4847a58 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -977,4 +977,5 @@ bool xen_set_default_idle(void);
977 977
978void stop_this_cpu(void *dummy); 978void stop_this_cpu(void *dummy);
979void df_debug(struct pt_regs *regs, long error_code); 979void df_debug(struct pt_regs *regs, long error_code);
980void microcode_check(void);
980#endif /* _ASM_X86_PROCESSOR_H */ 981#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index 4e44250e7d0d..d65171120e90 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -67,13 +67,13 @@ static __always_inline __must_check
67bool refcount_sub_and_test(unsigned int i, refcount_t *r) 67bool refcount_sub_and_test(unsigned int i, refcount_t *r)
68{ 68{
69 GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, 69 GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
70 r->refs.counter, "er", i, "%0", e); 70 r->refs.counter, "er", i, "%0", e, "cx");
71} 71}
72 72
73static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) 73static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
74{ 74{
75 GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO, 75 GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
76 r->refs.counter, "%0", e); 76 r->refs.counter, "%0", e, "cx");
77} 77}
78 78
79static __always_inline __must_check 79static __always_inline __must_check
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index f91c365e57c3..4914a3e7c803 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -2,8 +2,7 @@
2#ifndef _ASM_X86_RMWcc 2#ifndef _ASM_X86_RMWcc
3#define _ASM_X86_RMWcc 3#define _ASM_X86_RMWcc
4 4
5#define __CLOBBERS_MEM "memory" 5#define __CLOBBERS_MEM(clb...) "memory", ## clb
6#define __CLOBBERS_MEM_CC_CX "memory", "cc", "cx"
7 6
8#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) 7#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
9 8
@@ -40,18 +39,19 @@ do { \
40#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ 39#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
41 40
42#define GEN_UNARY_RMWcc(op, var, arg0, cc) \ 41#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
43 __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM) 42 __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
44 43
45#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc) \ 44#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\
46 __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \ 45 __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \
47 __CLOBBERS_MEM_CC_CX) 46 __CLOBBERS_MEM(clobbers))
48 47
49#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ 48#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
50 __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \ 49 __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \
51 __CLOBBERS_MEM, vcon (val)) 50 __CLOBBERS_MEM(), vcon (val))
52 51
53#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc) \ 52#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc, \
53 clobbers...) \
54 __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \ 54 __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \
55 __CLOBBERS_MEM_CC_CX, vcon (val)) 55 __CLOBBERS_MEM(clobbers), vcon (val))
56 56
57#endif /* _ASM_X86_RMWcc */ 57#endif /* _ASM_X86_RMWcc */
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index d6baf23782bc..5c019d23d06b 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -10,6 +10,7 @@ extern struct exception_table_entry __stop___ex_table[];
10 10
11#if defined(CONFIG_X86_64) 11#if defined(CONFIG_X86_64)
12extern char __end_rodata_hpage_align[]; 12extern char __end_rodata_hpage_align[];
13extern char __entry_trampoline_start[], __entry_trampoline_end[];
13#endif 14#endif
14 15
15#endif /* _ASM_X86_SECTIONS_H */ 16#endif /* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 82c34ee25a65..906794aa034e 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -20,31 +20,43 @@
20#include <asm/ia32.h> 20#include <asm/ia32.h>
21 21
22/* ia32/sys_ia32.c */ 22/* ia32/sys_ia32.c */
23asmlinkage long sys32_truncate64(const char __user *, unsigned long, unsigned long); 23asmlinkage long compat_sys_x86_truncate64(const char __user *, unsigned long,
24asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long); 24 unsigned long);
25asmlinkage long compat_sys_x86_ftruncate64(unsigned int, unsigned long,
26 unsigned long);
25 27
26asmlinkage long sys32_stat64(const char __user *, struct stat64 __user *); 28asmlinkage long compat_sys_x86_stat64(const char __user *,
27asmlinkage long sys32_lstat64(const char __user *, struct stat64 __user *); 29 struct stat64 __user *);
28asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); 30asmlinkage long compat_sys_x86_lstat64(const char __user *,
29asmlinkage long sys32_fstatat(unsigned int, const char __user *, 31 struct stat64 __user *);
32asmlinkage long compat_sys_x86_fstat64(unsigned int, struct stat64 __user *);
33asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *,
30 struct stat64 __user *, int); 34 struct stat64 __user *, int);
31struct mmap_arg_struct32; 35struct mmap_arg_struct32;
32asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *); 36asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *);
33 37
34asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); 38asmlinkage long compat_sys_x86_waitpid(compat_pid_t, unsigned int __user *,
39 int);
35 40
36asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); 41asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32,
37asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); 42 u32);
43asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32,
44 u32, u32);
38 45
39long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); 46asmlinkage long compat_sys_x86_fadvise64_64(int, __u32, __u32, __u32, __u32,
40long sys32_vm86_warning(void); 47 int);
41 48
42asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); 49asmlinkage ssize_t compat_sys_x86_readahead(int, unsigned int, unsigned int,
43asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, 50 size_t);
44 unsigned, unsigned, int); 51asmlinkage long compat_sys_x86_sync_file_range(int, unsigned int, unsigned int,
45asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int); 52 unsigned int, unsigned int,
46asmlinkage long sys32_fallocate(int, int, unsigned, 53 int);
47 unsigned, unsigned, unsigned); 54asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int,
55 size_t, int);
56asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int,
57 unsigned int, unsigned int);
58asmlinkage long compat_sys_x86_clone(unsigned long, unsigned long, int __user *,
59 unsigned long, int __user *);
48 60
49/* ia32/ia32_signal.c */ 61/* ia32/ia32_signal.c */
50asmlinkage long sys32_sigreturn(void); 62asmlinkage long sys32_sigreturn(void);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8ad2e410974f..7c5538769f7e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1603,7 +1603,7 @@ static void __init delay_with_tsc(void)
1603 do { 1603 do {
1604 rep_nop(); 1604 rep_nop();
1605 now = rdtsc(); 1605 now = rdtsc();
1606 } while ((now - start) < 40000000000UL / HZ && 1606 } while ((now - start) < 40000000000ULL / HZ &&
1607 time_before_eq(jiffies, end)); 1607 time_before_eq(jiffies, end));
1608} 1608}
1609 1609
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d71c8b54b696..bfca937bdcc3 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -300,6 +300,15 @@ retpoline_auto:
300 setup_force_cpu_cap(X86_FEATURE_USE_IBPB); 300 setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
301 pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); 301 pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
302 } 302 }
303
304 /*
305 * Retpoline means the kernel is safe because it has no indirect
306 * branches. But firmware isn't, so use IBRS to protect that.
307 */
308 if (boot_cpu_has(X86_FEATURE_IBRS)) {
309 setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
310 pr_info("Enabling Restricted Speculation for firmware calls\n");
311 }
303} 312}
304 313
305#undef pr_fmt 314#undef pr_fmt
@@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
326 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) 335 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
327 return sprintf(buf, "Not affected\n"); 336 return sprintf(buf, "Not affected\n");
328 337
329 return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], 338 return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
330 boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", 339 boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
340 boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
331 spectre_v2_module_string()); 341 spectre_v2_module_string());
332} 342}
333#endif 343#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 824aee0117bb..348cf4821240 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1749,3 +1749,33 @@ static int __init init_cpu_syscore(void)
1749 return 0; 1749 return 0;
1750} 1750}
1751core_initcall(init_cpu_syscore); 1751core_initcall(init_cpu_syscore);
1752
1753/*
1754 * The microcode loader calls this upon late microcode load to recheck features,
1755 * only when microcode has been updated. Caller holds microcode_mutex and CPU
1756 * hotplug lock.
1757 */
1758void microcode_check(void)
1759{
1760 struct cpuinfo_x86 info;
1761
1762 perf_check_microcode();
1763
1764 /* Reload CPUID max function as it might've changed. */
1765 info.cpuid_level = cpuid_eax(0);
1766
1767 /*
1768 * Copy all capability leafs to pick up the synthetic ones so that
1769 * memcmp() below doesn't fail on that. The ones coming from CPUID will
1770 * get overwritten in get_cpu_cap().
1771 */
1772 memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability));
1773
1774 get_cpu_cap(&info);
1775
1776 if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)))
1777 return;
1778
1779 pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
1780 pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
1781}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d19e903214b4..4aa9fd379390 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -144,6 +144,13 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
144{ 144{
145 int i; 145 int i;
146 146
147 /*
148 * We know that the hypervisor lie to us on the microcode version so
149 * we may as well hope that it is running the correct version.
150 */
151 if (cpu_has(c, X86_FEATURE_HYPERVISOR))
152 return false;
153
147 for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { 154 for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
148 if (c->x86_model == spectre_bad_microcodes[i].model && 155 if (c->x86_model == spectre_bad_microcodes[i].model &&
149 c->x86_stepping == spectre_bad_microcodes[i].stepping) 156 c->x86_stepping == spectre_bad_microcodes[i].stepping)
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 330b8462d426..a998e1a7d46f 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
498 return patch_size; 498 return patch_size;
499} 499}
500 500
501static int apply_microcode_amd(int cpu) 501static enum ucode_state apply_microcode_amd(int cpu)
502{ 502{
503 struct cpuinfo_x86 *c = &cpu_data(cpu); 503 struct cpuinfo_x86 *c = &cpu_data(cpu);
504 struct microcode_amd *mc_amd; 504 struct microcode_amd *mc_amd;
@@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu)
512 512
513 p = find_patch(cpu); 513 p = find_patch(cpu);
514 if (!p) 514 if (!p)
515 return 0; 515 return UCODE_NFOUND;
516 516
517 mc_amd = p->data; 517 mc_amd = p->data;
518 uci->mc = p->data; 518 uci->mc = p->data;
@@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu)
523 if (rev >= mc_amd->hdr.patch_id) { 523 if (rev >= mc_amd->hdr.patch_id) {
524 c->microcode = rev; 524 c->microcode = rev;
525 uci->cpu_sig.rev = rev; 525 uci->cpu_sig.rev = rev;
526 return 0; 526 return UCODE_OK;
527 } 527 }
528 528
529 if (__apply_microcode_amd(mc_amd)) { 529 if (__apply_microcode_amd(mc_amd)) {
530 pr_err("CPU%d: update failed for patch_level=0x%08x\n", 530 pr_err("CPU%d: update failed for patch_level=0x%08x\n",
531 cpu, mc_amd->hdr.patch_id); 531 cpu, mc_amd->hdr.patch_id);
532 return -1; 532 return UCODE_ERROR;
533 } 533 }
534 pr_info("CPU%d: new patch_level=0x%08x\n", cpu, 534 pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
535 mc_amd->hdr.patch_id); 535 mc_amd->hdr.patch_id);
@@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu)
537 uci->cpu_sig.rev = mc_amd->hdr.patch_id; 537 uci->cpu_sig.rev = mc_amd->hdr.patch_id;
538 c->microcode = mc_amd->hdr.patch_id; 538 c->microcode = mc_amd->hdr.patch_id;
539 539
540 return 0; 540 return UCODE_UPDATED;
541} 541}
542 542
543static int install_equiv_cpu_table(const u8 *buf) 543static int install_equiv_cpu_table(const u8 *buf)
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 319dd65f98a2..70ecbc8099c9 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -22,13 +22,16 @@
22#define pr_fmt(fmt) "microcode: " fmt 22#define pr_fmt(fmt) "microcode: " fmt
23 23
24#include <linux/platform_device.h> 24#include <linux/platform_device.h>
25#include <linux/stop_machine.h>
25#include <linux/syscore_ops.h> 26#include <linux/syscore_ops.h>
26#include <linux/miscdevice.h> 27#include <linux/miscdevice.h>
27#include <linux/capability.h> 28#include <linux/capability.h>
28#include <linux/firmware.h> 29#include <linux/firmware.h>
29#include <linux/kernel.h> 30#include <linux/kernel.h>
31#include <linux/delay.h>
30#include <linux/mutex.h> 32#include <linux/mutex.h>
31#include <linux/cpu.h> 33#include <linux/cpu.h>
34#include <linux/nmi.h>
32#include <linux/fs.h> 35#include <linux/fs.h>
33#include <linux/mm.h> 36#include <linux/mm.h>
34 37
@@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache);
64 */ 67 */
65static DEFINE_MUTEX(microcode_mutex); 68static DEFINE_MUTEX(microcode_mutex);
66 69
70/*
71 * Serialize late loading so that CPUs get updated one-by-one.
72 */
73static DEFINE_SPINLOCK(update_lock);
74
67struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; 75struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
68 76
69struct cpu_info_ctx { 77struct cpu_info_ctx {
@@ -373,26 +381,23 @@ static int collect_cpu_info(int cpu)
373 return ret; 381 return ret;
374} 382}
375 383
376struct apply_microcode_ctx {
377 int err;
378};
379
380static void apply_microcode_local(void *arg) 384static void apply_microcode_local(void *arg)
381{ 385{
382 struct apply_microcode_ctx *ctx = arg; 386 enum ucode_state *err = arg;
383 387
384 ctx->err = microcode_ops->apply_microcode(smp_processor_id()); 388 *err = microcode_ops->apply_microcode(smp_processor_id());
385} 389}
386 390
387static int apply_microcode_on_target(int cpu) 391static int apply_microcode_on_target(int cpu)
388{ 392{
389 struct apply_microcode_ctx ctx = { .err = 0 }; 393 enum ucode_state err;
390 int ret; 394 int ret;
391 395
392 ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1); 396 ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1);
393 if (!ret) 397 if (!ret) {
394 ret = ctx.err; 398 if (err == UCODE_ERROR)
395 399 ret = 1;
400 }
396 return ret; 401 return ret;
397} 402}
398 403
@@ -489,31 +494,110 @@ static void __exit microcode_dev_exit(void)
489/* fake device for request_firmware */ 494/* fake device for request_firmware */
490static struct platform_device *microcode_pdev; 495static struct platform_device *microcode_pdev;
491 496
492static int reload_for_cpu(int cpu) 497/*
498 * Late loading dance. Why the heavy-handed stomp_machine effort?
499 *
500 * - HT siblings must be idle and not execute other code while the other sibling
501 * is loading microcode in order to avoid any negative interactions caused by
502 * the loading.
503 *
504 * - In addition, microcode update on the cores must be serialized until this
505 * requirement can be relaxed in the future. Right now, this is conservative
506 * and good.
507 */
508#define SPINUNIT 100 /* 100 nsec */
509
510static int check_online_cpus(void)
493{ 511{
494 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 512 if (num_online_cpus() == num_present_cpus())
495 enum ucode_state ustate; 513 return 0;
496 int err = 0;
497 514
498 if (!uci->valid) 515 pr_err("Not all CPUs online, aborting microcode update.\n");
499 return err;
500 516
501 ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true); 517 return -EINVAL;
502 if (ustate == UCODE_OK) 518}
503 apply_microcode_on_target(cpu); 519
504 else 520static atomic_t late_cpus;
505 if (ustate == UCODE_ERROR) 521
506 err = -EINVAL; 522/*
507 return err; 523 * Returns:
524 * < 0 - on error
525 * 0 - no update done
526 * 1 - microcode was updated
527 */
528static int __reload_late(void *info)
529{
530 unsigned int timeout = NSEC_PER_SEC;
531 int all_cpus = num_online_cpus();
532 int cpu = smp_processor_id();
533 enum ucode_state err;
534 int ret = 0;
535
536 atomic_dec(&late_cpus);
537
538 /*
539 * Wait for all CPUs to arrive. A load will not be attempted unless all
540 * CPUs show up.
541 * */
542 while (atomic_read(&late_cpus)) {
543 if (timeout < SPINUNIT) {
544 pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
545 atomic_read(&late_cpus));
546 return -1;
547 }
548
549 ndelay(SPINUNIT);
550 timeout -= SPINUNIT;
551
552 touch_nmi_watchdog();
553 }
554
555 spin_lock(&update_lock);
556 apply_microcode_local(&err);
557 spin_unlock(&update_lock);
558
559 if (err > UCODE_NFOUND) {
560 pr_warn("Error reloading microcode on CPU %d\n", cpu);
561 ret = -1;
562 } else if (err == UCODE_UPDATED) {
563 ret = 1;
564 }
565
566 atomic_inc(&late_cpus);
567
568 while (atomic_read(&late_cpus) != all_cpus)
569 cpu_relax();
570
571 return ret;
572}
573
574/*
575 * Reload microcode late on all CPUs. Wait for a sec until they
576 * all gather together.
577 */
578static int microcode_reload_late(void)
579{
580 int ret;
581
582 atomic_set(&late_cpus, num_online_cpus());
583
584 ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
585 if (ret < 0)
586 return ret;
587 else if (ret > 0)
588 microcode_check();
589
590 return ret;
508} 591}
509 592
510static ssize_t reload_store(struct device *dev, 593static ssize_t reload_store(struct device *dev,
511 struct device_attribute *attr, 594 struct device_attribute *attr,
512 const char *buf, size_t size) 595 const char *buf, size_t size)
513{ 596{
597 enum ucode_state tmp_ret = UCODE_OK;
598 int bsp = boot_cpu_data.cpu_index;
514 unsigned long val; 599 unsigned long val;
515 int cpu; 600 ssize_t ret = 0;
516 ssize_t ret = 0, tmp_ret;
517 601
518 ret = kstrtoul(buf, 0, &val); 602 ret = kstrtoul(buf, 0, &val);
519 if (ret) 603 if (ret)
@@ -522,23 +606,24 @@ static ssize_t reload_store(struct device *dev,
522 if (val != 1) 606 if (val != 1)
523 return size; 607 return size;
524 608
609 tmp_ret = microcode_ops->request_microcode_fw(bsp, &microcode_pdev->dev, true);
610 if (tmp_ret != UCODE_OK)
611 return size;
612
525 get_online_cpus(); 613 get_online_cpus();
526 mutex_lock(&microcode_mutex);
527 for_each_online_cpu(cpu) {
528 tmp_ret = reload_for_cpu(cpu);
529 if (tmp_ret != 0)
530 pr_warn("Error reloading microcode on CPU %d\n", cpu);
531 614
532 /* save retval of the first encountered reload error */ 615 ret = check_online_cpus();
533 if (!ret) 616 if (ret)
534 ret = tmp_ret; 617 goto put;
535 } 618
536 if (!ret) 619 mutex_lock(&microcode_mutex);
537 perf_check_microcode(); 620 ret = microcode_reload_late();
538 mutex_unlock(&microcode_mutex); 621 mutex_unlock(&microcode_mutex);
622
623put:
539 put_online_cpus(); 624 put_online_cpus();
540 625
541 if (!ret) 626 if (ret >= 0)
542 ret = size; 627 ret = size;
543 628
544 return ret; 629 return ret;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index a15db2b4e0d6..2aded9db1d42 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -589,6 +589,23 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
589 if (!mc) 589 if (!mc)
590 return 0; 590 return 0;
591 591
592 /*
593 * Save us the MSR write below - which is a particular expensive
594 * operation - when the other hyperthread has updated the microcode
595 * already.
596 */
597 rev = intel_get_microcode_revision();
598 if (rev >= mc->hdr.rev) {
599 uci->cpu_sig.rev = rev;
600 return UCODE_OK;
601 }
602
603 /*
604 * Writeback and invalidate caches before updating microcode to avoid
605 * internal issues depending on what the microcode is updating.
606 */
607 native_wbinvd();
608
592 /* write microcode via MSR 0x79 */ 609 /* write microcode via MSR 0x79 */
593 native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); 610 native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
594 611
@@ -772,27 +789,44 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
772 return 0; 789 return 0;
773} 790}
774 791
775static int apply_microcode_intel(int cpu) 792static enum ucode_state apply_microcode_intel(int cpu)
776{ 793{
794 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
795 struct cpuinfo_x86 *c = &cpu_data(cpu);
777 struct microcode_intel *mc; 796 struct microcode_intel *mc;
778 struct ucode_cpu_info *uci;
779 struct cpuinfo_x86 *c;
780 static int prev_rev; 797 static int prev_rev;
781 u32 rev; 798 u32 rev;
782 799
783 /* We should bind the task to the CPU */ 800 /* We should bind the task to the CPU */
784 if (WARN_ON(raw_smp_processor_id() != cpu)) 801 if (WARN_ON(raw_smp_processor_id() != cpu))
785 return -1; 802 return UCODE_ERROR;
786 803
787 uci = ucode_cpu_info + cpu; 804 /* Look for a newer patch in our cache: */
788 mc = uci->mc; 805 mc = find_patch(uci);
789 if (!mc) { 806 if (!mc) {
790 /* Look for a newer patch in our cache: */ 807 mc = uci->mc;
791 mc = find_patch(uci);
792 if (!mc) 808 if (!mc)
793 return 0; 809 return UCODE_NFOUND;
794 } 810 }
795 811
812 /*
813 * Save us the MSR write below - which is a particular expensive
814 * operation - when the other hyperthread has updated the microcode
815 * already.
816 */
817 rev = intel_get_microcode_revision();
818 if (rev >= mc->hdr.rev) {
819 uci->cpu_sig.rev = rev;
820 c->microcode = rev;
821 return UCODE_OK;
822 }
823
824 /*
825 * Writeback and invalidate caches before updating microcode to avoid
826 * internal issues depending on what the microcode is updating.
827 */
828 native_wbinvd();
829
796 /* write microcode via MSR 0x79 */ 830 /* write microcode via MSR 0x79 */
797 wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); 831 wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
798 832
@@ -801,7 +835,7 @@ static int apply_microcode_intel(int cpu)
801 if (rev != mc->hdr.rev) { 835 if (rev != mc->hdr.rev) {
802 pr_err("CPU%d update to revision 0x%x failed\n", 836 pr_err("CPU%d update to revision 0x%x failed\n",
803 cpu, mc->hdr.rev); 837 cpu, mc->hdr.rev);
804 return -1; 838 return UCODE_ERROR;
805 } 839 }
806 840
807 if (rev != prev_rev) { 841 if (rev != prev_rev) {
@@ -813,12 +847,10 @@ static int apply_microcode_intel(int cpu)
813 prev_rev = rev; 847 prev_rev = rev;
814 } 848 }
815 849
816 c = &cpu_data(cpu);
817
818 uci->cpu_sig.rev = rev; 850 uci->cpu_sig.rev = rev;
819 c->microcode = rev; 851 c->microcode = rev;
820 852
821 return 0; 853 return UCODE_UPDATED;
822} 854}
823 855
824static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, 856static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 326c63129417..48385c1074a5 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -23,6 +23,7 @@
23#include <asm/nops.h> 23#include <asm/nops.h>
24#include "../entry/calling.h" 24#include "../entry/calling.h"
25#include <asm/export.h> 25#include <asm/export.h>
26#include <asm/nospec-branch.h>
26 27
27#ifdef CONFIG_PARAVIRT 28#ifdef CONFIG_PARAVIRT
28#include <asm/asm-offsets.h> 29#include <asm/asm-offsets.h>
@@ -137,6 +138,7 @@ ENTRY(secondary_startup_64)
137 138
138 /* Ensure I am executing from virtual addresses */ 139 /* Ensure I am executing from virtual addresses */
139 movq $1f, %rax 140 movq $1f, %rax
141 ANNOTATE_RETPOLINE_SAFE
140 jmp *%rax 142 jmp *%rax
1411: 1431:
142 UNWIND_HINT_EMPTY 144 UNWIND_HINT_EMPTY
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 2f723301eb58..38deafebb21b 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -23,7 +23,7 @@
23/* 23/*
24 * this changes the io permissions bitmap in the current task. 24 * this changes the io permissions bitmap in the current task.
25 */ 25 */
26asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) 26SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on)
27{ 27{
28 struct thread_struct *t = &current->thread; 28 struct thread_struct *t = &current->thread;
29 struct tss_struct *tss; 29 struct tss_struct *tss;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index bd36f3c33cd0..0715f827607c 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1168,10 +1168,18 @@ NOKPROBE_SYMBOL(longjmp_break_handler);
1168 1168
1169bool arch_within_kprobe_blacklist(unsigned long addr) 1169bool arch_within_kprobe_blacklist(unsigned long addr)
1170{ 1170{
1171 bool is_in_entry_trampoline_section = false;
1172
1173#ifdef CONFIG_X86_64
1174 is_in_entry_trampoline_section =
1175 (addr >= (unsigned long)__entry_trampoline_start &&
1176 addr < (unsigned long)__entry_trampoline_end);
1177#endif
1171 return (addr >= (unsigned long)__kprobes_text_start && 1178 return (addr >= (unsigned long)__kprobes_text_start &&
1172 addr < (unsigned long)__kprobes_text_end) || 1179 addr < (unsigned long)__kprobes_text_end) ||
1173 (addr >= (unsigned long)__entry_text_start && 1180 (addr >= (unsigned long)__entry_text_start &&
1174 addr < (unsigned long)__entry_text_end); 1181 addr < (unsigned long)__entry_text_end) ||
1182 is_in_entry_trampoline_section;
1175} 1183}
1176 1184
1177int __init arch_init_kprobes(void) 1185int __init arch_init_kprobes(void)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 399d0f7fa8f1..6285697b6e56 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1203,20 +1203,13 @@ void __init setup_arch(char **cmdline_p)
1203 1203
1204 kasan_init(); 1204 kasan_init();
1205 1205
1206#ifdef CONFIG_X86_32
1207 /* sync back kernel address range */
1208 clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
1209 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
1210 KERNEL_PGD_PTRS);
1211
1212 /* 1206 /*
1213 * sync back low identity map too. It is used for example 1207 * Sync back kernel address range.
1214 * in the 32-bit EFI stub. 1208 *
1209 * FIXME: Can the later sync in setup_cpu_entry_areas() replace
1210 * this call?
1215 */ 1211 */
1216 clone_pgd_range(initial_page_table, 1212 sync_initial_page_table();
1217 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
1218 min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
1219#endif
1220 1213
1221 tboot_probe(); 1214 tboot_probe();
1222 1215
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 497aa766fab3..ea554f812ee1 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -287,24 +287,15 @@ void __init setup_per_cpu_areas(void)
287 /* Setup cpu initialized, callin, callout masks */ 287 /* Setup cpu initialized, callin, callout masks */
288 setup_cpu_local_masks(); 288 setup_cpu_local_masks();
289 289
290#ifdef CONFIG_X86_32
291 /* 290 /*
292 * Sync back kernel address range again. We already did this in 291 * Sync back kernel address range again. We already did this in
293 * setup_arch(), but percpu data also needs to be available in 292 * setup_arch(), but percpu data also needs to be available in
294 * the smpboot asm. We can't reliably pick up percpu mappings 293 * the smpboot asm. We can't reliably pick up percpu mappings
295 * using vmalloc_fault(), because exception dispatch needs 294 * using vmalloc_fault(), because exception dispatch needs
296 * percpu data. 295 * percpu data.
296 *
297 * FIXME: Can the later sync in setup_cpu_entry_areas() replace
298 * this call?
297 */ 299 */
298 clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, 300 sync_initial_page_table();
299 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
300 KERNEL_PGD_PTRS);
301
302 /*
303 * sync back low identity map too. It is used for example
304 * in the 32-bit EFI stub.
305 */
306 clone_pgd_range(initial_page_table,
307 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
308 min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
309#endif
310} 301}
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 1f9188f5357c..feb28fee6cea 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -5,7 +5,6 @@
5#include <asm/unwind.h> 5#include <asm/unwind.h>
6#include <asm/orc_types.h> 6#include <asm/orc_types.h>
7#include <asm/orc_lookup.h> 7#include <asm/orc_lookup.h>
8#include <asm/sections.h>
9 8
10#define orc_warn(fmt, ...) \ 9#define orc_warn(fmt, ...) \
11 printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__) 10 printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
@@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip)
148 } 147 }
149 148
150 /* vmlinux .init slow lookup: */ 149 /* vmlinux .init slow lookup: */
151 if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext) 150 if (init_kernel_text(ip))
152 return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, 151 return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
153 __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); 152 __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
154 153
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 9b138a06c1a4..b854ebf5851b 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -118,9 +118,11 @@ SECTIONS
118 118
119#ifdef CONFIG_X86_64 119#ifdef CONFIG_X86_64
120 . = ALIGN(PAGE_SIZE); 120 . = ALIGN(PAGE_SIZE);
121 VMLINUX_SYMBOL(__entry_trampoline_start) = .;
121 _entry_trampoline = .; 122 _entry_trampoline = .;
122 *(.entry_trampoline) 123 *(.entry_trampoline)
123 . = ALIGN(PAGE_SIZE); 124 . = ALIGN(PAGE_SIZE);
125 VMLINUX_SYMBOL(__entry_trampoline_end) = .;
124 ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); 126 ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
125#endif 127#endif
126 128
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b3e488a74828..24c9521ebc24 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -49,6 +49,7 @@
49#include <asm/debugreg.h> 49#include <asm/debugreg.h>
50#include <asm/kvm_para.h> 50#include <asm/kvm_para.h>
51#include <asm/irq_remapping.h> 51#include <asm/irq_remapping.h>
52#include <asm/microcode.h>
52#include <asm/nospec-branch.h> 53#include <asm/nospec-branch.h>
53 54
54#include <asm/virtext.h> 55#include <asm/virtext.h>
@@ -5355,7 +5356,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
5355 * being speculatively taken. 5356 * being speculatively taken.
5356 */ 5357 */
5357 if (svm->spec_ctrl) 5358 if (svm->spec_ctrl)
5358 wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); 5359 native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
5359 5360
5360 asm volatile ( 5361 asm volatile (
5361 "push %%" _ASM_BP "; \n\t" 5362 "push %%" _ASM_BP "; \n\t"
@@ -5464,11 +5465,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
5464 * If the L02 MSR bitmap does not intercept the MSR, then we need to 5465 * If the L02 MSR bitmap does not intercept the MSR, then we need to
5465 * save it. 5466 * save it.
5466 */ 5467 */
5467 if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) 5468 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
5468 rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); 5469 svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
5469 5470
5470 if (svm->spec_ctrl) 5471 if (svm->spec_ctrl)
5471 wrmsrl(MSR_IA32_SPEC_CTRL, 0); 5472 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
5472 5473
5473 /* Eliminate branch target predictions from guest mode */ 5474 /* Eliminate branch target predictions from guest mode */
5474 vmexit_fill_RSB(); 5475 vmexit_fill_RSB();
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3dec126aa302..7f8401d05939 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -51,6 +51,7 @@
51#include <asm/apic.h> 51#include <asm/apic.h>
52#include <asm/irq_remapping.h> 52#include <asm/irq_remapping.h>
53#include <asm/mmu_context.h> 53#include <asm/mmu_context.h>
54#include <asm/microcode.h>
54#include <asm/nospec-branch.h> 55#include <asm/nospec-branch.h>
55 56
56#include "trace.h" 57#include "trace.h"
@@ -9452,7 +9453,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9452 * being speculatively taken. 9453 * being speculatively taken.
9453 */ 9454 */
9454 if (vmx->spec_ctrl) 9455 if (vmx->spec_ctrl)
9455 wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); 9456 native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
9456 9457
9457 vmx->__launched = vmx->loaded_vmcs->launched; 9458 vmx->__launched = vmx->loaded_vmcs->launched;
9458 asm( 9459 asm(
@@ -9587,11 +9588,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9587 * If the L02 MSR bitmap does not intercept the MSR, then we need to 9588 * If the L02 MSR bitmap does not intercept the MSR, then we need to
9588 * save it. 9589 * save it.
9589 */ 9590 */
9590 if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) 9591 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
9591 rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); 9592 vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
9592 9593
9593 if (vmx->spec_ctrl) 9594 if (vmx->spec_ctrl)
9594 wrmsrl(MSR_IA32_SPEC_CTRL, 0); 9595 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
9595 9596
9596 /* Eliminate branch target predictions from guest mode */ 9597 /* Eliminate branch target predictions from guest mode */
9597 vmexit_fill_RSB(); 9598 vmexit_fill_RSB();
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 91e9700cc6dc..25a972c61b0a 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
28lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o 28lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
29lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o 29lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
30lib-$(CONFIG_RETPOLINE) += retpoline.o 30lib-$(CONFIG_RETPOLINE) += retpoline.o
31OBJECT_FILES_NON_STANDARD_retpoline.o :=y
32 31
33obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o 32obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
34 33
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 480edc3a5e03..c909961e678a 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,7 +7,6 @@
7#include <asm/alternative-asm.h> 7#include <asm/alternative-asm.h>
8#include <asm/export.h> 8#include <asm/export.h>
9#include <asm/nospec-branch.h> 9#include <asm/nospec-branch.h>
10#include <asm/bitsperlong.h>
11 10
12.macro THUNK reg 11.macro THUNK reg
13 .section .text.__x86.indirect_thunk 12 .section .text.__x86.indirect_thunk
@@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
47GENERATE_THUNK(r14) 46GENERATE_THUNK(r14)
48GENERATE_THUNK(r15) 47GENERATE_THUNK(r15)
49#endif 48#endif
50
51/*
52 * Fill the CPU return stack buffer.
53 *
54 * Each entry in the RSB, if used for a speculative 'ret', contains an
55 * infinite 'pause; lfence; jmp' loop to capture speculative execution.
56 *
57 * This is required in various cases for retpoline and IBRS-based
58 * mitigations for the Spectre variant 2 vulnerability. Sometimes to
59 * eliminate potentially bogus entries from the RSB, and sometimes
60 * purely to ensure that it doesn't get empty, which on some CPUs would
61 * allow predictions from other (unwanted!) sources to be used.
62 *
63 * Google experimented with loop-unrolling and this turned out to be
64 * the optimal version - two calls, each with their own speculation
65 * trap should their return address end up getting used, in a loop.
66 */
67.macro STUFF_RSB nr:req sp:req
68 mov $(\nr / 2), %_ASM_BX
69 .align 16
70771:
71 call 772f
72773: /* speculation trap */
73 pause
74 lfence
75 jmp 773b
76 .align 16
77772:
78 call 774f
79775: /* speculation trap */
80 pause
81 lfence
82 jmp 775b
83 .align 16
84774:
85 dec %_ASM_BX
86 jnz 771b
87 add $((BITS_PER_LONG/8) * \nr), \sp
88.endm
89
90#define RSB_FILL_LOOPS 16 /* To avoid underflow */
91
92ENTRY(__fill_rsb)
93 STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
94 ret
95END(__fill_rsb)
96EXPORT_SYMBOL_GPL(__fill_rsb)
97
98#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
99
100ENTRY(__clear_rsb)
101 STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
102 ret
103END(__clear_rsb)
104EXPORT_SYMBOL_GPL(__clear_rsb)
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index b9283cc27622..476d810639a8 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -163,4 +163,10 @@ void __init setup_cpu_entry_areas(void)
163 163
164 for_each_possible_cpu(cpu) 164 for_each_possible_cpu(cpu)
165 setup_cpu_entry_area(cpu); 165 setup_cpu_entry_area(cpu);
166
167 /*
168 * This is the last essential update to swapper_pgdir which needs
169 * to be synchronized to initial_page_table on 32bit.
170 */
171 sync_initial_page_table();
166} 172}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 321b78060e93..e6af2b464c3d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1248,10 +1248,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1248 tsk = current; 1248 tsk = current;
1249 mm = tsk->mm; 1249 mm = tsk->mm;
1250 1250
1251 /*
1252 * Detect and handle instructions that would cause a page fault for
1253 * both a tracked kernel page and a userspace page.
1254 */
1255 prefetchw(&mm->mmap_sem); 1251 prefetchw(&mm->mmap_sem);
1256 1252
1257 if (unlikely(kmmio_fault(regs, address))) 1253 if (unlikely(kmmio_fault(regs, address)))
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 79cb066f40c0..396e1f0151ac 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -453,6 +453,21 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base)
453} 453}
454#endif /* CONFIG_HIGHMEM */ 454#endif /* CONFIG_HIGHMEM */
455 455
456void __init sync_initial_page_table(void)
457{
458 clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
459 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
460 KERNEL_PGD_PTRS);
461
462 /*
463 * sync back low identity map too. It is used for example
464 * in the 32-bit EFI stub.
465 */
466 clone_pgd_range(initial_page_table,
467 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
468 min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
469}
470
456void __init native_pagetable_init(void) 471void __init native_pagetable_init(void)
457{ 472{
458 unsigned long pfn, va; 473 unsigned long pfn, va;
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 01f682cf77a8..40a6085063d6 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -15,6 +15,7 @@
15#include <asm/page.h> 15#include <asm/page.h>
16#include <asm/processor-flags.h> 16#include <asm/processor-flags.h>
17#include <asm/msr-index.h> 17#include <asm/msr-index.h>
18#include <asm/nospec-branch.h>
18 19
19 .text 20 .text
20 .code64 21 .code64
@@ -59,6 +60,7 @@ ENTRY(sme_encrypt_execute)
59 movq %rax, %r8 /* Workarea encryption routine */ 60 movq %rax, %r8 /* Workarea encryption routine */
60 addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */ 61 addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */
61 62
63 ANNOTATE_RETPOLINE_SAFE
62 call *%rax /* Call the encryption routine */ 64 call *%rax /* Call the encryption routine */
63 65
64 pop %r12 66 pop %r12
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index ce38f165489b..631507f0c198 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -332,7 +332,7 @@ static void __init pti_clone_user_shared(void)
332} 332}
333 333
334/* 334/*
335 * Clone the ESPFIX P4D into the user space visinble page table 335 * Clone the ESPFIX P4D into the user space visible page table
336 */ 336 */
337static void __init pti_setup_espfix64(void) 337static void __init pti_setup_espfix64(void)
338{ 338{
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index de53bd15df5a..24bb7598774e 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -102,7 +102,7 @@ ENTRY(startup_32)
102 * don't we'll eventually crash trying to execute encrypted 102 * don't we'll eventually crash trying to execute encrypted
103 * instructions. 103 * instructions.
104 */ 104 */
105 bt $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags 105 btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
106 jnc .Ldone 106 jnc .Ldone
107 movl $MSR_K8_SYSCFG, %ecx 107 movl $MSR_K8_SYSCFG, %ecx
108 rdmsr 108 rdmsr
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index d9f96cc5d743..1d83152c761b 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -1,12 +1,15 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <linux/types.h> 2#include <linux/types.h>
3#include <linux/tick.h> 3#include <linux/tick.h>
4#include <linux/percpu-defs.h>
4 5
5#include <xen/xen.h> 6#include <xen/xen.h>
6#include <xen/interface/xen.h> 7#include <xen/interface/xen.h>
7#include <xen/grant_table.h> 8#include <xen/grant_table.h>
8#include <xen/events.h> 9#include <xen/events.h>
9 10
11#include <asm/cpufeatures.h>
12#include <asm/msr-index.h>
10#include <asm/xen/hypercall.h> 13#include <asm/xen/hypercall.h>
11#include <asm/xen/page.h> 14#include <asm/xen/page.h>
12#include <asm/fixmap.h> 15#include <asm/fixmap.h>
@@ -15,6 +18,8 @@
15#include "mmu.h" 18#include "mmu.h"
16#include "pmu.h" 19#include "pmu.h"
17 20
21static DEFINE_PER_CPU(u64, spec_ctrl);
22
18void xen_arch_pre_suspend(void) 23void xen_arch_pre_suspend(void)
19{ 24{
20 xen_save_time_memory_area(); 25 xen_save_time_memory_area();
@@ -35,6 +40,9 @@ void xen_arch_post_suspend(int cancelled)
35 40
36static void xen_vcpu_notify_restore(void *data) 41static void xen_vcpu_notify_restore(void *data)
37{ 42{
43 if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL))
44 wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl));
45
38 /* Boot processor notified via generic timekeeping_resume() */ 46 /* Boot processor notified via generic timekeeping_resume() */
39 if (smp_processor_id() == 0) 47 if (smp_processor_id() == 0)
40 return; 48 return;
@@ -44,7 +52,15 @@ static void xen_vcpu_notify_restore(void *data)
44 52
45static void xen_vcpu_notify_suspend(void *data) 53static void xen_vcpu_notify_suspend(void *data)
46{ 54{
55 u64 tmp;
56
47 tick_suspend_local(); 57 tick_suspend_local();
58
59 if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
60 rdmsrl(MSR_IA32_SPEC_CTRL, tmp);
61 this_cpu_write(spec_ctrl, tmp);
62 wrmsrl(MSR_IA32_SPEC_CTRL, 0);
63 }
48} 64}
49 65
50void xen_arch_resume(void) 66void xen_arch_resume(void)
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index d02a4df3f473..d3f264a5b04d 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -27,3 +27,8 @@
27#if __has_feature(address_sanitizer) 27#if __has_feature(address_sanitizer)
28#define __SANITIZE_ADDRESS__ 28#define __SANITIZE_ADDRESS__
29#endif 29#endif
30
31/* Clang doesn't have a way to turn it off per-function, yet. */
32#ifdef __noretpoline
33#undef __noretpoline
34#endif
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 901c1ccb3374..e2c7f4369eff 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -93,6 +93,10 @@
93#define __weak __attribute__((weak)) 93#define __weak __attribute__((weak))
94#define __alias(symbol) __attribute__((alias(#symbol))) 94#define __alias(symbol) __attribute__((alias(#symbol)))
95 95
96#ifdef RETPOLINE
97#define __noretpoline __attribute__((indirect_branch("keep")))
98#endif
99
96/* 100/*
97 * it doesn't make sense on ARM (currently the only user of __naked) 101 * it doesn't make sense on ARM (currently the only user of __naked)
98 * to trace naked functions because then mcount is called without 102 * to trace naked functions because then mcount is called without
diff --git a/include/linux/init.h b/include/linux/init.h
index 506a98151131..bc27cf03c41e 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -6,10 +6,10 @@
6#include <linux/types.h> 6#include <linux/types.h>
7 7
8/* Built-in __init functions needn't be compiled with retpoline */ 8/* Built-in __init functions needn't be compiled with retpoline */
9#if defined(RETPOLINE) && !defined(MODULE) 9#if defined(__noretpoline) && !defined(MODULE)
10#define __noretpoline __attribute__((indirect_branch("keep"))) 10#define __noinitretpoline __noretpoline
11#else 11#else
12#define __noretpoline 12#define __noinitretpoline
13#endif 13#endif
14 14
15/* These macros are used to mark some functions or 15/* These macros are used to mark some functions or
@@ -47,7 +47,7 @@
47 47
48/* These are for everybody (although not all archs will actually 48/* These are for everybody (although not all archs will actually
49 discard it in modules) */ 49 discard it in modules) */
50#define __init __section(.init.text) __cold __latent_entropy __noretpoline 50#define __init __section(.init.text) __cold __latent_entropy __noinitretpoline
51#define __initdata __section(.init.data) 51#define __initdata __section(.init.data)
52#define __initconst __section(.init.rodata) 52#define __initconst __section(.init.rodata)
53#define __exitdata __section(.exit.data) 53#define __exitdata __section(.exit.data)
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index b6a29c126cc4..2168cc6b8b30 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -151,6 +151,7 @@ extern struct jump_entry __start___jump_table[];
151extern struct jump_entry __stop___jump_table[]; 151extern struct jump_entry __stop___jump_table[];
152 152
153extern void jump_label_init(void); 153extern void jump_label_init(void);
154extern void jump_label_invalidate_init(void);
154extern void jump_label_lock(void); 155extern void jump_label_lock(void);
155extern void jump_label_unlock(void); 156extern void jump_label_unlock(void);
156extern void arch_jump_label_transform(struct jump_entry *entry, 157extern void arch_jump_label_transform(struct jump_entry *entry,
@@ -198,6 +199,8 @@ static __always_inline void jump_label_init(void)
198 static_key_initialized = true; 199 static_key_initialized = true;
199} 200}
200 201
202static inline void jump_label_invalidate_init(void) {}
203
201static __always_inline bool static_key_false(struct static_key *key) 204static __always_inline bool static_key_false(struct static_key *key)
202{ 205{
203 if (unlikely(static_key_count(key) > 0)) 206 if (unlikely(static_key_count(key) > 0))
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index ce51455e2adf..3fd291503576 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -472,6 +472,7 @@ extern bool parse_option_str(const char *str, const char *option);
472extern char *next_arg(char *args, char **param, char **val); 472extern char *next_arg(char *args, char **param, char **val);
473 473
474extern int core_kernel_text(unsigned long addr); 474extern int core_kernel_text(unsigned long addr);
475extern int init_kernel_text(unsigned long addr);
475extern int core_kernel_data(unsigned long addr); 476extern int core_kernel_data(unsigned long addr);
476extern int __kernel_text_address(unsigned long addr); 477extern int __kernel_text_address(unsigned long addr);
477extern int kernel_text_address(unsigned long addr); 478extern int kernel_text_address(unsigned long addr);
diff --git a/include/linux/nospec.h b/include/linux/nospec.h
index fbc98e2c8228..e791ebc65c9c 100644
--- a/include/linux/nospec.h
+++ b/include/linux/nospec.h
@@ -5,6 +5,7 @@
5 5
6#ifndef _LINUX_NOSPEC_H 6#ifndef _LINUX_NOSPEC_H
7#define _LINUX_NOSPEC_H 7#define _LINUX_NOSPEC_H
8#include <asm/barrier.h>
8 9
9/** 10/**
10 * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise 11 * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
@@ -30,26 +31,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
30#endif 31#endif
31 32
32/* 33/*
33 * Warn developers about inappropriate array_index_nospec() usage.
34 *
35 * Even if the CPU speculates past the WARN_ONCE branch, the
36 * sign bit of @index is taken into account when generating the
37 * mask.
38 *
39 * This warning is compiled out when the compiler can infer that
40 * @index and @size are less than LONG_MAX.
41 */
42#define array_index_mask_nospec_check(index, size) \
43({ \
44 if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \
45 "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \
46 _mask = 0; \
47 else \
48 _mask = array_index_mask_nospec(index, size); \
49 _mask; \
50})
51
52/*
53 * array_index_nospec - sanitize an array index after a bounds check 34 * array_index_nospec - sanitize an array index after a bounds check
54 * 35 *
55 * For a code sequence like: 36 * For a code sequence like:
@@ -67,12 +48,11 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
67({ \ 48({ \
68 typeof(index) _i = (index); \ 49 typeof(index) _i = (index); \
69 typeof(size) _s = (size); \ 50 typeof(size) _s = (size); \
70 unsigned long _mask = array_index_mask_nospec_check(_i, _s); \ 51 unsigned long _mask = array_index_mask_nospec(_i, _s); \
71 \ 52 \
72 BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ 53 BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \
73 BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ 54 BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \
74 \ 55 \
75 _i &= _mask; \ 56 (typeof(_i)) (_i & _mask); \
76 _i; \
77}) 57})
78#endif /* _LINUX_NOSPEC_H */ 58#endif /* _LINUX_NOSPEC_H */
diff --git a/init/main.c b/init/main.c
index a8100b954839..969eaf140ef0 100644
--- a/init/main.c
+++ b/init/main.c
@@ -89,6 +89,7 @@
89#include <linux/io.h> 89#include <linux/io.h>
90#include <linux/cache.h> 90#include <linux/cache.h>
91#include <linux/rodata_test.h> 91#include <linux/rodata_test.h>
92#include <linux/jump_label.h>
92 93
93#include <asm/io.h> 94#include <asm/io.h>
94#include <asm/bugs.h> 95#include <asm/bugs.h>
@@ -1000,6 +1001,7 @@ static int __ref kernel_init(void *unused)
1000 /* need to finish all async __init code before freeing the memory */ 1001 /* need to finish all async __init code before freeing the memory */
1001 async_synchronize_full(); 1002 async_synchronize_full();
1002 ftrace_free_init_mem(); 1003 ftrace_free_init_mem();
1004 jump_label_invalidate_init();
1003 free_initmem(); 1005 free_initmem();
1004 mark_readonly(); 1006 mark_readonly();
1005 system_state = SYSTEM_RUNNING; 1007 system_state = SYSTEM_RUNNING;
diff --git a/kernel/extable.c b/kernel/extable.c
index a17fdb63dc3e..6a5b61ebc66c 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -64,7 +64,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
64 return e; 64 return e;
65} 65}
66 66
67static inline int init_kernel_text(unsigned long addr) 67int init_kernel_text(unsigned long addr)
68{ 68{
69 if (addr >= (unsigned long)_sinittext && 69 if (addr >= (unsigned long)_sinittext &&
70 addr < (unsigned long)_einittext) 70 addr < (unsigned long)_einittext)
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index b4517095db6a..52a0a7af8640 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -366,12 +366,15 @@ static void __jump_label_update(struct static_key *key,
366{ 366{
367 for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) { 367 for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
368 /* 368 /*
369 * entry->code set to 0 invalidates module init text sections 369 * An entry->code of 0 indicates an entry which has been
370 * kernel_text_address() verifies we are not in core kernel 370 * disabled because it was in an init text area.
371 * init code, see jump_label_invalidate_module_init().
372 */ 371 */
373 if (entry->code && kernel_text_address(entry->code)) 372 if (entry->code) {
374 arch_jump_label_transform(entry, jump_label_type(entry)); 373 if (kernel_text_address(entry->code))
374 arch_jump_label_transform(entry, jump_label_type(entry));
375 else
376 WARN_ONCE(1, "can't patch jump_label at %pS", (void *)entry->code);
377 }
375 } 378 }
376} 379}
377 380
@@ -417,6 +420,19 @@ void __init jump_label_init(void)
417 cpus_read_unlock(); 420 cpus_read_unlock();
418} 421}
419 422
423/* Disable any jump label entries in __init code */
424void __init jump_label_invalidate_init(void)
425{
426 struct jump_entry *iter_start = __start___jump_table;
427 struct jump_entry *iter_stop = __stop___jump_table;
428 struct jump_entry *iter;
429
430 for (iter = iter_start; iter < iter_stop; iter++) {
431 if (init_kernel_text(iter->code))
432 iter->code = 0;
433 }
434}
435
420#ifdef CONFIG_MODULES 436#ifdef CONFIG_MODULES
421 437
422static enum jump_label_type jump_label_init_type(struct jump_entry *entry) 438static enum jump_label_type jump_label_init_type(struct jump_entry *entry)
@@ -633,6 +649,7 @@ static void jump_label_del_module(struct module *mod)
633 } 649 }
634} 650}
635 651
652/* Disable any jump label entries in module init code */
636static void jump_label_invalidate_module_init(struct module *mod) 653static void jump_label_invalidate_module_init(struct module *mod)
637{ 654{
638 struct jump_entry *iter_start = mod->jump_entries; 655 struct jump_entry *iter_start = mod->jump_entries;
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 47cddf32aeba..4f2b25d43ec9 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -256,6 +256,8 @@ __objtool_obj := $(objtree)/tools/objtool/objtool
256 256
257objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check) 257objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
258 258
259objtool_args += $(if $(part-of-module), --module,)
260
259ifndef CONFIG_FRAME_POINTER 261ifndef CONFIG_FRAME_POINTER
260objtool_args += --no-fp 262objtool_args += --no-fp
261endif 263endif
@@ -264,6 +266,12 @@ objtool_args += --no-unreachable
264else 266else
265objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable) 267objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
266endif 268endif
269ifdef CONFIG_RETPOLINE
270ifneq ($(RETPOLINE_CFLAGS),)
271 objtool_args += --retpoline
272endif
273endif
274
267 275
268ifdef CONFIG_MODVERSIONS 276ifdef CONFIG_MODVERSIONS
269objtool_o = $(@D)/.tmp_$(@F) 277objtool_o = $(@D)/.tmp_$(@F)
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 57254f5b2779..694abc628e9b 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -29,7 +29,7 @@
29#include "builtin.h" 29#include "builtin.h"
30#include "check.h" 30#include "check.h"
31 31
32bool no_fp, no_unreachable; 32bool no_fp, no_unreachable, retpoline, module;
33 33
34static const char * const check_usage[] = { 34static const char * const check_usage[] = {
35 "objtool check [<options>] file.o", 35 "objtool check [<options>] file.o",
@@ -39,6 +39,8 @@ static const char * const check_usage[] = {
39const struct option check_options[] = { 39const struct option check_options[] = {
40 OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), 40 OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
41 OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), 41 OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
42 OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
43 OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
42 OPT_END(), 44 OPT_END(),
43}; 45};
44 46
@@ -53,5 +55,5 @@ int cmd_check(int argc, const char **argv)
53 55
54 objname = argv[0]; 56 objname = argv[0];
55 57
56 return check(objname, no_fp, no_unreachable, false); 58 return check(objname, false);
57} 59}
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 91e8e19ff5e0..77ea2b97117d 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -25,7 +25,6 @@
25 */ 25 */
26 26
27#include <string.h> 27#include <string.h>
28#include <subcmd/parse-options.h>
29#include "builtin.h" 28#include "builtin.h"
30#include "check.h" 29#include "check.h"
31 30
@@ -36,9 +35,6 @@ static const char *orc_usage[] = {
36 NULL, 35 NULL,
37}; 36};
38 37
39extern const struct option check_options[];
40extern bool no_fp, no_unreachable;
41
42int cmd_orc(int argc, const char **argv) 38int cmd_orc(int argc, const char **argv)
43{ 39{
44 const char *objname; 40 const char *objname;
@@ -54,7 +50,7 @@ int cmd_orc(int argc, const char **argv)
54 50
55 objname = argv[0]; 51 objname = argv[0];
56 52
57 return check(objname, no_fp, no_unreachable, true); 53 return check(objname, true);
58 } 54 }
59 55
60 if (!strcmp(argv[0], "dump")) { 56 if (!strcmp(argv[0], "dump")) {
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
index dd526067fed5..28ff40e19a14 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/builtin.h
@@ -17,6 +17,11 @@
17#ifndef _BUILTIN_H 17#ifndef _BUILTIN_H
18#define _BUILTIN_H 18#define _BUILTIN_H
19 19
20#include <subcmd/parse-options.h>
21
22extern const struct option check_options[];
23extern bool no_fp, no_unreachable, retpoline, module;
24
20extern int cmd_check(int argc, const char **argv); 25extern int cmd_check(int argc, const char **argv);
21extern int cmd_orc(int argc, const char **argv); 26extern int cmd_orc(int argc, const char **argv);
22 27
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index a8cb69a26576..92b6a2c21631 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -18,6 +18,7 @@
18#include <string.h> 18#include <string.h>
19#include <stdlib.h> 19#include <stdlib.h>
20 20
21#include "builtin.h"
21#include "check.h" 22#include "check.h"
22#include "elf.h" 23#include "elf.h"
23#include "special.h" 24#include "special.h"
@@ -33,7 +34,6 @@ struct alternative {
33}; 34};
34 35
35const char *objname; 36const char *objname;
36static bool no_fp;
37struct cfi_state initial_func_cfi; 37struct cfi_state initial_func_cfi;
38 38
39struct instruction *find_insn(struct objtool_file *file, 39struct instruction *find_insn(struct objtool_file *file,
@@ -497,6 +497,7 @@ static int add_jump_destinations(struct objtool_file *file)
497 * disguise, so convert them accordingly. 497 * disguise, so convert them accordingly.
498 */ 498 */
499 insn->type = INSN_JUMP_DYNAMIC; 499 insn->type = INSN_JUMP_DYNAMIC;
500 insn->retpoline_safe = true;
500 continue; 501 continue;
501 } else { 502 } else {
502 /* sibling call */ 503 /* sibling call */
@@ -548,7 +549,8 @@ static int add_call_destinations(struct objtool_file *file)
548 if (!insn->call_dest && !insn->ignore) { 549 if (!insn->call_dest && !insn->ignore) {
549 WARN_FUNC("unsupported intra-function call", 550 WARN_FUNC("unsupported intra-function call",
550 insn->sec, insn->offset); 551 insn->sec, insn->offset);
551 WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."); 552 if (retpoline)
553 WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
552 return -1; 554 return -1;
553 } 555 }
554 556
@@ -923,7 +925,11 @@ static struct rela *find_switch_table(struct objtool_file *file,
923 if (find_symbol_containing(file->rodata, text_rela->addend)) 925 if (find_symbol_containing(file->rodata, text_rela->addend))
924 continue; 926 continue;
925 927
926 return find_rela_by_dest(file->rodata, text_rela->addend); 928 rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend);
929 if (!rodata_rela)
930 continue;
931
932 return rodata_rela;
927 } 933 }
928 934
929 return NULL; 935 return NULL;
@@ -1108,6 +1114,41 @@ static int read_unwind_hints(struct objtool_file *file)
1108 return 0; 1114 return 0;
1109} 1115}
1110 1116
1117static int read_retpoline_hints(struct objtool_file *file)
1118{
1119 struct section *sec;
1120 struct instruction *insn;
1121 struct rela *rela;
1122
1123 sec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe");
1124 if (!sec)
1125 return 0;
1126
1127 list_for_each_entry(rela, &sec->rela_list, list) {
1128 if (rela->sym->type != STT_SECTION) {
1129 WARN("unexpected relocation symbol type in %s", sec->name);
1130 return -1;
1131 }
1132
1133 insn = find_insn(file, rela->sym->sec, rela->addend);
1134 if (!insn) {
1135 WARN("bad .discard.retpoline_safe entry");
1136 return -1;
1137 }
1138
1139 if (insn->type != INSN_JUMP_DYNAMIC &&
1140 insn->type != INSN_CALL_DYNAMIC) {
1141 WARN_FUNC("retpoline_safe hint not an indirect jump/call",
1142 insn->sec, insn->offset);
1143 return -1;
1144 }
1145
1146 insn->retpoline_safe = true;
1147 }
1148
1149 return 0;
1150}
1151
1111static int decode_sections(struct objtool_file *file) 1152static int decode_sections(struct objtool_file *file)
1112{ 1153{
1113 int ret; 1154 int ret;
@@ -1146,6 +1187,10 @@ static int decode_sections(struct objtool_file *file)
1146 if (ret) 1187 if (ret)
1147 return ret; 1188 return ret;
1148 1189
1190 ret = read_retpoline_hints(file);
1191 if (ret)
1192 return ret;
1193
1149 return 0; 1194 return 0;
1150} 1195}
1151 1196
@@ -1891,6 +1936,38 @@ static int validate_unwind_hints(struct objtool_file *file)
1891 return warnings; 1936 return warnings;
1892} 1937}
1893 1938
1939static int validate_retpoline(struct objtool_file *file)
1940{
1941 struct instruction *insn;
1942 int warnings = 0;
1943
1944 for_each_insn(file, insn) {
1945 if (insn->type != INSN_JUMP_DYNAMIC &&
1946 insn->type != INSN_CALL_DYNAMIC)
1947 continue;
1948
1949 if (insn->retpoline_safe)
1950 continue;
1951
1952 /*
1953 * .init.text code is ran before userspace and thus doesn't
1954 * strictly need retpolines, except for modules which are
1955 * loaded late, they very much do need retpoline in their
1956 * .init.text
1957 */
1958 if (!strcmp(insn->sec->name, ".init.text") && !module)
1959 continue;
1960
1961 WARN_FUNC("indirect %s found in RETPOLINE build",
1962 insn->sec, insn->offset,
1963 insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
1964
1965 warnings++;
1966 }
1967
1968 return warnings;
1969}
1970
1894static bool is_kasan_insn(struct instruction *insn) 1971static bool is_kasan_insn(struct instruction *insn)
1895{ 1972{
1896 return (insn->type == INSN_CALL && 1973 return (insn->type == INSN_CALL &&
@@ -2022,13 +2099,12 @@ static void cleanup(struct objtool_file *file)
2022 elf_close(file->elf); 2099 elf_close(file->elf);
2023} 2100}
2024 2101
2025int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc) 2102int check(const char *_objname, bool orc)
2026{ 2103{
2027 struct objtool_file file; 2104 struct objtool_file file;
2028 int ret, warnings = 0; 2105 int ret, warnings = 0;
2029 2106
2030 objname = _objname; 2107 objname = _objname;
2031 no_fp = _no_fp;
2032 2108
2033 file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY); 2109 file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY);
2034 if (!file.elf) 2110 if (!file.elf)
@@ -2052,6 +2128,13 @@ int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc)
2052 if (list_empty(&file.insn_list)) 2128 if (list_empty(&file.insn_list))
2053 goto out; 2129 goto out;
2054 2130
2131 if (retpoline) {
2132 ret = validate_retpoline(&file);
2133 if (ret < 0)
2134 return ret;
2135 warnings += ret;
2136 }
2137
2055 ret = validate_functions(&file); 2138 ret = validate_functions(&file);
2056 if (ret < 0) 2139 if (ret < 0)
2057 goto out; 2140 goto out;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 23a1d065cae1..c6b68fcb926f 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -45,6 +45,7 @@ struct instruction {
45 unsigned char type; 45 unsigned char type;
46 unsigned long immediate; 46 unsigned long immediate;
47 bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts; 47 bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
48 bool retpoline_safe;
48 struct symbol *call_dest; 49 struct symbol *call_dest;
49 struct instruction *jump_dest; 50 struct instruction *jump_dest;
50 struct instruction *first_jump_src; 51 struct instruction *first_jump_src;
@@ -63,7 +64,7 @@ struct objtool_file {
63 bool ignore_unreachables, c_file, hints; 64 bool ignore_unreachables, c_file, hints;
64}; 65};
65 66
66int check(const char *objname, bool no_fp, bool no_unreachable, bool orc); 67int check(const char *objname, bool orc);
67 68
68struct instruction *find_insn(struct objtool_file *file, 69struct instruction *find_insn(struct objtool_file *file,
69 struct section *sec, unsigned long offset); 70 struct section *sec, unsigned long offset);
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index be81621446f0..0b4f1cc2291c 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -450,7 +450,7 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
450 num_vsyscall_traps++; 450 num_vsyscall_traps++;
451} 451}
452 452
453static int test_native_vsyscall(void) 453static int test_emulation(void)
454{ 454{
455 time_t tmp; 455 time_t tmp;
456 bool is_native; 456 bool is_native;
@@ -458,7 +458,7 @@ static int test_native_vsyscall(void)
458 if (!vtime) 458 if (!vtime)
459 return 0; 459 return 0;
460 460
461 printf("[RUN]\tchecking for native vsyscall\n"); 461 printf("[RUN]\tchecking that vsyscalls are emulated\n");
462 sethandler(SIGTRAP, sigtrap, 0); 462 sethandler(SIGTRAP, sigtrap, 0);
463 set_eflags(get_eflags() | X86_EFLAGS_TF); 463 set_eflags(get_eflags() | X86_EFLAGS_TF);
464 vtime(&tmp); 464 vtime(&tmp);
@@ -474,11 +474,12 @@ static int test_native_vsyscall(void)
474 */ 474 */
475 is_native = (num_vsyscall_traps > 1); 475 is_native = (num_vsyscall_traps > 1);
476 476
477 printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n", 477 printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n",
478 (is_native ? "FAIL" : "OK"),
478 (is_native ? "native" : "emulated"), 479 (is_native ? "native" : "emulated"),
479 (int)num_vsyscall_traps); 480 (int)num_vsyscall_traps);
480 481
481 return 0; 482 return is_native;
482} 483}
483#endif 484#endif
484 485
@@ -498,7 +499,7 @@ int main(int argc, char **argv)
498 nerrs += test_vsys_r(); 499 nerrs += test_vsys_r();
499 500
500#ifdef __x86_64__ 501#ifdef __x86_64__
501 nerrs += test_native_vsyscall(); 502 nerrs += test_emulation();
502#endif 503#endif
503 504
504 return nerrs ? 1 : 0; 505 return nerrs ? 1 : 0;