aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-09-02 13:11:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-09-02 13:11:30 -0400
commit899ba79553cf1699bdcd262950b48501b0285529 (patch)
tree81f016a0ab06969857f6925310bdad2682af1d7a
parent1395d109cddcf6c8ebf20ba3bfaa2beb48febfbc (diff)
parentff924c5a1ec7548825cc2d07980b03be4224ffac (diff)
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Thomas Gleixner: "Speculation: - Make the microcode check more robust - Make the L1TF memory limit depend on the internal cache physical address space and not on the CPUID advertised physical address space, which might be significantly smaller. This avoids disabling L1TF on machines which utilize the full physical address space. - Fix the GDT mapping for EFI calls on 32bit PTI - Fix the MCE nospec implementation to prevent #GP Fixes and robustness: - Use the proper operand order for LSL in the VDSO - Prevent NMI uaccess race against CR3 switching - Add a lockdep check to verify that text_mutex is held in text_poke() functions - Repair the fallout of giving native_restore_fl() a prototype - Prevent kernel memory dumps based on usermode RIP - Wipe KASAN shadow stack before rewinding the stack to prevent false positives - Move the AMS GOTO enforcement to the actual build stage to allow user API header extraction without a compiler - Fix a section mismatch introduced by the on demand VDSO mapping change Miscellaneous: - Trivial typo, GCC quirk removal and CC_SET/OUT() cleanups" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/pti: Fix section mismatch warning/error x86/vdso: Fix lsl operand order x86/mce: Fix set_mce_nospec() to avoid #GP fault x86/efi: Load fixmap GDT in efi_call_phys_epilog() x86/nmi: Fix NMI uaccess race against CR3 switching x86: Allow generating user-space headers without a compiler x86/dumpstack: Don't dump kernel memory based on usermode RIP x86/asm: Use CC_SET()/CC_OUT() in __gen_sigismember() x86/alternatives: Lockdep-enforce text_mutex in text_poke*() x86/entry/64: Wipe KASAN stack shadow before rewind_stack_do_exit() x86/irqflags: Mark native_restore_fl extern inline x86/build: Remove jump label quirk for GCC older than 4.5.2 x86/Kconfig: Fix trivial typo x86/speculation/l1tf: Increase l1tf memory limit for Nehalem+ x86/spectre: Add missing family 6 check to microcode check
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Makefile23
-rw-r--r--arch/x86/events/core.c2
-rw-r--r--arch/x86/include/asm/irqflags.h3
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/signal.h7
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h40
-rw-r--r--arch/x86/include/asm/vgtod.h2
-rw-r--r--arch/x86/kernel/alternative.c9
-rw-r--r--arch/x86/kernel/cpu/bugs.c46
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/dumpstack.c20
-rw-r--r--arch/x86/lib/usercopy.c5
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/pageattr.c25
-rw-r--r--arch/x86/mm/pti.c2
-rw-r--r--arch/x86/mm/tlb.c7
-rw-r--r--arch/x86/platform/efi/efi_32.c8
-rw-r--r--scripts/Kbuild.include4
21 files changed, 167 insertions, 50 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c5ff296bc5d1..1a0be022f91d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2843,7 +2843,7 @@ config X86_SYSFB
2843 This option, if enabled, marks VGA/VBE/EFI framebuffers as generic 2843 This option, if enabled, marks VGA/VBE/EFI framebuffers as generic
2844 framebuffers so the new generic system-framebuffer drivers can be 2844 framebuffers so the new generic system-framebuffer drivers can be
2845 used on x86. If the framebuffer is not compatible with the generic 2845 used on x86. If the framebuffer is not compatible with the generic
2846 modes, it is adverticed as fallback platform framebuffer so legacy 2846 modes, it is advertised as fallback platform framebuffer so legacy
2847 drivers like efifb, vesafb and uvesafb can pick it up. 2847 drivers like efifb, vesafb and uvesafb can pick it up.
2848 If this option is not selected, all system framebuffers are always 2848 If this option is not selected, all system framebuffers are always
2849 marked as fallback platform framebuffers as usual. 2849 marked as fallback platform framebuffers as usual.
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 94859241bc3e..8f6e7eb8ae9f 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -175,22 +175,6 @@ ifdef CONFIG_FUNCTION_GRAPH_TRACER
175 endif 175 endif
176endif 176endif
177 177
178ifndef CC_HAVE_ASM_GOTO
179 $(error Compiler lacks asm-goto support.)
180endif
181
182#
183# Jump labels need '-maccumulate-outgoing-args' for gcc < 4.5.2 to prevent a
184# GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226). There's no way
185# to test for this bug at compile-time because the test case needs to execute,
186# which is a no-go for cross compilers. So check the GCC version instead.
187#
188ifdef CONFIG_JUMP_LABEL
189 ifneq ($(ACCUMULATE_OUTGOING_ARGS), 1)
190 ACCUMULATE_OUTGOING_ARGS = $(call cc-if-fullversion, -lt, 040502, 1)
191 endif
192endif
193
194ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1) 178ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
195 # This compiler flag is not supported by Clang: 179 # This compiler flag is not supported by Clang:
196 KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,) 180 KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
@@ -312,6 +296,13 @@ PHONY += vdso_install
312vdso_install: 296vdso_install:
313 $(Q)$(MAKE) $(build)=arch/x86/entry/vdso $@ 297 $(Q)$(MAKE) $(build)=arch/x86/entry/vdso $@
314 298
299archprepare: checkbin
300checkbin:
301ifndef CC_HAVE_ASM_GOTO
302 @echo Compiler lacks asm-goto support.
303 @exit 1
304endif
305
315archclean: 306archclean:
316 $(Q)rm -rf $(objtree)/arch/i386 307 $(Q)rm -rf $(objtree)/arch/i386
317 $(Q)rm -rf $(objtree)/arch/x86_64 308 $(Q)rm -rf $(objtree)/arch/x86_64
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 5f4829f10129..dfb2f7c0d019 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2465,7 +2465,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
2465 2465
2466 perf_callchain_store(entry, regs->ip); 2466 perf_callchain_store(entry, regs->ip);
2467 2467
2468 if (!current->mm) 2468 if (!nmi_uaccess_okay())
2469 return; 2469 return;
2470 2470
2471 if (perf_callchain_user32(regs, entry)) 2471 if (perf_callchain_user32(regs, entry))
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c14f2a74b2be..15450a675031 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -33,7 +33,8 @@ extern inline unsigned long native_save_fl(void)
33 return flags; 33 return flags;
34} 34}
35 35
36static inline void native_restore_fl(unsigned long flags) 36extern inline void native_restore_fl(unsigned long flags);
37extern inline void native_restore_fl(unsigned long flags)
37{ 38{
38 asm volatile("push %0 ; popf" 39 asm volatile("push %0 ; popf"
39 : /* no output */ 40 : /* no output */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c24297268ebc..d53c54b842da 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -132,6 +132,8 @@ struct cpuinfo_x86 {
132 /* Index into per_cpu list: */ 132 /* Index into per_cpu list: */
133 u16 cpu_index; 133 u16 cpu_index;
134 u32 microcode; 134 u32 microcode;
135 /* Address space bits used by the cache internally */
136 u8 x86_cache_bits;
135 unsigned initialized : 1; 137 unsigned initialized : 1;
136} __randomize_layout; 138} __randomize_layout;
137 139
@@ -183,7 +185,7 @@ extern void cpu_detect(struct cpuinfo_x86 *c);
183 185
184static inline unsigned long long l1tf_pfn_limit(void) 186static inline unsigned long long l1tf_pfn_limit(void)
185{ 187{
186 return BIT_ULL(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT); 188 return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
187} 189}
188 190
189extern void early_cpu_init(void); 191extern void early_cpu_init(void);
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 5f9012ff52ed..33d3c88a7225 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -39,6 +39,7 @@ extern void do_signal(struct pt_regs *regs);
39 39
40#define __ARCH_HAS_SA_RESTORER 40#define __ARCH_HAS_SA_RESTORER
41 41
42#include <asm/asm.h>
42#include <uapi/asm/sigcontext.h> 43#include <uapi/asm/sigcontext.h>
43 44
44#ifdef __i386__ 45#ifdef __i386__
@@ -86,9 +87,9 @@ static inline int __const_sigismember(sigset_t *set, int _sig)
86 87
87static inline int __gen_sigismember(sigset_t *set, int _sig) 88static inline int __gen_sigismember(sigset_t *set, int _sig)
88{ 89{
89 unsigned char ret; 90 bool ret;
90 asm("btl %2,%1\n\tsetc %0" 91 asm("btl %2,%1" CC_SET(c)
91 : "=qm"(ret) : "m"(*set), "Ir"(_sig-1) : "cc"); 92 : CC_OUT(c) (ret) : "m"(*set), "Ir"(_sig-1));
92 return ret; 93 return ret;
93} 94}
94 95
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index b6dc698f992a..f335aad404a4 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -111,6 +111,6 @@ static inline unsigned long caller_frame_pointer(void)
111 return (unsigned long)frame; 111 return (unsigned long)frame;
112} 112}
113 113
114void show_opcodes(u8 *rip, const char *loglvl); 114void show_opcodes(struct pt_regs *regs, const char *loglvl);
115void show_ip(struct pt_regs *regs, const char *loglvl); 115void show_ip(struct pt_regs *regs, const char *loglvl);
116#endif /* _ASM_X86_STACKTRACE_H */ 116#endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 29c9da6c62fc..58ce5288878e 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -175,8 +175,16 @@ struct tlb_state {
175 * are on. This means that it may not match current->active_mm, 175 * are on. This means that it may not match current->active_mm,
176 * which will contain the previous user mm when we're in lazy TLB 176 * which will contain the previous user mm when we're in lazy TLB
177 * mode even if we've already switched back to swapper_pg_dir. 177 * mode even if we've already switched back to swapper_pg_dir.
178 *
179 * During switch_mm_irqs_off(), loaded_mm will be set to
180 * LOADED_MM_SWITCHING during the brief interrupts-off window
181 * when CR3 and loaded_mm would otherwise be inconsistent. This
182 * is for nmi_uaccess_okay()'s benefit.
178 */ 183 */
179 struct mm_struct *loaded_mm; 184 struct mm_struct *loaded_mm;
185
186#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
187
180 u16 loaded_mm_asid; 188 u16 loaded_mm_asid;
181 u16 next_asid; 189 u16 next_asid;
182 /* last user mm's ctx id */ 190 /* last user mm's ctx id */
@@ -246,6 +254,38 @@ struct tlb_state {
246}; 254};
247DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); 255DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
248 256
257/*
258 * Blindly accessing user memory from NMI context can be dangerous
259 * if we're in the middle of switching the current user task or
260 * switching the loaded mm. It can also be dangerous if we
261 * interrupted some kernel code that was temporarily using a
262 * different mm.
263 */
264static inline bool nmi_uaccess_okay(void)
265{
266 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
267 struct mm_struct *current_mm = current->mm;
268
269 VM_WARN_ON_ONCE(!loaded_mm);
270
271 /*
272 * The condition we want to check is
273 * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though,
274 * if we're running in a VM with shadow paging, and nmi_uaccess_okay()
275 * is supposed to be reasonably fast.
276 *
277 * Instead, we check the almost equivalent but somewhat conservative
278 * condition below, and we rely on the fact that switch_mm_irqs_off()
279 * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3.
280 */
281 if (loaded_mm != current_mm)
282 return false;
283
284 VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
285
286 return true;
287}
288
249/* Initialize cr4 shadow for this CPU. */ 289/* Initialize cr4 shadow for this CPU. */
250static inline void cr4_init_shadow(void) 290static inline void cr4_init_shadow(void)
251{ 291{
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index fb856c9f0449..53748541c487 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -93,7 +93,7 @@ static inline unsigned int __getcpu(void)
93 * 93 *
94 * If RDPID is available, use it. 94 * If RDPID is available, use it.
95 */ 95 */
96 alternative_io ("lsl %[p],%[seg]", 96 alternative_io ("lsl %[seg],%[p]",
97 ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ 97 ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
98 X86_FEATURE_RDPID, 98 X86_FEATURE_RDPID,
99 [p] "=a" (p), [seg] "r" (__PER_CPU_SEG)); 99 [p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 014f214da581..b9d5e7c9ef43 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -684,8 +684,6 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
684 * It means the size must be writable atomically and the address must be aligned 684 * It means the size must be writable atomically and the address must be aligned
685 * in a way that permits an atomic write. It also makes sure we fit on a single 685 * in a way that permits an atomic write. It also makes sure we fit on a single
686 * page. 686 * page.
687 *
688 * Note: Must be called under text_mutex.
689 */ 687 */
690void *text_poke(void *addr, const void *opcode, size_t len) 688void *text_poke(void *addr, const void *opcode, size_t len)
691{ 689{
@@ -700,6 +698,8 @@ void *text_poke(void *addr, const void *opcode, size_t len)
700 */ 698 */
701 BUG_ON(!after_bootmem); 699 BUG_ON(!after_bootmem);
702 700
701 lockdep_assert_held(&text_mutex);
702
703 if (!core_kernel_text((unsigned long)addr)) { 703 if (!core_kernel_text((unsigned long)addr)) {
704 pages[0] = vmalloc_to_page(addr); 704 pages[0] = vmalloc_to_page(addr);
705 pages[1] = vmalloc_to_page(addr + PAGE_SIZE); 705 pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
@@ -782,8 +782,6 @@ int poke_int3_handler(struct pt_regs *regs)
782 * - replace the first byte (int3) by the first byte of 782 * - replace the first byte (int3) by the first byte of
783 * replacing opcode 783 * replacing opcode
784 * - sync cores 784 * - sync cores
785 *
786 * Note: must be called under text_mutex.
787 */ 785 */
788void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) 786void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
789{ 787{
@@ -792,6 +790,9 @@ void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
792 bp_int3_handler = handler; 790 bp_int3_handler = handler;
793 bp_int3_addr = (u8 *)addr + sizeof(int3); 791 bp_int3_addr = (u8 *)addr + sizeof(int3);
794 bp_patching_in_progress = true; 792 bp_patching_in_progress = true;
793
794 lockdep_assert_held(&text_mutex);
795
795 /* 796 /*
796 * Corresponding read barrier in int3 notifier for making sure the 797 * Corresponding read barrier in int3 notifier for making sure the
797 * in_progress and handler are correctly ordered wrt. patching. 798 * in_progress and handler are correctly ordered wrt. patching.
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4c2313d0b9ca..40bdaea97fe7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -668,6 +668,45 @@ EXPORT_SYMBOL_GPL(l1tf_mitigation);
668enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; 668enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
669EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation); 669EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation);
670 670
671/*
672 * These CPUs all support 44bits physical address space internally in the
673 * cache but CPUID can report a smaller number of physical address bits.
674 *
675 * The L1TF mitigation uses the top most address bit for the inversion of
676 * non present PTEs. When the installed memory reaches into the top most
677 * address bit due to memory holes, which has been observed on machines
678 * which report 36bits physical address bits and have 32G RAM installed,
679 * then the mitigation range check in l1tf_select_mitigation() triggers.
680 * This is a false positive because the mitigation is still possible due to
681 * the fact that the cache uses 44bit internally. Use the cache bits
682 * instead of the reported physical bits and adjust them on the affected
683 * machines to 44bit if the reported bits are less than 44.
684 */
685static void override_cache_bits(struct cpuinfo_x86 *c)
686{
687 if (c->x86 != 6)
688 return;
689
690 switch (c->x86_model) {
691 case INTEL_FAM6_NEHALEM:
692 case INTEL_FAM6_WESTMERE:
693 case INTEL_FAM6_SANDYBRIDGE:
694 case INTEL_FAM6_IVYBRIDGE:
695 case INTEL_FAM6_HASWELL_CORE:
696 case INTEL_FAM6_HASWELL_ULT:
697 case INTEL_FAM6_HASWELL_GT3E:
698 case INTEL_FAM6_BROADWELL_CORE:
699 case INTEL_FAM6_BROADWELL_GT3E:
700 case INTEL_FAM6_SKYLAKE_MOBILE:
701 case INTEL_FAM6_SKYLAKE_DESKTOP:
702 case INTEL_FAM6_KABYLAKE_MOBILE:
703 case INTEL_FAM6_KABYLAKE_DESKTOP:
704 if (c->x86_cache_bits < 44)
705 c->x86_cache_bits = 44;
706 break;
707 }
708}
709
671static void __init l1tf_select_mitigation(void) 710static void __init l1tf_select_mitigation(void)
672{ 711{
673 u64 half_pa; 712 u64 half_pa;
@@ -675,6 +714,8 @@ static void __init l1tf_select_mitigation(void)
675 if (!boot_cpu_has_bug(X86_BUG_L1TF)) 714 if (!boot_cpu_has_bug(X86_BUG_L1TF))
676 return; 715 return;
677 716
717 override_cache_bits(&boot_cpu_data);
718
678 switch (l1tf_mitigation) { 719 switch (l1tf_mitigation) {
679 case L1TF_MITIGATION_OFF: 720 case L1TF_MITIGATION_OFF:
680 case L1TF_MITIGATION_FLUSH_NOWARN: 721 case L1TF_MITIGATION_FLUSH_NOWARN:
@@ -694,11 +735,6 @@ static void __init l1tf_select_mitigation(void)
694 return; 735 return;
695#endif 736#endif
696 737
697 /*
698 * This is extremely unlikely to happen because almost all
699 * systems have far more MAX_PA/2 than RAM can be fit into
700 * DIMM slots.
701 */
702 half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT; 738 half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
703 if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) { 739 if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
704 pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n"); 740 pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 84dee5ab745a..44c4ef3d989b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -919,6 +919,7 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
919 else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) 919 else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
920 c->x86_phys_bits = 36; 920 c->x86_phys_bits = 36;
921#endif 921#endif
922 c->x86_cache_bits = c->x86_phys_bits;
922} 923}
923 924
924static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) 925static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 401e8c133108..fc3c07fe7df5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -150,6 +150,9 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
150 if (cpu_has(c, X86_FEATURE_HYPERVISOR)) 150 if (cpu_has(c, X86_FEATURE_HYPERVISOR))
151 return false; 151 return false;
152 152
153 if (c->x86 != 6)
154 return false;
155
153 for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { 156 for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
154 if (c->x86_model == spectre_bad_microcodes[i].model && 157 if (c->x86_model == spectre_bad_microcodes[i].model &&
155 c->x86_stepping == spectre_bad_microcodes[i].stepping) 158 c->x86_stepping == spectre_bad_microcodes[i].stepping)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 9c8652974f8e..f56895106ccf 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -17,6 +17,7 @@
17#include <linux/bug.h> 17#include <linux/bug.h>
18#include <linux/nmi.h> 18#include <linux/nmi.h>
19#include <linux/sysfs.h> 19#include <linux/sysfs.h>
20#include <linux/kasan.h>
20 21
21#include <asm/cpu_entry_area.h> 22#include <asm/cpu_entry_area.h>
22#include <asm/stacktrace.h> 23#include <asm/stacktrace.h>
@@ -89,14 +90,24 @@ static void printk_stack_address(unsigned long address, int reliable,
89 * Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random 90 * Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random
90 * guesstimate in attempt to achieve all of the above. 91 * guesstimate in attempt to achieve all of the above.
91 */ 92 */
92void show_opcodes(u8 *rip, const char *loglvl) 93void show_opcodes(struct pt_regs *regs, const char *loglvl)
93{ 94{
94#define PROLOGUE_SIZE 42 95#define PROLOGUE_SIZE 42
95#define EPILOGUE_SIZE 21 96#define EPILOGUE_SIZE 21
96#define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE) 97#define OPCODE_BUFSIZE (PROLOGUE_SIZE + 1 + EPILOGUE_SIZE)
97 u8 opcodes[OPCODE_BUFSIZE]; 98 u8 opcodes[OPCODE_BUFSIZE];
99 unsigned long prologue = regs->ip - PROLOGUE_SIZE;
100 bool bad_ip;
98 101
99 if (probe_kernel_read(opcodes, rip - PROLOGUE_SIZE, OPCODE_BUFSIZE)) { 102 /*
103 * Make sure userspace isn't trying to trick us into dumping kernel
104 * memory by pointing the userspace instruction pointer at it.
105 */
106 bad_ip = user_mode(regs) &&
107 __chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX);
108
109 if (bad_ip || probe_kernel_read(opcodes, (u8 *)prologue,
110 OPCODE_BUFSIZE)) {
100 printk("%sCode: Bad RIP value.\n", loglvl); 111 printk("%sCode: Bad RIP value.\n", loglvl);
101 } else { 112 } else {
102 printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %" 113 printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
@@ -112,7 +123,7 @@ void show_ip(struct pt_regs *regs, const char *loglvl)
112#else 123#else
113 printk("%sRIP: %04x:%pS\n", loglvl, (int)regs->cs, (void *)regs->ip); 124 printk("%sRIP: %04x:%pS\n", loglvl, (int)regs->cs, (void *)regs->ip);
114#endif 125#endif
115 show_opcodes((u8 *)regs->ip, loglvl); 126 show_opcodes(regs, loglvl);
116} 127}
117 128
118void show_iret_regs(struct pt_regs *regs) 129void show_iret_regs(struct pt_regs *regs)
@@ -346,7 +357,10 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
346 * We're not going to return, but we might be on an IST stack or 357 * We're not going to return, but we might be on an IST stack or
347 * have very little stack space left. Rewind the stack and kill 358 * have very little stack space left. Rewind the stack and kill
348 * the task. 359 * the task.
360 * Before we rewind the stack, we have to tell KASAN that we're going to
361 * reuse the task stack and that existing poisons are invalid.
349 */ 362 */
363 kasan_unpoison_task_stack(current);
350 rewind_stack_do_exit(signr); 364 rewind_stack_do_exit(signr);
351} 365}
352NOKPROBE_SYMBOL(oops_end); 366NOKPROBE_SYMBOL(oops_end);
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index c8c6ad0d58b8..3f435d7fca5e 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -7,6 +7,8 @@
7#include <linux/uaccess.h> 7#include <linux/uaccess.h>
8#include <linux/export.h> 8#include <linux/export.h>
9 9
10#include <asm/tlbflush.h>
11
10/* 12/*
11 * We rely on the nested NMI work to allow atomic faults from the NMI path; the 13 * We rely on the nested NMI work to allow atomic faults from the NMI path; the
12 * nested NMI paths are careful to preserve CR2. 14 * nested NMI paths are careful to preserve CR2.
@@ -19,6 +21,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
19 if (__range_not_ok(from, n, TASK_SIZE)) 21 if (__range_not_ok(from, n, TASK_SIZE))
20 return n; 22 return n;
21 23
24 if (!nmi_uaccess_okay())
25 return n;
26
22 /* 27 /*
23 * Even though this function is typically called from NMI/IRQ context 28 * Even though this function is typically called from NMI/IRQ context
24 * disable pagefaults so that its behaviour is consistent even when 29 * disable pagefaults so that its behaviour is consistent even when
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b9123c497e0a..47bebfe6efa7 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -837,7 +837,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
837 837
838 printk(KERN_CONT "\n"); 838 printk(KERN_CONT "\n");
839 839
840 show_opcodes((u8 *)regs->ip, loglvl); 840 show_opcodes(regs, loglvl);
841} 841}
842 842
843static void 843static void
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 8d6c34fe49be..51a5a69ecac9 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1420,6 +1420,29 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
1420 return 0; 1420 return 0;
1421} 1421}
1422 1422
1423/*
1424 * Machine check recovery code needs to change cache mode of poisoned
1425 * pages to UC to avoid speculative access logging another error. But
1426 * passing the address of the 1:1 mapping to set_memory_uc() is a fine
1427 * way to encourage a speculative access. So we cheat and flip the top
1428 * bit of the address. This works fine for the code that updates the
1429 * page tables. But at the end of the process we need to flush the cache
1430 * and the non-canonical address causes a #GP fault when used by the
1431 * CLFLUSH instruction.
1432 *
1433 * But in the common case we already have a canonical address. This code
1434 * will fix the top bit if needed and is a no-op otherwise.
1435 */
1436static inline unsigned long make_addr_canonical_again(unsigned long addr)
1437{
1438#ifdef CONFIG_X86_64
1439 return (long)(addr << 1) >> 1;
1440#else
1441 return addr;
1442#endif
1443}
1444
1445
1423static int change_page_attr_set_clr(unsigned long *addr, int numpages, 1446static int change_page_attr_set_clr(unsigned long *addr, int numpages,
1424 pgprot_t mask_set, pgprot_t mask_clr, 1447 pgprot_t mask_set, pgprot_t mask_clr,
1425 int force_split, int in_flag, 1448 int force_split, int in_flag,
@@ -1465,7 +1488,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
1465 * Save address for cache flush. *addr is modified in the call 1488 * Save address for cache flush. *addr is modified in the call
1466 * to __change_page_attr_set_clr() below. 1489 * to __change_page_attr_set_clr() below.
1467 */ 1490 */
1468 baddr = *addr; 1491 baddr = make_addr_canonical_again(*addr);
1469 } 1492 }
1470 1493
1471 /* Must avoid aliasing mappings in the highmem code */ 1494 /* Must avoid aliasing mappings in the highmem code */
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 31341ae7309f..c1fc1ae6b429 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -248,7 +248,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
248 * 248 *
249 * Returns a pointer to a PTE on success, or NULL on failure. 249 * Returns a pointer to a PTE on success, or NULL on failure.
250 */ 250 */
251static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) 251static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
252{ 252{
253 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); 253 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
254 pmd_t *pmd; 254 pmd_t *pmd;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 9517d1b2a281..e96b99eb800c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -305,6 +305,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
305 305
306 choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); 306 choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
307 307
308 /* Let nmi_uaccess_okay() know that we're changing CR3. */
309 this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
310 barrier();
311
308 if (need_flush) { 312 if (need_flush) {
309 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); 313 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
310 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); 314 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
@@ -335,6 +339,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
335 if (next != &init_mm) 339 if (next != &init_mm)
336 this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); 340 this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
337 341
342 /* Make sure we write CR3 before loaded_mm. */
343 barrier();
344
338 this_cpu_write(cpu_tlbstate.loaded_mm, next); 345 this_cpu_write(cpu_tlbstate.loaded_mm, next);
339 this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); 346 this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
340 } 347 }
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 324b93328b37..05ca14222463 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -85,14 +85,10 @@ pgd_t * __init efi_call_phys_prolog(void)
85 85
86void __init efi_call_phys_epilog(pgd_t *save_pgd) 86void __init efi_call_phys_epilog(pgd_t *save_pgd)
87{ 87{
88 struct desc_ptr gdt_descr;
89
90 gdt_descr.address = (unsigned long)get_cpu_gdt_rw(0);
91 gdt_descr.size = GDT_SIZE - 1;
92 load_gdt(&gdt_descr);
93
94 load_cr3(save_pgd); 88 load_cr3(save_pgd);
95 __flush_tlb_all(); 89 __flush_tlb_all();
90
91 load_fixmap_gdt(0);
96} 92}
97 93
98void __init efi_runtime_update_mappings(void) 94void __init efi_runtime_update_mappings(void)
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index c75413d05a63..ce53639a864a 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -153,10 +153,6 @@ cc-fullversion = $(shell $(CONFIG_SHELL) \
153# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1) 153# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
154cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4)) 154cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4))
155 155
156# cc-if-fullversion
157# Usage: EXTRA_CFLAGS += $(call cc-if-fullversion, -lt, 040502, -O1)
158cc-if-fullversion = $(shell [ $(cc-fullversion) $(1) $(2) ] && echo $(3) || echo $(4))
159
160# cc-ldoption 156# cc-ldoption
161# Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both) 157# Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both)
162cc-ldoption = $(call try-run,\ 158cc-ldoption = $(call try-run,\