aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@kernel.org>2015-04-26 19:47:59 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-26 20:57:38 -0400
commit61f01dd941ba9e06d2bf05994450ecc3d61b6b8b (patch)
tree3a4a4ef2d5e4d44bb4cb1708f9fc4749e9c9824a
parent1190944f4b12203330ac5ed8784f6c181bf26f2d (diff)
x86_64, asm: Work around AMD SYSRET SS descriptor attribute issue
AMD CPUs don't reinitialize the SS descriptor on SYSRET, so SYSRET with SS == 0 results in an invalid usermode state in which SS is apparently equal to __USER_DS but causes #SS if used. Work around the issue by setting SS to __KERNEL_DS __switch_to, thus ensuring that SYSRET never happens with SS set to NULL. This was exposed by a recent vDSO cleanup. Fixes: e7d6eefaaa44 x86/vdso32/syscall.S: Do not load __USER32_DS to %ss Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Peter Anvin <hpa@zytor.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Denys Vlasenko <vda.linux@googlemail.com> Cc: Brian Gerst <brgerst@gmail.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/x86/ia32/ia32entry.S7
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/kernel/cpu/amd.c3
-rw-r--r--arch/x86/kernel/entry_64.S9
-rw-r--r--arch/x86/kernel/process_64.c28
5 files changed, 48 insertions, 0 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a821b1cd4fa7..72bf2680f819 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -427,6 +427,13 @@ sysretl_from_sys_call:
427 * cs and ss are loaded from MSRs. 427 * cs and ss are loaded from MSRs.
428 * (Note: 32bit->32bit SYSRET is different: since r11 428 * (Note: 32bit->32bit SYSRET is different: since r11
429 * does not exist, it merely sets eflags.IF=1). 429 * does not exist, it merely sets eflags.IF=1).
430 *
431 * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
432 * descriptor is not reinitialized. This means that we must
433 * avoid SYSRET with SS == NULL, which could happen if we schedule,
434 * exit the kernel, and re-enter using an interrupt vector. (All
435 * interrupt entries on x86_64 set SS to NULL.) We prevent that
436 * from happening by reloading SS in __switch_to.
430 */ 437 */
431 USERGS_SYSRET32 438 USERGS_SYSRET32
432 439
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7ee9b94d9921..3d6606fb97d0 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -265,6 +265,7 @@
265#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ 265#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
266#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ 266#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
267#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ 267#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
268#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
268 269
269#if defined(__KERNEL__) && !defined(__ASSEMBLY__) 270#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
270 271
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index fd470ebf924e..e4cf63301ff4 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -720,6 +720,9 @@ static void init_amd(struct cpuinfo_x86 *c)
720 if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH)) 720 if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
721 if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) 721 if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
722 set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); 722 set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
723
724 /* AMD CPUs don't reset SS attributes on SYSRET */
725 set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
723} 726}
724 727
725#ifdef CONFIG_X86_32 728#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c7b238494b31..02c2eff7478d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -295,6 +295,15 @@ system_call_fastpath:
295 * rflags from r11 (but RF and VM bits are forced to 0), 295 * rflags from r11 (but RF and VM bits are forced to 0),
296 * cs and ss are loaded from MSRs. 296 * cs and ss are loaded from MSRs.
297 * Restoration of rflags re-enables interrupts. 297 * Restoration of rflags re-enables interrupts.
298 *
299 * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
300 * descriptor is not reinitialized. This means that we should
301 * avoid SYSRET with SS == NULL, which could happen if we schedule,
302 * exit the kernel, and re-enter using an interrupt vector. (All
303 * interrupt entries on x86_64 set SS to NULL.) We prevent that
304 * from happening by reloading SS in __switch_to. (Actually
305 * detecting the failure in 64-bit userspace is tricky but can be
306 * done.)
298 */ 307 */
299 USERGS_SYSRET64 308 USERGS_SYSRET64
300 309
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4baaa972f52a..ddfdbf74f174 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -419,6 +419,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
419 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) 419 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
420 __switch_to_xtra(prev_p, next_p, tss); 420 __switch_to_xtra(prev_p, next_p, tss);
421 421
422 if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
423 /*
424 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
425 * does not update the cached descriptor. As a result, if we
426 * do SYSRET while SS is NULL, we'll end up in user mode with
427 * SS apparently equal to __USER_DS but actually unusable.
428 *
429 * The straightforward workaround would be to fix it up just
430 * before SYSRET, but that would slow down the system call
431 * fast paths. Instead, we ensure that SS is never NULL in
432 * system call context. We do this by replacing NULL SS
433 * selectors at every context switch. SYSCALL sets up a valid
434 * SS, so the only way to get NULL is to re-enter the kernel
435 * from CPL 3 through an interrupt. Since that can't happen
436 * in the same task as a running syscall, we are guaranteed to
437 * context switch between every interrupt vector entry and a
438 * subsequent SYSRET.
439 *
440 * We read SS first because SS reads are much faster than
441 * writes. Out of caution, we force SS to __KERNEL_DS even if
442 * it previously had a different non-NULL value.
443 */
444 unsigned short ss_sel;
445 savesegment(ss, ss_sel);
446 if (ss_sel != __KERNEL_DS)
447 loadsegment(ss, __KERNEL_DS);
448 }
449
422 return prev_p; 450 return prev_p;
423} 451}
424 452