aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@amacapital.net>2014-11-22 21:00:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-11-23 16:56:19 -0500
commitb645af2d5905c4e32399005b867987919cbfc3ae (patch)
tree63484dfff842bb500e4aaf47f7f50df830fbfe03
parent6f442be2fb22be02cafa606f1769fa1e6f894441 (diff)
x86_64, traps: Rework bad_iret
It's possible for iretq to userspace to fail. This can happen because of a bad CS, SS, or RIP. Historically, we've handled it by fixing up an exception from iretq to land at bad_iret, which pretends that the failed iret frame was really the hardware part of #GP(0) from userspace. To make this work, there's an extra fixup to fudge the gs base into a usable state. This is suboptimal because it loses the original exception. It's also buggy because there's no guarantee that we were on the kernel stack to begin with. For example, if the failing iret happened on return from an NMI, then we'll end up executing general_protection on the NMI stack. This is bad for several reasons, the most immediate of which is that general_protection, as a non-paranoid idtentry, will try to deliver signals and/or schedule from the wrong stack. This patch throws out bad_iret entirely. As a replacement, it augments the existing swapgs fudge into a full-blown iret fixup, mostly written in C. It's should be clearer and more correct. Signed-off-by: Andy Lutomirski <luto@amacapital.net> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/x86/kernel/entry_64.S45
-rw-r--r--arch/x86/kernel/traps.c29
2 files changed, 48 insertions, 26 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 49a0c1781253..c0226ab54106 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -830,8 +830,13 @@ ENTRY(native_iret)
830 830
831.global native_irq_return_iret 831.global native_irq_return_iret
832native_irq_return_iret: 832native_irq_return_iret:
833 /*
834 * This may fault. Non-paranoid faults on return to userspace are
835 * handled by fixup_bad_iret. These include #SS, #GP, and #NP.
836 * Double-faults due to espfix64 are handled in do_double_fault.
837 * Other faults here are fatal.
838 */
833 iretq 839 iretq
834 _ASM_EXTABLE(native_irq_return_iret, bad_iret)
835 840
836#ifdef CONFIG_X86_ESPFIX64 841#ifdef CONFIG_X86_ESPFIX64
837native_irq_return_ldt: 842native_irq_return_ldt:
@@ -859,25 +864,6 @@ native_irq_return_ldt:
859 jmp native_irq_return_iret 864 jmp native_irq_return_iret
860#endif 865#endif
861 866
862 .section .fixup,"ax"
863bad_iret:
864 /*
865 * The iret traps when the %cs or %ss being restored is bogus.
866 * We've lost the original trap vector and error code.
867 * #GPF is the most likely one to get for an invalid selector.
868 * So pretend we completed the iret and took the #GPF in user mode.
869 *
870 * We are now running with the kernel GS after exception recovery.
871 * But error_entry expects us to have user GS to match the user %cs,
872 * so swap back.
873 */
874 pushq $0
875
876 SWAPGS
877 jmp general_protection
878
879 .previous
880
881 /* edi: workmask, edx: work */ 867 /* edi: workmask, edx: work */
882retint_careful: 868retint_careful:
883 CFI_RESTORE_STATE 869 CFI_RESTORE_STATE
@@ -1369,17 +1355,16 @@ error_sti:
1369 1355
1370/* 1356/*
1371 * There are two places in the kernel that can potentially fault with 1357 * There are two places in the kernel that can potentially fault with
1372 * usergs. Handle them here. The exception handlers after iret run with 1358 * usergs. Handle them here. B stepping K8s sometimes report a
1373 * kernel gs again, so don't set the user space flag. B stepping K8s 1359 * truncated RIP for IRET exceptions returning to compat mode. Check
1374 * sometimes report an truncated RIP for IRET exceptions returning to 1360 * for these here too.
1375 * compat mode. Check for these here too.
1376 */ 1361 */
1377error_kernelspace: 1362error_kernelspace:
1378 CFI_REL_OFFSET rcx, RCX+8 1363 CFI_REL_OFFSET rcx, RCX+8
1379 incl %ebx 1364 incl %ebx
1380 leaq native_irq_return_iret(%rip),%rcx 1365 leaq native_irq_return_iret(%rip),%rcx
1381 cmpq %rcx,RIP+8(%rsp) 1366 cmpq %rcx,RIP+8(%rsp)
1382 je error_swapgs 1367 je error_bad_iret
1383 movl %ecx,%eax /* zero extend */ 1368 movl %ecx,%eax /* zero extend */
1384 cmpq %rax,RIP+8(%rsp) 1369 cmpq %rax,RIP+8(%rsp)
1385 je bstep_iret 1370 je bstep_iret
@@ -1390,7 +1375,15 @@ error_kernelspace:
1390bstep_iret: 1375bstep_iret:
1391 /* Fix truncated RIP */ 1376 /* Fix truncated RIP */
1392 movq %rcx,RIP+8(%rsp) 1377 movq %rcx,RIP+8(%rsp)
1393 jmp error_swapgs 1378 /* fall through */
1379
1380error_bad_iret:
1381 SWAPGS
1382 mov %rsp,%rdi
1383 call fixup_bad_iret
1384 mov %rax,%rsp
1385 decl %ebx /* Return to usergs */
1386 jmp error_sti
1394 CFI_ENDPROC 1387 CFI_ENDPROC
1395END(error_entry) 1388END(error_entry)
1396 1389
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 48035e9cdde9..de801f22128a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -407,6 +407,35 @@ asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs)
407 return regs; 407 return regs;
408} 408}
409NOKPROBE_SYMBOL(sync_regs); 409NOKPROBE_SYMBOL(sync_regs);
410
411struct bad_iret_stack {
412 void *error_entry_ret;
413 struct pt_regs regs;
414};
415
416asmlinkage __visible
417struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
418{
419 /*
420 * This is called from entry_64.S early in handling a fault
421 * caused by a bad iret to user mode. To handle the fault
422 * correctly, we want move our stack frame to task_pt_regs
423 * and we want to pretend that the exception came from the
424 * iret target.
425 */
426 struct bad_iret_stack *new_stack =
427 container_of(task_pt_regs(current),
428 struct bad_iret_stack, regs);
429
430 /* Copy the IRET target to the new stack. */
431 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
432
433 /* Copy the remainder of the stack from the current stack. */
434 memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
435
436 BUG_ON(!user_mode_vm(&new_stack->regs));
437 return new_stack;
438}
410#endif 439#endif
411 440
412/* 441/*