aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDenys Vlasenko <dvlasenk@redhat.com>2015-04-02 12:46:59 -0400
committerIngo Molnar <mingo@kernel.org>2015-04-08 03:02:12 -0400
commitfffbb5dcfd29f8831e41b4dd2ab938bd36d35283 (patch)
tree2125f2510b43c95b1d7c86e41b20bec790c1cec2
parent4bcc7827b02feea2c762fa6d46a1bffb300d7403 (diff)
x86/asm/entry/64: Move opportunistic sysret code to syscall code path
This change does two things: Copy-pastes "retint_swapgs:" code into syscall handling code, the copy is under "syscall_return:" label. The code is unchanged apart from some label renames. Removes "opportunistic sysret" code from "retint_swapgs:" code block, since now it won't be reached by syscall return. This in fact removes most of the code in question. text data bss dec hex filename 12530 0 0 12530 30f2 entry_64.o.before 12562 0 0 12562 3112 entry_64.o Run-tested. Acked-and-Tested-by: Borislav Petkov <bp@suse.de> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Kees Cook <keescook@chromium.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Will Drewry <wad@chromium.org> Link: http://lkml.kernel.org/r/1427993219-7291-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/kernel/entry_64.S158
1 files changed, 86 insertions, 72 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 65485b3baa59..e4c810395bae 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -354,8 +354,8 @@ GLOBAL(int_with_check)
354 movl TI_flags(%rcx),%edx 354 movl TI_flags(%rcx),%edx
355 andl %edi,%edx 355 andl %edi,%edx
356 jnz int_careful 356 jnz int_careful
357 andl $~TS_COMPAT,TI_status(%rcx) 357 andl $~TS_COMPAT,TI_status(%rcx)
358 jmp retint_swapgs 358 jmp syscall_return
359 359
360 /* Either reschedule or signal or syscall exit tracking needed. */ 360 /* Either reschedule or signal or syscall exit tracking needed. */
361 /* First do a reschedule test. */ 361 /* First do a reschedule test. */
@@ -399,9 +399,86 @@ int_restore_rest:
399 DISABLE_INTERRUPTS(CLBR_NONE) 399 DISABLE_INTERRUPTS(CLBR_NONE)
400 TRACE_IRQS_OFF 400 TRACE_IRQS_OFF
401 jmp int_with_check 401 jmp int_with_check
402
403syscall_return:
404 /* The IRETQ could re-enable interrupts: */
405 DISABLE_INTERRUPTS(CLBR_ANY)
406 TRACE_IRQS_IRETQ
407
408 /*
409 * Try to use SYSRET instead of IRET if we're returning to
410 * a completely clean 64-bit userspace context.
411 */
412 movq RCX(%rsp),%rcx
413 cmpq %rcx,RIP(%rsp) /* RCX == RIP */
414 jne opportunistic_sysret_failed
415
416 /*
417 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
418 * in kernel space. This essentially lets the user take over
419 * the kernel, since userspace controls RSP. It's not worth
420 * testing for canonicalness exactly -- this check detects any
421 * of the 17 high bits set, which is true for non-canonical
422 * or kernel addresses. (This will pessimize vsyscall=native.
423 * Big deal.)
424 *
425 * If virtual addresses ever become wider, this will need
426 * to be updated to remain correct on both old and new CPUs.
427 */
428 .ifne __VIRTUAL_MASK_SHIFT - 47
429 .error "virtual address width changed -- SYSRET checks need update"
430 .endif
431 shr $__VIRTUAL_MASK_SHIFT, %rcx
432 jnz opportunistic_sysret_failed
433
434 cmpq $__USER_CS,CS(%rsp) /* CS must match SYSRET */
435 jne opportunistic_sysret_failed
436
437 movq R11(%rsp),%r11
438 cmpq %r11,EFLAGS(%rsp) /* R11 == RFLAGS */
439 jne opportunistic_sysret_failed
440
441 /*
442 * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET,
443 * restoring TF results in a trap from userspace immediately after
444 * SYSRET. This would cause an infinite loop whenever #DB happens
445 * with register state that satisfies the opportunistic SYSRET
446 * conditions. For example, single-stepping this user code:
447 *
448 * movq $stuck_here,%rcx
449 * pushfq
450 * popq %r11
451 * stuck_here:
452 *
453 * would never get past 'stuck_here'.
454 */
455 testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
456 jnz opportunistic_sysret_failed
457
458 /* nothing to check for RSP */
459
460 cmpq $__USER_DS,SS(%rsp) /* SS must match SYSRET */
461 jne opportunistic_sysret_failed
462
463 /*
464 * We win! This label is here just for ease of understanding
465 * perf profiles. Nothing jumps here.
466 */
467syscall_return_via_sysret:
468 CFI_REMEMBER_STATE
469 /* r11 is already restored (see code above) */
470 RESTORE_C_REGS_EXCEPT_R11
471 movq RSP(%rsp),%rsp
472 USERGS_SYSRET64
473 CFI_RESTORE_STATE
474
475opportunistic_sysret_failed:
476 SWAPGS
477 jmp restore_c_regs_and_iret
402 CFI_ENDPROC 478 CFI_ENDPROC
403END(system_call) 479END(system_call)
404 480
481
405 .macro FORK_LIKE func 482 .macro FORK_LIKE func
406ENTRY(stub_\func) 483ENTRY(stub_\func)
407 CFI_STARTPROC 484 CFI_STARTPROC
@@ -673,76 +750,8 @@ retint_swapgs: /* return to user-space */
673 DISABLE_INTERRUPTS(CLBR_ANY) 750 DISABLE_INTERRUPTS(CLBR_ANY)
674 TRACE_IRQS_IRETQ 751 TRACE_IRQS_IRETQ
675 752
676 /*
677 * Try to use SYSRET instead of IRET if we're returning to
678 * a completely clean 64-bit userspace context.
679 */
680 movq RCX(%rsp),%rcx
681 cmpq %rcx,RIP(%rsp) /* RCX == RIP */
682 jne opportunistic_sysret_failed
683
684 /*
685 * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP
686 * in kernel space. This essentially lets the user take over
687 * the kernel, since userspace controls RSP. It's not worth
688 * testing for canonicalness exactly -- this check detects any
689 * of the 17 high bits set, which is true for non-canonical
690 * or kernel addresses. (This will pessimize vsyscall=native.
691 * Big deal.)
692 *
693 * If virtual addresses ever become wider, this will need
694 * to be updated to remain correct on both old and new CPUs.
695 */
696 .ifne __VIRTUAL_MASK_SHIFT - 47
697 .error "virtual address width changed -- sysret checks need update"
698 .endif
699 shr $__VIRTUAL_MASK_SHIFT, %rcx
700 jnz opportunistic_sysret_failed
701
702 cmpq $__USER_CS,CS(%rsp) /* CS must match SYSRET */
703 jne opportunistic_sysret_failed
704
705 movq R11(%rsp),%r11
706 cmpq %r11,EFLAGS(%rsp) /* R11 == RFLAGS */
707 jne opportunistic_sysret_failed
708
709 /*
710 * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET,
711 * restoring TF results in a trap from userspace immediately after
712 * SYSRET. This would cause an infinite loop whenever #DB happens
713 * with register state that satisfies the opportunistic SYSRET
714 * conditions. For example, single-stepping this user code:
715 *
716 * movq $stuck_here,%rcx
717 * pushfq
718 * popq %r11
719 * stuck_here:
720 *
721 * would never get past 'stuck_here'.
722 */
723 testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
724 jnz opportunistic_sysret_failed
725
726 /* nothing to check for RSP */
727
728 cmpq $__USER_DS,SS(%rsp) /* SS must match SYSRET */
729 jne opportunistic_sysret_failed
730
731 /*
732 * We win! This label is here just for ease of understanding
733 * perf profiles. Nothing jumps here.
734 */
735irq_return_via_sysret:
736 CFI_REMEMBER_STATE
737 /* r11 is already restored (see code above) */
738 RESTORE_C_REGS_EXCEPT_R11
739 movq RSP(%rsp),%rsp
740 USERGS_SYSRET64
741 CFI_RESTORE_STATE
742
743opportunistic_sysret_failed:
744 SWAPGS 753 SWAPGS
745 jmp restore_args 754 jmp restore_c_regs_and_iret
746 755
747/* Returning to kernel space */ 756/* Returning to kernel space */
748retint_kernel: 757retint_kernel:
@@ -761,7 +770,12 @@ retint_kernel:
761 * The iretq could re-enable interrupts: 770 * The iretq could re-enable interrupts:
762 */ 771 */
763 TRACE_IRQS_IRETQ 772 TRACE_IRQS_IRETQ
764restore_args: 773
774/*
775 * At this label, code paths which return to kernel and to user,
776 * which come from interrupts/exception and from syscalls, merge.
777 */
778restore_c_regs_and_iret:
765 RESTORE_C_REGS 779 RESTORE_C_REGS
766 REMOVE_PT_GPREGS_FROM_STACK 8 780 REMOVE_PT_GPREGS_FROM_STACK 8
767 781