diff options
Diffstat (limited to 'arch/x86/kernel/entry_64.S')
-rw-r--r-- | arch/x86/kernel/entry_64.S | 34 |
1 files changed, 29 insertions, 5 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1d74d161687c..f0095a76c182 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -364,12 +364,21 @@ system_call_fastpath: | |||
364 | * Has incomplete stack frame and undefined top of stack. | 364 | * Has incomplete stack frame and undefined top of stack. |
365 | */ | 365 | */ |
366 | ret_from_sys_call: | 366 | ret_from_sys_call: |
367 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | ||
368 | jnz int_ret_from_sys_call_fixup /* Go the the slow path */ | ||
369 | |||
370 | LOCKDEP_SYS_EXIT | 367 | LOCKDEP_SYS_EXIT |
371 | DISABLE_INTERRUPTS(CLBR_NONE) | 368 | DISABLE_INTERRUPTS(CLBR_NONE) |
372 | TRACE_IRQS_OFF | 369 | TRACE_IRQS_OFF |
370 | |||
371 | /* | ||
372 | * We must check ti flags with interrupts (or at least preemption) | ||
373 | * off because we must *never* return to userspace without | ||
374 | * processing exit work that is enqueued if we're preempted here. | ||
375 | * In particular, returning to userspace with any of the one-shot | ||
376 | * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is | ||
377 | * very bad. | ||
378 | */ | ||
379 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | ||
380 | jnz int_ret_from_sys_call_fixup /* Go the the slow path */ | ||
381 | |||
373 | CFI_REMEMBER_STATE | 382 | CFI_REMEMBER_STATE |
374 | /* | 383 | /* |
375 | * sysretq will re-enable interrupts: | 384 | * sysretq will re-enable interrupts: |
@@ -386,7 +395,7 @@ ret_from_sys_call: | |||
386 | 395 | ||
387 | int_ret_from_sys_call_fixup: | 396 | int_ret_from_sys_call_fixup: |
388 | FIXUP_TOP_OF_STACK %r11, -ARGOFFSET | 397 | FIXUP_TOP_OF_STACK %r11, -ARGOFFSET |
389 | jmp int_ret_from_sys_call | 398 | jmp int_ret_from_sys_call_irqs_off |
390 | 399 | ||
391 | /* Do syscall tracing */ | 400 | /* Do syscall tracing */ |
392 | tracesys: | 401 | tracesys: |
@@ -432,6 +441,7 @@ tracesys_phase2: | |||
432 | GLOBAL(int_ret_from_sys_call) | 441 | GLOBAL(int_ret_from_sys_call) |
433 | DISABLE_INTERRUPTS(CLBR_NONE) | 442 | DISABLE_INTERRUPTS(CLBR_NONE) |
434 | TRACE_IRQS_OFF | 443 | TRACE_IRQS_OFF |
444 | int_ret_from_sys_call_irqs_off: | ||
435 | movl $_TIF_ALLWORK_MASK,%edi | 445 | movl $_TIF_ALLWORK_MASK,%edi |
436 | /* edi: mask to check */ | 446 | /* edi: mask to check */ |
437 | GLOBAL(int_with_check) | 447 | GLOBAL(int_with_check) |
@@ -789,7 +799,21 @@ retint_swapgs: /* return to user-space */ | |||
789 | cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp) /* R11 == RFLAGS */ | 799 | cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp) /* R11 == RFLAGS */ |
790 | jne opportunistic_sysret_failed | 800 | jne opportunistic_sysret_failed |
791 | 801 | ||
792 | testq $X86_EFLAGS_RF,%r11 /* sysret can't restore RF */ | 802 | /* |
803 | * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET, | ||
804 | * restoring TF results in a trap from userspace immediately after | ||
805 | * SYSRET. This would cause an infinite loop whenever #DB happens | ||
806 | * with register state that satisfies the opportunistic SYSRET | ||
807 | * conditions. For example, single-stepping this user code: | ||
808 | * | ||
809 | * movq $stuck_here,%rcx | ||
810 | * pushfq | ||
811 | * popq %r11 | ||
812 | * stuck_here: | ||
813 | * | ||
814 | * would never get past 'stuck_here'. | ||
815 | */ | ||
816 | testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 | ||
793 | jnz opportunistic_sysret_failed | 817 | jnz opportunistic_sysret_failed |
794 | 818 | ||
795 | /* nothing to check for RSP */ | 819 | /* nothing to check for RSP */ |