aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-11-29 15:44:55 -0500
committerSteven Rostedt <rostedt@goodmis.org>2011-12-21 15:38:52 -0500
commit549c89b98c4530b278dde1a3f68ce5ebbb1e6304 (patch)
tree428c41033516d89bd3918209950a280721eaa284 /arch/x86
parentdc47ce90c3a822cd7c9e9339fe4d5f61dcb26b50 (diff)
x86: Do not schedule while still in NMI context
The NMI handler uses the paranoid_exit routine that checks the NEED_RESCHED flag, and if it is set and the return is for userspace, then interrupts are enabled, the stack is swapped to the thread's stack, and schedule is called. The problem with this is that we are still in an NMI context until an iret is executed. This means that any new NMIs are now starved until an interrupt or exception occurs and does the iret. As NMIs can not be masked and can interrupt any location, they are treated as a special case. NEED_RESCHED should not be set in an NMI handler. The interruption by the NMI should not disturb the work flow for scheduling. Any IPI sent to a processor after sending the NEED_RESCHED would have to wait for the NMI anyway, and after the IPI finishes the schedule would be called as required. There is no reason to do anything special leaving an NMI. Remove the call to paranoid_exit and do a simple return. This not only fixes the bug of starved NMIs, but it also cleans up the code. Link: http://lkml.kernel.org/r/CA+55aFzgM55hXTs4griX5e9=v_O+=ue+7Rj0PTD=M7hFYpyULQ@mail.gmail.com Acked-by: Andi Kleen <ak@linux.intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: "H. Peter Anvin" <hpa@linux.intel.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Paul Turner <pjt@google.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/entry_64.S32
1 files changed, 0 insertions, 32 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index faf8d5e74b0b..3819ea907339 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1489,46 +1489,14 @@ ENTRY(nmi)
1489 movq %rsp,%rdi 1489 movq %rsp,%rdi
1490 movq $-1,%rsi 1490 movq $-1,%rsi
1491 call do_nmi 1491 call do_nmi
1492#ifdef CONFIG_TRACE_IRQFLAGS
1493 /* paranoidexit; without TRACE_IRQS_OFF */
1494 /* ebx: no swapgs flag */
1495 DISABLE_INTERRUPTS(CLBR_NONE)
1496 testl %ebx,%ebx /* swapgs needed? */ 1492 testl %ebx,%ebx /* swapgs needed? */
1497 jnz nmi_restore 1493 jnz nmi_restore
1498 testl $3,CS(%rsp)
1499 jnz nmi_userspace
1500nmi_swapgs: 1494nmi_swapgs:
1501 SWAPGS_UNSAFE_STACK 1495 SWAPGS_UNSAFE_STACK
1502nmi_restore: 1496nmi_restore:
1503 RESTORE_ALL 8 1497 RESTORE_ALL 8
1504 jmp irq_return 1498 jmp irq_return
1505nmi_userspace:
1506 GET_THREAD_INFO(%rcx)
1507 movl TI_flags(%rcx),%ebx
1508 andl $_TIF_WORK_MASK,%ebx
1509 jz nmi_swapgs
1510 movq %rsp,%rdi /* &pt_regs */
1511 call sync_regs
1512 movq %rax,%rsp /* switch stack for scheduling */
1513 testl $_TIF_NEED_RESCHED,%ebx
1514 jnz nmi_schedule
1515 movl %ebx,%edx /* arg3: thread flags */
1516 ENABLE_INTERRUPTS(CLBR_NONE)
1517 xorl %esi,%esi /* arg2: oldset */
1518 movq %rsp,%rdi /* arg1: &pt_regs */
1519 call do_notify_resume
1520 DISABLE_INTERRUPTS(CLBR_NONE)
1521 jmp nmi_userspace
1522nmi_schedule:
1523 ENABLE_INTERRUPTS(CLBR_ANY)
1524 call schedule
1525 DISABLE_INTERRUPTS(CLBR_ANY)
1526 jmp nmi_userspace
1527 CFI_ENDPROC 1499 CFI_ENDPROC
1528#else
1529 jmp paranoid_exit
1530 CFI_ENDPROC
1531#endif
1532END(nmi) 1500END(nmi)
1533 1501
1534ENTRY(ignore_sysret) 1502ENTRY(ignore_sysret)