x86: Allow NMIs to hit breakpoints in i386

With i386, NMIs and breakpoints use the current stack and they do not reset the stack pointer to a fix point that might corrupt a previous NMI or breakpoint (as it does in x86_64). But NMIs are still not made to be re-entrant, and need to prevent the case that an NMI hitting a breakpoint (which does an iret), doesn't allow another NMI to run. The fix is to let the NMI be in 3 different states: 1) not running 2) executing 3) latched When no NMI is executing on a given CPU, the state is "not running". When the first NMI comes in, the state is switched to "executing". On exit of that NMI, a cmpxchg is performed to switch the state back to "not running" and if that fails, the NMI is restarted. If a breakpoint is hit and does an iret, which re-enables NMIs, and another NMI comes in before the first NMI finished, it will detect that the state is not in the "not running" state and the current NMI is nested. In this case, the state is switched to "latched" to let the interrupted NMI know to restart the NMI handler, and the nested NMI exits without doing anything. Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: H. Peter Anvin <hpa@linux.intel.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Paul Turner <pjt@google.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
author: Steven Rostedt <srostedt@redhat.com> 2011-12-13 16:44:16 -0500
committer: Steven Rostedt <rostedt@goodmis.org> 2011-12-21 15:38:55 -0500
commit: ccd49c2391773ffbf52bb80d75c4a92b16972517 (patch)
tree: 42137704944da98524e33eff1d1ec1cd9af48d3c /arch/x86
parent: 228bdaa95fb830e08b6acd1afd4d2c55093cabfa (diff)
1 files changed, 94 insertions, 7 deletions
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index de8d4b333f40..47acaf319165 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -405,11 +405,84 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
                unknown_nmi_error(reason, regs);
 }
-dotraplinkage notrace __kprobes void
+/*
-do_nmi(struct pt_regs *regs, long error_code)
+ * NMIs can hit breakpoints which will cause it to lose its
-{
+ * NMI context with the CPU when the breakpoint does an iret.
-        int update_debug_stack = 0;
+ */
+#ifdef CONFIG_X86_32
+/*
+ * For i386, NMIs use the same stack as the kernel, and we can
+ * add a workaround to the iret problem in C. Simply have 3 states
+ * the NMI can be in.
+ *
+ *  1) not running
+ *  2) executing
+ *  3) latched
+ *
+ * When no NMI is in progress, it is in the "not running" state.
+ * When an NMI comes in, it goes into the "executing" state.
+ * Normally, if another NMI is triggered, it does not interrupt
+ * the running NMI and the HW will simply latch it so that when
+ * the first NMI finishes, it will restart the second NMI.
+ * (Note, the latch is binary, thus multiple NMIs triggering,
+ *  when one is running, are ignored. Only one NMI is restarted.)
+ *
+ * If an NMI hits a breakpoint that executes an iret, another
+ * NMI can preempt it. We do not want to allow this new NMI
+ * to run, but we want to execute it when the first one finishes.
+ * We set the state to "latched", and the first NMI will perform
+ * an cmpxchg on the state, and if it doesn't successfully
+ * reset the state to "not running" it will restart the next
+ * NMI.
+ */
+enum nmi_states {
+        NMI_NOT_RUNNING,
+        NMI_EXECUTING,
+        NMI_LATCHED,
+};
+static DEFINE_PER_CPU(enum nmi_states, nmi_state);
+#define nmi_nesting_preprocess(regs)                                    \
+        do {                                                            \
+                if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) {      \
+                        __get_cpu_var(nmi_state) = NMI_LATCHED;         \
+                        return;                                         \
+                }                                                       \
+        nmi_restart:                                                    \
+                __get_cpu_var(nmi_state) = NMI_EXECUTING;               \
+        } while (0)
+#define nmi_nesting_postprocess()                                       \
+        do {                                                            \
+                if (cmpxchg(&__get_cpu_var(nmi_state),                  \
+                    NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING)   \
+                        goto nmi_restart;                               \
+        } while (0)
+#else /* x86_64 */
+/*
+ * In x86_64 things are a bit more difficult. This has the same problem
+ * where an NMI hitting a breakpoint that calls iret will remove the
+ * NMI context, allowing a nested NMI to enter. What makes this more
+ * difficult is that both NMIs and breakpoints have their own stack.
+ * When a new NMI or breakpoint is executed, the stack is set to a fixed
+ * point. If an NMI is nested, it will have its stack set at that same
+ * fixed address that the first NMI had, and will start corrupting the
+ * stack. This is handled in entry_64.S, but the same problem exists with
+ * the breakpoint stack.
+ *
+ * If a breakpoint is being processed, and the debug stack is being used,
+ * if an NMI comes in and also hits a breakpoint, the stack pointer
+ * will be set to the same fixed address as the breakpoint that was
+ * interrupted, causing that stack to be corrupted. To handle this case,
+ * check if the stack that was interrupted is the debug stack, and if
+ * so, change the IDT so that new breakpoints will use the current stack
+ * and not switch to the fixed address. On return of the NMI, switch back
+ * to the original IDT.
+ */
+static DEFINE_PER_CPU(int, update_debug_stack);
+static inline void nmi_nesting_preprocess(struct pt_regs *regs)
+{
        /*
         * If we interrupted a breakpoint, it is possible that
         * the nmi handler will have breakpoints too. We need to
@@ -418,8 +491,22 @@ do_nmi(struct pt_regs *regs, long error_code)
         */
        if (unlikely(is_debug_stack(regs->sp))) {
                debug_stack_set_zero();
-                update_debug_stack = 1;
+                __get_cpu_var(update_debug_stack) = 1;
        }
+}
+static inline void nmi_nesting_postprocess(void)
+{
+        if (unlikely(__get_cpu_var(update_debug_stack)))
+                debug_stack_reset();
+}
+#endif
+dotraplinkage notrace __kprobes void
+do_nmi(struct pt_regs *regs, long error_code)
+{
+        nmi_nesting_preprocess(regs);
        nmi_enter();
        inc_irq_stat(__nmi_count);
@@ -429,8 +516,8 @@ do_nmi(struct pt_regs *regs, long error_code)
        nmi_exit();
-        if (unlikely(update_debug_stack))
+        /* On i386, may loop back to preprocess */
-                debug_stack_reset();
+        nmi_nesting_postprocess();
 }
 void stop_nmi(void)
author	Steven Rostedt <srostedt@redhat.com>	2011-12-13 16:44:16 -0500
committer	Steven Rostedt <rostedt@goodmis.org>	2011-12-21 15:38:55 -0500
commit	ccd49c2391773ffbf52bb80d75c4a92b16972517 (patch)
tree	42137704944da98524e33eff1d1ec1cd9af48d3c /arch/x86
parent	228bdaa95fb830e08b6acd1afd4d2c55093cabfa (diff)

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index de8d4b333f40..47acaf319165 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c
@@ -405,11 +405,84 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
405	unknown_nmi_error(reason, regs);	405	unknown_nmi_error(reason, regs);
406	}	406	}
407		407
408	dotraplinkage notrace __kprobes void	408	/*
409	do_nmi(struct pt_regs *regs, long error_code)	409	* NMIs can hit breakpoints which will cause it to lose its
410	{	410	* NMI context with the CPU when the breakpoint does an iret.
411	int update_debug_stack = 0;	411	*/
		412	#ifdef CONFIG_X86_32
		413	/*
		414	* For i386, NMIs use the same stack as the kernel, and we can
		415	* add a workaround to the iret problem in C. Simply have 3 states
		416	* the NMI can be in.
		417	*
		418	* 1) not running
		419	* 2) executing
		420	* 3) latched
		421	*
		422	* When no NMI is in progress, it is in the "not running" state.
		423	* When an NMI comes in, it goes into the "executing" state.
		424	* Normally, if another NMI is triggered, it does not interrupt
		425	* the running NMI and the HW will simply latch it so that when
		426	* the first NMI finishes, it will restart the second NMI.
		427	* (Note, the latch is binary, thus multiple NMIs triggering,
		428	* when one is running, are ignored. Only one NMI is restarted.)
		429	*
		430	* If an NMI hits a breakpoint that executes an iret, another
		431	* NMI can preempt it. We do not want to allow this new NMI
		432	* to run, but we want to execute it when the first one finishes.
		433	* We set the state to "latched", and the first NMI will perform
		434	* an cmpxchg on the state, and if it doesn't successfully
		435	* reset the state to "not running" it will restart the next
		436	* NMI.
		437	*/
		438	enum nmi_states {
		439	NMI_NOT_RUNNING,
		440	NMI_EXECUTING,
		441	NMI_LATCHED,
		442	};
		443	static DEFINE_PER_CPU(enum nmi_states, nmi_state);
		444
		445	#define nmi_nesting_preprocess(regs) \
		446	do { \
		447	if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \
		448	__get_cpu_var(nmi_state) = NMI_LATCHED; \
		449	return; \
		450	} \
		451	nmi_restart: \
		452	__get_cpu_var(nmi_state) = NMI_EXECUTING; \
		453	} while (0)
		454
		455	#define nmi_nesting_postprocess() \
		456	do { \
		457	if (cmpxchg(&__get_cpu_var(nmi_state), \
		458	NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \
		459	goto nmi_restart; \
		460	} while (0)
		461	#else /* x86_64 */
		462	/*
		463	* In x86_64 things are a bit more difficult. This has the same problem
		464	* where an NMI hitting a breakpoint that calls iret will remove the
		465	* NMI context, allowing a nested NMI to enter. What makes this more
		466	* difficult is that both NMIs and breakpoints have their own stack.
		467	* When a new NMI or breakpoint is executed, the stack is set to a fixed
		468	* point. If an NMI is nested, it will have its stack set at that same
		469	* fixed address that the first NMI had, and will start corrupting the
		470	* stack. This is handled in entry_64.S, but the same problem exists with
		471	* the breakpoint stack.
		472	*
		473	* If a breakpoint is being processed, and the debug stack is being used,
		474	* if an NMI comes in and also hits a breakpoint, the stack pointer
		475	* will be set to the same fixed address as the breakpoint that was
		476	* interrupted, causing that stack to be corrupted. To handle this case,
		477	* check if the stack that was interrupted is the debug stack, and if
		478	* so, change the IDT so that new breakpoints will use the current stack
		479	* and not switch to the fixed address. On return of the NMI, switch back
		480	* to the original IDT.
		481	*/
		482	static DEFINE_PER_CPU(int, update_debug_stack);
412		483
		484	static inline void nmi_nesting_preprocess(struct pt_regs *regs)
		485	{
413	/*	486	/*
414	* If we interrupted a breakpoint, it is possible that	487	* If we interrupted a breakpoint, it is possible that
415	* the nmi handler will have breakpoints too. We need to	488	* the nmi handler will have breakpoints too. We need to
@@ -418,8 +491,22 @@ do_nmi(struct pt_regs *regs, long error_code)
418	*/	491	*/
419	if (unlikely(is_debug_stack(regs->sp))) {	492	if (unlikely(is_debug_stack(regs->sp))) {
420	debug_stack_set_zero();	493	debug_stack_set_zero();
421	update_debug_stack = 1;	494	__get_cpu_var(update_debug_stack) = 1;
422	}	495	}
		496	}
		497
		498	static inline void nmi_nesting_postprocess(void)
		499	{
		500	if (unlikely(__get_cpu_var(update_debug_stack)))
		501	debug_stack_reset();
		502	}
		503	#endif
		504
		505	dotraplinkage notrace __kprobes void
		506	do_nmi(struct pt_regs *regs, long error_code)
		507	{
		508	nmi_nesting_preprocess(regs);
		509
423	nmi_enter();	510	nmi_enter();
424		511
425	inc_irq_stat(__nmi_count);	512	inc_irq_stat(__nmi_count);
@@ -429,8 +516,8 @@ do_nmi(struct pt_regs *regs, long error_code)
429		516
430	nmi_exit();	517	nmi_exit();
431		518
432	if (unlikely(update_debug_stack))	519	/* On i386, may loop back to preprocess */
433	debug_stack_reset();	520	nmi_nesting_postprocess();
434	}	521	}
435		522
436	void stop_nmi(void)	523	void stop_nmi(void)