x86, nmi: Add in logic to handle multiple events and unknown NMIs

Previous patches allow the NMI subsystem to process multipe NMI events in one NMI. As previously discussed this can cause issues when an event triggered another NMI but is processed in the current NMI. This causes the next NMI to go unprocessed and become an 'unknown' NMI. To handle this, we first have to flag whether or not the NMI handler handled more than one event or not. If it did, then there exists a chance that the next NMI might be already processed. Once the NMI is flagged as a candidate to be swallowed, we next look for a back-to-back NMI condition. This is determined by looking at the %rip from pt_regs. If it is the same as the previous NMI, it is assumed the cpu did not have a chance to jump back into a non-NMI context and execute code and instead handled another NMI. If both of those conditions are true then we will swallow any unknown NMI. There still exists a chance that we accidentally swallow a real unknown NMI, but for now things seem better. An optimization has also been added to the nmi notifier rountine. Because x86 can latch up to one NMI while currently processing an NMI, we don't have to worry about executing _all_ the handlers in a standalone NMI. The idea is if multiple NMIs come in, the second NMI will represent them. For those back-to-back NMI cases, we have the potentail to drop NMIs. Therefore only execute all the handlers in the second half of a detected back-to-back NMI. Signed-off-by: Don Zickus <dzickus@redhat.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1317409584-23662-5-git-send-email-dzickus@redhat.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Don Zickus <dzickus@redhat.com> 2011-09-30 15:06:22 -0400
committer: Ingo Molnar <mingo@elte.hu> 2011-10-10 00:57:01 -0400
commit: b227e23399dc59977aa42c49bd668bdab7a61812 (patch)
tree: 8d786adf23225588d9175412002b04e4dea37b6d /arch/x86/kernel/nmi.c
parent: 9c48f1c629ecfa114850c03f875c6691003214de (diff)
1 files changed, 88 insertions, 9 deletions
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index e20f5e790599..35b39592732b 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -71,7 +71,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
 #define nmi_to_desc(type) (&nmi_desc[type])
-static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs)
+static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
 {
        struct nmi_desc *desc = nmi_to_desc(type);
        struct nmiaction *a;
@@ -85,12 +85,9 @@ static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs)
         * can be latched at any given time.  Walk the whole list
         * to handle those situations.
         */
-        list_for_each_entry_rcu(a, &desc->head, list) {
+        list_for_each_entry_rcu(a, &desc->head, list)
                handled += a->handler(type, regs);
-        }
        rcu_read_unlock();
        /* return total number of NMI events handled */
@@ -105,6 +102,13 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action)
        spin_lock_irqsave(&desc->lock, flags);
        /*
+         * most handlers of type NMI_UNKNOWN never return because
+         * they just assume the NMI is theirs.  Just a sanity check
+         * to manage expectations
+         */
+        WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
+        /*
         * some handlers need to be executed first otherwise a fake
         * event confuses some handlers (kdump uses this flag)
         */
@@ -251,7 +255,13 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
        int handled;
-        handled = nmi_handle(NMI_UNKNOWN, regs);
+        /*
+         * Use 'false' as back-to-back NMIs are dealt with one level up.
+         * Of course this makes having multiple 'unknown' handlers useless
+         * as only the first one is ever run (unless it can actually determine
+         * if it caused the NMI)
+         */
+        handled = nmi_handle(NMI_UNKNOWN, regs, false);
        if (handled)
                return;
 #ifdef CONFIG_MCA
@@ -274,19 +284,49 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
        pr_emerg("Dazed and confused, but trying to continue\n");
 }
+static DEFINE_PER_CPU(bool, swallow_nmi);
+static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
 static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 {
        unsigned char reason = 0;
        int handled;
+        bool b2b = false;
        /*
         * CPU-specific NMI must be processed before non-CPU-specific
         * NMI, otherwise we may lose it, because the CPU-specific
         * NMI can not be detected/processed on other CPUs.
         */
-        handled = nmi_handle(NMI_LOCAL, regs);
-        if (handled)
+        /*
+         * Back-to-back NMIs are interesting because they can either
+         * be two NMI or more than two NMIs (any thing over two is dropped
+         * due to NMI being edge-triggered).  If this is the second half
+         * of the back-to-back NMI, assume we dropped things and process
+         * more handlers.  Otherwise reset the 'swallow' NMI behaviour
+         */
+        if (regs->ip == __this_cpu_read(last_nmi_rip))
+                b2b = true;
+        else
+                __this_cpu_write(swallow_nmi, false);
+        __this_cpu_write(last_nmi_rip, regs->ip);
+        handled = nmi_handle(NMI_LOCAL, regs, b2b);
+        if (handled) {
+                /*
+                 * There are cases when a NMI handler handles multiple
+                 * events in the current NMI.  One of these events may
+                 * be queued for in the next NMI.  Because the event is
+                 * already handled, the next NMI will result in an unknown
+                 * NMI.  Instead lets flag this for a potential NMI to
+                 * swallow.
+                 */
+                if (handled > 1)
+                        __this_cpu_write(swallow_nmi, true);
                return;
+        }
        /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
        raw_spin_lock(&nmi_reason_lock);
@@ -309,7 +349,40 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
        }
        raw_spin_unlock(&nmi_reason_lock);
-        unknown_nmi_error(reason, regs);
+        /*
+         * Only one NMI can be latched at a time.  To handle
+         * this we may process multiple nmi handlers at once to
+         * cover the case where an NMI is dropped.  The downside
+         * to this approach is we may process an NMI prematurely,
+         * while its real NMI is sitting latched.  This will cause
+         * an unknown NMI on the next run of the NMI processing.
+         *
+         * We tried to flag that condition above, by setting the
+         * swallow_nmi flag when we process more than one event.
+         * This condition is also only present on the second half
+         * of a back-to-back NMI, so we flag that condition too.
+         *
+         * If both are true, we assume we already processed this
+         * NMI previously and we swallow it.  Otherwise we reset
+         * the logic.
+         *
+         * There are scenarios where we may accidentally swallow
+         * a 'real' unknown NMI.  For example, while processing
+         * a perf NMI another perf NMI comes in along with a
+         * 'real' unknown NMI.  These two NMIs get combined into
+         * one (as descibed above).  When the next NMI gets
+         * processed, it will be flagged by perf as handled, but
+         * noone will know that there was a 'real' unknown NMI sent
+         * also.  As a result it gets swallowed.  Or if the first
+         * perf NMI returns two events handled then the second
+         * NMI will get eaten by the logic below, again losing a
+         * 'real' unknown NMI.  But this is the best we can do
+         * for now.
+         */
+        if (b2b && __this_cpu_read(swallow_nmi))
+                ;
+        else
+                unknown_nmi_error(reason, regs);
 }
 dotraplinkage notrace __kprobes void
@@ -334,3 +407,9 @@ void restart_nmi(void)
 {
        ignore_nmis--;
 }
+/* reset the back-to-back NMI logic */
+void local_touch_nmi(void)
+{
+        __this_cpu_write(last_nmi_rip, 0);
+}
author	Don Zickus <dzickus@redhat.com>	2011-09-30 15:06:22 -0400
committer	Ingo Molnar <mingo@elte.hu>	2011-10-10 00:57:01 -0400
commit	b227e23399dc59977aa42c49bd668bdab7a61812 (patch)
tree	8d786adf23225588d9175412002b04e4dea37b6d /arch/x86/kernel/nmi.c
parent	9c48f1c629ecfa114850c03f875c6691003214de (diff)

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e20f5e790599..35b39592732b 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c
@@ -71,7 +71,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
71		71
72	#define nmi_to_desc(type) (&nmi_desc[type])	72	#define nmi_to_desc(type) (&nmi_desc[type])
73		73
74	static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs)	74	static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
75	{	75	{
76	struct nmi_desc *desc = nmi_to_desc(type);	76	struct nmi_desc *desc = nmi_to_desc(type);
77	struct nmiaction *a;	77	struct nmiaction *a;
@@ -85,12 +85,9 @@ static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs)
85	* can be latched at any given time. Walk the whole list	85	* can be latched at any given time. Walk the whole list
86	* to handle those situations.	86	* to handle those situations.
87	*/	87	*/
88	list_for_each_entry_rcu(a, &desc->head, list) {	88	list_for_each_entry_rcu(a, &desc->head, list)
89
90	handled += a->handler(type, regs);	89	handled += a->handler(type, regs);
91		90
92	}
93
94	rcu_read_unlock();	91	rcu_read_unlock();
95		92
96	/* return total number of NMI events handled */	93	/* return total number of NMI events handled */
@@ -105,6 +102,13 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action)
105	spin_lock_irqsave(&desc->lock, flags);	102	spin_lock_irqsave(&desc->lock, flags);
106		103
107	/*	104	/*
		105	* most handlers of type NMI_UNKNOWN never return because
		106	* they just assume the NMI is theirs. Just a sanity check
		107	* to manage expectations
		108	*/
		109	WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
		110
		111	/*
108	* some handlers need to be executed first otherwise a fake	112	* some handlers need to be executed first otherwise a fake
109	* event confuses some handlers (kdump uses this flag)	113	* event confuses some handlers (kdump uses this flag)
110	*/	114	*/
@@ -251,7 +255,13 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
251	{	255	{
252	int handled;	256	int handled;
253		257
254	handled = nmi_handle(NMI_UNKNOWN, regs);	258	/*
		259	* Use 'false' as back-to-back NMIs are dealt with one level up.
		260	* Of course this makes having multiple 'unknown' handlers useless
		261	* as only the first one is ever run (unless it can actually determine
		262	* if it caused the NMI)
		263	*/
		264	handled = nmi_handle(NMI_UNKNOWN, regs, false);
255	if (handled)	265	if (handled)
256	return;	266	return;
257	#ifdef CONFIG_MCA	267	#ifdef CONFIG_MCA
@@ -274,19 +284,49 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
274	pr_emerg("Dazed and confused, but trying to continue\n");	284	pr_emerg("Dazed and confused, but trying to continue\n");
275	}	285	}
276		286
		287	static DEFINE_PER_CPU(bool, swallow_nmi);
		288	static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
		289
277	static notrace __kprobes void default_do_nmi(struct pt_regs *regs)	290	static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
278	{	291	{
279	unsigned char reason = 0;	292	unsigned char reason = 0;
280	int handled;	293	int handled;
		294	bool b2b = false;
281		295
282	/*	296	/*
283	* CPU-specific NMI must be processed before non-CPU-specific	297	* CPU-specific NMI must be processed before non-CPU-specific
284	* NMI, otherwise we may lose it, because the CPU-specific	298	* NMI, otherwise we may lose it, because the CPU-specific
285	* NMI can not be detected/processed on other CPUs.	299	* NMI can not be detected/processed on other CPUs.
286	*/	300	*/
287	handled = nmi_handle(NMI_LOCAL, regs);	301
288	if (handled)	302	/*
		303	* Back-to-back NMIs are interesting because they can either
		304	* be two NMI or more than two NMIs (any thing over two is dropped
		305	* due to NMI being edge-triggered). If this is the second half
		306	* of the back-to-back NMI, assume we dropped things and process
		307	* more handlers. Otherwise reset the 'swallow' NMI behaviour
		308	*/
		309	if (regs->ip == __this_cpu_read(last_nmi_rip))
		310	b2b = true;
		311	else
		312	__this_cpu_write(swallow_nmi, false);
		313
		314	__this_cpu_write(last_nmi_rip, regs->ip);
		315
		316	handled = nmi_handle(NMI_LOCAL, regs, b2b);
		317	if (handled) {
		318	/*
		319	* There are cases when a NMI handler handles multiple
		320	* events in the current NMI. One of these events may
		321	* be queued for in the next NMI. Because the event is
		322	* already handled, the next NMI will result in an unknown
		323	* NMI. Instead lets flag this for a potential NMI to
		324	* swallow.
		325	*/
		326	if (handled > 1)
		327	__this_cpu_write(swallow_nmi, true);
289	return;	328	return;
		329	}
290		330
291	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */	331	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
292	raw_spin_lock(&nmi_reason_lock);	332	raw_spin_lock(&nmi_reason_lock);
@@ -309,7 +349,40 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
309	}	349	}
310	raw_spin_unlock(&nmi_reason_lock);	350	raw_spin_unlock(&nmi_reason_lock);
311		351
312	unknown_nmi_error(reason, regs);	352	/*
		353	* Only one NMI can be latched at a time. To handle
		354	* this we may process multiple nmi handlers at once to
		355	* cover the case where an NMI is dropped. The downside
		356	* to this approach is we may process an NMI prematurely,
		357	* while its real NMI is sitting latched. This will cause
		358	* an unknown NMI on the next run of the NMI processing.
		359	*
		360	* We tried to flag that condition above, by setting the
		361	* swallow_nmi flag when we process more than one event.
		362	* This condition is also only present on the second half
		363	* of a back-to-back NMI, so we flag that condition too.
		364	*
		365	* If both are true, we assume we already processed this
		366	* NMI previously and we swallow it. Otherwise we reset
		367	* the logic.
		368	*
		369	* There are scenarios where we may accidentally swallow
		370	* a 'real' unknown NMI. For example, while processing
		371	* a perf NMI another perf NMI comes in along with a
		372	* 'real' unknown NMI. These two NMIs get combined into
		373	* one (as descibed above). When the next NMI gets
		374	* processed, it will be flagged by perf as handled, but
		375	* noone will know that there was a 'real' unknown NMI sent
		376	* also. As a result it gets swallowed. Or if the first
		377	* perf NMI returns two events handled then the second
		378	* NMI will get eaten by the logic below, again losing a
		379	* 'real' unknown NMI. But this is the best we can do
		380	* for now.
		381	*/
		382	if (b2b && __this_cpu_read(swallow_nmi))
		383	;
		384	else
		385	unknown_nmi_error(reason, regs);
313	}	386	}
314		387
315	dotraplinkage notrace __kprobes void	388	dotraplinkage notrace __kprobes void
@@ -334,3 +407,9 @@ void restart_nmi(void)
334	{	407	{
335	ignore_nmis--;	408	ignore_nmis--;
336	}	409	}
		410
		411	/* reset the back-to-back NMI logic */
		412	void local_touch_nmi(void)
		413	{
		414	__this_cpu_write(last_nmi_rip, 0);
		415	}