aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDon Zickus <dzickus@redhat.com>2011-10-13 15:14:26 -0400
committerIngo Molnar <mingo@elte.hu>2011-12-05 06:00:16 -0500
commit99e8b9ca90d688c3ac7d3a141b701c9694a93925 (patch)
tree1893fd5fce8c3d8ae52440d9db7d23be7fdecbc9
parent3603a2512f9e69dc87914ba922eb4a0812b21cd6 (diff)
x86, NMI: Add NMI IPI selftest
The previous patch modified the stop cpus path to use NMI instead of IRQ as the way to communicate to the other cpus to shutdown. There were some concerns that various machines may have problems with using an NMI IPI. This patch creates a selftest to check if NMI is working at boot. The idea is to help catch any issues before the machine panics and we learn the hard way. Loosely based on the locking-selftest.c file, this separate file runs a couple of simple tests and reports the results. The output looks like: ... Brought up 4 CPUs ---------------- | NMI testsuite: -------------------- remote IPI: ok | local IPI: ok | -------------------- Good, all 2 testcases passed! | --------------------------------- Total of 4 processors activated (21330.61 BogoMIPS). ... Signed-off-by: Don Zickus <dzickus@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Robert Richter <robert.richter@amd.com> Cc: seiji.aguchi@hds.com Cc: vgoyal@redhat.com Cc: mjg@redhat.com Cc: tony.luck@intel.com Cc: gong.chen@intel.com Cc: satoru.moriya@hds.com Cc: avi@redhat.com Cc: Andi Kleen <andi@firstfloor.org> Link: http://lkml.kernel.org/r/1318533267-18880-3-git-send-email-dzickus@redhat.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/Kconfig.debug12
-rw-r--r--arch/x86/include/asm/smp.h6
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/nmi_selftest.c179
-rw-r--r--arch/x86/kernel/smpboot.c1
5 files changed, 199 insertions, 0 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 4caec1261f12..97da3c17b424 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -287,4 +287,16 @@ config DEBUG_STRICT_USER_COPY_CHECKS
287 287
288 If unsure, or if you run an older (pre 4.4) gcc, say N. 288 If unsure, or if you run an older (pre 4.4) gcc, say N.
289 289
290config DEBUG_NMI_SELFTEST
291 bool "NMI Selftest"
292 depends on DEBUG_KERNEL
293 ---help---
294 Enabling this option turns on a quick NMI selftest to verify
295 that the NMI behaves correctly.
296
297 This might help diagnose strange hangs that rely on NMI to
298 function properly.
299
300 If unsure, say N.
301
290endmenu 302endmenu
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 73b11bc0ae6f..0434c400287c 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -225,5 +225,11 @@ extern int hard_smp_processor_id(void);
225 225
226#endif /* CONFIG_X86_LOCAL_APIC */ 226#endif /* CONFIG_X86_LOCAL_APIC */
227 227
228#ifdef CONFIG_DEBUG_NMI_SELFTEST
229extern void nmi_selftest(void);
230#else
231#define nmi_selftest() do { } while (0)
232#endif
233
228#endif /* __ASSEMBLY__ */ 234#endif /* __ASSEMBLY__ */
229#endif /* _ASM_X86_SMP_H */ 235#endif /* _ASM_X86_SMP_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8baca3c4871c..02b2f05b371e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -80,6 +80,7 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o
80obj-$(CONFIG_AMD_NB) += amd_nb.o 80obj-$(CONFIG_AMD_NB) += amd_nb.o
81obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o 81obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
82obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o 82obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
83obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
83 84
84obj-$(CONFIG_KVM_GUEST) += kvm.o 85obj-$(CONFIG_KVM_GUEST) += kvm.o
85obj-$(CONFIG_KVM_CLOCK) += kvmclock.o 86obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
new file mode 100644
index 000000000000..572adb622251
--- /dev/null
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -0,0 +1,179 @@
1/*
2 * arch/x86/kernel/nmi-selftest.c
3 *
4 * Testsuite for NMI: IPIs
5 *
6 * Started by Don Zickus:
7 * (using lib/locking-selftest.c as a guide)
8 *
9 * Copyright (C) 2011 Red Hat, Inc., Don Zickus <dzickus@redhat.com>
10 */
11
12#include <linux/smp.h>
13#include <linux/cpumask.h>
14#include <linux/delay.h>
15
16#include <asm/apic.h>
17#include <asm/nmi.h>
18
19#define SUCCESS 0
20#define FAILURE 1
21#define TIMEOUT 2
22
23static int nmi_fail;
24
25/* check to see if NMI IPIs work on this machine */
26static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __read_mostly;
27
28static int testcase_total;
29static int testcase_successes;
30static int expected_testcase_failures;
31static int unexpected_testcase_failures;
32static int unexpected_testcase_unknowns;
33
34static int nmi_unk_cb(unsigned int val, struct pt_regs *regs)
35{
36 unexpected_testcase_unknowns++;
37 return NMI_HANDLED;
38}
39
40static void init_nmi_testsuite(void)
41{
42 /* trap all the unknown NMIs we may generate */
43 register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
44}
45
46static void cleanup_nmi_testsuite(void)
47{
48 unregister_nmi_handler(NMI_UNKNOWN, "nmi_selftest_unk");
49}
50
51static int test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs)
52{
53 int cpu = raw_smp_processor_id();
54
55 if (cpumask_test_and_clear_cpu(cpu, to_cpumask(nmi_ipi_mask)))
56 return NMI_HANDLED;
57
58 return NMI_DONE;
59}
60
61static void test_nmi_ipi(struct cpumask *mask)
62{
63 unsigned long timeout;
64
65 if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback,
66 NMI_FLAG_FIRST, "nmi_selftest")) {
67 nmi_fail = FAILURE;
68 return;
69 }
70
71 /* sync above data before sending NMI */
72 wmb();
73
74 apic->send_IPI_mask(mask, NMI_VECTOR);
75
76 /* Don't wait longer than a second */
77 timeout = USEC_PER_SEC;
78 while (!cpumask_empty(mask) && timeout--)
79 udelay(1);
80
81 /* What happens if we timeout, do we still unregister?? */
82 unregister_nmi_handler(NMI_LOCAL, "nmi_selftest");
83
84 if (!timeout)
85 nmi_fail = TIMEOUT;
86 return;
87}
88
89static void remote_ipi(void)
90{
91 cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask);
92 cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask));
93 test_nmi_ipi(to_cpumask(nmi_ipi_mask));
94}
95
96static void local_ipi(void)
97{
98 cpumask_clear(to_cpumask(nmi_ipi_mask));
99 cpumask_set_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask));
100 test_nmi_ipi(to_cpumask(nmi_ipi_mask));
101}
102
103static void reset_nmi(void)
104{
105 nmi_fail = 0;
106}
107
108static void dotest(void (*testcase_fn)(void), int expected)
109{
110 testcase_fn();
111 /*
112 * Filter out expected failures:
113 */
114 if (nmi_fail != expected) {
115 unexpected_testcase_failures++;
116
117 if (nmi_fail == FAILURE)
118 printk("FAILED |");
119 else if (nmi_fail == TIMEOUT)
120 printk("TIMEOUT|");
121 else
122 printk("ERROR |");
123 dump_stack();
124 } else {
125 testcase_successes++;
126 printk(" ok |");
127 }
128 testcase_total++;
129
130 reset_nmi();
131}
132
133static inline void print_testname(const char *testname)
134{
135 printk("%12s:", testname);
136}
137
138void nmi_selftest(void)
139{
140 init_nmi_testsuite();
141
142 /*
143 * Run the testsuite:
144 */
145 printk("----------------\n");
146 printk("| NMI testsuite:\n");
147 printk("--------------------\n");
148
149 print_testname("remote IPI");
150 dotest(remote_ipi, SUCCESS);
151 printk("\n");
152 print_testname("local IPI");
153 dotest(local_ipi, SUCCESS);
154 printk("\n");
155
156 cleanup_nmi_testsuite();
157
158 if (unexpected_testcase_failures) {
159 printk("--------------------\n");
160 printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n",
161 unexpected_testcase_failures, testcase_total);
162 printk("-----------------------------------------------------------------\n");
163 } else if (expected_testcase_failures && testcase_successes) {
164 printk("--------------------\n");
165 printk("%3d out of %3d testcases failed, as expected. |\n",
166 expected_testcase_failures, testcase_total);
167 printk("----------------------------------------------------\n");
168 } else if (expected_testcase_failures && !testcase_successes) {
169 printk("--------------------\n");
170 printk("All %3d testcases failed, as expected. |\n",
171 expected_testcase_failures);
172 printk("----------------------------------------\n");
173 } else {
174 printk("--------------------\n");
175 printk("Good, all %3d testcases passed! |\n",
176 testcase_successes);
177 printk("---------------------------------\n");
178 }
179}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9f548cb4a958..19277817effa 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1142,6 +1142,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1142{ 1142{
1143 pr_debug("Boot done.\n"); 1143 pr_debug("Boot done.\n");
1144 1144
1145 nmi_selftest();
1145 impress_friends(); 1146 impress_friends();
1146#ifdef CONFIG_X86_IO_APIC 1147#ifdef CONFIG_X86_IO_APIC
1147 setup_ioapic_dest(); 1148 setup_ioapic_dest();