diff options
-rw-r--r-- | arch/x86/include/asm/nmi.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/nmi.c | 97 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 2 |
4 files changed, 93 insertions, 9 deletions
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 53610957feaf..fd3f9f18cf3f 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -42,5 +42,6 @@ void unregister_nmi_handler(unsigned int, const char *); | |||
42 | 42 | ||
43 | void stop_nmi(void); | 43 | void stop_nmi(void); |
44 | void restart_nmi(void); | 44 | void restart_nmi(void); |
45 | void local_touch_nmi(void); | ||
45 | 46 | ||
46 | #endif /* _ASM_X86_NMI_H */ | 47 | #endif /* _ASM_X86_NMI_H */ |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e20f5e790599..35b39592732b 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -71,7 +71,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | |||
71 | 71 | ||
72 | #define nmi_to_desc(type) (&nmi_desc[type]) | 72 | #define nmi_to_desc(type) (&nmi_desc[type]) |
73 | 73 | ||
74 | static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs) | 74 | static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) |
75 | { | 75 | { |
76 | struct nmi_desc *desc = nmi_to_desc(type); | 76 | struct nmi_desc *desc = nmi_to_desc(type); |
77 | struct nmiaction *a; | 77 | struct nmiaction *a; |
@@ -85,12 +85,9 @@ static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs) | |||
85 | * can be latched at any given time. Walk the whole list | 85 | * can be latched at any given time. Walk the whole list |
86 | * to handle those situations. | 86 | * to handle those situations. |
87 | */ | 87 | */ |
88 | list_for_each_entry_rcu(a, &desc->head, list) { | 88 | list_for_each_entry_rcu(a, &desc->head, list) |
89 | |||
90 | handled += a->handler(type, regs); | 89 | handled += a->handler(type, regs); |
91 | 90 | ||
92 | } | ||
93 | |||
94 | rcu_read_unlock(); | 91 | rcu_read_unlock(); |
95 | 92 | ||
96 | /* return total number of NMI events handled */ | 93 | /* return total number of NMI events handled */ |
@@ -105,6 +102,13 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action) | |||
105 | spin_lock_irqsave(&desc->lock, flags); | 102 | spin_lock_irqsave(&desc->lock, flags); |
106 | 103 | ||
107 | /* | 104 | /* |
105 | * most handlers of type NMI_UNKNOWN never return because | ||
106 | * they just assume the NMI is theirs. Just a sanity check | ||
107 | * to manage expectations | ||
108 | */ | ||
109 | WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); | ||
110 | |||
111 | /* | ||
108 | * some handlers need to be executed first otherwise a fake | 112 | * some handlers need to be executed first otherwise a fake |
109 | * event confuses some handlers (kdump uses this flag) | 113 | * event confuses some handlers (kdump uses this flag) |
110 | */ | 114 | */ |
@@ -251,7 +255,13 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | |||
251 | { | 255 | { |
252 | int handled; | 256 | int handled; |
253 | 257 | ||
254 | handled = nmi_handle(NMI_UNKNOWN, regs); | 258 | /* |
259 | * Use 'false' as back-to-back NMIs are dealt with one level up. | ||
260 | * Of course this makes having multiple 'unknown' handlers useless | ||
261 | * as only the first one is ever run (unless it can actually determine | ||
262 | * if it caused the NMI) | ||
263 | */ | ||
264 | handled = nmi_handle(NMI_UNKNOWN, regs, false); | ||
255 | if (handled) | 265 | if (handled) |
256 | return; | 266 | return; |
257 | #ifdef CONFIG_MCA | 267 | #ifdef CONFIG_MCA |
@@ -274,19 +284,49 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | |||
274 | pr_emerg("Dazed and confused, but trying to continue\n"); | 284 | pr_emerg("Dazed and confused, but trying to continue\n"); |
275 | } | 285 | } |
276 | 286 | ||
287 | static DEFINE_PER_CPU(bool, swallow_nmi); | ||
288 | static DEFINE_PER_CPU(unsigned long, last_nmi_rip); | ||
289 | |||
277 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | 290 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) |
278 | { | 291 | { |
279 | unsigned char reason = 0; | 292 | unsigned char reason = 0; |
280 | int handled; | 293 | int handled; |
294 | bool b2b = false; | ||
281 | 295 | ||
282 | /* | 296 | /* |
283 | * CPU-specific NMI must be processed before non-CPU-specific | 297 | * CPU-specific NMI must be processed before non-CPU-specific |
284 | * NMI, otherwise we may lose it, because the CPU-specific | 298 | * NMI, otherwise we may lose it, because the CPU-specific |
285 | * NMI can not be detected/processed on other CPUs. | 299 | * NMI can not be detected/processed on other CPUs. |
286 | */ | 300 | */ |
287 | handled = nmi_handle(NMI_LOCAL, regs); | 301 | |
288 | if (handled) | 302 | /* |
303 | * Back-to-back NMIs are interesting because they can either | ||
304 | * be two NMI or more than two NMIs (any thing over two is dropped | ||
305 | * due to NMI being edge-triggered). If this is the second half | ||
306 | * of the back-to-back NMI, assume we dropped things and process | ||
307 | * more handlers. Otherwise reset the 'swallow' NMI behaviour | ||
308 | */ | ||
309 | if (regs->ip == __this_cpu_read(last_nmi_rip)) | ||
310 | b2b = true; | ||
311 | else | ||
312 | __this_cpu_write(swallow_nmi, false); | ||
313 | |||
314 | __this_cpu_write(last_nmi_rip, regs->ip); | ||
315 | |||
316 | handled = nmi_handle(NMI_LOCAL, regs, b2b); | ||
317 | if (handled) { | ||
318 | /* | ||
319 | * There are cases when a NMI handler handles multiple | ||
320 | * events in the current NMI. One of these events may | ||
321 | * be queued for in the next NMI. Because the event is | ||
322 | * already handled, the next NMI will result in an unknown | ||
323 | * NMI. Instead lets flag this for a potential NMI to | ||
324 | * swallow. | ||
325 | */ | ||
326 | if (handled > 1) | ||
327 | __this_cpu_write(swallow_nmi, true); | ||
289 | return; | 328 | return; |
329 | } | ||
290 | 330 | ||
291 | /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ | 331 | /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ |
292 | raw_spin_lock(&nmi_reason_lock); | 332 | raw_spin_lock(&nmi_reason_lock); |
@@ -309,7 +349,40 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
309 | } | 349 | } |
310 | raw_spin_unlock(&nmi_reason_lock); | 350 | raw_spin_unlock(&nmi_reason_lock); |
311 | 351 | ||
312 | unknown_nmi_error(reason, regs); | 352 | /* |
353 | * Only one NMI can be latched at a time. To handle | ||
354 | * this we may process multiple nmi handlers at once to | ||
355 | * cover the case where an NMI is dropped. The downside | ||
356 | * to this approach is we may process an NMI prematurely, | ||
357 | * while its real NMI is sitting latched. This will cause | ||
358 | * an unknown NMI on the next run of the NMI processing. | ||
359 | * | ||
360 | * We tried to flag that condition above, by setting the | ||
361 | * swallow_nmi flag when we process more than one event. | ||
362 | * This condition is also only present on the second half | ||
363 | * of a back-to-back NMI, so we flag that condition too. | ||
364 | * | ||
365 | * If both are true, we assume we already processed this | ||
366 | * NMI previously and we swallow it. Otherwise we reset | ||
367 | * the logic. | ||
368 | * | ||
369 | * There are scenarios where we may accidentally swallow | ||
370 | * a 'real' unknown NMI. For example, while processing | ||
371 | * a perf NMI another perf NMI comes in along with a | ||
372 | * 'real' unknown NMI. These two NMIs get combined into | ||
373 | * one (as descibed above). When the next NMI gets | ||
374 | * processed, it will be flagged by perf as handled, but | ||
375 | * noone will know that there was a 'real' unknown NMI sent | ||
376 | * also. As a result it gets swallowed. Or if the first | ||
377 | * perf NMI returns two events handled then the second | ||
378 | * NMI will get eaten by the logic below, again losing a | ||
379 | * 'real' unknown NMI. But this is the best we can do | ||
380 | * for now. | ||
381 | */ | ||
382 | if (b2b && __this_cpu_read(swallow_nmi)) | ||
383 | ; | ||
384 | else | ||
385 | unknown_nmi_error(reason, regs); | ||
313 | } | 386 | } |
314 | 387 | ||
315 | dotraplinkage notrace __kprobes void | 388 | dotraplinkage notrace __kprobes void |
@@ -334,3 +407,9 @@ void restart_nmi(void) | |||
334 | { | 407 | { |
335 | ignore_nmis--; | 408 | ignore_nmis--; |
336 | } | 409 | } |
410 | |||
411 | /* reset the back-to-back NMI logic */ | ||
412 | void local_touch_nmi(void) | ||
413 | { | ||
414 | __this_cpu_write(last_nmi_rip, 0); | ||
415 | } | ||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 7a3b65107a27..46ff054ebaaa 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <asm/idle.h> | 57 | #include <asm/idle.h> |
58 | #include <asm/syscalls.h> | 58 | #include <asm/syscalls.h> |
59 | #include <asm/debugreg.h> | 59 | #include <asm/debugreg.h> |
60 | #include <asm/nmi.h> | ||
60 | 61 | ||
61 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
62 | 63 | ||
@@ -107,6 +108,7 @@ void cpu_idle(void) | |||
107 | if (cpu_is_offline(cpu)) | 108 | if (cpu_is_offline(cpu)) |
108 | play_dead(); | 109 | play_dead(); |
109 | 110 | ||
111 | local_touch_nmi(); | ||
110 | local_irq_disable(); | 112 | local_irq_disable(); |
111 | /* Don't trace irqs off for idle */ | 113 | /* Don't trace irqs off for idle */ |
112 | stop_critical_timings(); | 114 | stop_critical_timings(); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index f693e44e1bf6..3bd7e6eebf31 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <asm/idle.h> | 51 | #include <asm/idle.h> |
52 | #include <asm/syscalls.h> | 52 | #include <asm/syscalls.h> |
53 | #include <asm/debugreg.h> | 53 | #include <asm/debugreg.h> |
54 | #include <asm/nmi.h> | ||
54 | 55 | ||
55 | asmlinkage extern void ret_from_fork(void); | 56 | asmlinkage extern void ret_from_fork(void); |
56 | 57 | ||
@@ -133,6 +134,7 @@ void cpu_idle(void) | |||
133 | * from here on, until they go to idle. | 134 | * from here on, until they go to idle. |
134 | * Otherwise, idle callbacks can misfire. | 135 | * Otherwise, idle callbacks can misfire. |
135 | */ | 136 | */ |
137 | local_touch_nmi(); | ||
136 | local_irq_disable(); | 138 | local_irq_disable(); |
137 | enter_idle(); | 139 | enter_idle(); |
138 | /* Don't trace irqs off for idle */ | 140 | /* Don't trace irqs off for idle */ |