diff options
author | Andy Lutomirski <luto@kernel.org> | 2015-07-15 13:29:33 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-07-17 06:50:10 -0400 |
commit | 9d05041679904b12c12421cbcf9cb5f4860a8d7b (patch) | |
tree | 20f096fc9c1ae2af31643a25474273b5392caeeb | |
parent | 21bdb584af8cca7c6df3c44cba268be050a234eb (diff) |
x86/nmi: Enable nested do_nmi() handling for 64-bit kernels
32-bit kernels handle nested NMIs in C. Enable the exact same
handling on 64-bit kernels as well. This isn't currently
necessary, but it will become necessary once the asm code starts
allowing limited nesting.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/kernel/nmi.c | 123 |
1 files changed, 52 insertions, 71 deletions
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index c3e985d1751c..d8766b1c9974 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -408,15 +408,15 @@ static void default_do_nmi(struct pt_regs *regs) | |||
408 | NOKPROBE_SYMBOL(default_do_nmi); | 408 | NOKPROBE_SYMBOL(default_do_nmi); |
409 | 409 | ||
410 | /* | 410 | /* |
411 | * NMIs can hit breakpoints which will cause it to lose its | 411 | * NMIs can hit breakpoints which will cause it to lose its NMI context |
412 | * NMI context with the CPU when the breakpoint does an iret. | 412 | * with the CPU when the breakpoint or page fault does an IRET. |
413 | */ | 413 | * |
414 | #ifdef CONFIG_X86_32 | 414 | * As a result, NMIs can nest if NMIs get unmasked due an IRET during |
415 | /* | 415 | * NMI processing. On x86_64, the asm glue protects us from nested NMIs |
416 | * For i386, NMIs use the same stack as the kernel, and we can | 416 | * if the outer NMI came from kernel mode, but we can still nest if the |
417 | * add a workaround to the iret problem in C (preventing nested | 417 | * outer NMI came from user mode. |
418 | * NMIs if an NMI takes a trap). Simply have 3 states the NMI | 418 | * |
419 | * can be in: | 419 | * To handle these nested NMIs, we have three states: |
420 | * | 420 | * |
421 | * 1) not running | 421 | * 1) not running |
422 | * 2) executing | 422 | * 2) executing |
@@ -430,15 +430,14 @@ NOKPROBE_SYMBOL(default_do_nmi); | |||
430 | * (Note, the latch is binary, thus multiple NMIs triggering, | 430 | * (Note, the latch is binary, thus multiple NMIs triggering, |
431 | * when one is running, are ignored. Only one NMI is restarted.) | 431 | * when one is running, are ignored. Only one NMI is restarted.) |
432 | * | 432 | * |
433 | * If an NMI hits a breakpoint that executes an iret, another | 433 | * If an NMI executes an iret, another NMI can preempt it. We do not |
434 | * NMI can preempt it. We do not want to allow this new NMI | 434 | * want to allow this new NMI to run, but we want to execute it when the |
435 | * to run, but we want to execute it when the first one finishes. | 435 | * first one finishes. We set the state to "latched", and the exit of |
436 | * We set the state to "latched", and the exit of the first NMI will | 436 | * the first NMI will perform a dec_return, if the result is zero |
437 | * perform a dec_return, if the result is zero (NOT_RUNNING), then | 437 | * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the |
438 | * it will simply exit the NMI handler. If not, the dec_return | 438 | * dec_return would have set the state to NMI_EXECUTING (what we want it |
439 | * would have set the state to NMI_EXECUTING (what we want it to | 439 | * to be when we are running). In this case, we simply jump back to |
440 | * be when we are running). In this case, we simply jump back | 440 | * rerun the NMI handler again, and restart the 'latched' NMI. |
441 | * to rerun the NMI handler again, and restart the 'latched' NMI. | ||
442 | * | 441 | * |
443 | * No trap (breakpoint or page fault) should be hit before nmi_restart, | 442 | * No trap (breakpoint or page fault) should be hit before nmi_restart, |
444 | * thus there is no race between the first check of state for NOT_RUNNING | 443 | * thus there is no race between the first check of state for NOT_RUNNING |
@@ -461,49 +460,36 @@ enum nmi_states { | |||
461 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); | 460 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); |
462 | static DEFINE_PER_CPU(unsigned long, nmi_cr2); | 461 | static DEFINE_PER_CPU(unsigned long, nmi_cr2); |
463 | 462 | ||
464 | #define nmi_nesting_preprocess(regs) \ | 463 | #ifdef CONFIG_X86_64 |
465 | do { \ | ||
466 | if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \ | ||
467 | this_cpu_write(nmi_state, NMI_LATCHED); \ | ||
468 | return; \ | ||
469 | } \ | ||
470 | this_cpu_write(nmi_state, NMI_EXECUTING); \ | ||
471 | this_cpu_write(nmi_cr2, read_cr2()); \ | ||
472 | } while (0); \ | ||
473 | nmi_restart: | ||
474 | |||
475 | #define nmi_nesting_postprocess() \ | ||
476 | do { \ | ||
477 | if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \ | ||
478 | write_cr2(this_cpu_read(nmi_cr2)); \ | ||
479 | if (this_cpu_dec_return(nmi_state)) \ | ||
480 | goto nmi_restart; \ | ||
481 | } while (0) | ||
482 | #else /* x86_64 */ | ||
483 | /* | 464 | /* |
484 | * In x86_64 things are a bit more difficult. This has the same problem | 465 | * In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without |
485 | * where an NMI hitting a breakpoint that calls iret will remove the | 466 | * some care, the inner breakpoint will clobber the outer breakpoint's |
486 | * NMI context, allowing a nested NMI to enter. What makes this more | 467 | * stack. |
487 | * difficult is that both NMIs and breakpoints have their own stack. | ||
488 | * When a new NMI or breakpoint is executed, the stack is set to a fixed | ||
489 | * point. If an NMI is nested, it will have its stack set at that same | ||
490 | * fixed address that the first NMI had, and will start corrupting the | ||
491 | * stack. This is handled in entry_64.S, but the same problem exists with | ||
492 | * the breakpoint stack. | ||
493 | * | 468 | * |
494 | * If a breakpoint is being processed, and the debug stack is being used, | 469 | * If a breakpoint is being processed, and the debug stack is being |
495 | * if an NMI comes in and also hits a breakpoint, the stack pointer | 470 | * used, if an NMI comes in and also hits a breakpoint, the stack |
496 | * will be set to the same fixed address as the breakpoint that was | 471 | * pointer will be set to the same fixed address as the breakpoint that |
497 | * interrupted, causing that stack to be corrupted. To handle this case, | 472 | * was interrupted, causing that stack to be corrupted. To handle this |
498 | * check if the stack that was interrupted is the debug stack, and if | 473 | * case, check if the stack that was interrupted is the debug stack, and |
499 | * so, change the IDT so that new breakpoints will use the current stack | 474 | * if so, change the IDT so that new breakpoints will use the current |
500 | * and not switch to the fixed address. On return of the NMI, switch back | 475 | * stack and not switch to the fixed address. On return of the NMI, |
501 | * to the original IDT. | 476 | * switch back to the original IDT. |
502 | */ | 477 | */ |
503 | static DEFINE_PER_CPU(int, update_debug_stack); | 478 | static DEFINE_PER_CPU(int, update_debug_stack); |
479 | #endif | ||
504 | 480 | ||
505 | static inline void nmi_nesting_preprocess(struct pt_regs *regs) | 481 | dotraplinkage notrace void |
482 | do_nmi(struct pt_regs *regs, long error_code) | ||
506 | { | 483 | { |
484 | if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { | ||
485 | this_cpu_write(nmi_state, NMI_LATCHED); | ||
486 | return; | ||
487 | } | ||
488 | this_cpu_write(nmi_state, NMI_EXECUTING); | ||
489 | this_cpu_write(nmi_cr2, read_cr2()); | ||
490 | nmi_restart: | ||
491 | |||
492 | #ifdef CONFIG_X86_64 | ||
507 | /* | 493 | /* |
508 | * If we interrupted a breakpoint, it is possible that | 494 | * If we interrupted a breakpoint, it is possible that |
509 | * the nmi handler will have breakpoints too. We need to | 495 | * the nmi handler will have breakpoints too. We need to |
@@ -514,22 +500,8 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs) | |||
514 | debug_stack_set_zero(); | 500 | debug_stack_set_zero(); |
515 | this_cpu_write(update_debug_stack, 1); | 501 | this_cpu_write(update_debug_stack, 1); |
516 | } | 502 | } |
517 | } | ||
518 | |||
519 | static inline void nmi_nesting_postprocess(void) | ||
520 | { | ||
521 | if (unlikely(this_cpu_read(update_debug_stack))) { | ||
522 | debug_stack_reset(); | ||
523 | this_cpu_write(update_debug_stack, 0); | ||
524 | } | ||
525 | } | ||
526 | #endif | 503 | #endif |
527 | 504 | ||
528 | dotraplinkage notrace void | ||
529 | do_nmi(struct pt_regs *regs, long error_code) | ||
530 | { | ||
531 | nmi_nesting_preprocess(regs); | ||
532 | |||
533 | nmi_enter(); | 505 | nmi_enter(); |
534 | 506 | ||
535 | inc_irq_stat(__nmi_count); | 507 | inc_irq_stat(__nmi_count); |
@@ -539,8 +511,17 @@ do_nmi(struct pt_regs *regs, long error_code) | |||
539 | 511 | ||
540 | nmi_exit(); | 512 | nmi_exit(); |
541 | 513 | ||
542 | /* On i386, may loop back to preprocess */ | 514 | #ifdef CONFIG_X86_64 |
543 | nmi_nesting_postprocess(); | 515 | if (unlikely(this_cpu_read(update_debug_stack))) { |
516 | debug_stack_reset(); | ||
517 | this_cpu_write(update_debug_stack, 0); | ||
518 | } | ||
519 | #endif | ||
520 | |||
521 | if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) | ||
522 | write_cr2(this_cpu_read(nmi_cr2)); | ||
523 | if (this_cpu_dec_return(nmi_state)) | ||
524 | goto nmi_restart; | ||
544 | } | 525 | } |
545 | NOKPROBE_SYMBOL(do_nmi); | 526 | NOKPROBE_SYMBOL(do_nmi); |
546 | 527 | ||