diff options
Diffstat (limited to 'arch/x86/kernel/nmi.c')
-rw-r--r-- | arch/x86/kernel/nmi.c | 47 |
1 files changed, 33 insertions, 14 deletions
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index a0b2f84457be..f84f5c57de35 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -365,8 +365,9 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) | |||
365 | #ifdef CONFIG_X86_32 | 365 | #ifdef CONFIG_X86_32 |
366 | /* | 366 | /* |
367 | * For i386, NMIs use the same stack as the kernel, and we can | 367 | * For i386, NMIs use the same stack as the kernel, and we can |
368 | * add a workaround to the iret problem in C. Simply have 3 states | 368 | * add a workaround to the iret problem in C (preventing nested |
369 | * the NMI can be in. | 369 | * NMIs if an NMI takes a trap). Simply have 3 states the NMI |
370 | * can be in: | ||
370 | * | 371 | * |
371 | * 1) not running | 372 | * 1) not running |
372 | * 2) executing | 373 | * 2) executing |
@@ -383,32 +384,50 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) | |||
383 | * If an NMI hits a breakpoint that executes an iret, another | 384 | * If an NMI hits a breakpoint that executes an iret, another |
384 | * NMI can preempt it. We do not want to allow this new NMI | 385 | * NMI can preempt it. We do not want to allow this new NMI |
385 | * to run, but we want to execute it when the first one finishes. | 386 | * to run, but we want to execute it when the first one finishes. |
386 | * We set the state to "latched", and the first NMI will perform | 387 | * We set the state to "latched", and the exit of the first NMI will |
387 | * an cmpxchg on the state, and if it doesn't successfully | 388 | * perform a dec_return, if the result is zero (NOT_RUNNING), then |
388 | * reset the state to "not running" it will restart the next | 389 | * it will simply exit the NMI handler. If not, the dec_return |
389 | * NMI. | 390 | * would have set the state to NMI_EXECUTING (what we want it to |
391 | * be when we are running). In this case, we simply jump back | ||
392 | * to rerun the NMI handler again, and restart the 'latched' NMI. | ||
393 | * | ||
394 | * No trap (breakpoint or page fault) should be hit before nmi_restart, | ||
395 | * thus there is no race between the first check of state for NOT_RUNNING | ||
396 | * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs | ||
397 | * at this point. | ||
398 | * | ||
399 | * In case the NMI takes a page fault, we need to save off the CR2 | ||
400 | * because the NMI could have preempted another page fault and corrupt | ||
401 | * the CR2 that is about to be read. As nested NMIs must be restarted | ||
402 | * and they can not take breakpoints or page faults, the update of the | ||
403 | * CR2 must be done before converting the nmi state back to NOT_RUNNING. | ||
404 | * Otherwise, there would be a race of another nested NMI coming in | ||
405 | * after setting state to NOT_RUNNING but before updating the nmi_cr2. | ||
390 | */ | 406 | */ |
391 | enum nmi_states { | 407 | enum nmi_states { |
392 | NMI_NOT_RUNNING, | 408 | NMI_NOT_RUNNING = 0, |
393 | NMI_EXECUTING, | 409 | NMI_EXECUTING, |
394 | NMI_LATCHED, | 410 | NMI_LATCHED, |
395 | }; | 411 | }; |
396 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); | 412 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); |
413 | static DEFINE_PER_CPU(unsigned long, nmi_cr2); | ||
397 | 414 | ||
398 | #define nmi_nesting_preprocess(regs) \ | 415 | #define nmi_nesting_preprocess(regs) \ |
399 | do { \ | 416 | do { \ |
400 | if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ | 417 | if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \ |
401 | __get_cpu_var(nmi_state) = NMI_LATCHED; \ | 418 | this_cpu_write(nmi_state, NMI_LATCHED); \ |
402 | return; \ | 419 | return; \ |
403 | } \ | 420 | } \ |
404 | nmi_restart: \ | 421 | this_cpu_write(nmi_state, NMI_EXECUTING); \ |
405 | __get_cpu_var(nmi_state) = NMI_EXECUTING; \ | 422 | this_cpu_write(nmi_cr2, read_cr2()); \ |
406 | } while (0) | 423 | } while (0); \ |
424 | nmi_restart: | ||
407 | 425 | ||
408 | #define nmi_nesting_postprocess() \ | 426 | #define nmi_nesting_postprocess() \ |
409 | do { \ | 427 | do { \ |
410 | if (cmpxchg(&__get_cpu_var(nmi_state), \ | 428 | if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \ |
411 | NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ | 429 | write_cr2(this_cpu_read(nmi_cr2)); \ |
430 | if (this_cpu_dec_return(nmi_state)) \ | ||
412 | goto nmi_restart; \ | 431 | goto nmi_restart; \ |
413 | } while (0) | 432 | } while (0) |
414 | #else /* x86_64 */ | 433 | #else /* x86_64 */ |