diff options
author | Steven Rostedt <srostedt@redhat.com> | 2012-06-07 11:03:00 -0400 |
---|---|---|
committer | Steven Rostedt <rostedt@goodmis.org> | 2012-06-08 18:48:05 -0400 |
commit | c7d65a78fc18ed70353baeb7497ec71a7c775ac5 (patch) | |
tree | 1cf4651a56b972cbaa66361f41246d1e107548cd | |
parent | 7fbb98c5cb07563d3ee08714073a8e5452a96be2 (diff) |
x86: Remove cmpxchg from i386 NMI nesting code
I've been informed by someone on LWN called 'slashdot' that
some i386 machines do not support a true cmpxchg. The cmpxchg
used by the i386 NMI nesting code must be a true cmpxchg as
disabling interrupts will not work for NMIs (which is the work
around for i386s that do not have a true cmpxchg).
This 'slashdot' character also suggested a fix to the issue.
As the state of the nesting NMIs goes as follows:
NOT_RUNNING -> EXECUTING
EXECUTING -> NOT_RUNNING
EXECUTING -> LATCHED
LATCHED -> EXECUTING
Having these states as enum values of:
NOT_RUNNING = 0
EXECUTING = 1
LATCHED = 2
Instead of a cmpxchg to make EXECUTING -> NOT_RUNNING a
dec_and_test() would work as well. If the dec_and_test brings
the state to NOT_RUNNING, that is the same as a cmpxchg
succeeding to change EXECUTING to NOT_RUNNING. If a nested NMI
were to come in and change it to LATCHED, the dec_and_test() would
convert the state to EXECUTING (what we want it to be in such a
case anyway).
I asked 'slashdot' to post this as a patch, but it never came to
be. I decided to do the work instead.
Thanks to H. Peter Anvin for suggesting to use this_cpu_dec_and_return()
instead of local_dec_and_test(&__get_cpu_var()).
Link: http://lwn.net/Articles/484932/
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
-rw-r--r-- | arch/x86/kernel/nmi.c | 35 |
1 files changed, 21 insertions, 14 deletions
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index a0b2f84457be..a15a88800661 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -365,8 +365,9 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) | |||
365 | #ifdef CONFIG_X86_32 | 365 | #ifdef CONFIG_X86_32 |
366 | /* | 366 | /* |
367 | * For i386, NMIs use the same stack as the kernel, and we can | 367 | * For i386, NMIs use the same stack as the kernel, and we can |
368 | * add a workaround to the iret problem in C. Simply have 3 states | 368 | * add a workaround to the iret problem in C (preventing nested |
369 | * the NMI can be in. | 369 | * NMIs if an NMI takes a trap). Simply have 3 states the NMI |
370 | * can be in: | ||
370 | * | 371 | * |
371 | * 1) not running | 372 | * 1) not running |
372 | * 2) executing | 373 | * 2) executing |
@@ -383,13 +384,20 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) | |||
383 | * If an NMI hits a breakpoint that executes an iret, another | 384 | * If an NMI hits a breakpoint that executes an iret, another |
384 | * NMI can preempt it. We do not want to allow this new NMI | 385 | * NMI can preempt it. We do not want to allow this new NMI |
385 | * to run, but we want to execute it when the first one finishes. | 386 | * to run, but we want to execute it when the first one finishes. |
386 | * We set the state to "latched", and the first NMI will perform | 387 | * We set the state to "latched", and the exit of the first NMI will |
387 | * an cmpxchg on the state, and if it doesn't successfully | 388 | * perform a dec_return, if the result is zero (NOT_RUNNING), then |
388 | * reset the state to "not running" it will restart the next | 389 | * it will simply exit the NMI handler. If not, the dec_return |
389 | * NMI. | 390 | * would have set the state to NMI_EXECUTING (what we want it to |
391 | * be when we are running). In this case, we simply jump back | ||
392 | * to rerun the NMI handler again, and restart the 'latched' NMI. | ||
393 | * | ||
394 | * No trap (breakpoint or page fault) should be hit before nmi_restart, | ||
395 | * thus there is no race between the first check of state for NOT_RUNNING | ||
396 | * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs | ||
397 | * at this point. | ||
390 | */ | 398 | */ |
391 | enum nmi_states { | 399 | enum nmi_states { |
392 | NMI_NOT_RUNNING, | 400 | NMI_NOT_RUNNING = 0, |
393 | NMI_EXECUTING, | 401 | NMI_EXECUTING, |
394 | NMI_LATCHED, | 402 | NMI_LATCHED, |
395 | }; | 403 | }; |
@@ -397,18 +405,17 @@ static DEFINE_PER_CPU(enum nmi_states, nmi_state); | |||
397 | 405 | ||
398 | #define nmi_nesting_preprocess(regs) \ | 406 | #define nmi_nesting_preprocess(regs) \ |
399 | do { \ | 407 | do { \ |
400 | if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ | 408 | if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \ |
401 | __get_cpu_var(nmi_state) = NMI_LATCHED; \ | 409 | this_cpu_write(nmi_state, NMI_LATCHED); \ |
402 | return; \ | 410 | return; \ |
403 | } \ | 411 | } \ |
404 | nmi_restart: \ | 412 | this_cpu_write(nmi_state, NMI_EXECUTING); \ |
405 | __get_cpu_var(nmi_state) = NMI_EXECUTING; \ | 413 | } while (0); \ |
406 | } while (0) | 414 | nmi_restart: |
407 | 415 | ||
408 | #define nmi_nesting_postprocess() \ | 416 | #define nmi_nesting_postprocess() \ |
409 | do { \ | 417 | do { \ |
410 | if (cmpxchg(&__get_cpu_var(nmi_state), \ | 418 | if (this_cpu_dec_return(nmi_state)) \ |
411 | NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ | ||
412 | goto nmi_restart; \ | 419 | goto nmi_restart; \ |
413 | } while (0) | 420 | } while (0) |
414 | #else /* x86_64 */ | 421 | #else /* x86_64 */ |