aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2012-06-07 11:03:00 -0400
committerSteven Rostedt <rostedt@goodmis.org>2012-06-08 18:48:05 -0400
commitc7d65a78fc18ed70353baeb7497ec71a7c775ac5 (patch)
tree1cf4651a56b972cbaa66361f41246d1e107548cd
parent7fbb98c5cb07563d3ee08714073a8e5452a96be2 (diff)
x86: Remove cmpxchg from i386 NMI nesting code
I've been informed by someone on LWN called 'slashdot' that some i386 machines do not support a true cmpxchg. The cmpxchg used by the i386 NMI nesting code must be a true cmpxchg as disabling interrupts will not work for NMIs (which is the work around for i386s that do not have a true cmpxchg). This 'slashdot' character also suggested a fix to the issue. As the state of the nesting NMIs goes as follows: NOT_RUNNING -> EXECUTING EXECUTING -> NOT_RUNNING EXECUTING -> LATCHED LATCHED -> EXECUTING Having these states as enum values of: NOT_RUNNING = 0 EXECUTING = 1 LATCHED = 2 Instead of a cmpxchg to make EXECUTING -> NOT_RUNNING a dec_and_test() would work as well. If the dec_and_test brings the state to NOT_RUNNING, that is the same as a cmpxchg succeeding to change EXECUTING to NOT_RUNNING. If a nested NMI were to come in and change it to LATCHED, the dec_and_test() would convert the state to EXECUTING (what we want it to be in such a case anyway). I asked 'slashdot' to post this as a patch, but it never came to be. I decided to do the work instead. Thanks to H. Peter Anvin for suggesting to use this_cpu_dec_and_return() instead of local_dec_and_test(&__get_cpu_var()). Link: http://lwn.net/Articles/484932/ Cc: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
-rw-r--r--arch/x86/kernel/nmi.c35
1 files changed, 21 insertions, 14 deletions
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index a0b2f84457be..a15a88800661 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -365,8 +365,9 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
365#ifdef CONFIG_X86_32 365#ifdef CONFIG_X86_32
366/* 366/*
367 * For i386, NMIs use the same stack as the kernel, and we can 367 * For i386, NMIs use the same stack as the kernel, and we can
368 * add a workaround to the iret problem in C. Simply have 3 states 368 * add a workaround to the iret problem in C (preventing nested
369 * the NMI can be in. 369 * NMIs if an NMI takes a trap). Simply have 3 states the NMI
370 * can be in:
370 * 371 *
371 * 1) not running 372 * 1) not running
372 * 2) executing 373 * 2) executing
@@ -383,13 +384,20 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
383 * If an NMI hits a breakpoint that executes an iret, another 384 * If an NMI hits a breakpoint that executes an iret, another
384 * NMI can preempt it. We do not want to allow this new NMI 385 * NMI can preempt it. We do not want to allow this new NMI
385 * to run, but we want to execute it when the first one finishes. 386 * to run, but we want to execute it when the first one finishes.
386 * We set the state to "latched", and the first NMI will perform 387 * We set the state to "latched", and the exit of the first NMI will
387 * an cmpxchg on the state, and if it doesn't successfully 388 * perform a dec_return, if the result is zero (NOT_RUNNING), then
388 * reset the state to "not running" it will restart the next 389 * it will simply exit the NMI handler. If not, the dec_return
389 * NMI. 390 * would have set the state to NMI_EXECUTING (what we want it to
391 * be when we are running). In this case, we simply jump back
392 * to rerun the NMI handler again, and restart the 'latched' NMI.
393 *
394 * No trap (breakpoint or page fault) should be hit before nmi_restart,
395 * thus there is no race between the first check of state for NOT_RUNNING
396 * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs
397 * at this point.
390 */ 398 */
391enum nmi_states { 399enum nmi_states {
392 NMI_NOT_RUNNING, 400 NMI_NOT_RUNNING = 0,
393 NMI_EXECUTING, 401 NMI_EXECUTING,
394 NMI_LATCHED, 402 NMI_LATCHED,
395}; 403};
@@ -397,18 +405,17 @@ static DEFINE_PER_CPU(enum nmi_states, nmi_state);
397 405
398#define nmi_nesting_preprocess(regs) \ 406#define nmi_nesting_preprocess(regs) \
399 do { \ 407 do { \
400 if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ 408 if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \
401 __get_cpu_var(nmi_state) = NMI_LATCHED; \ 409 this_cpu_write(nmi_state, NMI_LATCHED); \
402 return; \ 410 return; \
403 } \ 411 } \
404 nmi_restart: \ 412 this_cpu_write(nmi_state, NMI_EXECUTING); \
405 __get_cpu_var(nmi_state) = NMI_EXECUTING; \ 413 } while (0); \
406 } while (0) 414 nmi_restart:
407 415
408#define nmi_nesting_postprocess() \ 416#define nmi_nesting_postprocess() \
409 do { \ 417 do { \
410 if (cmpxchg(&__get_cpu_var(nmi_state), \ 418 if (this_cpu_dec_return(nmi_state)) \
411 NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \
412 goto nmi_restart; \ 419 goto nmi_restart; \
413 } while (0) 420 } while (0)
414#else /* x86_64 */ 421#else /* x86_64 */