aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@amacapital.net>2014-11-19 20:41:09 -0500
committerAndy Lutomirski <luto@amacapital.net>2015-01-02 13:22:46 -0500
commit959274753857efe9c5f1ba35fe727f51e9aa128d (patch)
tree57532e8c2738c21a90d47ac0e23dcc3f81ffd29b /arch/x86
parent48e08d0fb265b007ebbb29a72297ff7e40938969 (diff)
x86, traps: Track entry into and exit from IST context
We currently pretend that IST context is like standard exception context, but this is incorrect. IST entries from userspace are like standard exceptions except that they use per-cpu stacks, so they are atomic. IST entries from kernel space are like NMIs from RCU's perspective -- they are not quiescent states even if they interrupted the kernel during a quiescent state. Add and use ist_enter and ist_exit to track IST context. Even though x86_32 has no IST stacks, we track these interrupts the same way. This fixes two issues: - Scheduling from an IST interrupt handler will now warn. It would previously appear to work as long as we got lucky and nothing overwrote the stack frame. (I don't know of any bugs in this that would trigger the warning, but it's good to be on the safe side.) - RCU handling in IST context was dangerous. As far as I know, only machine checks were likely to trigger this, but it's good to be on the safe side. Note that the machine check handlers appears to have been missing any context tracking at all before this patch. Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Josh Triplett <josh@joshtriplett.org> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/traps.h4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c5
-rw-r--r--arch/x86/kernel/traps.c47
5 files changed, 61 insertions, 6 deletions
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 707adc6549d8..3cf525ec762d 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -1,6 +1,7 @@
1#ifndef _ASM_X86_TRAPS_H 1#ifndef _ASM_X86_TRAPS_H
2#define _ASM_X86_TRAPS_H 2#define _ASM_X86_TRAPS_H
3 3
4#include <linux/context_tracking_state.h>
4#include <linux/kprobes.h> 5#include <linux/kprobes.h>
5 6
6#include <asm/debugreg.h> 7#include <asm/debugreg.h>
@@ -110,6 +111,9 @@ asmlinkage void smp_thermal_interrupt(void);
110asmlinkage void mce_threshold_interrupt(void); 111asmlinkage void mce_threshold_interrupt(void);
111#endif 112#endif
112 113
114extern enum ctx_state ist_enter(struct pt_regs *regs);
115extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state);
116
113/* Interrupts/Exceptions */ 117/* Interrupts/Exceptions */
114enum { 118enum {
115 X86_TRAP_DE = 0, /* 0, Divide-by-zero */ 119 X86_TRAP_DE = 0, /* 0, Divide-by-zero */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d2c611699cd9..800d423f1e92 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -43,6 +43,7 @@
43#include <linux/export.h> 43#include <linux/export.h>
44 44
45#include <asm/processor.h> 45#include <asm/processor.h>
46#include <asm/traps.h>
46#include <asm/mce.h> 47#include <asm/mce.h>
47#include <asm/msr.h> 48#include <asm/msr.h>
48 49
@@ -1063,6 +1064,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1063{ 1064{
1064 struct mca_config *cfg = &mca_cfg; 1065 struct mca_config *cfg = &mca_cfg;
1065 struct mce m, *final; 1066 struct mce m, *final;
1067 enum ctx_state prev_state;
1066 int i; 1068 int i;
1067 int worst = 0; 1069 int worst = 0;
1068 int severity; 1070 int severity;
@@ -1085,6 +1087,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1085 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); 1087 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
1086 char *msg = "Unknown"; 1088 char *msg = "Unknown";
1087 1089
1090 prev_state = ist_enter(regs);
1091
1088 this_cpu_inc(mce_exception_count); 1092 this_cpu_inc(mce_exception_count);
1089 1093
1090 if (!cfg->banks) 1094 if (!cfg->banks)
@@ -1216,6 +1220,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1216 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); 1220 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
1217out: 1221out:
1218 sync_core(); 1222 sync_core();
1223 ist_exit(regs, prev_state);
1219} 1224}
1220EXPORT_SYMBOL_GPL(do_machine_check); 1225EXPORT_SYMBOL_GPL(do_machine_check);
1221 1226
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index a3042989398c..ec2663a708e4 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -8,6 +8,7 @@
8#include <linux/smp.h> 8#include <linux/smp.h>
9 9
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/traps.h>
11#include <asm/mce.h> 12#include <asm/mce.h>
12#include <asm/msr.h> 13#include <asm/msr.h>
13 14
@@ -17,8 +18,11 @@ int mce_p5_enabled __read_mostly;
17/* Machine check handler for Pentium class Intel CPUs: */ 18/* Machine check handler for Pentium class Intel CPUs: */
18static void pentium_machine_check(struct pt_regs *regs, long error_code) 19static void pentium_machine_check(struct pt_regs *regs, long error_code)
19{ 20{
21 enum ctx_state prev_state;
20 u32 loaddr, hi, lotype; 22 u32 loaddr, hi, lotype;
21 23
24 prev_state = ist_enter(regs);
25
22 rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); 26 rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
23 rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); 27 rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
24 28
@@ -33,6 +37,8 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
33 } 37 }
34 38
35 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 39 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
40
41 ist_exit(regs, prev_state);
36} 42}
37 43
38/* Set up machine check reporting for processors with Intel style MCE: */ 44/* Set up machine check reporting for processors with Intel style MCE: */
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 7dc5564d0cdf..bd5d46a32210 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -7,14 +7,19 @@
7#include <linux/types.h> 7#include <linux/types.h>
8 8
9#include <asm/processor.h> 9#include <asm/processor.h>
10#include <asm/traps.h>
10#include <asm/mce.h> 11#include <asm/mce.h>
11#include <asm/msr.h> 12#include <asm/msr.h>
12 13
13/* Machine check handler for WinChip C6: */ 14/* Machine check handler for WinChip C6: */
14static void winchip_machine_check(struct pt_regs *regs, long error_code) 15static void winchip_machine_check(struct pt_regs *regs, long error_code)
15{ 16{
17 enum ctx_state prev_state = ist_enter(regs);
18
16 printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); 19 printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
17 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 20 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
21
22 ist_exit(regs, prev_state);
18} 23}
19 24
20/* Set up machine check reporting on the Winchip C6 series */ 25/* Set up machine check reporting on the Winchip C6 series */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 28f3e5ffc55d..b3a9d24dba25 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -108,6 +108,39 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
108 preempt_count_dec(); 108 preempt_count_dec();
109} 109}
110 110
111enum ctx_state ist_enter(struct pt_regs *regs)
112{
113 /*
114 * We are atomic because we're on the IST stack (or we're on x86_32,
115 * in which case we still shouldn't schedule.
116 */
117 preempt_count_add(HARDIRQ_OFFSET);
118
119 if (user_mode_vm(regs)) {
120 /* Other than that, we're just an exception. */
121 return exception_enter();
122 } else {
123 /*
124 * We might have interrupted pretty much anything. In
125 * fact, if we're a machine check, we can even interrupt
126 * NMI processing. We don't want in_nmi() to return true,
127 * but we need to notify RCU.
128 */
129 rcu_nmi_enter();
130 return IN_KERNEL; /* the value is irrelevant. */
131 }
132}
133
134void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
135{
136 preempt_count_sub(HARDIRQ_OFFSET);
137
138 if (user_mode_vm(regs))
139 return exception_exit(prev_state);
140 else
141 rcu_nmi_exit();
142}
143
111static nokprobe_inline int 144static nokprobe_inline int
112do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, 145do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
113 struct pt_regs *regs, long error_code) 146 struct pt_regs *regs, long error_code)
@@ -251,6 +284,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
251 * end up promoting it to a doublefault. In that case, modify 284 * end up promoting it to a doublefault. In that case, modify
252 * the stack to make it look like we just entered the #GP 285 * the stack to make it look like we just entered the #GP
253 * handler from user space, similar to bad_iret. 286 * handler from user space, similar to bad_iret.
287 *
288 * No need for ist_enter here because we don't use RCU.
254 */ 289 */
255 if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && 290 if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
256 regs->cs == __KERNEL_CS && 291 regs->cs == __KERNEL_CS &&
@@ -263,12 +298,12 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
263 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ 298 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
264 regs->ip = (unsigned long)general_protection; 299 regs->ip = (unsigned long)general_protection;
265 regs->sp = (unsigned long)&normal_regs->orig_ax; 300 regs->sp = (unsigned long)&normal_regs->orig_ax;
301
266 return; 302 return;
267 } 303 }
268#endif 304#endif
269 305
270 exception_enter(); 306 ist_enter(regs); /* Discard prev_state because we won't return. */
271 /* Return not checked because double check cannot be ignored */
272 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); 307 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
273 308
274 tsk->thread.error_code = error_code; 309 tsk->thread.error_code = error_code;
@@ -434,7 +469,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
434 if (poke_int3_handler(regs)) 469 if (poke_int3_handler(regs))
435 return; 470 return;
436 471
437 prev_state = exception_enter(); 472 prev_state = ist_enter(regs);
438#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 473#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
439 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 474 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
440 SIGTRAP) == NOTIFY_STOP) 475 SIGTRAP) == NOTIFY_STOP)
@@ -460,7 +495,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
460 preempt_conditional_cli(regs); 495 preempt_conditional_cli(regs);
461 debug_stack_usage_dec(); 496 debug_stack_usage_dec();
462exit: 497exit:
463 exception_exit(prev_state); 498 ist_exit(regs, prev_state);
464} 499}
465NOKPROBE_SYMBOL(do_int3); 500NOKPROBE_SYMBOL(do_int3);
466 501
@@ -541,7 +576,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
541 unsigned long dr6; 576 unsigned long dr6;
542 int si_code; 577 int si_code;
543 578
544 prev_state = exception_enter(); 579 prev_state = ist_enter(regs);
545 580
546 get_debugreg(dr6, 6); 581 get_debugreg(dr6, 6);
547 582
@@ -616,7 +651,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
616 debug_stack_usage_dec(); 651 debug_stack_usage_dec();
617 652
618exit: 653exit:
619 exception_exit(prev_state); 654 ist_exit(regs, prev_state);
620} 655}
621NOKPROBE_SYMBOL(do_debug); 656NOKPROBE_SYMBOL(do_debug);
622 657