aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-02-04 03:01:12 -0500
committerIngo Molnar <mingo@kernel.org>2015-02-04 03:01:12 -0500
commit0967160ad615985c7c35443156ea9aecc60c37b8 (patch)
tree658f728aff1be23540180091b718452a6848a6b0 /arch/x86
parent2fde4f94e0a9531251e706fa57131b51b0df042e (diff)
parentb57c0b5175ddbe9b477801f9994a5b330702c1ba (diff)
Merge branch 'x86/asm' into perf/x86, to avoid conflicts with upcoming patches
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/ia32/ia32entry.S4
-rw-r--r--arch/x86/include/asm/calling.h1
-rw-r--r--arch/x86/include/asm/mce.h1
-rw-r--r--arch/x86/include/asm/thread_info.h15
-rw-r--r--arch/x86/include/asm/traps.h6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c114
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c5
-rw-r--r--arch/x86/kernel/entry_64.S317
-rw-r--r--arch/x86/kernel/irq_32.c13
-rw-r--r--arch/x86/kernel/signal.c6
-rw-r--r--arch/x86/kernel/traps.c119
-rw-r--r--arch/x86/vdso/Makefile2
13 files changed, 308 insertions, 301 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 82e8a1d44658..156ebcab4ada 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -179,8 +179,8 @@ sysenter_dispatch:
179sysexit_from_sys_call: 179sysexit_from_sys_call:
180 andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) 180 andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
181 /* clear IF, that popfq doesn't enable interrupts early */ 181 /* clear IF, that popfq doesn't enable interrupts early */
182 andl $~0x200,EFLAGS-R11(%rsp) 182 andl $~0x200,EFLAGS-ARGOFFSET(%rsp)
183 movl RIP-R11(%rsp),%edx /* User %eip */ 183 movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */
184 CFI_REGISTER rip,rdx 184 CFI_REGISTER rip,rdx
185 RESTORE_ARGS 0,24,0,0,0,0 185 RESTORE_ARGS 0,24,0,0,0,0
186 xorq %r8,%r8 186 xorq %r8,%r8
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 76659b67fd11..1f1297b46f83 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -83,7 +83,6 @@ For 32-bit we have the following conventions - kernel is built with
83#define SS 160 83#define SS 160
84 84
85#define ARGOFFSET R11 85#define ARGOFFSET R11
86#define SWFRAME ORIG_RAX
87 86
88 .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 87 .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
89 subq $9*8+\addskip, %rsp 88 subq $9*8+\addskip, %rsp
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 51b26e895933..9b3de99dc004 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -190,7 +190,6 @@ enum mcp_flags {
190void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); 190void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
191 191
192int mce_notify_irq(void); 192int mce_notify_irq(void);
193void mce_notify_process(void);
194 193
195DECLARE_PER_CPU(struct mce, injectm); 194DECLARE_PER_CPU(struct mce, injectm);
196 195
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 547e344a6dc6..e82e95abc92b 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -75,7 +75,6 @@ struct thread_info {
75#define TIF_SYSCALL_EMU 6 /* syscall emulation active */ 75#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
76#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ 76#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
77#define TIF_SECCOMP 8 /* secure computing */ 77#define TIF_SECCOMP 8 /* secure computing */
78#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
79#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ 78#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
80#define TIF_UPROBE 12 /* breakpointed or singlestepping */ 79#define TIF_UPROBE 12 /* breakpointed or singlestepping */
81#define TIF_NOTSC 16 /* TSC is not accessible in userland */ 80#define TIF_NOTSC 16 /* TSC is not accessible in userland */
@@ -100,7 +99,6 @@ struct thread_info {
100#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) 99#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
101#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) 100#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
102#define _TIF_SECCOMP (1 << TIF_SECCOMP) 101#define _TIF_SECCOMP (1 << TIF_SECCOMP)
103#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
104#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) 102#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
105#define _TIF_UPROBE (1 << TIF_UPROBE) 103#define _TIF_UPROBE (1 << TIF_UPROBE)
106#define _TIF_NOTSC (1 << TIF_NOTSC) 104#define _TIF_NOTSC (1 << TIF_NOTSC)
@@ -140,7 +138,7 @@ struct thread_info {
140 138
141/* Only used for 64 bit */ 139/* Only used for 64 bit */
142#define _TIF_DO_NOTIFY_MASK \ 140#define _TIF_DO_NOTIFY_MASK \
143 (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ 141 (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \
144 _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) 142 _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE)
145 143
146/* flags to check in __switch_to() */ 144/* flags to check in __switch_to() */
@@ -170,6 +168,17 @@ static inline struct thread_info *current_thread_info(void)
170 return ti; 168 return ti;
171} 169}
172 170
171static inline unsigned long current_stack_pointer(void)
172{
173 unsigned long sp;
174#ifdef CONFIG_X86_64
175 asm("mov %%rsp,%0" : "=g" (sp));
176#else
177 asm("mov %%esp,%0" : "=g" (sp));
178#endif
179 return sp;
180}
181
173#else /* !__ASSEMBLY__ */ 182#else /* !__ASSEMBLY__ */
174 183
175/* how to get the thread information struct from ASM */ 184/* how to get the thread information struct from ASM */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 707adc6549d8..4e49d7dff78e 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -1,6 +1,7 @@
1#ifndef _ASM_X86_TRAPS_H 1#ifndef _ASM_X86_TRAPS_H
2#define _ASM_X86_TRAPS_H 2#define _ASM_X86_TRAPS_H
3 3
4#include <linux/context_tracking_state.h>
4#include <linux/kprobes.h> 5#include <linux/kprobes.h>
5 6
6#include <asm/debugreg.h> 7#include <asm/debugreg.h>
@@ -110,6 +111,11 @@ asmlinkage void smp_thermal_interrupt(void);
110asmlinkage void mce_threshold_interrupt(void); 111asmlinkage void mce_threshold_interrupt(void);
111#endif 112#endif
112 113
114extern enum ctx_state ist_enter(struct pt_regs *regs);
115extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state);
116extern void ist_begin_non_atomic(struct pt_regs *regs);
117extern void ist_end_non_atomic(void);
118
113/* Interrupts/Exceptions */ 119/* Interrupts/Exceptions */
114enum { 120enum {
115 X86_TRAP_DE = 0, /* 0, Divide-by-zero */ 121 X86_TRAP_DE = 0, /* 0, Divide-by-zero */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d2c611699cd9..d23179900755 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -43,6 +43,7 @@
43#include <linux/export.h> 43#include <linux/export.h>
44 44
45#include <asm/processor.h> 45#include <asm/processor.h>
46#include <asm/traps.h>
46#include <asm/mce.h> 47#include <asm/mce.h>
47#include <asm/msr.h> 48#include <asm/msr.h>
48 49
@@ -1003,51 +1004,6 @@ static void mce_clear_state(unsigned long *toclear)
1003} 1004}
1004 1005
1005/* 1006/*
1006 * Need to save faulting physical address associated with a process
1007 * in the machine check handler some place where we can grab it back
1008 * later in mce_notify_process()
1009 */
1010#define MCE_INFO_MAX 16
1011
1012struct mce_info {
1013 atomic_t inuse;
1014 struct task_struct *t;
1015 __u64 paddr;
1016 int restartable;
1017} mce_info[MCE_INFO_MAX];
1018
1019static void mce_save_info(__u64 addr, int c)
1020{
1021 struct mce_info *mi;
1022
1023 for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) {
1024 if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
1025 mi->t = current;
1026 mi->paddr = addr;
1027 mi->restartable = c;
1028 return;
1029 }
1030 }
1031
1032 mce_panic("Too many concurrent recoverable errors", NULL, NULL);
1033}
1034
1035static struct mce_info *mce_find_info(void)
1036{
1037 struct mce_info *mi;
1038
1039 for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++)
1040 if (atomic_read(&mi->inuse) && mi->t == current)
1041 return mi;
1042 return NULL;
1043}
1044
1045static void mce_clear_info(struct mce_info *mi)
1046{
1047 atomic_set(&mi->inuse, 0);
1048}
1049
1050/*
1051 * The actual machine check handler. This only handles real 1007 * The actual machine check handler. This only handles real
1052 * exceptions when something got corrupted coming in through int 18. 1008 * exceptions when something got corrupted coming in through int 18.
1053 * 1009 *
@@ -1063,6 +1019,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1063{ 1019{
1064 struct mca_config *cfg = &mca_cfg; 1020 struct mca_config *cfg = &mca_cfg;
1065 struct mce m, *final; 1021 struct mce m, *final;
1022 enum ctx_state prev_state;
1066 int i; 1023 int i;
1067 int worst = 0; 1024 int worst = 0;
1068 int severity; 1025 int severity;
@@ -1084,6 +1041,10 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1084 DECLARE_BITMAP(toclear, MAX_NR_BANKS); 1041 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
1085 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); 1042 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
1086 char *msg = "Unknown"; 1043 char *msg = "Unknown";
1044 u64 recover_paddr = ~0ull;
1045 int flags = MF_ACTION_REQUIRED;
1046
1047 prev_state = ist_enter(regs);
1087 1048
1088 this_cpu_inc(mce_exception_count); 1049 this_cpu_inc(mce_exception_count);
1089 1050
@@ -1203,9 +1164,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1203 if (no_way_out) 1164 if (no_way_out)
1204 mce_panic("Fatal machine check on current CPU", &m, msg); 1165 mce_panic("Fatal machine check on current CPU", &m, msg);
1205 if (worst == MCE_AR_SEVERITY) { 1166 if (worst == MCE_AR_SEVERITY) {
1206 /* schedule action before return to userland */ 1167 recover_paddr = m.addr;
1207 mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV); 1168 if (!(m.mcgstatus & MCG_STATUS_RIPV))
1208 set_thread_flag(TIF_MCE_NOTIFY); 1169 flags |= MF_MUST_KILL;
1209 } else if (kill_it) { 1170 } else if (kill_it) {
1210 force_sig(SIGBUS, current); 1171 force_sig(SIGBUS, current);
1211 } 1172 }
@@ -1216,6 +1177,27 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1216 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); 1177 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
1217out: 1178out:
1218 sync_core(); 1179 sync_core();
1180
1181 if (recover_paddr == ~0ull)
1182 goto done;
1183
1184 pr_err("Uncorrected hardware memory error in user-access at %llx",
1185 recover_paddr);
1186 /*
1187 * We must call memory_failure() here even if the current process is
1188 * doomed. We still need to mark the page as poisoned and alert any
1189 * other users of the page.
1190 */
1191 ist_begin_non_atomic(regs);
1192 local_irq_enable();
1193 if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) {
1194 pr_err("Memory error not recovered");
1195 force_sig(SIGBUS, current);
1196 }
1197 local_irq_disable();
1198 ist_end_non_atomic();
1199done:
1200 ist_exit(regs, prev_state);
1219} 1201}
1220EXPORT_SYMBOL_GPL(do_machine_check); 1202EXPORT_SYMBOL_GPL(do_machine_check);
1221 1203
@@ -1233,42 +1215,6 @@ int memory_failure(unsigned long pfn, int vector, int flags)
1233#endif 1215#endif
1234 1216
1235/* 1217/*
1236 * Called in process context that interrupted by MCE and marked with
1237 * TIF_MCE_NOTIFY, just before returning to erroneous userland.
1238 * This code is allowed to sleep.
1239 * Attempt possible recovery such as calling the high level VM handler to
1240 * process any corrupted pages, and kill/signal current process if required.
1241 * Action required errors are handled here.
1242 */
1243void mce_notify_process(void)
1244{
1245 unsigned long pfn;
1246 struct mce_info *mi = mce_find_info();
1247 int flags = MF_ACTION_REQUIRED;
1248
1249 if (!mi)
1250 mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL);
1251 pfn = mi->paddr >> PAGE_SHIFT;
1252
1253 clear_thread_flag(TIF_MCE_NOTIFY);
1254
1255 pr_err("Uncorrected hardware memory error in user-access at %llx",
1256 mi->paddr);
1257 /*
1258 * We must call memory_failure() here even if the current process is
1259 * doomed. We still need to mark the page as poisoned and alert any
1260 * other users of the page.
1261 */
1262 if (!mi->restartable)
1263 flags |= MF_MUST_KILL;
1264 if (memory_failure(pfn, MCE_VECTOR, flags) < 0) {
1265 pr_err("Memory error not recovered");
1266 force_sig(SIGBUS, current);
1267 }
1268 mce_clear_info(mi);
1269}
1270
1271/*
1272 * Action optional processing happens here (picking up 1218 * Action optional processing happens here (picking up
1273 * from the list of faulting pages that do_machine_check() 1219 * from the list of faulting pages that do_machine_check()
1274 * placed into the "ring"). 1220 * placed into the "ring").
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index a3042989398c..ec2663a708e4 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -8,6 +8,7 @@
8#include <linux/smp.h> 8#include <linux/smp.h>
9 9
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/traps.h>
11#include <asm/mce.h> 12#include <asm/mce.h>
12#include <asm/msr.h> 13#include <asm/msr.h>
13 14
@@ -17,8 +18,11 @@ int mce_p5_enabled __read_mostly;
17/* Machine check handler for Pentium class Intel CPUs: */ 18/* Machine check handler for Pentium class Intel CPUs: */
18static void pentium_machine_check(struct pt_regs *regs, long error_code) 19static void pentium_machine_check(struct pt_regs *regs, long error_code)
19{ 20{
21 enum ctx_state prev_state;
20 u32 loaddr, hi, lotype; 22 u32 loaddr, hi, lotype;
21 23
24 prev_state = ist_enter(regs);
25
22 rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); 26 rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
23 rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); 27 rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
24 28
@@ -33,6 +37,8 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
33 } 37 }
34 38
35 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 39 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
40
41 ist_exit(regs, prev_state);
36} 42}
37 43
38/* Set up machine check reporting for processors with Intel style MCE: */ 44/* Set up machine check reporting for processors with Intel style MCE: */
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 7dc5564d0cdf..bd5d46a32210 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -7,14 +7,19 @@
7#include <linux/types.h> 7#include <linux/types.h>
8 8
9#include <asm/processor.h> 9#include <asm/processor.h>
10#include <asm/traps.h>
10#include <asm/mce.h> 11#include <asm/mce.h>
11#include <asm/msr.h> 12#include <asm/msr.h>
12 13
13/* Machine check handler for WinChip C6: */ 14/* Machine check handler for WinChip C6: */
14static void winchip_machine_check(struct pt_regs *regs, long error_code) 15static void winchip_machine_check(struct pt_regs *regs, long error_code)
15{ 16{
17 enum ctx_state prev_state = ist_enter(regs);
18
16 printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); 19 printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
17 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 20 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
21
22 ist_exit(regs, prev_state);
18} 23}
19 24
20/* Set up machine check reporting on the Winchip C6 series */ 25/* Set up machine check reporting on the Winchip C6 series */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 9ebaf63ba182..db13655c3a2a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -143,7 +143,8 @@ ENDPROC(native_usergs_sysret64)
143 movq \tmp,RSP+\offset(%rsp) 143 movq \tmp,RSP+\offset(%rsp)
144 movq $__USER_DS,SS+\offset(%rsp) 144 movq $__USER_DS,SS+\offset(%rsp)
145 movq $__USER_CS,CS+\offset(%rsp) 145 movq $__USER_CS,CS+\offset(%rsp)
146 movq $-1,RCX+\offset(%rsp) 146 movq RIP+\offset(%rsp),\tmp /* get rip */
147 movq \tmp,RCX+\offset(%rsp) /* copy it to rcx as sysret would do */
147 movq R11+\offset(%rsp),\tmp /* get eflags */ 148 movq R11+\offset(%rsp),\tmp /* get eflags */
148 movq \tmp,EFLAGS+\offset(%rsp) 149 movq \tmp,EFLAGS+\offset(%rsp)
149 .endm 150 .endm
@@ -155,27 +156,6 @@ ENDPROC(native_usergs_sysret64)
155 movq \tmp,R11+\offset(%rsp) 156 movq \tmp,R11+\offset(%rsp)
156 .endm 157 .endm
157 158
158 .macro FAKE_STACK_FRAME child_rip
159 /* push in order ss, rsp, eflags, cs, rip */
160 xorl %eax, %eax
161 pushq_cfi $__KERNEL_DS /* ss */
162 /*CFI_REL_OFFSET ss,0*/
163 pushq_cfi %rax /* rsp */
164 CFI_REL_OFFSET rsp,0
165 pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */
166 /*CFI_REL_OFFSET rflags,0*/
167 pushq_cfi $__KERNEL_CS /* cs */
168 /*CFI_REL_OFFSET cs,0*/
169 pushq_cfi \child_rip /* rip */
170 CFI_REL_OFFSET rip,0
171 pushq_cfi %rax /* orig rax */
172 .endm
173
174 .macro UNFAKE_STACK_FRAME
175 addq $8*6, %rsp
176 CFI_ADJUST_CFA_OFFSET -(6*8)
177 .endm
178
179/* 159/*
180 * initial frame state for interrupts (and exceptions without error code) 160 * initial frame state for interrupts (and exceptions without error code)
181 */ 161 */
@@ -238,51 +218,6 @@ ENDPROC(native_usergs_sysret64)
238 CFI_REL_OFFSET r15, R15+\offset 218 CFI_REL_OFFSET r15, R15+\offset
239 .endm 219 .endm
240 220
241/* save partial stack frame */
242 .macro SAVE_ARGS_IRQ
243 cld
244 /* start from rbp in pt_regs and jump over */
245 movq_cfi rdi, (RDI-RBP)
246 movq_cfi rsi, (RSI-RBP)
247 movq_cfi rdx, (RDX-RBP)
248 movq_cfi rcx, (RCX-RBP)
249 movq_cfi rax, (RAX-RBP)
250 movq_cfi r8, (R8-RBP)
251 movq_cfi r9, (R9-RBP)
252 movq_cfi r10, (R10-RBP)
253 movq_cfi r11, (R11-RBP)
254
255 /* Save rbp so that we can unwind from get_irq_regs() */
256 movq_cfi rbp, 0
257
258 /* Save previous stack value */
259 movq %rsp, %rsi
260
261 leaq -RBP(%rsp),%rdi /* arg1 for handler */
262 testl $3, CS-RBP(%rsi)
263 je 1f
264 SWAPGS
265 /*
266 * irq_count is used to check if a CPU is already on an interrupt stack
267 * or not. While this is essentially redundant with preempt_count it is
268 * a little cheaper to use a separate counter in the PDA (short of
269 * moving irq_enter into assembly, which would be too much work)
270 */
2711: incl PER_CPU_VAR(irq_count)
272 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
273 CFI_DEF_CFA_REGISTER rsi
274
275 /* Store previous stack value */
276 pushq %rsi
277 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
278 0x77 /* DW_OP_breg7 */, 0, \
279 0x06 /* DW_OP_deref */, \
280 0x08 /* DW_OP_const1u */, SS+8-RBP, \
281 0x22 /* DW_OP_plus */
282 /* We entered an interrupt context - irqs are off: */
283 TRACE_IRQS_OFF
284 .endm
285
286ENTRY(save_paranoid) 221ENTRY(save_paranoid)
287 XCPT_FRAME 1 RDI+8 222 XCPT_FRAME 1 RDI+8
288 cld 223 cld
@@ -426,15 +361,12 @@ system_call_fastpath:
426 * Has incomplete stack frame and undefined top of stack. 361 * Has incomplete stack frame and undefined top of stack.
427 */ 362 */
428ret_from_sys_call: 363ret_from_sys_call:
429 movl $_TIF_ALLWORK_MASK,%edi 364 testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
430 /* edi: flagmask */ 365 jnz int_ret_from_sys_call_fixup /* Go the the slow path */
431sysret_check: 366
432 LOCKDEP_SYS_EXIT 367 LOCKDEP_SYS_EXIT
433 DISABLE_INTERRUPTS(CLBR_NONE) 368 DISABLE_INTERRUPTS(CLBR_NONE)
434 TRACE_IRQS_OFF 369 TRACE_IRQS_OFF
435 movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
436 andl %edi,%edx
437 jnz sysret_careful
438 CFI_REMEMBER_STATE 370 CFI_REMEMBER_STATE
439 /* 371 /*
440 * sysretq will re-enable interrupts: 372 * sysretq will re-enable interrupts:
@@ -448,49 +380,10 @@ sysret_check:
448 USERGS_SYSRET64 380 USERGS_SYSRET64
449 381
450 CFI_RESTORE_STATE 382 CFI_RESTORE_STATE
451 /* Handle reschedules */
452 /* edx: work, edi: workmask */
453sysret_careful:
454 bt $TIF_NEED_RESCHED,%edx
455 jnc sysret_signal
456 TRACE_IRQS_ON
457 ENABLE_INTERRUPTS(CLBR_NONE)
458 pushq_cfi %rdi
459 SCHEDULE_USER
460 popq_cfi %rdi
461 jmp sysret_check
462 383
463 /* Handle a signal */ 384int_ret_from_sys_call_fixup:
464sysret_signal:
465 TRACE_IRQS_ON
466 ENABLE_INTERRUPTS(CLBR_NONE)
467#ifdef CONFIG_AUDITSYSCALL
468 bt $TIF_SYSCALL_AUDIT,%edx
469 jc sysret_audit
470#endif
471 /*
472 * We have a signal, or exit tracing or single-step.
473 * These all wind up with the iret return path anyway,
474 * so just join that path right now.
475 */
476 FIXUP_TOP_OF_STACK %r11, -ARGOFFSET 385 FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
477 jmp int_check_syscall_exit_work 386 jmp int_ret_from_sys_call
478
479#ifdef CONFIG_AUDITSYSCALL
480 /*
481 * Return fast path for syscall audit. Call __audit_syscall_exit()
482 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
483 * masked off.
484 */
485sysret_audit:
486 movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */
487 cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */
488 setbe %al /* 1 if so, 0 if not */
489 movzbl %al,%edi /* zero-extend that into %edi */
490 call __audit_syscall_exit
491 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
492 jmp sysret_check
493#endif /* CONFIG_AUDITSYSCALL */
494 387
495 /* Do syscall tracing */ 388 /* Do syscall tracing */
496tracesys: 389tracesys:
@@ -626,19 +519,6 @@ END(\label)
626 FORK_LIKE vfork 519 FORK_LIKE vfork
627 FIXED_FRAME stub_iopl, sys_iopl 520 FIXED_FRAME stub_iopl, sys_iopl
628 521
629ENTRY(ptregscall_common)
630 DEFAULT_FRAME 1 8 /* offset 8: return address */
631 RESTORE_TOP_OF_STACK %r11, 8
632 movq_cfi_restore R15+8, r15
633 movq_cfi_restore R14+8, r14
634 movq_cfi_restore R13+8, r13
635 movq_cfi_restore R12+8, r12
636 movq_cfi_restore RBP+8, rbp
637 movq_cfi_restore RBX+8, rbx
638 ret $REST_SKIP /* pop extended registers */
639 CFI_ENDPROC
640END(ptregscall_common)
641
642ENTRY(stub_execve) 522ENTRY(stub_execve)
643 CFI_STARTPROC 523 CFI_STARTPROC
644 addq $8, %rsp 524 addq $8, %rsp
@@ -779,7 +659,48 @@ END(interrupt)
779 /* reserve pt_regs for scratch regs and rbp */ 659 /* reserve pt_regs for scratch regs and rbp */
780 subq $ORIG_RAX-RBP, %rsp 660 subq $ORIG_RAX-RBP, %rsp
781 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP 661 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
782 SAVE_ARGS_IRQ 662 cld
663 /* start from rbp in pt_regs and jump over */
664 movq_cfi rdi, (RDI-RBP)
665 movq_cfi rsi, (RSI-RBP)
666 movq_cfi rdx, (RDX-RBP)
667 movq_cfi rcx, (RCX-RBP)
668 movq_cfi rax, (RAX-RBP)
669 movq_cfi r8, (R8-RBP)
670 movq_cfi r9, (R9-RBP)
671 movq_cfi r10, (R10-RBP)
672 movq_cfi r11, (R11-RBP)
673
674 /* Save rbp so that we can unwind from get_irq_regs() */
675 movq_cfi rbp, 0
676
677 /* Save previous stack value */
678 movq %rsp, %rsi
679
680 leaq -RBP(%rsp),%rdi /* arg1 for handler */
681 testl $3, CS-RBP(%rsi)
682 je 1f
683 SWAPGS
684 /*
685 * irq_count is used to check if a CPU is already on an interrupt stack
686 * or not. While this is essentially redundant with preempt_count it is
687 * a little cheaper to use a separate counter in the PDA (short of
688 * moving irq_enter into assembly, which would be too much work)
689 */
6901: incl PER_CPU_VAR(irq_count)
691 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
692 CFI_DEF_CFA_REGISTER rsi
693
694 /* Store previous stack value */
695 pushq %rsi
696 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
697 0x77 /* DW_OP_breg7 */, 0, \
698 0x06 /* DW_OP_deref */, \
699 0x08 /* DW_OP_const1u */, SS+8-RBP, \
700 0x22 /* DW_OP_plus */
701 /* We entered an interrupt context - irqs are off: */
702 TRACE_IRQS_OFF
703
783 call \func 704 call \func
784 .endm 705 .endm
785 706
@@ -831,6 +752,60 @@ retint_swapgs: /* return to user-space */
831 */ 752 */
832 DISABLE_INTERRUPTS(CLBR_ANY) 753 DISABLE_INTERRUPTS(CLBR_ANY)
833 TRACE_IRQS_IRETQ 754 TRACE_IRQS_IRETQ
755
756 /*
757 * Try to use SYSRET instead of IRET if we're returning to
758 * a completely clean 64-bit userspace context.
759 */
760 movq (RCX-R11)(%rsp), %rcx
761 cmpq %rcx,(RIP-R11)(%rsp) /* RCX == RIP */
762 jne opportunistic_sysret_failed
763
764 /*
765 * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP
766 * in kernel space. This essentially lets the user take over
767 * the kernel, since userspace controls RSP. It's not worth
768 * testing for canonicalness exactly -- this check detects any
769 * of the 17 high bits set, which is true for non-canonical
770 * or kernel addresses. (This will pessimize vsyscall=native.
771 * Big deal.)
772 *
773 * If virtual addresses ever become wider, this will need
774 * to be updated to remain correct on both old and new CPUs.
775 */
776 .ifne __VIRTUAL_MASK_SHIFT - 47
777 .error "virtual address width changed -- sysret checks need update"
778 .endif
779 shr $__VIRTUAL_MASK_SHIFT, %rcx
780 jnz opportunistic_sysret_failed
781
782 cmpq $__USER_CS,(CS-R11)(%rsp) /* CS must match SYSRET */
783 jne opportunistic_sysret_failed
784
785 movq (R11-ARGOFFSET)(%rsp), %r11
786 cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp) /* R11 == RFLAGS */
787 jne opportunistic_sysret_failed
788
789 testq $X86_EFLAGS_RF,%r11 /* sysret can't restore RF */
790 jnz opportunistic_sysret_failed
791
792 /* nothing to check for RSP */
793
794 cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp) /* SS must match SYSRET */
795 jne opportunistic_sysret_failed
796
797 /*
798 * We win! This label is here just for ease of understanding
799 * perf profiles. Nothing jumps here.
800 */
801irq_return_via_sysret:
802 CFI_REMEMBER_STATE
803 RESTORE_ARGS 1,8,1
804 movq (RSP-RIP)(%rsp),%rsp
805 USERGS_SYSRET64
806 CFI_RESTORE_STATE
807
808opportunistic_sysret_failed:
834 SWAPGS 809 SWAPGS
835 jmp restore_args 810 jmp restore_args
836 811
@@ -1048,6 +1023,11 @@ ENTRY(\sym)
1048 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1023 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1049 1024
1050 .if \paranoid 1025 .if \paranoid
1026 .if \paranoid == 1
1027 CFI_REMEMBER_STATE
1028 testl $3, CS(%rsp) /* If coming from userspace, switch */
1029 jnz 1f /* stacks. */
1030 .endif
1051 call save_paranoid 1031 call save_paranoid
1052 .else 1032 .else
1053 call error_entry 1033 call error_entry
@@ -1088,6 +1068,36 @@ ENTRY(\sym)
1088 jmp error_exit /* %ebx: no swapgs flag */ 1068 jmp error_exit /* %ebx: no swapgs flag */
1089 .endif 1069 .endif
1090 1070
1071 .if \paranoid == 1
1072 CFI_RESTORE_STATE
1073 /*
1074 * Paranoid entry from userspace. Switch stacks and treat it
1075 * as a normal entry. This means that paranoid handlers
1076 * run in real process context if user_mode(regs).
1077 */
10781:
1079 call error_entry
1080
1081 DEFAULT_FRAME 0
1082
1083 movq %rsp,%rdi /* pt_regs pointer */
1084 call sync_regs
1085 movq %rax,%rsp /* switch stack */
1086
1087 movq %rsp,%rdi /* pt_regs pointer */
1088
1089 .if \has_error_code
1090 movq ORIG_RAX(%rsp),%rsi /* get error code */
1091 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
1092 .else
1093 xorl %esi,%esi /* no error code */
1094 .endif
1095
1096 call \do_sym
1097
1098 jmp error_exit /* %ebx: no swapgs flag */
1099 .endif
1100
1091 CFI_ENDPROC 1101 CFI_ENDPROC
1092END(\sym) 1102END(\sym)
1093.endm 1103.endm
@@ -1108,7 +1118,7 @@ idtentry overflow do_overflow has_error_code=0
1108idtentry bounds do_bounds has_error_code=0 1118idtentry bounds do_bounds has_error_code=0
1109idtentry invalid_op do_invalid_op has_error_code=0 1119idtentry invalid_op do_invalid_op has_error_code=0
1110idtentry device_not_available do_device_not_available has_error_code=0 1120idtentry device_not_available do_device_not_available has_error_code=0
1111idtentry double_fault do_double_fault has_error_code=1 paranoid=1 1121idtentry double_fault do_double_fault has_error_code=1 paranoid=2
1112idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 1122idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
1113idtentry invalid_TSS do_invalid_TSS has_error_code=1 1123idtentry invalid_TSS do_invalid_TSS has_error_code=1
1114idtentry segment_not_present do_segment_not_present has_error_code=1 1124idtentry segment_not_present do_segment_not_present has_error_code=1
@@ -1289,16 +1299,14 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(
1289#endif 1299#endif
1290 1300
1291 /* 1301 /*
1292 * "Paranoid" exit path from exception stack. 1302 * "Paranoid" exit path from exception stack. This is invoked
1293 * Paranoid because this is used by NMIs and cannot take 1303 * only on return from non-NMI IST interrupts that came
1294 * any kernel state for granted. 1304 * from kernel space.
1295 * We don't do kernel preemption checks here, because only
1296 * NMI should be common and it does not enable IRQs and
1297 * cannot get reschedule ticks.
1298 * 1305 *
1299 * "trace" is 0 for the NMI handler only, because irq-tracing 1306 * We may be returning to very strange contexts (e.g. very early
1300 * is fundamentally NMI-unsafe. (we cannot change the soft and 1307 * in syscall entry), so checking for preemption here would
1301 * hard flags at once, atomically) 1308 * be complicated. Fortunately, we there's no good reason
1309 * to try to handle preemption here.
1302 */ 1310 */
1303 1311
1304 /* ebx: no swapgs flag */ 1312 /* ebx: no swapgs flag */
@@ -1308,43 +1316,14 @@ ENTRY(paranoid_exit)
1308 TRACE_IRQS_OFF_DEBUG 1316 TRACE_IRQS_OFF_DEBUG
1309 testl %ebx,%ebx /* swapgs needed? */ 1317 testl %ebx,%ebx /* swapgs needed? */
1310 jnz paranoid_restore 1318 jnz paranoid_restore
1311 testl $3,CS(%rsp)
1312 jnz paranoid_userspace
1313paranoid_swapgs:
1314 TRACE_IRQS_IRETQ 0 1319 TRACE_IRQS_IRETQ 0
1315 SWAPGS_UNSAFE_STACK 1320 SWAPGS_UNSAFE_STACK
1316 RESTORE_ALL 8 1321 RESTORE_ALL 8
1317 jmp irq_return 1322 INTERRUPT_RETURN
1318paranoid_restore: 1323paranoid_restore:
1319 TRACE_IRQS_IRETQ_DEBUG 0 1324 TRACE_IRQS_IRETQ_DEBUG 0
1320 RESTORE_ALL 8 1325 RESTORE_ALL 8
1321 jmp irq_return 1326 INTERRUPT_RETURN
1322paranoid_userspace:
1323 GET_THREAD_INFO(%rcx)
1324 movl TI_flags(%rcx),%ebx
1325 andl $_TIF_WORK_MASK,%ebx
1326 jz paranoid_swapgs
1327 movq %rsp,%rdi /* &pt_regs */
1328 call sync_regs
1329 movq %rax,%rsp /* switch stack for scheduling */
1330 testl $_TIF_NEED_RESCHED,%ebx
1331 jnz paranoid_schedule
1332 movl %ebx,%edx /* arg3: thread flags */
1333 TRACE_IRQS_ON
1334 ENABLE_INTERRUPTS(CLBR_NONE)
1335 xorl %esi,%esi /* arg2: oldset */
1336 movq %rsp,%rdi /* arg1: &pt_regs */
1337 call do_notify_resume
1338 DISABLE_INTERRUPTS(CLBR_NONE)
1339 TRACE_IRQS_OFF
1340 jmp paranoid_userspace
1341paranoid_schedule:
1342 TRACE_IRQS_ON
1343 ENABLE_INTERRUPTS(CLBR_ANY)
1344 SCHEDULE_USER
1345 DISABLE_INTERRUPTS(CLBR_ANY)
1346 TRACE_IRQS_OFF
1347 jmp paranoid_userspace
1348 CFI_ENDPROC 1327 CFI_ENDPROC
1349END(paranoid_exit) 1328END(paranoid_exit)
1350 1329
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 63ce838e5a54..28d28f5eb8f4 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -69,16 +69,9 @@ static void call_on_stack(void *func, void *stack)
69 : "memory", "cc", "edx", "ecx", "eax"); 69 : "memory", "cc", "edx", "ecx", "eax");
70} 70}
71 71
72/* how to get the current stack pointer from C */
73#define current_stack_pointer ({ \
74 unsigned long sp; \
75 asm("mov %%esp,%0" : "=g" (sp)); \
76 sp; \
77})
78
79static inline void *current_stack(void) 72static inline void *current_stack(void)
80{ 73{
81 return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); 74 return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
82} 75}
83 76
84static inline int 77static inline int
@@ -103,7 +96,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
103 96
104 /* Save the next esp at the bottom of the stack */ 97 /* Save the next esp at the bottom of the stack */
105 prev_esp = (u32 *)irqstk; 98 prev_esp = (u32 *)irqstk;
106 *prev_esp = current_stack_pointer; 99 *prev_esp = current_stack_pointer();
107 100
108 if (unlikely(overflow)) 101 if (unlikely(overflow))
109 call_on_stack(print_stack_overflow, isp); 102 call_on_stack(print_stack_overflow, isp);
@@ -156,7 +149,7 @@ void do_softirq_own_stack(void)
156 149
157 /* Push the previous esp onto the stack */ 150 /* Push the previous esp onto the stack */
158 prev_esp = (u32 *)irqstk; 151 prev_esp = (u32 *)irqstk;
159 *prev_esp = current_stack_pointer; 152 *prev_esp = current_stack_pointer();
160 153
161 call_on_stack(__do_softirq, isp); 154 call_on_stack(__do_softirq, isp);
162} 155}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index ed37a768d0fc..2a33c8f68319 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -740,12 +740,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
740{ 740{
741 user_exit(); 741 user_exit();
742 742
743#ifdef CONFIG_X86_MCE
744 /* notify userspace of pending MCEs */
745 if (thread_info_flags & _TIF_MCE_NOTIFY)
746 mce_notify_process();
747#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
748
749 if (thread_info_flags & _TIF_UPROBE) 743 if (thread_info_flags & _TIF_UPROBE)
750 uprobe_notify_resume(regs); 744 uprobe_notify_resume(regs);
751 745
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 88900e288021..c74f2f5652da 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -108,6 +108,88 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
108 preempt_count_dec(); 108 preempt_count_dec();
109} 109}
110 110
111enum ctx_state ist_enter(struct pt_regs *regs)
112{
113 enum ctx_state prev_state;
114
115 if (user_mode_vm(regs)) {
116 /* Other than that, we're just an exception. */
117 prev_state = exception_enter();
118 } else {
119 /*
120 * We might have interrupted pretty much anything. In
121 * fact, if we're a machine check, we can even interrupt
122 * NMI processing. We don't want in_nmi() to return true,
123 * but we need to notify RCU.
124 */
125 rcu_nmi_enter();
126 prev_state = IN_KERNEL; /* the value is irrelevant. */
127 }
128
129 /*
130 * We are atomic because we're on the IST stack (or we're on x86_32,
131 * in which case we still shouldn't schedule).
132 *
133 * This must be after exception_enter(), because exception_enter()
134 * won't do anything if in_interrupt() returns true.
135 */
136 preempt_count_add(HARDIRQ_OFFSET);
137
138 /* This code is a bit fragile. Test it. */
139 rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work");
140
141 return prev_state;
142}
143
144void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
145{
146 /* Must be before exception_exit. */
147 preempt_count_sub(HARDIRQ_OFFSET);
148
149 if (user_mode_vm(regs))
150 return exception_exit(prev_state);
151 else
152 rcu_nmi_exit();
153}
154
155/**
156 * ist_begin_non_atomic() - begin a non-atomic section in an IST exception
157 * @regs: regs passed to the IST exception handler
158 *
159 * IST exception handlers normally cannot schedule. As a special
160 * exception, if the exception interrupted userspace code (i.e.
161 * user_mode_vm(regs) would return true) and the exception was not
162 * a double fault, it can be safe to schedule. ist_begin_non_atomic()
163 * begins a non-atomic section within an ist_enter()/ist_exit() region.
164 * Callers are responsible for enabling interrupts themselves inside
165 * the non-atomic section, and callers must call is_end_non_atomic()
166 * before ist_exit().
167 */
168void ist_begin_non_atomic(struct pt_regs *regs)
169{
170 BUG_ON(!user_mode_vm(regs));
171
172 /*
173 * Sanity check: we need to be on the normal thread stack. This
174 * will catch asm bugs and any attempt to use ist_preempt_enable
175 * from double_fault.
176 */
177 BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack))
178 & ~(THREAD_SIZE - 1)) != 0);
179
180 preempt_count_sub(HARDIRQ_OFFSET);
181}
182
183/**
184 * ist_end_non_atomic() - begin a non-atomic section in an IST exception
185 *
186 * Ends a non-atomic section started with ist_begin_non_atomic().
187 */
188void ist_end_non_atomic(void)
189{
190 preempt_count_add(HARDIRQ_OFFSET);
191}
192
111static nokprobe_inline int 193static nokprobe_inline int
112do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, 194do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
113 struct pt_regs *regs, long error_code) 195 struct pt_regs *regs, long error_code)
@@ -251,6 +333,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
251 * end up promoting it to a doublefault. In that case, modify 333 * end up promoting it to a doublefault. In that case, modify
252 * the stack to make it look like we just entered the #GP 334 * the stack to make it look like we just entered the #GP
253 * handler from user space, similar to bad_iret. 335 * handler from user space, similar to bad_iret.
336 *
337 * No need for ist_enter here because we don't use RCU.
254 */ 338 */
255 if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && 339 if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
256 regs->cs == __KERNEL_CS && 340 regs->cs == __KERNEL_CS &&
@@ -263,12 +347,12 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
263 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ 347 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
264 regs->ip = (unsigned long)general_protection; 348 regs->ip = (unsigned long)general_protection;
265 regs->sp = (unsigned long)&normal_regs->orig_ax; 349 regs->sp = (unsigned long)&normal_regs->orig_ax;
350
266 return; 351 return;
267 } 352 }
268#endif 353#endif
269 354
270 exception_enter(); 355 ist_enter(regs); /* Discard prev_state because we won't return. */
271 /* Return not checked because double check cannot be ignored */
272 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); 356 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
273 357
274 tsk->thread.error_code = error_code; 358 tsk->thread.error_code = error_code;
@@ -434,7 +518,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
434 if (poke_int3_handler(regs)) 518 if (poke_int3_handler(regs))
435 return; 519 return;
436 520
437 prev_state = exception_enter(); 521 prev_state = ist_enter(regs);
438#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 522#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
439 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 523 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
440 SIGTRAP) == NOTIFY_STOP) 524 SIGTRAP) == NOTIFY_STOP)
@@ -460,33 +544,20 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
460 preempt_conditional_cli(regs); 544 preempt_conditional_cli(regs);
461 debug_stack_usage_dec(); 545 debug_stack_usage_dec();
462exit: 546exit:
463 exception_exit(prev_state); 547 ist_exit(regs, prev_state);
464} 548}
465NOKPROBE_SYMBOL(do_int3); 549NOKPROBE_SYMBOL(do_int3);
466 550
467#ifdef CONFIG_X86_64 551#ifdef CONFIG_X86_64
468/* 552/*
469 * Help handler running on IST stack to switch back to user stack 553 * Help handler running on IST stack to switch off the IST stack if the
470 * for scheduling or signal handling. The actual stack switch is done in 554 * interrupted code was in user mode. The actual stack switch is done in
471 * entry.S 555 * entry_64.S
472 */ 556 */
473asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) 557asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
474{ 558{
475 struct pt_regs *regs = eregs; 559 struct pt_regs *regs = task_pt_regs(current);
476 /* Did already sync */ 560 *regs = *eregs;
477 if (eregs == (struct pt_regs *)eregs->sp)
478 ;
479 /* Exception from user space */
480 else if (user_mode(eregs))
481 regs = task_pt_regs(current);
482 /*
483 * Exception from kernel and interrupts are enabled. Move to
484 * kernel process stack.
485 */
486 else if (eregs->flags & X86_EFLAGS_IF)
487 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
488 if (eregs != regs)
489 *regs = *eregs;
490 return regs; 561 return regs;
491} 562}
492NOKPROBE_SYMBOL(sync_regs); 563NOKPROBE_SYMBOL(sync_regs);
@@ -554,7 +625,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
554 unsigned long dr6; 625 unsigned long dr6;
555 int si_code; 626 int si_code;
556 627
557 prev_state = exception_enter(); 628 prev_state = ist_enter(regs);
558 629
559 get_debugreg(dr6, 6); 630 get_debugreg(dr6, 6);
560 631
@@ -629,7 +700,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
629 debug_stack_usage_dec(); 700 debug_stack_usage_dec();
630 701
631exit: 702exit:
632 exception_exit(prev_state); 703 ist_exit(regs, prev_state);
633} 704}
634NOKPROBE_SYMBOL(do_debug); 705NOKPROBE_SYMBOL(do_debug);
635 706
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 5a4affe025e8..09297c8e1fcd 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -205,4 +205,4 @@ $(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
205PHONY += vdso_install $(vdso_img_insttargets) 205PHONY += vdso_install $(vdso_img_insttargets)
206vdso_install: $(vdso_img_insttargets) FORCE 206vdso_install: $(vdso_img_insttargets) FORCE
207 207
208clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* 208clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64*