aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-06-08 14:43:07 -0400
committerIngo Molnar <mingo@kernel.org>2015-06-08 14:48:43 -0400
commit4d7321381e5c7102a3d3faf0a0a0035a09619612 (patch)
tree2e35b99d9ec62501cd568c594dfd9a115d86209c
parent9dda1658a9bd450d65da5153a2427955785d17c2 (diff)
x86/asm/entry/64: Clean up entry_64.S
Make the 64-bit syscall entry code a bit more readable: - use consistent assembly coding style similar to the other entry_*.S files - remove old comments that are not true anymore - eliminate whitespace noise - use consistent vertical spacing - fix various comments - reorganize entry point generation tables to be more readable No code changed: # arch/x86/entry/entry_64.o: text data bss dec hex filename 12282 0 0 12282 2ffa entry_64.o.before 12282 0 0 12282 2ffa entry_64.o.after md5: cbab1f2d727a2a8a87618eeb79f391b7 entry_64.o.before.asm cbab1f2d727a2a8a87618eeb79f391b7 entry_64.o.after.asm Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/entry/entry_64.S820
1 files changed, 404 insertions, 416 deletions
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d2a0ed211bed..bd97161f90cb 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -4,26 +4,20 @@
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs 5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 */ 7 *
8
9/*
10 * entry.S contains the system-call and fault low-level handling routines. 8 * entry.S contains the system-call and fault low-level handling routines.
11 * 9 *
12 * Some of this is documented in Documentation/x86/entry_64.txt 10 * Some of this is documented in Documentation/x86/entry_64.txt
13 * 11 *
14 * NOTE: This code handles signal-recognition, which happens every time
15 * after an interrupt and after each system call.
16 *
17 * A note on terminology: 12 * A note on terminology:
18 * - iret frame: Architecture defined interrupt frame from SS to RIP 13 * - iret frame: Architecture defined interrupt frame from SS to RIP
19 * at the top of the kernel process stack. 14 * at the top of the kernel process stack.
20 * 15 *
21 * Some macro usage: 16 * Some macro usage:
22 * - ENTRY/END Define functions in the symbol table. 17 * - ENTRY/END: Define functions in the symbol table.
23 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. 18 * - TRACE_IRQ_*: Trace hardirq state for lock debugging.
24 * - idtentry - Define exception entry points. 19 * - idtentry: Define exception entry points.
25 */ 20 */
26
27#include <linux/linkage.h> 21#include <linux/linkage.h>
28#include <asm/segment.h> 22#include <asm/segment.h>
29#include <asm/cache.h> 23#include <asm/cache.h>
@@ -46,13 +40,12 @@
46 40
47/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 41/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
48#include <linux/elf-em.h> 42#include <linux/elf-em.h>
49#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) 43#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
50#define __AUDIT_ARCH_64BIT 0x80000000 44#define __AUDIT_ARCH_64BIT 0x80000000
51#define __AUDIT_ARCH_LE 0x40000000 45#define __AUDIT_ARCH_LE 0x40000000
52
53 .code64
54 .section .entry.text, "ax"
55 46
47.code64
48.section .entry.text, "ax"
56 49
57#ifdef CONFIG_PARAVIRT 50#ifdef CONFIG_PARAVIRT
58ENTRY(native_usergs_sysret64) 51ENTRY(native_usergs_sysret64)
@@ -61,11 +54,10 @@ ENTRY(native_usergs_sysret64)
61ENDPROC(native_usergs_sysret64) 54ENDPROC(native_usergs_sysret64)
62#endif /* CONFIG_PARAVIRT */ 55#endif /* CONFIG_PARAVIRT */
63 56
64
65.macro TRACE_IRQS_IRETQ 57.macro TRACE_IRQS_IRETQ
66#ifdef CONFIG_TRACE_IRQFLAGS 58#ifdef CONFIG_TRACE_IRQFLAGS
67 bt $9,EFLAGS(%rsp) /* interrupts off? */ 59 bt $9, EFLAGS(%rsp) /* interrupts off? */
68 jnc 1f 60 jnc 1f
69 TRACE_IRQS_ON 61 TRACE_IRQS_ON
701: 621:
71#endif 63#endif
@@ -85,34 +77,34 @@ ENDPROC(native_usergs_sysret64)
85#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) 77#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
86 78
87.macro TRACE_IRQS_OFF_DEBUG 79.macro TRACE_IRQS_OFF_DEBUG
88 call debug_stack_set_zero 80 call debug_stack_set_zero
89 TRACE_IRQS_OFF 81 TRACE_IRQS_OFF
90 call debug_stack_reset 82 call debug_stack_reset
91.endm 83.endm
92 84
93.macro TRACE_IRQS_ON_DEBUG 85.macro TRACE_IRQS_ON_DEBUG
94 call debug_stack_set_zero 86 call debug_stack_set_zero
95 TRACE_IRQS_ON 87 TRACE_IRQS_ON
96 call debug_stack_reset 88 call debug_stack_reset
97.endm 89.endm
98 90
99.macro TRACE_IRQS_IRETQ_DEBUG 91.macro TRACE_IRQS_IRETQ_DEBUG
100 bt $9,EFLAGS(%rsp) /* interrupts off? */ 92 bt $9, EFLAGS(%rsp) /* interrupts off? */
101 jnc 1f 93 jnc 1f
102 TRACE_IRQS_ON_DEBUG 94 TRACE_IRQS_ON_DEBUG
1031: 951:
104.endm 96.endm
105 97
106#else 98#else
107# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF 99# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
108# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON 100# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
109# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ 101# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
110#endif 102#endif
111 103
112/* 104/*
113 * 64bit SYSCALL instruction entry. Up to 6 arguments in registers. 105 * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
114 * 106 *
115 * 64bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, 107 * 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
116 * then loads new ss, cs, and rip from previously programmed MSRs. 108 * then loads new ss, cs, and rip from previously programmed MSRs.
117 * rflags gets masked by a value from another MSR (so CLD and CLAC 109 * rflags gets masked by a value from another MSR (so CLD and CLAC
118 * are not needed). SYSCALL does not save anything on the stack 110 * are not needed). SYSCALL does not save anything on the stack
@@ -128,7 +120,7 @@ ENDPROC(native_usergs_sysret64)
128 * r10 arg3 (needs to be moved to rcx to conform to C ABI) 120 * r10 arg3 (needs to be moved to rcx to conform to C ABI)
129 * r8 arg4 121 * r8 arg4
130 * r9 arg5 122 * r9 arg5
131 * (note: r12-r15,rbp,rbx are callee-preserved in C ABI) 123 * (note: r12-r15, rbp, rbx are callee-preserved in C ABI)
132 * 124 *
133 * Only called from user space. 125 * Only called from user space.
134 * 126 *
@@ -151,12 +143,12 @@ ENTRY(entry_SYSCALL_64)
151 */ 143 */
152GLOBAL(entry_SYSCALL_64_after_swapgs) 144GLOBAL(entry_SYSCALL_64_after_swapgs)
153 145
154 movq %rsp,PER_CPU_VAR(rsp_scratch) 146 movq %rsp, PER_CPU_VAR(rsp_scratch)
155 movq PER_CPU_VAR(cpu_current_top_of_stack),%rsp 147 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
156 148
157 /* Construct struct pt_regs on stack */ 149 /* Construct struct pt_regs on stack */
158 pushq $__USER_DS /* pt_regs->ss */ 150 pushq $__USER_DS /* pt_regs->ss */
159 pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ 151 pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
160 /* 152 /*
161 * Re-enable interrupts. 153 * Re-enable interrupts.
162 * We use 'rsp_scratch' as a scratch space, hence irq-off block above 154 * We use 'rsp_scratch' as a scratch space, hence irq-off block above
@@ -165,34 +157,34 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
165 * with using rsp_scratch: 157 * with using rsp_scratch:
166 */ 158 */
167 ENABLE_INTERRUPTS(CLBR_NONE) 159 ENABLE_INTERRUPTS(CLBR_NONE)
168 pushq %r11 /* pt_regs->flags */ 160 pushq %r11 /* pt_regs->flags */
169 pushq $__USER_CS /* pt_regs->cs */ 161 pushq $__USER_CS /* pt_regs->cs */
170 pushq %rcx /* pt_regs->ip */ 162 pushq %rcx /* pt_regs->ip */
171 pushq %rax /* pt_regs->orig_ax */ 163 pushq %rax /* pt_regs->orig_ax */
172 pushq %rdi /* pt_regs->di */ 164 pushq %rdi /* pt_regs->di */
173 pushq %rsi /* pt_regs->si */ 165 pushq %rsi /* pt_regs->si */
174 pushq %rdx /* pt_regs->dx */ 166 pushq %rdx /* pt_regs->dx */
175 pushq %rcx /* pt_regs->cx */ 167 pushq %rcx /* pt_regs->cx */
176 pushq $-ENOSYS /* pt_regs->ax */ 168 pushq $-ENOSYS /* pt_regs->ax */
177 pushq %r8 /* pt_regs->r8 */ 169 pushq %r8 /* pt_regs->r8 */
178 pushq %r9 /* pt_regs->r9 */ 170 pushq %r9 /* pt_regs->r9 */
179 pushq %r10 /* pt_regs->r10 */ 171 pushq %r10 /* pt_regs->r10 */
180 pushq %r11 /* pt_regs->r11 */ 172 pushq %r11 /* pt_regs->r11 */
181 sub $(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */ 173 sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
182 174
183 testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) 175 testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
184 jnz tracesys 176 jnz tracesys
185entry_SYSCALL_64_fastpath: 177entry_SYSCALL_64_fastpath:
186#if __SYSCALL_MASK == ~0 178#if __SYSCALL_MASK == ~0
187 cmpq $__NR_syscall_max,%rax 179 cmpq $__NR_syscall_max, %rax
188#else 180#else
189 andl $__SYSCALL_MASK,%eax 181 andl $__SYSCALL_MASK, %eax
190 cmpl $__NR_syscall_max,%eax 182 cmpl $__NR_syscall_max, %eax
191#endif 183#endif
192 ja 1f /* return -ENOSYS (already in pt_regs->ax) */ 184 ja 1f /* return -ENOSYS (already in pt_regs->ax) */
193 movq %r10,%rcx 185 movq %r10, %rcx
194 call *sys_call_table(,%rax,8) 186 call *sys_call_table(, %rax, 8)
195 movq %rax,RAX(%rsp) 187 movq %rax, RAX(%rsp)
1961: 1881:
197/* 189/*
198 * Syscall return path ending with SYSRET (fast path). 190 * Syscall return path ending with SYSRET (fast path).
@@ -213,15 +205,15 @@ entry_SYSCALL_64_fastpath:
213 * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is 205 * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
214 * very bad. 206 * very bad.
215 */ 207 */
216 testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) 208 testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
217 jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */ 209 jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */
218 210
219 RESTORE_C_REGS_EXCEPT_RCX_R11 211 RESTORE_C_REGS_EXCEPT_RCX_R11
220 movq RIP(%rsp),%rcx 212 movq RIP(%rsp), %rcx
221 movq EFLAGS(%rsp),%r11 213 movq EFLAGS(%rsp), %r11
222 movq RSP(%rsp),%rsp 214 movq RSP(%rsp), %rsp
223 /* 215 /*
224 * 64bit SYSRET restores rip from rcx, 216 * 64-bit SYSRET restores rip from rcx,
225 * rflags from r11 (but RF and VM bits are forced to 0), 217 * rflags from r11 (but RF and VM bits are forced to 0),
226 * cs and ss are loaded from MSRs. 218 * cs and ss are loaded from MSRs.
227 * Restoration of rflags re-enables interrupts. 219 * Restoration of rflags re-enables interrupts.
@@ -239,21 +231,21 @@ entry_SYSCALL_64_fastpath:
239 231
240 /* Do syscall entry tracing */ 232 /* Do syscall entry tracing */
241tracesys: 233tracesys:
242 movq %rsp, %rdi 234 movq %rsp, %rdi
243 movl $AUDIT_ARCH_X86_64, %esi 235 movl $AUDIT_ARCH_X86_64, %esi
244 call syscall_trace_enter_phase1 236 call syscall_trace_enter_phase1
245 test %rax, %rax 237 test %rax, %rax
246 jnz tracesys_phase2 /* if needed, run the slow path */ 238 jnz tracesys_phase2 /* if needed, run the slow path */
247 RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */ 239 RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
248 movq ORIG_RAX(%rsp), %rax 240 movq ORIG_RAX(%rsp), %rax
249 jmp entry_SYSCALL_64_fastpath /* and return to the fast path */ 241 jmp entry_SYSCALL_64_fastpath /* and return to the fast path */
250 242
251tracesys_phase2: 243tracesys_phase2:
252 SAVE_EXTRA_REGS 244 SAVE_EXTRA_REGS
253 movq %rsp, %rdi 245 movq %rsp, %rdi
254 movl $AUDIT_ARCH_X86_64, %esi 246 movl $AUDIT_ARCH_X86_64, %esi
255 movq %rax,%rdx 247 movq %rax, %rdx
256 call syscall_trace_enter_phase2 248 call syscall_trace_enter_phase2
257 249
258 /* 250 /*
259 * Reload registers from stack in case ptrace changed them. 251 * Reload registers from stack in case ptrace changed them.
@@ -263,15 +255,15 @@ tracesys_phase2:
263 RESTORE_C_REGS_EXCEPT_RAX 255 RESTORE_C_REGS_EXCEPT_RAX
264 RESTORE_EXTRA_REGS 256 RESTORE_EXTRA_REGS
265#if __SYSCALL_MASK == ~0 257#if __SYSCALL_MASK == ~0
266 cmpq $__NR_syscall_max,%rax 258 cmpq $__NR_syscall_max, %rax
267#else 259#else
268 andl $__SYSCALL_MASK,%eax 260 andl $__SYSCALL_MASK, %eax
269 cmpl $__NR_syscall_max,%eax 261 cmpl $__NR_syscall_max, %eax
270#endif 262#endif
271 ja 1f /* return -ENOSYS (already in pt_regs->ax) */ 263 ja 1f /* return -ENOSYS (already in pt_regs->ax) */
272 movq %r10,%rcx /* fixup for C */ 264 movq %r10, %rcx /* fixup for C */
273 call *sys_call_table(,%rax,8) 265 call *sys_call_table(, %rax, 8)
274 movq %rax,RAX(%rsp) 266 movq %rax, RAX(%rsp)
2751: 2671:
276 /* Use IRET because user could have changed pt_regs->foo */ 268 /* Use IRET because user could have changed pt_regs->foo */
277 269
@@ -283,31 +275,33 @@ GLOBAL(int_ret_from_sys_call)
283 DISABLE_INTERRUPTS(CLBR_NONE) 275 DISABLE_INTERRUPTS(CLBR_NONE)
284int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */ 276int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */
285 TRACE_IRQS_OFF 277 TRACE_IRQS_OFF
286 movl $_TIF_ALLWORK_MASK,%edi 278 movl $_TIF_ALLWORK_MASK, %edi
287 /* edi: mask to check */ 279 /* edi: mask to check */
288GLOBAL(int_with_check) 280GLOBAL(int_with_check)
289 LOCKDEP_SYS_EXIT_IRQ 281 LOCKDEP_SYS_EXIT_IRQ
290 GET_THREAD_INFO(%rcx) 282 GET_THREAD_INFO(%rcx)
291 movl TI_flags(%rcx),%edx 283 movl TI_flags(%rcx), %edx
292 andl %edi,%edx 284 andl %edi, %edx
293 jnz int_careful 285 jnz int_careful
294 andl $~TS_COMPAT,TI_status(%rcx) 286 andl $~TS_COMPAT, TI_status(%rcx)
295 jmp syscall_return 287 jmp syscall_return
296 288
297 /* Either reschedule or signal or syscall exit tracking needed. */ 289 /*
298 /* First do a reschedule test. */ 290 * Either reschedule or signal or syscall exit tracking needed.
299 /* edx: work, edi: workmask */ 291 * First do a reschedule test.
292 * edx: work, edi: workmask
293 */
300int_careful: 294int_careful:
301 bt $TIF_NEED_RESCHED,%edx 295 bt $TIF_NEED_RESCHED, %edx
302 jnc int_very_careful 296 jnc int_very_careful
303 TRACE_IRQS_ON 297 TRACE_IRQS_ON
304 ENABLE_INTERRUPTS(CLBR_NONE) 298 ENABLE_INTERRUPTS(CLBR_NONE)
305 pushq %rdi 299 pushq %rdi
306 SCHEDULE_USER 300 SCHEDULE_USER
307 popq %rdi 301 popq %rdi
308 DISABLE_INTERRUPTS(CLBR_NONE) 302 DISABLE_INTERRUPTS(CLBR_NONE)
309 TRACE_IRQS_OFF 303 TRACE_IRQS_OFF
310 jmp int_with_check 304 jmp int_with_check
311 305
312 /* handle signals and tracing -- both require a full pt_regs */ 306 /* handle signals and tracing -- both require a full pt_regs */
313int_very_careful: 307int_very_careful:
@@ -315,27 +309,27 @@ int_very_careful:
315 ENABLE_INTERRUPTS(CLBR_NONE) 309 ENABLE_INTERRUPTS(CLBR_NONE)
316 SAVE_EXTRA_REGS 310 SAVE_EXTRA_REGS
317 /* Check for syscall exit trace */ 311 /* Check for syscall exit trace */
318 testl $_TIF_WORK_SYSCALL_EXIT,%edx 312 testl $_TIF_WORK_SYSCALL_EXIT, %edx
319 jz int_signal 313 jz int_signal
320 pushq %rdi 314 pushq %rdi
321 leaq 8(%rsp),%rdi # &ptregs -> arg1 315 leaq 8(%rsp), %rdi /* &ptregs -> arg1 */
322 call syscall_trace_leave 316 call syscall_trace_leave
323 popq %rdi 317 popq %rdi
324 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi 318 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU), %edi
325 jmp int_restore_rest 319 jmp int_restore_rest
326 320
327int_signal: 321int_signal:
328 testl $_TIF_DO_NOTIFY_MASK,%edx 322 testl $_TIF_DO_NOTIFY_MASK, %edx
329 jz 1f 323 jz 1f
330 movq %rsp,%rdi # &ptregs -> arg1 324 movq %rsp, %rdi /* &ptregs -> arg1 */
331 xorl %esi,%esi # oldset -> arg2 325 xorl %esi, %esi /* oldset -> arg2 */
332 call do_notify_resume 326 call do_notify_resume
3331: movl $_TIF_WORK_MASK,%edi 3271: movl $_TIF_WORK_MASK, %edi
334int_restore_rest: 328int_restore_rest:
335 RESTORE_EXTRA_REGS 329 RESTORE_EXTRA_REGS
336 DISABLE_INTERRUPTS(CLBR_NONE) 330 DISABLE_INTERRUPTS(CLBR_NONE)
337 TRACE_IRQS_OFF 331 TRACE_IRQS_OFF
338 jmp int_with_check 332 jmp int_with_check
339 333
340syscall_return: 334syscall_return:
341 /* The IRETQ could re-enable interrupts: */ 335 /* The IRETQ could re-enable interrupts: */
@@ -346,10 +340,10 @@ syscall_return:
346 * Try to use SYSRET instead of IRET if we're returning to 340 * Try to use SYSRET instead of IRET if we're returning to
347 * a completely clean 64-bit userspace context. 341 * a completely clean 64-bit userspace context.
348 */ 342 */
349 movq RCX(%rsp),%rcx 343 movq RCX(%rsp), %rcx
350 movq RIP(%rsp),%r11 344 movq RIP(%rsp), %r11
351 cmpq %rcx,%r11 /* RCX == RIP */ 345 cmpq %rcx, %r11 /* RCX == RIP */
352 jne opportunistic_sysret_failed 346 jne opportunistic_sysret_failed
353 347
354 /* 348 /*
355 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP 349 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
@@ -362,19 +356,21 @@ syscall_return:
362 .ifne __VIRTUAL_MASK_SHIFT - 47 356 .ifne __VIRTUAL_MASK_SHIFT - 47
363 .error "virtual address width changed -- SYSRET checks need update" 357 .error "virtual address width changed -- SYSRET checks need update"
364 .endif 358 .endif
359
365 /* Change top 16 bits to be the sign-extension of 47th bit */ 360 /* Change top 16 bits to be the sign-extension of 47th bit */
366 shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx 361 shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
367 sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx 362 sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
363
368 /* If this changed %rcx, it was not canonical */ 364 /* If this changed %rcx, it was not canonical */
369 cmpq %rcx, %r11 365 cmpq %rcx, %r11
370 jne opportunistic_sysret_failed 366 jne opportunistic_sysret_failed
371 367
372 cmpq $__USER_CS,CS(%rsp) /* CS must match SYSRET */ 368 cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
373 jne opportunistic_sysret_failed 369 jne opportunistic_sysret_failed
374 370
375 movq R11(%rsp),%r11 371 movq R11(%rsp), %r11
376 cmpq %r11,EFLAGS(%rsp) /* R11 == RFLAGS */ 372 cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
377 jne opportunistic_sysret_failed 373 jne opportunistic_sysret_failed
378 374
379 /* 375 /*
380 * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET, 376 * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET,
@@ -383,29 +379,29 @@ syscall_return:
383 * with register state that satisfies the opportunistic SYSRET 379 * with register state that satisfies the opportunistic SYSRET
384 * conditions. For example, single-stepping this user code: 380 * conditions. For example, single-stepping this user code:
385 * 381 *
386 * movq $stuck_here,%rcx 382 * movq $stuck_here, %rcx
387 * pushfq 383 * pushfq
388 * popq %r11 384 * popq %r11
389 * stuck_here: 385 * stuck_here:
390 * 386 *
391 * would never get past 'stuck_here'. 387 * would never get past 'stuck_here'.
392 */ 388 */
393 testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 389 testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
394 jnz opportunistic_sysret_failed 390 jnz opportunistic_sysret_failed
395 391
396 /* nothing to check for RSP */ 392 /* nothing to check for RSP */
397 393
398 cmpq $__USER_DS,SS(%rsp) /* SS must match SYSRET */ 394 cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
399 jne opportunistic_sysret_failed 395 jne opportunistic_sysret_failed
400 396
401 /* 397 /*
402 * We win! This label is here just for ease of understanding 398 * We win! This label is here just for ease of understanding
403 * perf profiles. Nothing jumps here. 399 * perf profiles. Nothing jumps here.
404 */ 400 */
405syscall_return_via_sysret: 401syscall_return_via_sysret:
406 /* rcx and r11 are already restored (see code above) */ 402 /* rcx and r11 are already restored (see code above) */
407 RESTORE_C_REGS_EXCEPT_RCX_R11 403 RESTORE_C_REGS_EXCEPT_RCX_R11
408 movq RSP(%rsp),%rsp 404 movq RSP(%rsp), %rsp
409 USERGS_SYSRET64 405 USERGS_SYSRET64
410 406
411opportunistic_sysret_failed: 407opportunistic_sysret_failed:
@@ -417,7 +413,7 @@ END(entry_SYSCALL_64)
417 .macro FORK_LIKE func 413 .macro FORK_LIKE func
418ENTRY(stub_\func) 414ENTRY(stub_\func)
419 SAVE_EXTRA_REGS 8 415 SAVE_EXTRA_REGS 8
420 jmp sys_\func 416 jmp sys_\func
421END(stub_\func) 417END(stub_\func)
422 .endm 418 .endm
423 419
@@ -436,7 +432,7 @@ return_from_execve:
436 /* must use IRET code path (pt_regs->cs may have changed) */ 432 /* must use IRET code path (pt_regs->cs may have changed) */
437 addq $8, %rsp 433 addq $8, %rsp
438 ZERO_EXTRA_REGS 434 ZERO_EXTRA_REGS
439 movq %rax,RAX(%rsp) 435 movq %rax, RAX(%rsp)
440 jmp int_ret_from_sys_call 436 jmp int_ret_from_sys_call
441END(stub_execve) 437END(stub_execve)
442/* 438/*
@@ -479,19 +475,19 @@ ENTRY(stub_rt_sigreturn)
479 * we SAVE_EXTRA_REGS here. 475 * we SAVE_EXTRA_REGS here.
480 */ 476 */
481 SAVE_EXTRA_REGS 8 477 SAVE_EXTRA_REGS 8
482 call sys_rt_sigreturn 478 call sys_rt_sigreturn
483return_from_stub: 479return_from_stub:
484 addq $8, %rsp 480 addq $8, %rsp
485 RESTORE_EXTRA_REGS 481 RESTORE_EXTRA_REGS
486 movq %rax,RAX(%rsp) 482 movq %rax, RAX(%rsp)
487 jmp int_ret_from_sys_call 483 jmp int_ret_from_sys_call
488END(stub_rt_sigreturn) 484END(stub_rt_sigreturn)
489 485
490#ifdef CONFIG_X86_X32_ABI 486#ifdef CONFIG_X86_X32_ABI
491ENTRY(stub_x32_rt_sigreturn) 487ENTRY(stub_x32_rt_sigreturn)
492 SAVE_EXTRA_REGS 8 488 SAVE_EXTRA_REGS 8
493 call sys32_x32_rt_sigreturn 489 call sys32_x32_rt_sigreturn
494 jmp return_from_stub 490 jmp return_from_stub
495END(stub_x32_rt_sigreturn) 491END(stub_x32_rt_sigreturn)
496#endif 492#endif
497 493
@@ -502,16 +498,16 @@ END(stub_x32_rt_sigreturn)
502 */ 498 */
503ENTRY(ret_from_fork) 499ENTRY(ret_from_fork)
504 500
505 LOCK ; btr $TIF_FORK,TI_flags(%r8) 501 LOCK ; btr $TIF_FORK, TI_flags(%r8)
506 502
507 pushq $0x0002 503 pushq $0x0002
508 popfq # reset kernel eflags 504 popfq /* reset kernel eflags */
509 505
510 call schedule_tail # rdi: 'prev' task parameter 506 call schedule_tail /* rdi: 'prev' task parameter */
511 507
512 RESTORE_EXTRA_REGS 508 RESTORE_EXTRA_REGS
513 509
514 testb $3, CS(%rsp) # from kernel_thread? 510 testb $3, CS(%rsp) /* from kernel_thread? */
515 511
516 /* 512 /*
517 * By the time we get here, we have no idea whether our pt_regs, 513 * By the time we get here, we have no idea whether our pt_regs,
@@ -522,13 +518,15 @@ ENTRY(ret_from_fork)
522 */ 518 */
523 jnz int_ret_from_sys_call 519 jnz int_ret_from_sys_call
524 520
525 /* We came from kernel_thread */ 521 /*
526 /* nb: we depend on RESTORE_EXTRA_REGS above */ 522 * We came from kernel_thread
527 movq %rbp, %rdi 523 * nb: we depend on RESTORE_EXTRA_REGS above
528 call *%rbx 524 */
529 movl $0, RAX(%rsp) 525 movq %rbp, %rdi
526 call *%rbx
527 movl $0, RAX(%rsp)
530 RESTORE_EXTRA_REGS 528 RESTORE_EXTRA_REGS
531 jmp int_ret_from_sys_call 529 jmp int_ret_from_sys_call
532END(ret_from_fork) 530END(ret_from_fork)
533 531
534/* 532/*
@@ -539,7 +537,7 @@ END(ret_from_fork)
539ENTRY(irq_entries_start) 537ENTRY(irq_entries_start)
540 vector=FIRST_EXTERNAL_VECTOR 538 vector=FIRST_EXTERNAL_VECTOR
541 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) 539 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
542 pushq $(~vector+0x80) /* Note: always in signed byte range */ 540 pushq $(~vector+0x80) /* Note: always in signed byte range */
543 vector=vector+1 541 vector=vector+1
544 jmp common_interrupt 542 jmp common_interrupt
545 .align 8 543 .align 8
@@ -569,7 +567,7 @@ END(irq_entries_start)
569 /* this goes to 0(%rsp) for unwinder, not for saving the value: */ 567 /* this goes to 0(%rsp) for unwinder, not for saving the value: */
570 SAVE_EXTRA_REGS_RBP -RBP 568 SAVE_EXTRA_REGS_RBP -RBP
571 569
572 leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */ 570 leaq -RBP(%rsp), %rdi /* arg1 for \func (pointer to pt_regs) */
573 571
574 testb $3, CS-RBP(%rsp) 572 testb $3, CS-RBP(%rsp)
575 jz 1f 573 jz 1f
@@ -582,14 +580,14 @@ END(irq_entries_start)
582 * a little cheaper to use a separate counter in the PDA (short of 580 * a little cheaper to use a separate counter in the PDA (short of
583 * moving irq_enter into assembly, which would be too much work) 581 * moving irq_enter into assembly, which would be too much work)
584 */ 582 */
585 movq %rsp, %rsi 583 movq %rsp, %rsi
586 incl PER_CPU_VAR(irq_count) 584 incl PER_CPU_VAR(irq_count)
587 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp 585 cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
588 pushq %rsi 586 pushq %rsi
589 /* We entered an interrupt context - irqs are off: */ 587 /* We entered an interrupt context - irqs are off: */
590 TRACE_IRQS_OFF 588 TRACE_IRQS_OFF
591 589
592 call \func 590 call \func
593 .endm 591 .endm
594 592
595 /* 593 /*
@@ -599,36 +597,35 @@ END(irq_entries_start)
599 .p2align CONFIG_X86_L1_CACHE_SHIFT 597 .p2align CONFIG_X86_L1_CACHE_SHIFT
600common_interrupt: 598common_interrupt:
601 ASM_CLAC 599 ASM_CLAC
602 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ 600 addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
603 interrupt do_IRQ 601 interrupt do_IRQ
604 /* 0(%rsp): old RSP */ 602 /* 0(%rsp): old RSP */
605ret_from_intr: 603ret_from_intr:
606 DISABLE_INTERRUPTS(CLBR_NONE) 604 DISABLE_INTERRUPTS(CLBR_NONE)
607 TRACE_IRQS_OFF 605 TRACE_IRQS_OFF
608 decl PER_CPU_VAR(irq_count) 606 decl PER_CPU_VAR(irq_count)
609 607
610 /* Restore saved previous stack */ 608 /* Restore saved previous stack */
611 popq %rsi 609 popq %rsi
612 /* return code expects complete pt_regs - adjust rsp accordingly: */ 610 /* return code expects complete pt_regs - adjust rsp accordingly: */
613 leaq -RBP(%rsi),%rsp 611 leaq -RBP(%rsi), %rsp
614 612
615 testb $3, CS(%rsp) 613 testb $3, CS(%rsp)
616 jz retint_kernel 614 jz retint_kernel
617 /* Interrupt came from user space */ 615 /* Interrupt came from user space */
618retint_user: 616retint_user:
619 GET_THREAD_INFO(%rcx) 617 GET_THREAD_INFO(%rcx)
620 /* 618
621 * %rcx: thread info. Interrupts off. 619 /* %rcx: thread info. Interrupts are off. */
622 */
623retint_with_reschedule: 620retint_with_reschedule:
624 movl $_TIF_WORK_MASK,%edi 621 movl $_TIF_WORK_MASK, %edi
625retint_check: 622retint_check:
626 LOCKDEP_SYS_EXIT_IRQ 623 LOCKDEP_SYS_EXIT_IRQ
627 movl TI_flags(%rcx),%edx 624 movl TI_flags(%rcx), %edx
628 andl %edi,%edx 625 andl %edi, %edx
629 jnz retint_careful 626 jnz retint_careful
630 627
631retint_swapgs: /* return to user-space */ 628retint_swapgs: /* return to user-space */
632 /* 629 /*
633 * The iretq could re-enable interrupts: 630 * The iretq could re-enable interrupts:
634 */ 631 */
@@ -643,9 +640,9 @@ retint_kernel:
643#ifdef CONFIG_PREEMPT 640#ifdef CONFIG_PREEMPT
644 /* Interrupts are off */ 641 /* Interrupts are off */
645 /* Check if we need preemption */ 642 /* Check if we need preemption */
646 bt $9,EFLAGS(%rsp) /* interrupts were off? */ 643 bt $9, EFLAGS(%rsp) /* were interrupts off? */
647 jnc 1f 644 jnc 1f
6480: cmpl $0,PER_CPU_VAR(__preempt_count) 6450: cmpl $0, PER_CPU_VAR(__preempt_count)
649 jnz 1f 646 jnz 1f
650 call preempt_schedule_irq 647 call preempt_schedule_irq
651 jmp 0b 648 jmp 0b
@@ -671,8 +668,8 @@ ENTRY(native_iret)
671 * 64-bit mode SS:RSP on the exception stack is always valid. 668 * 64-bit mode SS:RSP on the exception stack is always valid.
672 */ 669 */
673#ifdef CONFIG_X86_ESPFIX64 670#ifdef CONFIG_X86_ESPFIX64
674 testb $4,(SS-RIP)(%rsp) 671 testb $4, (SS-RIP)(%rsp)
675 jnz native_irq_return_ldt 672 jnz native_irq_return_ldt
676#endif 673#endif
677 674
678.global native_irq_return_iret 675.global native_irq_return_iret
@@ -687,59 +684,59 @@ native_irq_return_iret:
687 684
688#ifdef CONFIG_X86_ESPFIX64 685#ifdef CONFIG_X86_ESPFIX64
689native_irq_return_ldt: 686native_irq_return_ldt:
690 pushq %rax 687 pushq %rax
691 pushq %rdi 688 pushq %rdi
692 SWAPGS 689 SWAPGS
693 movq PER_CPU_VAR(espfix_waddr),%rdi 690 movq PER_CPU_VAR(espfix_waddr), %rdi
694 movq %rax,(0*8)(%rdi) /* RAX */ 691 movq %rax, (0*8)(%rdi) /* RAX */
695 movq (2*8)(%rsp),%rax /* RIP */ 692 movq (2*8)(%rsp), %rax /* RIP */
696 movq %rax,(1*8)(%rdi) 693 movq %rax, (1*8)(%rdi)
697 movq (3*8)(%rsp),%rax /* CS */ 694 movq (3*8)(%rsp), %rax /* CS */
698 movq %rax,(2*8)(%rdi) 695 movq %rax, (2*8)(%rdi)
699 movq (4*8)(%rsp),%rax /* RFLAGS */ 696 movq (4*8)(%rsp), %rax /* RFLAGS */
700 movq %rax,(3*8)(%rdi) 697 movq %rax, (3*8)(%rdi)
701 movq (6*8)(%rsp),%rax /* SS */ 698 movq (6*8)(%rsp), %rax /* SS */
702 movq %rax,(5*8)(%rdi) 699 movq %rax, (5*8)(%rdi)
703 movq (5*8)(%rsp),%rax /* RSP */ 700 movq (5*8)(%rsp), %rax /* RSP */
704 movq %rax,(4*8)(%rdi) 701 movq %rax, (4*8)(%rdi)
705 andl $0xffff0000,%eax 702 andl $0xffff0000, %eax
706 popq %rdi 703 popq %rdi
707 orq PER_CPU_VAR(espfix_stack),%rax 704 orq PER_CPU_VAR(espfix_stack), %rax
708 SWAPGS 705 SWAPGS
709 movq %rax,%rsp 706 movq %rax, %rsp
710 popq %rax 707 popq %rax
711 jmp native_irq_return_iret 708 jmp native_irq_return_iret
712#endif 709#endif
713 710
714 /* edi: workmask, edx: work */ 711 /* edi: workmask, edx: work */
715retint_careful: 712retint_careful:
716 bt $TIF_NEED_RESCHED,%edx 713 bt $TIF_NEED_RESCHED, %edx
717 jnc retint_signal 714 jnc retint_signal
718 TRACE_IRQS_ON 715 TRACE_IRQS_ON
719 ENABLE_INTERRUPTS(CLBR_NONE) 716 ENABLE_INTERRUPTS(CLBR_NONE)
720 pushq %rdi 717 pushq %rdi
721 SCHEDULE_USER 718 SCHEDULE_USER
722 popq %rdi 719 popq %rdi
723 GET_THREAD_INFO(%rcx) 720 GET_THREAD_INFO(%rcx)
724 DISABLE_INTERRUPTS(CLBR_NONE) 721 DISABLE_INTERRUPTS(CLBR_NONE)
725 TRACE_IRQS_OFF 722 TRACE_IRQS_OFF
726 jmp retint_check 723 jmp retint_check
727 724
728retint_signal: 725retint_signal:
729 testl $_TIF_DO_NOTIFY_MASK,%edx 726 testl $_TIF_DO_NOTIFY_MASK, %edx
730 jz retint_swapgs 727 jz retint_swapgs
731 TRACE_IRQS_ON 728 TRACE_IRQS_ON
732 ENABLE_INTERRUPTS(CLBR_NONE) 729 ENABLE_INTERRUPTS(CLBR_NONE)
733 SAVE_EXTRA_REGS 730 SAVE_EXTRA_REGS
734 movq $-1,ORIG_RAX(%rsp) 731 movq $-1, ORIG_RAX(%rsp)
735 xorl %esi,%esi # oldset 732 xorl %esi, %esi /* oldset */
736 movq %rsp,%rdi # &pt_regs 733 movq %rsp, %rdi /* &pt_regs */
737 call do_notify_resume 734 call do_notify_resume
738 RESTORE_EXTRA_REGS 735 RESTORE_EXTRA_REGS
739 DISABLE_INTERRUPTS(CLBR_NONE) 736 DISABLE_INTERRUPTS(CLBR_NONE)
740 TRACE_IRQS_OFF 737 TRACE_IRQS_OFF
741 GET_THREAD_INFO(%rcx) 738 GET_THREAD_INFO(%rcx)
742 jmp retint_with_reschedule 739 jmp retint_with_reschedule
743 740
744END(common_interrupt) 741END(common_interrupt)
745 742
@@ -749,10 +746,10 @@ END(common_interrupt)
749.macro apicinterrupt3 num sym do_sym 746.macro apicinterrupt3 num sym do_sym
750ENTRY(\sym) 747ENTRY(\sym)
751 ASM_CLAC 748 ASM_CLAC
752 pushq $~(\num) 749 pushq $~(\num)
753.Lcommon_\sym: 750.Lcommon_\sym:
754 interrupt \do_sym 751 interrupt \do_sym
755 jmp ret_from_intr 752 jmp ret_from_intr
756END(\sym) 753END(\sym)
757.endm 754.endm
758 755
@@ -774,60 +771,45 @@ trace_apicinterrupt \num \sym
774.endm 771.endm
775 772
776#ifdef CONFIG_SMP 773#ifdef CONFIG_SMP
777apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR \ 774apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
778 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt 775apicinterrupt3 REBOOT_VECTOR reboot_interrupt smp_reboot_interrupt
779apicinterrupt3 REBOOT_VECTOR \
780 reboot_interrupt smp_reboot_interrupt
781#endif 776#endif
782 777
783#ifdef CONFIG_X86_UV 778#ifdef CONFIG_X86_UV
784apicinterrupt3 UV_BAU_MESSAGE \ 779apicinterrupt3 UV_BAU_MESSAGE uv_bau_message_intr1 uv_bau_message_interrupt
785 uv_bau_message_intr1 uv_bau_message_interrupt
786#endif 780#endif
787apicinterrupt LOCAL_TIMER_VECTOR \ 781
788 apic_timer_interrupt smp_apic_timer_interrupt 782apicinterrupt LOCAL_TIMER_VECTOR apic_timer_interrupt smp_apic_timer_interrupt
789apicinterrupt X86_PLATFORM_IPI_VECTOR \ 783apicinterrupt X86_PLATFORM_IPI_VECTOR x86_platform_ipi smp_x86_platform_ipi
790 x86_platform_ipi smp_x86_platform_ipi
791 784
792#ifdef CONFIG_HAVE_KVM 785#ifdef CONFIG_HAVE_KVM
793apicinterrupt3 POSTED_INTR_VECTOR \ 786apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
794 kvm_posted_intr_ipi smp_kvm_posted_intr_ipi 787apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
795apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR \
796 kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
797#endif 788#endif
798 789
799#ifdef CONFIG_X86_MCE_THRESHOLD 790#ifdef CONFIG_X86_MCE_THRESHOLD
800apicinterrupt THRESHOLD_APIC_VECTOR \ 791apicinterrupt THRESHOLD_APIC_VECTOR threshold_interrupt smp_threshold_interrupt
801 threshold_interrupt smp_threshold_interrupt
802#endif 792#endif
803 793
804#ifdef CONFIG_X86_MCE_AMD 794#ifdef CONFIG_X86_MCE_AMD
805apicinterrupt DEFERRED_ERROR_VECTOR \ 795apicinterrupt DEFERRED_ERROR_VECTOR deferred_error_interrupt smp_deferred_error_interrupt
806 deferred_error_interrupt smp_deferred_error_interrupt
807#endif 796#endif
808 797
809#ifdef CONFIG_X86_THERMAL_VECTOR 798#ifdef CONFIG_X86_THERMAL_VECTOR
810apicinterrupt THERMAL_APIC_VECTOR \ 799apicinterrupt THERMAL_APIC_VECTOR thermal_interrupt smp_thermal_interrupt
811 thermal_interrupt smp_thermal_interrupt
812#endif 800#endif
813 801
814#ifdef CONFIG_SMP 802#ifdef CONFIG_SMP
815apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ 803apicinterrupt CALL_FUNCTION_SINGLE_VECTOR call_function_single_interrupt smp_call_function_single_interrupt
816 call_function_single_interrupt smp_call_function_single_interrupt 804apicinterrupt CALL_FUNCTION_VECTOR call_function_interrupt smp_call_function_interrupt
817apicinterrupt CALL_FUNCTION_VECTOR \ 805apicinterrupt RESCHEDULE_VECTOR reschedule_interrupt smp_reschedule_interrupt
818 call_function_interrupt smp_call_function_interrupt
819apicinterrupt RESCHEDULE_VECTOR \
820 reschedule_interrupt smp_reschedule_interrupt
821#endif 806#endif
822 807
823apicinterrupt ERROR_APIC_VECTOR \ 808apicinterrupt ERROR_APIC_VECTOR error_interrupt smp_error_interrupt
824 error_interrupt smp_error_interrupt 809apicinterrupt SPURIOUS_APIC_VECTOR spurious_interrupt smp_spurious_interrupt
825apicinterrupt SPURIOUS_APIC_VECTOR \
826 spurious_interrupt smp_spurious_interrupt
827 810
828#ifdef CONFIG_IRQ_WORK 811#ifdef CONFIG_IRQ_WORK
829apicinterrupt IRQ_WORK_VECTOR \ 812apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
830 irq_work_interrupt smp_irq_work_interrupt
831#endif 813#endif
832 814
833/* 815/*
@@ -846,54 +828,54 @@ ENTRY(\sym)
846 PARAVIRT_ADJUST_EXCEPTION_FRAME 828 PARAVIRT_ADJUST_EXCEPTION_FRAME
847 829
848 .ifeq \has_error_code 830 .ifeq \has_error_code
849 pushq $-1 /* ORIG_RAX: no syscall to restart */ 831 pushq $-1 /* ORIG_RAX: no syscall to restart */
850 .endif 832 .endif
851 833
852 ALLOC_PT_GPREGS_ON_STACK 834 ALLOC_PT_GPREGS_ON_STACK
853 835
854 .if \paranoid 836 .if \paranoid
855 .if \paranoid == 1 837 .if \paranoid == 1
856 testb $3, CS(%rsp) /* If coming from userspace, switch */ 838 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
857 jnz 1f /* stacks. */ 839 jnz 1f
858 .endif 840 .endif
859 call paranoid_entry 841 call paranoid_entry
860 .else 842 .else
861 call error_entry 843 call error_entry
862 .endif 844 .endif
863 /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ 845 /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
864 846
865 .if \paranoid 847 .if \paranoid
866 .if \shift_ist != -1 848 .if \shift_ist != -1
867 TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ 849 TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
868 .else 850 .else
869 TRACE_IRQS_OFF 851 TRACE_IRQS_OFF
870 .endif 852 .endif
871 .endif 853 .endif
872 854
873 movq %rsp,%rdi /* pt_regs pointer */ 855 movq %rsp, %rdi /* pt_regs pointer */
874 856
875 .if \has_error_code 857 .if \has_error_code
876 movq ORIG_RAX(%rsp),%rsi /* get error code */ 858 movq ORIG_RAX(%rsp), %rsi /* get error code */
877 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ 859 movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
878 .else 860 .else
879 xorl %esi,%esi /* no error code */ 861 xorl %esi, %esi /* no error code */
880 .endif 862 .endif
881 863
882 .if \shift_ist != -1 864 .if \shift_ist != -1
883 subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) 865 subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
884 .endif 866 .endif
885 867
886 call \do_sym 868 call \do_sym
887 869
888 .if \shift_ist != -1 870 .if \shift_ist != -1
889 addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) 871 addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
890 .endif 872 .endif
891 873
892 /* these procedures expect "no swapgs" flag in ebx */ 874 /* these procedures expect "no swapgs" flag in ebx */
893 .if \paranoid 875 .if \paranoid
894 jmp paranoid_exit 876 jmp paranoid_exit
895 .else 877 .else
896 jmp error_exit 878 jmp error_exit
897 .endif 879 .endif
898 880
899 .if \paranoid == 1 881 .if \paranoid == 1
@@ -903,25 +885,25 @@ ENTRY(\sym)
903 * run in real process context if user_mode(regs). 885 * run in real process context if user_mode(regs).
904 */ 886 */
9051: 8871:
906 call error_entry 888 call error_entry
907 889
908 890
909 movq %rsp,%rdi /* pt_regs pointer */ 891 movq %rsp, %rdi /* pt_regs pointer */
910 call sync_regs 892 call sync_regs
911 movq %rax,%rsp /* switch stack */ 893 movq %rax, %rsp /* switch stack */
912 894
913 movq %rsp,%rdi /* pt_regs pointer */ 895 movq %rsp, %rdi /* pt_regs pointer */
914 896
915 .if \has_error_code 897 .if \has_error_code
916 movq ORIG_RAX(%rsp),%rsi /* get error code */ 898 movq ORIG_RAX(%rsp), %rsi /* get error code */
917 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ 899 movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
918 .else 900 .else
919 xorl %esi,%esi /* no error code */ 901 xorl %esi, %esi /* no error code */
920 .endif 902 .endif
921 903
922 call \do_sym 904 call \do_sym
923 905
924 jmp error_exit /* %ebx: no swapgs flag */ 906 jmp error_exit /* %ebx: no swapgs flag */
925 .endif 907 .endif
926END(\sym) 908END(\sym)
927.endm 909.endm
@@ -937,55 +919,57 @@ idtentry \sym \do_sym has_error_code=\has_error_code
937.endm 919.endm
938#endif 920#endif
939 921
940idtentry divide_error do_divide_error has_error_code=0 922idtentry divide_error do_divide_error has_error_code=0
941idtentry overflow do_overflow has_error_code=0 923idtentry overflow do_overflow has_error_code=0
942idtentry bounds do_bounds has_error_code=0 924idtentry bounds do_bounds has_error_code=0
943idtentry invalid_op do_invalid_op has_error_code=0 925idtentry invalid_op do_invalid_op has_error_code=0
944idtentry device_not_available do_device_not_available has_error_code=0 926idtentry device_not_available do_device_not_available has_error_code=0
945idtentry double_fault do_double_fault has_error_code=1 paranoid=2 927idtentry double_fault do_double_fault has_error_code=1 paranoid=2
946idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 928idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
947idtentry invalid_TSS do_invalid_TSS has_error_code=1 929idtentry invalid_TSS do_invalid_TSS has_error_code=1
948idtentry segment_not_present do_segment_not_present has_error_code=1 930idtentry segment_not_present do_segment_not_present has_error_code=1
949idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0 931idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0
950idtentry coprocessor_error do_coprocessor_error has_error_code=0 932idtentry coprocessor_error do_coprocessor_error has_error_code=0
951idtentry alignment_check do_alignment_check has_error_code=1 933idtentry alignment_check do_alignment_check has_error_code=1
952idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 934idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0
953 935
954 936
955 /* Reload gs selector with exception handling */ 937 /*
956 /* edi: new selector */ 938 * Reload gs selector with exception handling
939 * edi: new selector
940 */
957ENTRY(native_load_gs_index) 941ENTRY(native_load_gs_index)
958 pushfq 942 pushfq
959 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) 943 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
960 SWAPGS 944 SWAPGS
961gs_change: 945gs_change:
962 movl %edi,%gs 946 movl %edi, %gs
9632: mfence /* workaround */ 9472: mfence /* workaround */
964 SWAPGS 948 SWAPGS
965 popfq 949 popfq
966 ret 950 ret
967END(native_load_gs_index) 951END(native_load_gs_index)
968 952
969 _ASM_EXTABLE(gs_change,bad_gs) 953 _ASM_EXTABLE(gs_change, bad_gs)
970 .section .fixup,"ax" 954 .section .fixup, "ax"
971 /* running with kernelgs */ 955 /* running with kernelgs */
972bad_gs: 956bad_gs:
973 SWAPGS /* switch back to user gs */ 957 SWAPGS /* switch back to user gs */
974 xorl %eax,%eax 958 xorl %eax, %eax
975 movl %eax,%gs 959 movl %eax, %gs
976 jmp 2b 960 jmp 2b
977 .previous 961 .previous
978 962
979/* Call softirq on interrupt stack. Interrupts are off. */ 963/* Call softirq on interrupt stack. Interrupts are off. */
980ENTRY(do_softirq_own_stack) 964ENTRY(do_softirq_own_stack)
981 pushq %rbp 965 pushq %rbp
982 mov %rsp,%rbp 966 mov %rsp, %rbp
983 incl PER_CPU_VAR(irq_count) 967 incl PER_CPU_VAR(irq_count)
984 cmove PER_CPU_VAR(irq_stack_ptr),%rsp 968 cmove PER_CPU_VAR(irq_stack_ptr), %rsp
985 push %rbp # backlink for old unwinder 969 push %rbp /* frame pointer backlink */
986 call __do_softirq 970 call __do_softirq
987 leaveq 971 leaveq
988 decl PER_CPU_VAR(irq_count) 972 decl PER_CPU_VAR(irq_count)
989 ret 973 ret
990END(do_softirq_own_stack) 974END(do_softirq_own_stack)
991 975
@@ -1005,23 +989,24 @@ idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
1005 * existing activation in its critical region -- if so, we pop the current 989 * existing activation in its critical region -- if so, we pop the current
1006 * activation and restart the handler using the previous one. 990 * activation and restart the handler using the previous one.
1007 */ 991 */
1008ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 992ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
993
1009/* 994/*
1010 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will 995 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1011 * see the correct pointer to the pt_regs 996 * see the correct pointer to the pt_regs
1012 */ 997 */
1013 movq %rdi, %rsp # we don't return, adjust the stack frame 998 movq %rdi, %rsp /* we don't return, adjust the stack frame */
101411: incl PER_CPU_VAR(irq_count) 99911: incl PER_CPU_VAR(irq_count)
1015 movq %rsp,%rbp 1000 movq %rsp, %rbp
1016 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp 1001 cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
1017 pushq %rbp # backlink for old unwinder 1002 pushq %rbp /* frame pointer backlink */
1018 call xen_evtchn_do_upcall 1003 call xen_evtchn_do_upcall
1019 popq %rsp 1004 popq %rsp
1020 decl PER_CPU_VAR(irq_count) 1005 decl PER_CPU_VAR(irq_count)
1021#ifndef CONFIG_PREEMPT 1006#ifndef CONFIG_PREEMPT
1022 call xen_maybe_preempt_hcall 1007 call xen_maybe_preempt_hcall
1023#endif 1008#endif
1024 jmp error_exit 1009 jmp error_exit
1025END(xen_do_hypervisor_callback) 1010END(xen_do_hypervisor_callback)
1026 1011
1027/* 1012/*
@@ -1038,35 +1023,35 @@ END(xen_do_hypervisor_callback)
1038 * with its current contents: any discrepancy means we in category 1. 1023 * with its current contents: any discrepancy means we in category 1.
1039 */ 1024 */
1040ENTRY(xen_failsafe_callback) 1025ENTRY(xen_failsafe_callback)
1041 movl %ds,%ecx 1026 movl %ds, %ecx
1042 cmpw %cx,0x10(%rsp) 1027 cmpw %cx, 0x10(%rsp)
1043 jne 1f 1028 jne 1f
1044 movl %es,%ecx 1029 movl %es, %ecx
1045 cmpw %cx,0x18(%rsp) 1030 cmpw %cx, 0x18(%rsp)
1046 jne 1f 1031 jne 1f
1047 movl %fs,%ecx 1032 movl %fs, %ecx
1048 cmpw %cx,0x20(%rsp) 1033 cmpw %cx, 0x20(%rsp)
1049 jne 1f 1034 jne 1f
1050 movl %gs,%ecx 1035 movl %gs, %ecx
1051 cmpw %cx,0x28(%rsp) 1036 cmpw %cx, 0x28(%rsp)
1052 jne 1f 1037 jne 1f
1053 /* All segments match their saved values => Category 2 (Bad IRET). */ 1038 /* All segments match their saved values => Category 2 (Bad IRET). */
1054 movq (%rsp),%rcx 1039 movq (%rsp), %rcx
1055 movq 8(%rsp),%r11 1040 movq 8(%rsp), %r11
1056 addq $0x30,%rsp 1041 addq $0x30, %rsp
1057 pushq $0 /* RIP */ 1042 pushq $0 /* RIP */
1058 pushq %r11 1043 pushq %r11
1059 pushq %rcx 1044 pushq %rcx
1060 jmp general_protection 1045 jmp general_protection
10611: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ 10461: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1062 movq (%rsp),%rcx 1047 movq (%rsp), %rcx
1063 movq 8(%rsp),%r11 1048 movq 8(%rsp), %r11
1064 addq $0x30,%rsp 1049 addq $0x30, %rsp
1065 pushq $-1 /* orig_ax = -1 => not a system call */ 1050 pushq $-1 /* orig_ax = -1 => not a system call */
1066 ALLOC_PT_GPREGS_ON_STACK 1051 ALLOC_PT_GPREGS_ON_STACK
1067 SAVE_C_REGS 1052 SAVE_C_REGS
1068 SAVE_EXTRA_REGS 1053 SAVE_EXTRA_REGS
1069 jmp error_exit 1054 jmp error_exit
1070END(xen_failsafe_callback) 1055END(xen_failsafe_callback)
1071 1056
1072apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ 1057apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
@@ -1079,21 +1064,25 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
1079 hyperv_callback_vector hyperv_vector_handler 1064 hyperv_callback_vector hyperv_vector_handler
1080#endif /* CONFIG_HYPERV */ 1065#endif /* CONFIG_HYPERV */
1081 1066
1082idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1067idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1083idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1068idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1084idtentry stack_segment do_stack_segment has_error_code=1 1069idtentry stack_segment do_stack_segment has_error_code=1
1070
1085#ifdef CONFIG_XEN 1071#ifdef CONFIG_XEN
1086idtentry xen_debug do_debug has_error_code=0 1072idtentry xen_debug do_debug has_error_code=0
1087idtentry xen_int3 do_int3 has_error_code=0 1073idtentry xen_int3 do_int3 has_error_code=0
1088idtentry xen_stack_segment do_stack_segment has_error_code=1 1074idtentry xen_stack_segment do_stack_segment has_error_code=1
1089#endif 1075#endif
1090idtentry general_protection do_general_protection has_error_code=1 1076
1091trace_idtentry page_fault do_page_fault has_error_code=1 1077idtentry general_protection do_general_protection has_error_code=1
1078trace_idtentry page_fault do_page_fault has_error_code=1
1079
1092#ifdef CONFIG_KVM_GUEST 1080#ifdef CONFIG_KVM_GUEST
1093idtentry async_page_fault do_async_page_fault has_error_code=1 1081idtentry async_page_fault do_async_page_fault has_error_code=1
1094#endif 1082#endif
1083
1095#ifdef CONFIG_X86_MCE 1084#ifdef CONFIG_X86_MCE
1096idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) 1085idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
1097#endif 1086#endif
1098 1087
1099/* 1088/*
@@ -1105,13 +1094,13 @@ ENTRY(paranoid_entry)
1105 cld 1094 cld
1106 SAVE_C_REGS 8 1095 SAVE_C_REGS 8
1107 SAVE_EXTRA_REGS 8 1096 SAVE_EXTRA_REGS 8
1108 movl $1,%ebx 1097 movl $1, %ebx
1109 movl $MSR_GS_BASE,%ecx 1098 movl $MSR_GS_BASE, %ecx
1110 rdmsr 1099 rdmsr
1111 testl %edx,%edx 1100 testl %edx, %edx
1112 js 1f /* negative -> in kernel */ 1101 js 1f /* negative -> in kernel */
1113 SWAPGS 1102 SWAPGS
1114 xorl %ebx,%ebx 1103 xorl %ebx, %ebx
11151: ret 11041: ret
1116END(paranoid_entry) 1105END(paranoid_entry)
1117 1106
@@ -1124,16 +1113,17 @@ END(paranoid_entry)
1124 * in syscall entry), so checking for preemption here would 1113 * in syscall entry), so checking for preemption here would
1125 * be complicated. Fortunately, we there's no good reason 1114 * be complicated. Fortunately, we there's no good reason
1126 * to try to handle preemption here. 1115 * to try to handle preemption here.
1116 *
1117 * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
1127 */ 1118 */
1128/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
1129ENTRY(paranoid_exit) 1119ENTRY(paranoid_exit)
1130 DISABLE_INTERRUPTS(CLBR_NONE) 1120 DISABLE_INTERRUPTS(CLBR_NONE)
1131 TRACE_IRQS_OFF_DEBUG 1121 TRACE_IRQS_OFF_DEBUG
1132 testl %ebx,%ebx /* swapgs needed? */ 1122 testl %ebx, %ebx /* swapgs needed? */
1133 jnz paranoid_exit_no_swapgs 1123 jnz paranoid_exit_no_swapgs
1134 TRACE_IRQS_IRETQ 1124 TRACE_IRQS_IRETQ
1135 SWAPGS_UNSAFE_STACK 1125 SWAPGS_UNSAFE_STACK
1136 jmp paranoid_exit_restore 1126 jmp paranoid_exit_restore
1137paranoid_exit_no_swapgs: 1127paranoid_exit_no_swapgs:
1138 TRACE_IRQS_IRETQ_DEBUG 1128 TRACE_IRQS_IRETQ_DEBUG
1139paranoid_exit_restore: 1129paranoid_exit_restore:
@@ -1151,7 +1141,7 @@ ENTRY(error_entry)
1151 cld 1141 cld
1152 SAVE_C_REGS 8 1142 SAVE_C_REGS 8
1153 SAVE_EXTRA_REGS 8 1143 SAVE_EXTRA_REGS 8
1154 xorl %ebx,%ebx 1144 xorl %ebx, %ebx
1155 testb $3, CS+8(%rsp) 1145 testb $3, CS+8(%rsp)
1156 jz error_kernelspace 1146 jz error_kernelspace
1157error_swapgs: 1147error_swapgs:
@@ -1167,41 +1157,41 @@ error_sti:
1167 * for these here too. 1157 * for these here too.
1168 */ 1158 */
1169error_kernelspace: 1159error_kernelspace:
1170 incl %ebx 1160 incl %ebx
1171 leaq native_irq_return_iret(%rip),%rcx 1161 leaq native_irq_return_iret(%rip), %rcx
1172 cmpq %rcx,RIP+8(%rsp) 1162 cmpq %rcx, RIP+8(%rsp)
1173 je error_bad_iret 1163 je error_bad_iret
1174 movl %ecx,%eax /* zero extend */ 1164 movl %ecx, %eax /* zero extend */
1175 cmpq %rax,RIP+8(%rsp) 1165 cmpq %rax, RIP+8(%rsp)
1176 je bstep_iret 1166 je bstep_iret
1177 cmpq $gs_change,RIP+8(%rsp) 1167 cmpq $gs_change, RIP+8(%rsp)
1178 je error_swapgs 1168 je error_swapgs
1179 jmp error_sti 1169 jmp error_sti
1180 1170
1181bstep_iret: 1171bstep_iret:
1182 /* Fix truncated RIP */ 1172 /* Fix truncated RIP */
1183 movq %rcx,RIP+8(%rsp) 1173 movq %rcx, RIP+8(%rsp)
1184 /* fall through */ 1174 /* fall through */
1185 1175
1186error_bad_iret: 1176error_bad_iret:
1187 SWAPGS 1177 SWAPGS
1188 mov %rsp,%rdi 1178 mov %rsp, %rdi
1189 call fixup_bad_iret 1179 call fixup_bad_iret
1190 mov %rax,%rsp 1180 mov %rax, %rsp
1191 decl %ebx /* Return to usergs */ 1181 decl %ebx /* Return to usergs */
1192 jmp error_sti 1182 jmp error_sti
1193END(error_entry) 1183END(error_entry)
1194 1184
1195 1185
1196/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ 1186/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
1197ENTRY(error_exit) 1187ENTRY(error_exit)
1198 movl %ebx,%eax 1188 movl %ebx, %eax
1199 RESTORE_EXTRA_REGS 1189 RESTORE_EXTRA_REGS
1200 DISABLE_INTERRUPTS(CLBR_NONE) 1190 DISABLE_INTERRUPTS(CLBR_NONE)
1201 TRACE_IRQS_OFF 1191 TRACE_IRQS_OFF
1202 testl %eax,%eax 1192 testl %eax, %eax
1203 jnz retint_kernel 1193 jnz retint_kernel
1204 jmp retint_user 1194 jmp retint_user
1205END(error_exit) 1195END(error_exit)
1206 1196
1207/* Runs on exception stack */ 1197/* Runs on exception stack */
@@ -1240,21 +1230,21 @@ ENTRY(nmi)
1240 */ 1230 */
1241 1231
1242 /* Use %rdx as our temp variable throughout */ 1232 /* Use %rdx as our temp variable throughout */
1243 pushq %rdx 1233 pushq %rdx
1244 1234
1245 /* 1235 /*
1246 * If %cs was not the kernel segment, then the NMI triggered in user 1236 * If %cs was not the kernel segment, then the NMI triggered in user
1247 * space, which means it is definitely not nested. 1237 * space, which means it is definitely not nested.
1248 */ 1238 */
1249 cmpl $__KERNEL_CS, 16(%rsp) 1239 cmpl $__KERNEL_CS, 16(%rsp)
1250 jne first_nmi 1240 jne first_nmi
1251 1241
1252 /* 1242 /*
1253 * Check the special variable on the stack to see if NMIs are 1243 * Check the special variable on the stack to see if NMIs are
1254 * executing. 1244 * executing.
1255 */ 1245 */
1256 cmpl $1, -8(%rsp) 1246 cmpl $1, -8(%rsp)
1257 je nested_nmi 1247 je nested_nmi
1258 1248
1259 /* 1249 /*
1260 * Now test if the previous stack was an NMI stack. 1250 * Now test if the previous stack was an NMI stack.
@@ -1268,6 +1258,7 @@ ENTRY(nmi)
1268 cmpq %rdx, 4*8(%rsp) 1258 cmpq %rdx, 4*8(%rsp)
1269 /* If the stack pointer is above the NMI stack, this is a normal NMI */ 1259 /* If the stack pointer is above the NMI stack, this is a normal NMI */
1270 ja first_nmi 1260 ja first_nmi
1261
1271 subq $EXCEPTION_STKSZ, %rdx 1262 subq $EXCEPTION_STKSZ, %rdx
1272 cmpq %rdx, 4*8(%rsp) 1263 cmpq %rdx, 4*8(%rsp)
1273 /* If it is below the NMI stack, it is a normal NMI */ 1264 /* If it is below the NMI stack, it is a normal NMI */
@@ -1280,29 +1271,29 @@ nested_nmi:
1280 * It's about to repeat the NMI handler, so we are fine 1271 * It's about to repeat the NMI handler, so we are fine
1281 * with ignoring this one. 1272 * with ignoring this one.
1282 */ 1273 */
1283 movq $repeat_nmi, %rdx 1274 movq $repeat_nmi, %rdx
1284 cmpq 8(%rsp), %rdx 1275 cmpq 8(%rsp), %rdx
1285 ja 1f 1276 ja 1f
1286 movq $end_repeat_nmi, %rdx 1277 movq $end_repeat_nmi, %rdx
1287 cmpq 8(%rsp), %rdx 1278 cmpq 8(%rsp), %rdx
1288 ja nested_nmi_out 1279 ja nested_nmi_out
1289 1280
12901: 12811:
1291 /* Set up the interrupted NMIs stack to jump to repeat_nmi */ 1282 /* Set up the interrupted NMIs stack to jump to repeat_nmi */
1292 leaq -1*8(%rsp), %rdx 1283 leaq -1*8(%rsp), %rdx
1293 movq %rdx, %rsp 1284 movq %rdx, %rsp
1294 leaq -10*8(%rsp), %rdx 1285 leaq -10*8(%rsp), %rdx
1295 pushq $__KERNEL_DS 1286 pushq $__KERNEL_DS
1296 pushq %rdx 1287 pushq %rdx
1297 pushfq 1288 pushfq
1298 pushq $__KERNEL_CS 1289 pushq $__KERNEL_CS
1299 pushq $repeat_nmi 1290 pushq $repeat_nmi
1300 1291
1301 /* Put stack back */ 1292 /* Put stack back */
1302 addq $(6*8), %rsp 1293 addq $(6*8), %rsp
1303 1294
1304nested_nmi_out: 1295nested_nmi_out:
1305 popq %rdx 1296 popq %rdx
1306 1297
1307 /* No need to check faults here */ 1298 /* No need to check faults here */
1308 INTERRUPT_RETURN 1299 INTERRUPT_RETURN
@@ -1344,19 +1335,17 @@ first_nmi:
1344 * is also used by nested NMIs and can not be trusted on exit. 1335 * is also used by nested NMIs and can not be trusted on exit.
1345 */ 1336 */
1346 /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ 1337 /* Do not pop rdx, nested NMIs will corrupt that part of the stack */
1347 movq (%rsp), %rdx 1338 movq (%rsp), %rdx
1348 1339
1349 /* Set the NMI executing variable on the stack. */ 1340 /* Set the NMI executing variable on the stack. */
1350 pushq $1 1341 pushq $1
1351 1342
1352 /* 1343 /* Leave room for the "copied" frame */
1353 * Leave room for the "copied" frame 1344 subq $(5*8), %rsp
1354 */
1355 subq $(5*8), %rsp
1356 1345
1357 /* Copy the stack frame to the Saved frame */ 1346 /* Copy the stack frame to the Saved frame */
1358 .rept 5 1347 .rept 5
1359 pushq 11*8(%rsp) 1348 pushq 11*8(%rsp)
1360 .endr 1349 .endr
1361 1350
1362 /* Everything up to here is safe from nested NMIs */ 1351 /* Everything up to here is safe from nested NMIs */
@@ -1376,14 +1365,14 @@ repeat_nmi:
1376 * is benign for the non-repeat case, where 1 was pushed just above 1365 * is benign for the non-repeat case, where 1 was pushed just above
1377 * to this very stack slot). 1366 * to this very stack slot).
1378 */ 1367 */
1379 movq $1, 10*8(%rsp) 1368 movq $1, 10*8(%rsp)
1380 1369
1381 /* Make another copy, this one may be modified by nested NMIs */ 1370 /* Make another copy, this one may be modified by nested NMIs */
1382 addq $(10*8), %rsp 1371 addq $(10*8), %rsp
1383 .rept 5 1372 .rept 5
1384 pushq -6*8(%rsp) 1373 pushq -6*8(%rsp)
1385 .endr 1374 .endr
1386 subq $(5*8), %rsp 1375 subq $(5*8), %rsp
1387end_repeat_nmi: 1376end_repeat_nmi:
1388 1377
1389 /* 1378 /*
@@ -1391,7 +1380,7 @@ end_repeat_nmi:
1391 * NMI if the first NMI took an exception and reset our iret stack 1380 * NMI if the first NMI took an exception and reset our iret stack
1392 * so that we repeat another NMI. 1381 * so that we repeat another NMI.
1393 */ 1382 */
1394 pushq $-1 /* ORIG_RAX: no syscall to restart */ 1383 pushq $-1 /* ORIG_RAX: no syscall to restart */
1395 ALLOC_PT_GPREGS_ON_STACK 1384 ALLOC_PT_GPREGS_ON_STACK
1396 1385
1397 /* 1386 /*
@@ -1401,7 +1390,7 @@ end_repeat_nmi:
1401 * setting NEED_RESCHED or anything that normal interrupts and 1390 * setting NEED_RESCHED or anything that normal interrupts and
1402 * exceptions might do. 1391 * exceptions might do.
1403 */ 1392 */
1404 call paranoid_entry 1393 call paranoid_entry
1405 1394
1406 /* 1395 /*
1407 * Save off the CR2 register. If we take a page fault in the NMI then 1396 * Save off the CR2 register. If we take a page fault in the NMI then
@@ -1412,21 +1401,21 @@ end_repeat_nmi:
1412 * origin fault. Save it off and restore it if it changes. 1401 * origin fault. Save it off and restore it if it changes.
1413 * Use the r12 callee-saved register. 1402 * Use the r12 callee-saved register.
1414 */ 1403 */
1415 movq %cr2, %r12 1404 movq %cr2, %r12
1416 1405
1417 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ 1406 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1418 movq %rsp,%rdi 1407 movq %rsp, %rdi
1419 movq $-1,%rsi 1408 movq $-1, %rsi
1420 call do_nmi 1409 call do_nmi
1421 1410
1422 /* Did the NMI take a page fault? Restore cr2 if it did */ 1411 /* Did the NMI take a page fault? Restore cr2 if it did */
1423 movq %cr2, %rcx 1412 movq %cr2, %rcx
1424 cmpq %rcx, %r12 1413 cmpq %rcx, %r12
1425 je 1f 1414 je 1f
1426 movq %r12, %cr2 1415 movq %r12, %cr2
14271: 14161:
1428 testl %ebx,%ebx /* swapgs needed? */ 1417 testl %ebx, %ebx /* swapgs needed? */
1429 jnz nmi_restore 1418 jnz nmi_restore
1430nmi_swapgs: 1419nmi_swapgs:
1431 SWAPGS_UNSAFE_STACK 1420 SWAPGS_UNSAFE_STACK
1432nmi_restore: 1421nmi_restore:
@@ -1436,12 +1425,11 @@ nmi_restore:
1436 REMOVE_PT_GPREGS_FROM_STACK 6*8 1425 REMOVE_PT_GPREGS_FROM_STACK 6*8
1437 1426
1438 /* Clear the NMI executing stack variable */ 1427 /* Clear the NMI executing stack variable */
1439 movq $0, 5*8(%rsp) 1428 movq $0, 5*8(%rsp)
1440 INTERRUPT_RETURN 1429 INTERRUPT_RETURN
1441END(nmi) 1430END(nmi)
1442 1431
1443ENTRY(ignore_sysret) 1432ENTRY(ignore_sysret)
1444 mov $-ENOSYS,%eax 1433 mov $-ENOSYS, %eax
1445 sysret 1434 sysret
1446END(ignore_sysret) 1435END(ignore_sysret)
1447