aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/entry_64.S
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 15:07:57 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 15:07:57 -0500
commitbe9c5ae4eeec2e85527e95647348b8ea4eb25128 (patch)
tree59383b15bc0891b8a44500a0ac172a8850f1068d /arch/x86/kernel/entry_64.S
parentbb26c6c29b7cc9f39e491b074b09f3c284738d36 (diff)
parent79a66b96c339626a3e4b226fefc0e45244cfe6ff (diff)
Merge branch 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (246 commits) x86: traps.c replace #if CONFIG_X86_32 with #ifdef CONFIG_X86_32 x86: PAT: fix address types in track_pfn_vma_new() x86: prioritize the FPU traps for the error code x86: PAT: pfnmap documentation update changes x86: PAT: move track untrack pfnmap stubs to asm-generic x86: PAT: remove follow_pfnmap_pte in favor of follow_phys x86: PAT: modify follow_phys to return phys_addr prot and return value x86: PAT: clarify is_linear_pfn_mapping() interface x86: ia32_signal: remove unnecessary declaration x86: common.c boot_cpu_stack and boot_exception_stacks should be static x86: fix intel x86_64 llc_shared_map/cpu_llc_id anomolies x86: fix warning in arch/x86/kernel/microcode_amd.c x86: ia32.h: remove unused struct sigfram32 and rt_sigframe32 x86: asm-offset_64: use rt_sigframe_ia32 x86: sigframe.h: include headers for dependency x86: traps.c declare functions before they get used x86: PAT: update documentation to cover pgprot and remap_pfn related changes - v3 x86: PAT: add pgprot_writecombine() interface for drivers - v3 x86: PAT: change pgprot_noncached to uc_minus instead of strong uc - v3 x86: PAT: implement track/untrack of pfnmap regions for x86 - v3 ...
Diffstat (limited to 'arch/x86/kernel/entry_64.S')
-rw-r--r--arch/x86/kernel/entry_64.S1360
1 files changed, 725 insertions, 635 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b86f332c96a6..3194636a4293 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -11,15 +11,15 @@
11 * 11 *
12 * NOTE: This code handles signal-recognition, which happens every time 12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call. 13 * after an interrupt and after each system call.
14 * 14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is 15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al. 16 * only done for syscall tracing, signals or fork/exec et.al.
17 * 17 *
18 * A note on terminology: 18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP 19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack. 20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11. 21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved. 22 * - full stack frame: Like partial stack frame, but all register saved.
23 * 23 *
24 * Some macro usage: 24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better 25 * - CFI macros are used to generate dwarf2 unwind information for better
@@ -60,7 +60,6 @@
60#define __AUDIT_ARCH_LE 0x40000000 60#define __AUDIT_ARCH_LE 0x40000000
61 61
62 .code64 62 .code64
63
64#ifdef CONFIG_FUNCTION_TRACER 63#ifdef CONFIG_FUNCTION_TRACER
65#ifdef CONFIG_DYNAMIC_FTRACE 64#ifdef CONFIG_DYNAMIC_FTRACE
66ENTRY(mcount) 65ENTRY(mcount)
@@ -142,7 +141,7 @@ END(mcount)
142 141
143#ifndef CONFIG_PREEMPT 142#ifndef CONFIG_PREEMPT
144#define retint_kernel retint_restore_args 143#define retint_kernel retint_restore_args
145#endif 144#endif
146 145
147#ifdef CONFIG_PARAVIRT 146#ifdef CONFIG_PARAVIRT
148ENTRY(native_usergs_sysret64) 147ENTRY(native_usergs_sysret64)
@@ -161,29 +160,29 @@ ENTRY(native_usergs_sysret64)
161.endm 160.endm
162 161
163/* 162/*
164 * C code is not supposed to know about undefined top of stack. Every time 163 * C code is not supposed to know about undefined top of stack. Every time
165 * a C function with an pt_regs argument is called from the SYSCALL based 164 * a C function with an pt_regs argument is called from the SYSCALL based
166 * fast path FIXUP_TOP_OF_STACK is needed. 165 * fast path FIXUP_TOP_OF_STACK is needed.
167 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs 166 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
168 * manipulation. 167 * manipulation.
169 */ 168 */
170 169
171 /* %rsp:at FRAMEEND */ 170 /* %rsp:at FRAMEEND */
172 .macro FIXUP_TOP_OF_STACK tmp 171 .macro FIXUP_TOP_OF_STACK tmp offset=0
173 movq %gs:pda_oldrsp,\tmp 172 movq %gs:pda_oldrsp,\tmp
174 movq \tmp,RSP(%rsp) 173 movq \tmp,RSP+\offset(%rsp)
175 movq $__USER_DS,SS(%rsp) 174 movq $__USER_DS,SS+\offset(%rsp)
176 movq $__USER_CS,CS(%rsp) 175 movq $__USER_CS,CS+\offset(%rsp)
177 movq $-1,RCX(%rsp) 176 movq $-1,RCX+\offset(%rsp)
178 movq R11(%rsp),\tmp /* get eflags */ 177 movq R11+\offset(%rsp),\tmp /* get eflags */
179 movq \tmp,EFLAGS(%rsp) 178 movq \tmp,EFLAGS+\offset(%rsp)
180 .endm 179 .endm
181 180
182 .macro RESTORE_TOP_OF_STACK tmp,offset=0 181 .macro RESTORE_TOP_OF_STACK tmp offset=0
183 movq RSP-\offset(%rsp),\tmp 182 movq RSP+\offset(%rsp),\tmp
184 movq \tmp,%gs:pda_oldrsp 183 movq \tmp,%gs:pda_oldrsp
185 movq EFLAGS-\offset(%rsp),\tmp 184 movq EFLAGS+\offset(%rsp),\tmp
186 movq \tmp,R11-\offset(%rsp) 185 movq \tmp,R11+\offset(%rsp)
187 .endm 186 .endm
188 187
189 .macro FAKE_STACK_FRAME child_rip 188 .macro FAKE_STACK_FRAME child_rip
@@ -195,7 +194,7 @@ ENTRY(native_usergs_sysret64)
195 pushq %rax /* rsp */ 194 pushq %rax /* rsp */
196 CFI_ADJUST_CFA_OFFSET 8 195 CFI_ADJUST_CFA_OFFSET 8
197 CFI_REL_OFFSET rsp,0 196 CFI_REL_OFFSET rsp,0
198 pushq $(1<<9) /* eflags - interrupts on */ 197 pushq $X86_EFLAGS_IF /* eflags - interrupts on */
199 CFI_ADJUST_CFA_OFFSET 8 198 CFI_ADJUST_CFA_OFFSET 8
200 /*CFI_REL_OFFSET rflags,0*/ 199 /*CFI_REL_OFFSET rflags,0*/
201 pushq $__KERNEL_CS /* cs */ 200 pushq $__KERNEL_CS /* cs */
@@ -213,62 +212,184 @@ ENTRY(native_usergs_sysret64)
213 CFI_ADJUST_CFA_OFFSET -(6*8) 212 CFI_ADJUST_CFA_OFFSET -(6*8)
214 .endm 213 .endm
215 214
216 .macro CFI_DEFAULT_STACK start=1 215/*
216 * initial frame state for interrupts (and exceptions without error code)
217 */
218 .macro EMPTY_FRAME start=1 offset=0
217 .if \start 219 .if \start
218 CFI_STARTPROC simple 220 CFI_STARTPROC simple
219 CFI_SIGNAL_FRAME 221 CFI_SIGNAL_FRAME
220 CFI_DEF_CFA rsp,SS+8 222 CFI_DEF_CFA rsp,8+\offset
221 .else 223 .else
222 CFI_DEF_CFA_OFFSET SS+8 224 CFI_DEF_CFA_OFFSET 8+\offset
223 .endif 225 .endif
224 CFI_REL_OFFSET r15,R15
225 CFI_REL_OFFSET r14,R14
226 CFI_REL_OFFSET r13,R13
227 CFI_REL_OFFSET r12,R12
228 CFI_REL_OFFSET rbp,RBP
229 CFI_REL_OFFSET rbx,RBX
230 CFI_REL_OFFSET r11,R11
231 CFI_REL_OFFSET r10,R10
232 CFI_REL_OFFSET r9,R9
233 CFI_REL_OFFSET r8,R8
234 CFI_REL_OFFSET rax,RAX
235 CFI_REL_OFFSET rcx,RCX
236 CFI_REL_OFFSET rdx,RDX
237 CFI_REL_OFFSET rsi,RSI
238 CFI_REL_OFFSET rdi,RDI
239 CFI_REL_OFFSET rip,RIP
240 /*CFI_REL_OFFSET cs,CS*/
241 /*CFI_REL_OFFSET rflags,EFLAGS*/
242 CFI_REL_OFFSET rsp,RSP
243 /*CFI_REL_OFFSET ss,SS*/
244 .endm 226 .endm
227
228/*
229 * initial frame state for interrupts (and exceptions without error code)
230 */
231 .macro INTR_FRAME start=1 offset=0
232 EMPTY_FRAME \start, SS+8+\offset-RIP
233 /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
234 CFI_REL_OFFSET rsp, RSP+\offset-RIP
235 /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
236 /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
237 CFI_REL_OFFSET rip, RIP+\offset-RIP
238 .endm
239
240/*
241 * initial frame state for exceptions with error code (and interrupts
242 * with vector already pushed)
243 */
244 .macro XCPT_FRAME start=1 offset=0
245 INTR_FRAME \start, RIP+\offset-ORIG_RAX
246 /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
247 .endm
248
249/*
250 * frame that enables calling into C.
251 */
252 .macro PARTIAL_FRAME start=1 offset=0
253 XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
254 CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
255 CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
256 CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
257 CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
258 CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
259 CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
260 CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
261 CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
262 CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
263 .endm
264
265/*
266 * frame that enables passing a complete pt_regs to a C function.
267 */
268 .macro DEFAULT_FRAME start=1 offset=0
269 PARTIAL_FRAME \start, R11+\offset-R15
270 CFI_REL_OFFSET rbx, RBX+\offset
271 CFI_REL_OFFSET rbp, RBP+\offset
272 CFI_REL_OFFSET r12, R12+\offset
273 CFI_REL_OFFSET r13, R13+\offset
274 CFI_REL_OFFSET r14, R14+\offset
275 CFI_REL_OFFSET r15, R15+\offset
276 .endm
277
278/* save partial stack frame */
279ENTRY(save_args)
280 XCPT_FRAME
281 cld
282 movq_cfi rdi, RDI+16-ARGOFFSET
283 movq_cfi rsi, RSI+16-ARGOFFSET
284 movq_cfi rdx, RDX+16-ARGOFFSET
285 movq_cfi rcx, RCX+16-ARGOFFSET
286 movq_cfi rax, RAX+16-ARGOFFSET
287 movq_cfi r8, R8+16-ARGOFFSET
288 movq_cfi r9, R9+16-ARGOFFSET
289 movq_cfi r10, R10+16-ARGOFFSET
290 movq_cfi r11, R11+16-ARGOFFSET
291
292 leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
293 movq_cfi rbp, 8 /* push %rbp */
294 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
295 testl $3, CS(%rdi)
296 je 1f
297 SWAPGS
298 /*
299 * irqcount is used to check if a CPU is already on an interrupt stack
300 * or not. While this is essentially redundant with preempt_count it is
301 * a little cheaper to use a separate counter in the PDA (short of
302 * moving irq_enter into assembly, which would be too much work)
303 */
3041: incl %gs:pda_irqcount
305 jne 2f
306 popq_cfi %rax /* move return address... */
307 mov %gs:pda_irqstackptr,%rsp
308 EMPTY_FRAME 0
309 pushq_cfi %rax /* ... to the new stack */
310 /*
311 * We entered an interrupt context - irqs are off:
312 */
3132: TRACE_IRQS_OFF
314 ret
315 CFI_ENDPROC
316END(save_args)
317
318ENTRY(save_rest)
319 PARTIAL_FRAME 1 REST_SKIP+8
320 movq 5*8+16(%rsp), %r11 /* save return address */
321 movq_cfi rbx, RBX+16
322 movq_cfi rbp, RBP+16
323 movq_cfi r12, R12+16
324 movq_cfi r13, R13+16
325 movq_cfi r14, R14+16
326 movq_cfi r15, R15+16
327 movq %r11, 8(%rsp) /* return address */
328 FIXUP_TOP_OF_STACK %r11, 16
329 ret
330 CFI_ENDPROC
331END(save_rest)
332
333/* save complete stack frame */
334ENTRY(save_paranoid)
335 XCPT_FRAME 1 RDI+8
336 cld
337 movq_cfi rdi, RDI+8
338 movq_cfi rsi, RSI+8
339 movq_cfi rdx, RDX+8
340 movq_cfi rcx, RCX+8
341 movq_cfi rax, RAX+8
342 movq_cfi r8, R8+8
343 movq_cfi r9, R9+8
344 movq_cfi r10, R10+8
345 movq_cfi r11, R11+8
346 movq_cfi rbx, RBX+8
347 movq_cfi rbp, RBP+8
348 movq_cfi r12, R12+8
349 movq_cfi r13, R13+8
350 movq_cfi r14, R14+8
351 movq_cfi r15, R15+8
352 movl $1,%ebx
353 movl $MSR_GS_BASE,%ecx
354 rdmsr
355 testl %edx,%edx
356 js 1f /* negative -> in kernel */
357 SWAPGS
358 xorl %ebx,%ebx
3591: ret
360 CFI_ENDPROC
361END(save_paranoid)
362
245/* 363/*
246 * A newly forked process directly context switches into this. 364 * A newly forked process directly context switches into this address.
247 */ 365 *
248/* rdi: prev */ 366 * rdi: prev task we switched from
367 */
249ENTRY(ret_from_fork) 368ENTRY(ret_from_fork)
250 CFI_DEFAULT_STACK 369 DEFAULT_FRAME
370
251 push kernel_eflags(%rip) 371 push kernel_eflags(%rip)
252 CFI_ADJUST_CFA_OFFSET 8 372 CFI_ADJUST_CFA_OFFSET 8
253 popf # reset kernel eflags 373 popf # reset kernel eflags
254 CFI_ADJUST_CFA_OFFSET -8 374 CFI_ADJUST_CFA_OFFSET -8
255 call schedule_tail 375
376 call schedule_tail # rdi: 'prev' task parameter
377
256 GET_THREAD_INFO(%rcx) 378 GET_THREAD_INFO(%rcx)
257 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 379
258 jnz rff_trace 380 CFI_REMEMBER_STATE
259rff_action:
260 RESTORE_REST 381 RESTORE_REST
261 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? 382
383 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
262 je int_ret_from_sys_call 384 je int_ret_from_sys_call
263 testl $_TIF_IA32,TI_flags(%rcx) 385
386 testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
264 jnz int_ret_from_sys_call 387 jnz int_ret_from_sys_call
265 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET 388
266 jmp ret_from_sys_call 389 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
267rff_trace: 390 jmp ret_from_sys_call # go to the SYSRET fastpath
268 movq %rsp,%rdi 391
269 call syscall_trace_leave 392 CFI_RESTORE_STATE
270 GET_THREAD_INFO(%rcx)
271 jmp rff_action
272 CFI_ENDPROC 393 CFI_ENDPROC
273END(ret_from_fork) 394END(ret_from_fork)
274 395
@@ -278,20 +399,20 @@ END(ret_from_fork)
278 * SYSCALL does not save anything on the stack and does not change the 399 * SYSCALL does not save anything on the stack and does not change the
279 * stack pointer. 400 * stack pointer.
280 */ 401 */
281 402
282/* 403/*
283 * Register setup: 404 * Register setup:
284 * rax system call number 405 * rax system call number
285 * rdi arg0 406 * rdi arg0
286 * rcx return address for syscall/sysret, C arg3 407 * rcx return address for syscall/sysret, C arg3
287 * rsi arg1 408 * rsi arg1
288 * rdx arg2 409 * rdx arg2
289 * r10 arg3 (--> moved to rcx for C) 410 * r10 arg3 (--> moved to rcx for C)
290 * r8 arg4 411 * r8 arg4
291 * r9 arg5 412 * r9 arg5
292 * r11 eflags for syscall/sysret, temporary for C 413 * r11 eflags for syscall/sysret, temporary for C
293 * r12-r15,rbp,rbx saved by C code, not touched. 414 * r12-r15,rbp,rbx saved by C code, not touched.
294 * 415 *
295 * Interrupts are off on entry. 416 * Interrupts are off on entry.
296 * Only called from user space. 417 * Only called from user space.
297 * 418 *
@@ -301,7 +422,7 @@ END(ret_from_fork)
301 * When user can change the frames always force IRET. That is because 422 * When user can change the frames always force IRET. That is because
302 * it deals with uncanonical addresses better. SYSRET has trouble 423 * it deals with uncanonical addresses better. SYSRET has trouble
303 * with them due to bugs in both AMD and Intel CPUs. 424 * with them due to bugs in both AMD and Intel CPUs.
304 */ 425 */
305 426
306ENTRY(system_call) 427ENTRY(system_call)
307 CFI_STARTPROC simple 428 CFI_STARTPROC simple
@@ -317,7 +438,7 @@ ENTRY(system_call)
317 */ 438 */
318ENTRY(system_call_after_swapgs) 439ENTRY(system_call_after_swapgs)
319 440
320 movq %rsp,%gs:pda_oldrsp 441 movq %rsp,%gs:pda_oldrsp
321 movq %gs:pda_kernelstack,%rsp 442 movq %gs:pda_kernelstack,%rsp
322 /* 443 /*
323 * No need to follow this irqs off/on section - it's straight 444 * No need to follow this irqs off/on section - it's straight
@@ -325,7 +446,7 @@ ENTRY(system_call_after_swapgs)
325 */ 446 */
326 ENABLE_INTERRUPTS(CLBR_NONE) 447 ENABLE_INTERRUPTS(CLBR_NONE)
327 SAVE_ARGS 8,1 448 SAVE_ARGS 8,1
328 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 449 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
329 movq %rcx,RIP-ARGOFFSET(%rsp) 450 movq %rcx,RIP-ARGOFFSET(%rsp)
330 CFI_REL_OFFSET rip,RIP-ARGOFFSET 451 CFI_REL_OFFSET rip,RIP-ARGOFFSET
331 GET_THREAD_INFO(%rcx) 452 GET_THREAD_INFO(%rcx)
@@ -339,19 +460,19 @@ system_call_fastpath:
339 movq %rax,RAX-ARGOFFSET(%rsp) 460 movq %rax,RAX-ARGOFFSET(%rsp)
340/* 461/*
341 * Syscall return path ending with SYSRET (fast path) 462 * Syscall return path ending with SYSRET (fast path)
342 * Has incomplete stack frame and undefined top of stack. 463 * Has incomplete stack frame and undefined top of stack.
343 */ 464 */
344ret_from_sys_call: 465ret_from_sys_call:
345 movl $_TIF_ALLWORK_MASK,%edi 466 movl $_TIF_ALLWORK_MASK,%edi
346 /* edi: flagmask */ 467 /* edi: flagmask */
347sysret_check: 468sysret_check:
348 LOCKDEP_SYS_EXIT 469 LOCKDEP_SYS_EXIT
349 GET_THREAD_INFO(%rcx) 470 GET_THREAD_INFO(%rcx)
350 DISABLE_INTERRUPTS(CLBR_NONE) 471 DISABLE_INTERRUPTS(CLBR_NONE)
351 TRACE_IRQS_OFF 472 TRACE_IRQS_OFF
352 movl TI_flags(%rcx),%edx 473 movl TI_flags(%rcx),%edx
353 andl %edi,%edx 474 andl %edi,%edx
354 jnz sysret_careful 475 jnz sysret_careful
355 CFI_REMEMBER_STATE 476 CFI_REMEMBER_STATE
356 /* 477 /*
357 * sysretq will re-enable interrupts: 478 * sysretq will re-enable interrupts:
@@ -366,7 +487,7 @@ sysret_check:
366 487
367 CFI_RESTORE_STATE 488 CFI_RESTORE_STATE
368 /* Handle reschedules */ 489 /* Handle reschedules */
369 /* edx: work, edi: workmask */ 490 /* edx: work, edi: workmask */
370sysret_careful: 491sysret_careful:
371 bt $TIF_NEED_RESCHED,%edx 492 bt $TIF_NEED_RESCHED,%edx
372 jnc sysret_signal 493 jnc sysret_signal
@@ -379,7 +500,7 @@ sysret_careful:
379 CFI_ADJUST_CFA_OFFSET -8 500 CFI_ADJUST_CFA_OFFSET -8
380 jmp sysret_check 501 jmp sysret_check
381 502
382 /* Handle a signal */ 503 /* Handle a signal */
383sysret_signal: 504sysret_signal:
384 TRACE_IRQS_ON 505 TRACE_IRQS_ON
385 ENABLE_INTERRUPTS(CLBR_NONE) 506 ENABLE_INTERRUPTS(CLBR_NONE)
@@ -388,17 +509,20 @@ sysret_signal:
388 jc sysret_audit 509 jc sysret_audit
389#endif 510#endif
390 /* edx: work flags (arg3) */ 511 /* edx: work flags (arg3) */
391 leaq do_notify_resume(%rip),%rax
392 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 512 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
393 xorl %esi,%esi # oldset -> arg2 513 xorl %esi,%esi # oldset -> arg2
394 call ptregscall_common 514 SAVE_REST
515 FIXUP_TOP_OF_STACK %r11
516 call do_notify_resume
517 RESTORE_TOP_OF_STACK %r11
518 RESTORE_REST
395 movl $_TIF_WORK_MASK,%edi 519 movl $_TIF_WORK_MASK,%edi
396 /* Use IRET because user could have changed frame. This 520 /* Use IRET because user could have changed frame. This
397 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ 521 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
398 DISABLE_INTERRUPTS(CLBR_NONE) 522 DISABLE_INTERRUPTS(CLBR_NONE)
399 TRACE_IRQS_OFF 523 TRACE_IRQS_OFF
400 jmp int_with_check 524 jmp int_with_check
401 525
402badsys: 526badsys:
403 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 527 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
404 jmp ret_from_sys_call 528 jmp ret_from_sys_call
@@ -437,7 +561,7 @@ sysret_audit:
437#endif /* CONFIG_AUDITSYSCALL */ 561#endif /* CONFIG_AUDITSYSCALL */
438 562
439 /* Do syscall tracing */ 563 /* Do syscall tracing */
440tracesys: 564tracesys:
441#ifdef CONFIG_AUDITSYSCALL 565#ifdef CONFIG_AUDITSYSCALL
442 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 566 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
443 jz auditsys 567 jz auditsys
@@ -460,8 +584,8 @@ tracesys:
460 call *sys_call_table(,%rax,8) 584 call *sys_call_table(,%rax,8)
461 movq %rax,RAX-ARGOFFSET(%rsp) 585 movq %rax,RAX-ARGOFFSET(%rsp)
462 /* Use IRET because user could have changed frame */ 586 /* Use IRET because user could have changed frame */
463 587
464/* 588/*
465 * Syscall return path ending with IRET. 589 * Syscall return path ending with IRET.
466 * Has correct top of stack, but partial stack frame. 590 * Has correct top of stack, but partial stack frame.
467 */ 591 */
@@ -505,18 +629,18 @@ int_very_careful:
505 TRACE_IRQS_ON 629 TRACE_IRQS_ON
506 ENABLE_INTERRUPTS(CLBR_NONE) 630 ENABLE_INTERRUPTS(CLBR_NONE)
507 SAVE_REST 631 SAVE_REST
508 /* Check for syscall exit trace */ 632 /* Check for syscall exit trace */
509 testl $_TIF_WORK_SYSCALL_EXIT,%edx 633 testl $_TIF_WORK_SYSCALL_EXIT,%edx
510 jz int_signal 634 jz int_signal
511 pushq %rdi 635 pushq %rdi
512 CFI_ADJUST_CFA_OFFSET 8 636 CFI_ADJUST_CFA_OFFSET 8
513 leaq 8(%rsp),%rdi # &ptregs -> arg1 637 leaq 8(%rsp),%rdi # &ptregs -> arg1
514 call syscall_trace_leave 638 call syscall_trace_leave
515 popq %rdi 639 popq %rdi
516 CFI_ADJUST_CFA_OFFSET -8 640 CFI_ADJUST_CFA_OFFSET -8
517 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi 641 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
518 jmp int_restore_rest 642 jmp int_restore_rest
519 643
520int_signal: 644int_signal:
521 testl $_TIF_DO_NOTIFY_MASK,%edx 645 testl $_TIF_DO_NOTIFY_MASK,%edx
522 jz 1f 646 jz 1f
@@ -531,22 +655,24 @@ int_restore_rest:
531 jmp int_with_check 655 jmp int_with_check
532 CFI_ENDPROC 656 CFI_ENDPROC
533END(system_call) 657END(system_call)
534 658
535/* 659/*
536 * Certain special system calls that need to save a complete full stack frame. 660 * Certain special system calls that need to save a complete full stack frame.
537 */ 661 */
538
539 .macro PTREGSCALL label,func,arg 662 .macro PTREGSCALL label,func,arg
540 .globl \label 663ENTRY(\label)
541\label: 664 PARTIAL_FRAME 1 8 /* offset 8: return address */
542 leaq \func(%rip),%rax 665 subq $REST_SKIP, %rsp
543 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 666 CFI_ADJUST_CFA_OFFSET REST_SKIP
544 jmp ptregscall_common 667 call save_rest
668 DEFAULT_FRAME 0 8 /* offset 8: return address */
669 leaq 8(%rsp), \arg /* pt_regs pointer */
670 call \func
671 jmp ptregscall_common
672 CFI_ENDPROC
545END(\label) 673END(\label)
546 .endm 674 .endm
547 675
548 CFI_STARTPROC
549
550 PTREGSCALL stub_clone, sys_clone, %r8 676 PTREGSCALL stub_clone, sys_clone, %r8
551 PTREGSCALL stub_fork, sys_fork, %rdi 677 PTREGSCALL stub_fork, sys_fork, %rdi
552 PTREGSCALL stub_vfork, sys_vfork, %rdi 678 PTREGSCALL stub_vfork, sys_vfork, %rdi
@@ -554,25 +680,18 @@ END(\label)
554 PTREGSCALL stub_iopl, sys_iopl, %rsi 680 PTREGSCALL stub_iopl, sys_iopl, %rsi
555 681
556ENTRY(ptregscall_common) 682ENTRY(ptregscall_common)
557 popq %r11 683 DEFAULT_FRAME 1 8 /* offset 8: return address */
558 CFI_ADJUST_CFA_OFFSET -8 684 RESTORE_TOP_OF_STACK %r11, 8
559 CFI_REGISTER rip, r11 685 movq_cfi_restore R15+8, r15
560 SAVE_REST 686 movq_cfi_restore R14+8, r14
561 movq %r11, %r15 687 movq_cfi_restore R13+8, r13
562 CFI_REGISTER rip, r15 688 movq_cfi_restore R12+8, r12
563 FIXUP_TOP_OF_STACK %r11 689 movq_cfi_restore RBP+8, rbp
564 call *%rax 690 movq_cfi_restore RBX+8, rbx
565 RESTORE_TOP_OF_STACK %r11 691 ret $REST_SKIP /* pop extended registers */
566 movq %r15, %r11
567 CFI_REGISTER rip, r11
568 RESTORE_REST
569 pushq %r11
570 CFI_ADJUST_CFA_OFFSET 8
571 CFI_REL_OFFSET rip, 0
572 ret
573 CFI_ENDPROC 692 CFI_ENDPROC
574END(ptregscall_common) 693END(ptregscall_common)
575 694
576ENTRY(stub_execve) 695ENTRY(stub_execve)
577 CFI_STARTPROC 696 CFI_STARTPROC
578 popq %r11 697 popq %r11
@@ -588,11 +707,11 @@ ENTRY(stub_execve)
588 jmp int_ret_from_sys_call 707 jmp int_ret_from_sys_call
589 CFI_ENDPROC 708 CFI_ENDPROC
590END(stub_execve) 709END(stub_execve)
591 710
592/* 711/*
593 * sigreturn is special because it needs to restore all registers on return. 712 * sigreturn is special because it needs to restore all registers on return.
594 * This cannot be done with SYSRET, so use the IRET return path instead. 713 * This cannot be done with SYSRET, so use the IRET return path instead.
595 */ 714 */
596ENTRY(stub_rt_sigreturn) 715ENTRY(stub_rt_sigreturn)
597 CFI_STARTPROC 716 CFI_STARTPROC
598 addq $8, %rsp 717 addq $8, %rsp
@@ -608,70 +727,70 @@ ENTRY(stub_rt_sigreturn)
608END(stub_rt_sigreturn) 727END(stub_rt_sigreturn)
609 728
610/* 729/*
611 * initial frame state for interrupts and exceptions 730 * Build the entry stubs and pointer table with some assembler magic.
731 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
732 * single cache line on all modern x86 implementations.
612 */ 733 */
613 .macro _frame ref 734 .section .init.rodata,"a"
614 CFI_STARTPROC simple 735ENTRY(interrupt)
615 CFI_SIGNAL_FRAME 736 .text
616 CFI_DEF_CFA rsp,SS+8-\ref 737 .p2align 5
617 /*CFI_REL_OFFSET ss,SS-\ref*/ 738 .p2align CONFIG_X86_L1_CACHE_SHIFT
618 CFI_REL_OFFSET rsp,RSP-\ref 739ENTRY(irq_entries_start)
619 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ 740 INTR_FRAME
620 /*CFI_REL_OFFSET cs,CS-\ref*/ 741vector=FIRST_EXTERNAL_VECTOR
621 CFI_REL_OFFSET rip,RIP-\ref 742.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
622 .endm 743 .balign 32
744 .rept 7
745 .if vector < NR_VECTORS
746 .if vector <> FIRST_EXTERNAL_VECTOR
747 CFI_ADJUST_CFA_OFFSET -8
748 .endif
7491: pushq $(~vector+0x80) /* Note: always in signed byte range */
750 CFI_ADJUST_CFA_OFFSET 8
751 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
752 jmp 2f
753 .endif
754 .previous
755 .quad 1b
756 .text
757vector=vector+1
758 .endif
759 .endr
7602: jmp common_interrupt
761.endr
762 CFI_ENDPROC
763END(irq_entries_start)
623 764
624/* initial frame state for interrupts (and exceptions without error code) */ 765.previous
625#define INTR_FRAME _frame RIP 766END(interrupt)
626/* initial frame state for exceptions with error code (and interrupts with 767.previous
627 vector already pushed) */
628#define XCPT_FRAME _frame ORIG_RAX
629 768
630/* 769/*
631 * Interrupt entry/exit. 770 * Interrupt entry/exit.
632 * 771 *
633 * Interrupt entry points save only callee clobbered registers in fast path. 772 * Interrupt entry points save only callee clobbered registers in fast path.
634 * 773 *
635 * Entry runs with interrupts off. 774 * Entry runs with interrupts off.
636 */ 775 */
637 776
638/* 0(%rsp): interrupt number */ 777/* 0(%rsp): ~(interrupt number) */
639 .macro interrupt func 778 .macro interrupt func
640 cld 779 subq $10*8, %rsp
641 SAVE_ARGS 780 CFI_ADJUST_CFA_OFFSET 10*8
642 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 781 call save_args
643 pushq %rbp 782 PARTIAL_FRAME 0
644 /*
645 * Save rbp twice: One is for marking the stack frame, as usual, and the
646 * other, to fill pt_regs properly. This is because bx comes right
647 * before the last saved register in that structure, and not bp. If the
648 * base pointer were in the place bx is today, this would not be needed.
649 */
650 movq %rbp, -8(%rsp)
651 CFI_ADJUST_CFA_OFFSET 8
652 CFI_REL_OFFSET rbp, 0
653 movq %rsp,%rbp
654 CFI_DEF_CFA_REGISTER rbp
655 testl $3,CS(%rdi)
656 je 1f
657 SWAPGS
658 /* irqcount is used to check if a CPU is already on an interrupt
659 stack or not. While this is essentially redundant with preempt_count
660 it is a little cheaper to use a separate counter in the PDA
661 (short of moving irq_enter into assembly, which would be too
662 much work) */
6631: incl %gs:pda_irqcount
664 cmoveq %gs:pda_irqstackptr,%rsp
665 push %rbp # backlink for old unwinder
666 /*
667 * We entered an interrupt context - irqs are off:
668 */
669 TRACE_IRQS_OFF
670 call \func 783 call \func
671 .endm 784 .endm
672 785
673ENTRY(common_interrupt) 786 /*
787 * The interrupt stubs push (~vector+0x80) onto the stack and
788 * then jump to common_interrupt.
789 */
790 .p2align CONFIG_X86_L1_CACHE_SHIFT
791common_interrupt:
674 XCPT_FRAME 792 XCPT_FRAME
793 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
675 interrupt do_IRQ 794 interrupt do_IRQ
676 /* 0(%rsp): oldrsp-ARGOFFSET */ 795 /* 0(%rsp): oldrsp-ARGOFFSET */
677ret_from_intr: 796ret_from_intr:
@@ -685,12 +804,12 @@ exit_intr:
685 GET_THREAD_INFO(%rcx) 804 GET_THREAD_INFO(%rcx)
686 testl $3,CS-ARGOFFSET(%rsp) 805 testl $3,CS-ARGOFFSET(%rsp)
687 je retint_kernel 806 je retint_kernel
688 807
689 /* Interrupt came from user space */ 808 /* Interrupt came from user space */
690 /* 809 /*
691 * Has a correct top of stack, but a partial stack frame 810 * Has a correct top of stack, but a partial stack frame
692 * %rcx: thread info. Interrupts off. 811 * %rcx: thread info. Interrupts off.
693 */ 812 */
694retint_with_reschedule: 813retint_with_reschedule:
695 movl $_TIF_WORK_MASK,%edi 814 movl $_TIF_WORK_MASK,%edi
696retint_check: 815retint_check:
@@ -763,20 +882,20 @@ retint_careful:
763 pushq %rdi 882 pushq %rdi
764 CFI_ADJUST_CFA_OFFSET 8 883 CFI_ADJUST_CFA_OFFSET 8
765 call schedule 884 call schedule
766 popq %rdi 885 popq %rdi
767 CFI_ADJUST_CFA_OFFSET -8 886 CFI_ADJUST_CFA_OFFSET -8
768 GET_THREAD_INFO(%rcx) 887 GET_THREAD_INFO(%rcx)
769 DISABLE_INTERRUPTS(CLBR_NONE) 888 DISABLE_INTERRUPTS(CLBR_NONE)
770 TRACE_IRQS_OFF 889 TRACE_IRQS_OFF
771 jmp retint_check 890 jmp retint_check
772 891
773retint_signal: 892retint_signal:
774 testl $_TIF_DO_NOTIFY_MASK,%edx 893 testl $_TIF_DO_NOTIFY_MASK,%edx
775 jz retint_swapgs 894 jz retint_swapgs
776 TRACE_IRQS_ON 895 TRACE_IRQS_ON
777 ENABLE_INTERRUPTS(CLBR_NONE) 896 ENABLE_INTERRUPTS(CLBR_NONE)
778 SAVE_REST 897 SAVE_REST
779 movq $-1,ORIG_RAX(%rsp) 898 movq $-1,ORIG_RAX(%rsp)
780 xorl %esi,%esi # oldset 899 xorl %esi,%esi # oldset
781 movq %rsp,%rdi # &pt_regs 900 movq %rsp,%rdi # &pt_regs
782 call do_notify_resume 901 call do_notify_resume
@@ -798,324 +917,211 @@ ENTRY(retint_kernel)
798 jnc retint_restore_args 917 jnc retint_restore_args
799 call preempt_schedule_irq 918 call preempt_schedule_irq
800 jmp exit_intr 919 jmp exit_intr
801#endif 920#endif
802 921
803 CFI_ENDPROC 922 CFI_ENDPROC
804END(common_interrupt) 923END(common_interrupt)
805 924
806/* 925/*
807 * APIC interrupts. 926 * APIC interrupts.
808 */ 927 */
809 .macro apicinterrupt num,func 928.macro apicinterrupt num sym do_sym
929ENTRY(\sym)
810 INTR_FRAME 930 INTR_FRAME
811 pushq $~(\num) 931 pushq $~(\num)
812 CFI_ADJUST_CFA_OFFSET 8 932 CFI_ADJUST_CFA_OFFSET 8
813 interrupt \func 933 interrupt \do_sym
814 jmp ret_from_intr 934 jmp ret_from_intr
815 CFI_ENDPROC 935 CFI_ENDPROC
816 .endm 936END(\sym)
817 937.endm
818ENTRY(thermal_interrupt)
819 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
820END(thermal_interrupt)
821
822ENTRY(threshold_interrupt)
823 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
824END(threshold_interrupt)
825
826#ifdef CONFIG_SMP
827ENTRY(reschedule_interrupt)
828 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
829END(reschedule_interrupt)
830
831 .macro INVALIDATE_ENTRY num
832ENTRY(invalidate_interrupt\num)
833 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
834END(invalidate_interrupt\num)
835 .endm
836 938
837 INVALIDATE_ENTRY 0 939#ifdef CONFIG_SMP
838 INVALIDATE_ENTRY 1 940apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
839 INVALIDATE_ENTRY 2 941 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
840 INVALIDATE_ENTRY 3
841 INVALIDATE_ENTRY 4
842 INVALIDATE_ENTRY 5
843 INVALIDATE_ENTRY 6
844 INVALIDATE_ENTRY 7
845
846ENTRY(call_function_interrupt)
847 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
848END(call_function_interrupt)
849ENTRY(call_function_single_interrupt)
850 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
851END(call_function_single_interrupt)
852ENTRY(irq_move_cleanup_interrupt)
853 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
854END(irq_move_cleanup_interrupt)
855#endif 942#endif
856 943
857ENTRY(apic_timer_interrupt) 944apicinterrupt UV_BAU_MESSAGE \
858 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 945 uv_bau_message_intr1 uv_bau_message_interrupt
859END(apic_timer_interrupt) 946apicinterrupt LOCAL_TIMER_VECTOR \
947 apic_timer_interrupt smp_apic_timer_interrupt
948
949#ifdef CONFIG_SMP
950apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
951 invalidate_interrupt0 smp_invalidate_interrupt
952apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
953 invalidate_interrupt1 smp_invalidate_interrupt
954apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
955 invalidate_interrupt2 smp_invalidate_interrupt
956apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
957 invalidate_interrupt3 smp_invalidate_interrupt
958apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
959 invalidate_interrupt4 smp_invalidate_interrupt
960apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
961 invalidate_interrupt5 smp_invalidate_interrupt
962apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
963 invalidate_interrupt6 smp_invalidate_interrupt
964apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
965 invalidate_interrupt7 smp_invalidate_interrupt
966#endif
860 967
861ENTRY(uv_bau_message_intr1) 968apicinterrupt THRESHOLD_APIC_VECTOR \
862 apicinterrupt 220,uv_bau_message_interrupt 969 threshold_interrupt mce_threshold_interrupt
863END(uv_bau_message_intr1) 970apicinterrupt THERMAL_APIC_VECTOR \
971 thermal_interrupt smp_thermal_interrupt
972
973#ifdef CONFIG_SMP
974apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
975 call_function_single_interrupt smp_call_function_single_interrupt
976apicinterrupt CALL_FUNCTION_VECTOR \
977 call_function_interrupt smp_call_function_interrupt
978apicinterrupt RESCHEDULE_VECTOR \
979 reschedule_interrupt smp_reschedule_interrupt
980#endif
864 981
865ENTRY(error_interrupt) 982apicinterrupt ERROR_APIC_VECTOR \
866 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt 983 error_interrupt smp_error_interrupt
867END(error_interrupt) 984apicinterrupt SPURIOUS_APIC_VECTOR \
985 spurious_interrupt smp_spurious_interrupt
868 986
869ENTRY(spurious_interrupt)
870 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
871END(spurious_interrupt)
872
873/* 987/*
874 * Exception entry points. 988 * Exception entry points.
875 */ 989 */
876 .macro zeroentry sym 990.macro zeroentry sym do_sym
991ENTRY(\sym)
877 INTR_FRAME 992 INTR_FRAME
878 PARAVIRT_ADJUST_EXCEPTION_FRAME 993 PARAVIRT_ADJUST_EXCEPTION_FRAME
879 pushq $0 /* push error code/oldrax */ 994 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
880 CFI_ADJUST_CFA_OFFSET 8 995 subq $15*8,%rsp
881 pushq %rax /* push real oldrax to the rdi slot */ 996 CFI_ADJUST_CFA_OFFSET 15*8
882 CFI_ADJUST_CFA_OFFSET 8 997 call error_entry
883 CFI_REL_OFFSET rax,0 998 DEFAULT_FRAME 0
884 leaq \sym(%rip),%rax 999 movq %rsp,%rdi /* pt_regs pointer */
885 jmp error_entry 1000 xorl %esi,%esi /* no error code */
1001 call \do_sym
1002 jmp error_exit /* %ebx: no swapgs flag */
886 CFI_ENDPROC 1003 CFI_ENDPROC
887 .endm 1004END(\sym)
1005.endm
888 1006
889 .macro errorentry sym 1007.macro paranoidzeroentry sym do_sym
890 XCPT_FRAME 1008ENTRY(\sym)
1009 INTR_FRAME
891 PARAVIRT_ADJUST_EXCEPTION_FRAME 1010 PARAVIRT_ADJUST_EXCEPTION_FRAME
892 pushq %rax 1011 pushq $-1 /* ORIG_RAX: no syscall to restart */
893 CFI_ADJUST_CFA_OFFSET 8 1012 CFI_ADJUST_CFA_OFFSET 8
894 CFI_REL_OFFSET rax,0 1013 subq $15*8, %rsp
895 leaq \sym(%rip),%rax 1014 call save_paranoid
896 jmp error_entry 1015 TRACE_IRQS_OFF
1016 movq %rsp,%rdi /* pt_regs pointer */
1017 xorl %esi,%esi /* no error code */
1018 call \do_sym
1019 jmp paranoid_exit /* %ebx: no swapgs flag */
897 CFI_ENDPROC 1020 CFI_ENDPROC
898 .endm 1021END(\sym)
1022.endm
899 1023
900 /* error code is on the stack already */ 1024.macro paranoidzeroentry_ist sym do_sym ist
901 /* handle NMI like exceptions that can happen everywhere */ 1025ENTRY(\sym)
902 .macro paranoidentry sym, ist=0, irqtrace=1 1026 INTR_FRAME
903 SAVE_ALL 1027 PARAVIRT_ADJUST_EXCEPTION_FRAME
904 cld 1028 pushq $-1 /* ORIG_RAX: no syscall to restart */
905 movl $1,%ebx 1029 CFI_ADJUST_CFA_OFFSET 8
906 movl $MSR_GS_BASE,%ecx 1030 subq $15*8, %rsp
907 rdmsr 1031 call save_paranoid
908 testl %edx,%edx
909 js 1f
910 SWAPGS
911 xorl %ebx,%ebx
9121:
913 .if \ist
914 movq %gs:pda_data_offset, %rbp
915 .endif
916 .if \irqtrace
917 TRACE_IRQS_OFF
918 .endif
919 movq %rsp,%rdi
920 movq ORIG_RAX(%rsp),%rsi
921 movq $-1,ORIG_RAX(%rsp)
922 .if \ist
923 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
924 .endif
925 call \sym
926 .if \ist
927 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
928 .endif
929 DISABLE_INTERRUPTS(CLBR_NONE)
930 .if \irqtrace
931 TRACE_IRQS_OFF 1032 TRACE_IRQS_OFF
932 .endif 1033 movq %rsp,%rdi /* pt_regs pointer */
933 .endm 1034 xorl %esi,%esi /* no error code */
1035 movq %gs:pda_data_offset, %rbp
1036 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
1037 call \do_sym
1038 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
1039 jmp paranoid_exit /* %ebx: no swapgs flag */
1040 CFI_ENDPROC
1041END(\sym)
1042.endm
934 1043
935 /* 1044.macro errorentry sym do_sym
936 * "Paranoid" exit path from exception stack. 1045ENTRY(\sym)
937 * Paranoid because this is used by NMIs and cannot take 1046 XCPT_FRAME
938 * any kernel state for granted. 1047 PARAVIRT_ADJUST_EXCEPTION_FRAME
939 * We don't do kernel preemption checks here, because only 1048 subq $15*8,%rsp
940 * NMI should be common and it does not enable IRQs and 1049 CFI_ADJUST_CFA_OFFSET 15*8
941 * cannot get reschedule ticks. 1050 call error_entry
942 * 1051 DEFAULT_FRAME 0
943 * "trace" is 0 for the NMI handler only, because irq-tracing 1052 movq %rsp,%rdi /* pt_regs pointer */
944 * is fundamentally NMI-unsafe. (we cannot change the soft and 1053 movq ORIG_RAX(%rsp),%rsi /* get error code */
945 * hard flags at once, atomically) 1054 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
946 */ 1055 call \do_sym
947 .macro paranoidexit trace=1 1056 jmp error_exit /* %ebx: no swapgs flag */
948 /* ebx: no swapgs flag */
949paranoid_exit\trace:
950 testl %ebx,%ebx /* swapgs needed? */
951 jnz paranoid_restore\trace
952 testl $3,CS(%rsp)
953 jnz paranoid_userspace\trace
954paranoid_swapgs\trace:
955 .if \trace
956 TRACE_IRQS_IRETQ 0
957 .endif
958 SWAPGS_UNSAFE_STACK
959paranoid_restore\trace:
960 RESTORE_ALL 8
961 jmp irq_return
962paranoid_userspace\trace:
963 GET_THREAD_INFO(%rcx)
964 movl TI_flags(%rcx),%ebx
965 andl $_TIF_WORK_MASK,%ebx
966 jz paranoid_swapgs\trace
967 movq %rsp,%rdi /* &pt_regs */
968 call sync_regs
969 movq %rax,%rsp /* switch stack for scheduling */
970 testl $_TIF_NEED_RESCHED,%ebx
971 jnz paranoid_schedule\trace
972 movl %ebx,%edx /* arg3: thread flags */
973 .if \trace
974 TRACE_IRQS_ON
975 .endif
976 ENABLE_INTERRUPTS(CLBR_NONE)
977 xorl %esi,%esi /* arg2: oldset */
978 movq %rsp,%rdi /* arg1: &pt_regs */
979 call do_notify_resume
980 DISABLE_INTERRUPTS(CLBR_NONE)
981 .if \trace
982 TRACE_IRQS_OFF
983 .endif
984 jmp paranoid_userspace\trace
985paranoid_schedule\trace:
986 .if \trace
987 TRACE_IRQS_ON
988 .endif
989 ENABLE_INTERRUPTS(CLBR_ANY)
990 call schedule
991 DISABLE_INTERRUPTS(CLBR_ANY)
992 .if \trace
993 TRACE_IRQS_OFF
994 .endif
995 jmp paranoid_userspace\trace
996 CFI_ENDPROC 1057 CFI_ENDPROC
997 .endm 1058END(\sym)
1059.endm
998 1060
999/* 1061 /* error code is on the stack already */
1000 * Exception entry point. This expects an error code/orig_rax on the stack 1062.macro paranoiderrorentry sym do_sym
1001 * and the exception handler in %rax. 1063ENTRY(\sym)
1002 */ 1064 XCPT_FRAME
1003KPROBE_ENTRY(error_entry) 1065 PARAVIRT_ADJUST_EXCEPTION_FRAME
1004 _frame RDI 1066 subq $15*8,%rsp
1005 CFI_REL_OFFSET rax,0 1067 CFI_ADJUST_CFA_OFFSET 15*8
1006 /* rdi slot contains rax, oldrax contains error code */ 1068 call save_paranoid
1007 cld 1069 DEFAULT_FRAME 0
1008 subq $14*8,%rsp
1009 CFI_ADJUST_CFA_OFFSET (14*8)
1010 movq %rsi,13*8(%rsp)
1011 CFI_REL_OFFSET rsi,RSI
1012 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
1013 CFI_REGISTER rax,rsi
1014 movq %rdx,12*8(%rsp)
1015 CFI_REL_OFFSET rdx,RDX
1016 movq %rcx,11*8(%rsp)
1017 CFI_REL_OFFSET rcx,RCX
1018 movq %rsi,10*8(%rsp) /* store rax */
1019 CFI_REL_OFFSET rax,RAX
1020 movq %r8, 9*8(%rsp)
1021 CFI_REL_OFFSET r8,R8
1022 movq %r9, 8*8(%rsp)
1023 CFI_REL_OFFSET r9,R9
1024 movq %r10,7*8(%rsp)
1025 CFI_REL_OFFSET r10,R10
1026 movq %r11,6*8(%rsp)
1027 CFI_REL_OFFSET r11,R11
1028 movq %rbx,5*8(%rsp)
1029 CFI_REL_OFFSET rbx,RBX
1030 movq %rbp,4*8(%rsp)
1031 CFI_REL_OFFSET rbp,RBP
1032 movq %r12,3*8(%rsp)
1033 CFI_REL_OFFSET r12,R12
1034 movq %r13,2*8(%rsp)
1035 CFI_REL_OFFSET r13,R13
1036 movq %r14,1*8(%rsp)
1037 CFI_REL_OFFSET r14,R14
1038 movq %r15,(%rsp)
1039 CFI_REL_OFFSET r15,R15
1040 xorl %ebx,%ebx
1041 testl $3,CS(%rsp)
1042 je error_kernelspace
1043error_swapgs:
1044 SWAPGS
1045error_sti:
1046 TRACE_IRQS_OFF
1047 movq %rdi,RDI(%rsp)
1048 CFI_REL_OFFSET rdi,RDI
1049 movq %rsp,%rdi
1050 movq ORIG_RAX(%rsp),%rsi /* get error code */
1051 movq $-1,ORIG_RAX(%rsp)
1052 call *%rax
1053 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1054error_exit:
1055 movl %ebx,%eax
1056 RESTORE_REST
1057 DISABLE_INTERRUPTS(CLBR_NONE)
1058 TRACE_IRQS_OFF 1070 TRACE_IRQS_OFF
1059 GET_THREAD_INFO(%rcx) 1071 movq %rsp,%rdi /* pt_regs pointer */
1060 testl %eax,%eax 1072 movq ORIG_RAX(%rsp),%rsi /* get error code */
1061 jne retint_kernel 1073 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
1062 LOCKDEP_SYS_EXIT_IRQ 1074 call \do_sym
1063 movl TI_flags(%rcx),%edx 1075 jmp paranoid_exit /* %ebx: no swapgs flag */
1064 movl $_TIF_WORK_MASK,%edi
1065 andl %edi,%edx
1066 jnz retint_careful
1067 jmp retint_swapgs
1068 CFI_ENDPROC 1076 CFI_ENDPROC
1077END(\sym)
1078.endm
1069 1079
1070error_kernelspace: 1080zeroentry divide_error do_divide_error
1071 incl %ebx 1081zeroentry overflow do_overflow
1072 /* There are two places in the kernel that can potentially fault with 1082zeroentry bounds do_bounds
1073 usergs. Handle them here. The exception handlers after 1083zeroentry invalid_op do_invalid_op
1074 iret run with kernel gs again, so don't set the user space flag. 1084zeroentry device_not_available do_device_not_available
1075 B stepping K8s sometimes report an truncated RIP for IRET 1085paranoiderrorentry double_fault do_double_fault
1076 exceptions returning to compat mode. Check for these here too. */ 1086zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
1077 leaq irq_return(%rip),%rcx 1087errorentry invalid_TSS do_invalid_TSS
1078 cmpq %rcx,RIP(%rsp) 1088errorentry segment_not_present do_segment_not_present
1079 je error_swapgs 1089zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1080 movl %ecx,%ecx /* zero extend */ 1090zeroentry coprocessor_error do_coprocessor_error
1081 cmpq %rcx,RIP(%rsp) 1091errorentry alignment_check do_alignment_check
1082 je error_swapgs 1092zeroentry simd_coprocessor_error do_simd_coprocessor_error
1083 cmpq $gs_change,RIP(%rsp) 1093
1084 je error_swapgs 1094 /* Reload gs selector with exception handling */
1085 jmp error_sti 1095 /* edi: new selector */
1086KPROBE_END(error_entry)
1087
1088 /* Reload gs selector with exception handling */
1089 /* edi: new selector */
1090ENTRY(native_load_gs_index) 1096ENTRY(native_load_gs_index)
1091 CFI_STARTPROC 1097 CFI_STARTPROC
1092 pushf 1098 pushf
1093 CFI_ADJUST_CFA_OFFSET 8 1099 CFI_ADJUST_CFA_OFFSET 8
1094 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) 1100 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1095 SWAPGS 1101 SWAPGS
1096gs_change: 1102gs_change:
1097 movl %edi,%gs 1103 movl %edi,%gs
10982: mfence /* workaround */ 11042: mfence /* workaround */
1099 SWAPGS 1105 SWAPGS
1100 popf 1106 popf
1101 CFI_ADJUST_CFA_OFFSET -8 1107 CFI_ADJUST_CFA_OFFSET -8
1102 ret 1108 ret
1103 CFI_ENDPROC 1109 CFI_ENDPROC
1104ENDPROC(native_load_gs_index) 1110END(native_load_gs_index)
1105 1111
1106 .section __ex_table,"a" 1112 .section __ex_table,"a"
1107 .align 8 1113 .align 8
1108 .quad gs_change,bad_gs 1114 .quad gs_change,bad_gs
1109 .previous 1115 .previous
1110 .section .fixup,"ax" 1116 .section .fixup,"ax"
1111 /* running with kernelgs */ 1117 /* running with kernelgs */
1112bad_gs: 1118bad_gs:
1113 SWAPGS /* switch back to user gs */ 1119 SWAPGS /* switch back to user gs */
1114 xorl %eax,%eax 1120 xorl %eax,%eax
1115 movl %eax,%gs 1121 movl %eax,%gs
1116 jmp 2b 1122 jmp 2b
1117 .previous 1123 .previous
1118 1124
1119/* 1125/*
1120 * Create a kernel thread. 1126 * Create a kernel thread.
1121 * 1127 *
@@ -1138,7 +1144,7 @@ ENTRY(kernel_thread)
1138 1144
1139 xorl %r8d,%r8d 1145 xorl %r8d,%r8d
1140 xorl %r9d,%r9d 1146 xorl %r9d,%r9d
1141 1147
1142 # clone now 1148 # clone now
1143 call do_fork 1149 call do_fork
1144 movq %rax,RAX(%rsp) 1150 movq %rax,RAX(%rsp)
@@ -1149,15 +1155,15 @@ ENTRY(kernel_thread)
1149 * so internally to the x86_64 port you can rely on kernel_thread() 1155 * so internally to the x86_64 port you can rely on kernel_thread()
1150 * not to reschedule the child before returning, this avoids the need 1156 * not to reschedule the child before returning, this avoids the need
1151 * of hacks for example to fork off the per-CPU idle tasks. 1157 * of hacks for example to fork off the per-CPU idle tasks.
1152 * [Hopefully no generic code relies on the reschedule -AK] 1158 * [Hopefully no generic code relies on the reschedule -AK]
1153 */ 1159 */
1154 RESTORE_ALL 1160 RESTORE_ALL
1155 UNFAKE_STACK_FRAME 1161 UNFAKE_STACK_FRAME
1156 ret 1162 ret
1157 CFI_ENDPROC 1163 CFI_ENDPROC
1158ENDPROC(kernel_thread) 1164END(kernel_thread)
1159 1165
1160child_rip: 1166ENTRY(child_rip)
1161 pushq $0 # fake return address 1167 pushq $0 # fake return address
1162 CFI_STARTPROC 1168 CFI_STARTPROC
1163 /* 1169 /*
@@ -1170,8 +1176,9 @@ child_rip:
1170 # exit 1176 # exit
1171 mov %eax, %edi 1177 mov %eax, %edi
1172 call do_exit 1178 call do_exit
1179 ud2 # padding for call trace
1173 CFI_ENDPROC 1180 CFI_ENDPROC
1174ENDPROC(child_rip) 1181END(child_rip)
1175 1182
1176/* 1183/*
1177 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 1184 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -1191,10 +1198,10 @@ ENDPROC(child_rip)
1191ENTRY(kernel_execve) 1198ENTRY(kernel_execve)
1192 CFI_STARTPROC 1199 CFI_STARTPROC
1193 FAKE_STACK_FRAME $0 1200 FAKE_STACK_FRAME $0
1194 SAVE_ALL 1201 SAVE_ALL
1195 movq %rsp,%rcx 1202 movq %rsp,%rcx
1196 call sys_execve 1203 call sys_execve
1197 movq %rax, RAX(%rsp) 1204 movq %rax, RAX(%rsp)
1198 RESTORE_REST 1205 RESTORE_REST
1199 testq %rax,%rax 1206 testq %rax,%rax
1200 je int_ret_from_sys_call 1207 je int_ret_from_sys_call
@@ -1202,129 +1209,7 @@ ENTRY(kernel_execve)
1202 UNFAKE_STACK_FRAME 1209 UNFAKE_STACK_FRAME
1203 ret 1210 ret
1204 CFI_ENDPROC 1211 CFI_ENDPROC
1205ENDPROC(kernel_execve) 1212END(kernel_execve)
1206
1207KPROBE_ENTRY(page_fault)
1208 errorentry do_page_fault
1209KPROBE_END(page_fault)
1210
1211ENTRY(coprocessor_error)
1212 zeroentry do_coprocessor_error
1213END(coprocessor_error)
1214
1215ENTRY(simd_coprocessor_error)
1216 zeroentry do_simd_coprocessor_error
1217END(simd_coprocessor_error)
1218
1219ENTRY(device_not_available)
1220 zeroentry do_device_not_available
1221END(device_not_available)
1222
1223 /* runs on exception stack */
1224KPROBE_ENTRY(debug)
1225 INTR_FRAME
1226 PARAVIRT_ADJUST_EXCEPTION_FRAME
1227 pushq $0
1228 CFI_ADJUST_CFA_OFFSET 8
1229 paranoidentry do_debug, DEBUG_STACK
1230 paranoidexit
1231KPROBE_END(debug)
1232
1233 /* runs on exception stack */
1234KPROBE_ENTRY(nmi)
1235 INTR_FRAME
1236 PARAVIRT_ADJUST_EXCEPTION_FRAME
1237 pushq $-1
1238 CFI_ADJUST_CFA_OFFSET 8
1239 paranoidentry do_nmi, 0, 0
1240#ifdef CONFIG_TRACE_IRQFLAGS
1241 paranoidexit 0
1242#else
1243 jmp paranoid_exit1
1244 CFI_ENDPROC
1245#endif
1246KPROBE_END(nmi)
1247
1248KPROBE_ENTRY(int3)
1249 INTR_FRAME
1250 PARAVIRT_ADJUST_EXCEPTION_FRAME
1251 pushq $0
1252 CFI_ADJUST_CFA_OFFSET 8
1253 paranoidentry do_int3, DEBUG_STACK
1254 jmp paranoid_exit1
1255 CFI_ENDPROC
1256KPROBE_END(int3)
1257
1258ENTRY(overflow)
1259 zeroentry do_overflow
1260END(overflow)
1261
1262ENTRY(bounds)
1263 zeroentry do_bounds
1264END(bounds)
1265
1266ENTRY(invalid_op)
1267 zeroentry do_invalid_op
1268END(invalid_op)
1269
1270ENTRY(coprocessor_segment_overrun)
1271 zeroentry do_coprocessor_segment_overrun
1272END(coprocessor_segment_overrun)
1273
1274 /* runs on exception stack */
1275ENTRY(double_fault)
1276 XCPT_FRAME
1277 PARAVIRT_ADJUST_EXCEPTION_FRAME
1278 paranoidentry do_double_fault
1279 jmp paranoid_exit1
1280 CFI_ENDPROC
1281END(double_fault)
1282
1283ENTRY(invalid_TSS)
1284 errorentry do_invalid_TSS
1285END(invalid_TSS)
1286
1287ENTRY(segment_not_present)
1288 errorentry do_segment_not_present
1289END(segment_not_present)
1290
1291 /* runs on exception stack */
1292ENTRY(stack_segment)
1293 XCPT_FRAME
1294 PARAVIRT_ADJUST_EXCEPTION_FRAME
1295 paranoidentry do_stack_segment
1296 jmp paranoid_exit1
1297 CFI_ENDPROC
1298END(stack_segment)
1299
1300KPROBE_ENTRY(general_protection)
1301 errorentry do_general_protection
1302KPROBE_END(general_protection)
1303
1304ENTRY(alignment_check)
1305 errorentry do_alignment_check
1306END(alignment_check)
1307
1308ENTRY(divide_error)
1309 zeroentry do_divide_error
1310END(divide_error)
1311
1312ENTRY(spurious_interrupt_bug)
1313 zeroentry do_spurious_interrupt_bug
1314END(spurious_interrupt_bug)
1315
1316#ifdef CONFIG_X86_MCE
1317 /* runs on exception stack */
1318ENTRY(machine_check)
1319 INTR_FRAME
1320 PARAVIRT_ADJUST_EXCEPTION_FRAME
1321 pushq $0
1322 CFI_ADJUST_CFA_OFFSET 8
1323 paranoidentry do_machine_check
1324 jmp paranoid_exit1
1325 CFI_ENDPROC
1326END(machine_check)
1327#endif
1328 1213
1329/* Call softirq on interrupt stack. Interrupts are off. */ 1214/* Call softirq on interrupt stack. Interrupts are off. */
1330ENTRY(call_softirq) 1215ENTRY(call_softirq)
@@ -1344,40 +1229,33 @@ ENTRY(call_softirq)
1344 decl %gs:pda_irqcount 1229 decl %gs:pda_irqcount
1345 ret 1230 ret
1346 CFI_ENDPROC 1231 CFI_ENDPROC
1347ENDPROC(call_softirq) 1232END(call_softirq)
1348
1349KPROBE_ENTRY(ignore_sysret)
1350 CFI_STARTPROC
1351 mov $-ENOSYS,%eax
1352 sysret
1353 CFI_ENDPROC
1354ENDPROC(ignore_sysret)
1355 1233
1356#ifdef CONFIG_XEN 1234#ifdef CONFIG_XEN
1357ENTRY(xen_hypervisor_callback) 1235zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
1358 zeroentry xen_do_hypervisor_callback
1359END(xen_hypervisor_callback)
1360 1236
1361/* 1237/*
1362# A note on the "critical region" in our callback handler. 1238 * A note on the "critical region" in our callback handler.
1363# We want to avoid stacking callback handlers due to events occurring 1239 * We want to avoid stacking callback handlers due to events occurring
1364# during handling of the last event. To do this, we keep events disabled 1240 * during handling of the last event. To do this, we keep events disabled
1365# until we've done all processing. HOWEVER, we must enable events before 1241 * until we've done all processing. HOWEVER, we must enable events before
1366# popping the stack frame (can't be done atomically) and so it would still 1242 * popping the stack frame (can't be done atomically) and so it would still
1367# be possible to get enough handler activations to overflow the stack. 1243 * be possible to get enough handler activations to overflow the stack.
1368# Although unlikely, bugs of that kind are hard to track down, so we'd 1244 * Although unlikely, bugs of that kind are hard to track down, so we'd
1369# like to avoid the possibility. 1245 * like to avoid the possibility.
1370# So, on entry to the handler we detect whether we interrupted an 1246 * So, on entry to the handler we detect whether we interrupted an
1371# existing activation in its critical region -- if so, we pop the current 1247 * existing activation in its critical region -- if so, we pop the current
1372# activation and restart the handler using the previous one. 1248 * activation and restart the handler using the previous one.
1373*/ 1249 */
1374ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 1250ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1375 CFI_STARTPROC 1251 CFI_STARTPROC
1376/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will 1252/*
1377 see the correct pointer to the pt_regs */ 1253 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1254 * see the correct pointer to the pt_regs
1255 */
1378 movq %rdi, %rsp # we don't return, adjust the stack frame 1256 movq %rdi, %rsp # we don't return, adjust the stack frame
1379 CFI_ENDPROC 1257 CFI_ENDPROC
1380 CFI_DEFAULT_STACK 1258 DEFAULT_FRAME
138111: incl %gs:pda_irqcount 125911: incl %gs:pda_irqcount
1382 movq %rsp,%rbp 1260 movq %rsp,%rbp
1383 CFI_DEF_CFA_REGISTER rbp 1261 CFI_DEF_CFA_REGISTER rbp
@@ -1392,23 +1270,26 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1392END(do_hypervisor_callback) 1270END(do_hypervisor_callback)
1393 1271
1394/* 1272/*
1395# Hypervisor uses this for application faults while it executes. 1273 * Hypervisor uses this for application faults while it executes.
1396# We get here for two reasons: 1274 * We get here for two reasons:
1397# 1. Fault while reloading DS, ES, FS or GS 1275 * 1. Fault while reloading DS, ES, FS or GS
1398# 2. Fault while executing IRET 1276 * 2. Fault while executing IRET
1399# Category 1 we do not need to fix up as Xen has already reloaded all segment 1277 * Category 1 we do not need to fix up as Xen has already reloaded all segment
1400# registers that could be reloaded and zeroed the others. 1278 * registers that could be reloaded and zeroed the others.
1401# Category 2 we fix up by killing the current process. We cannot use the 1279 * Category 2 we fix up by killing the current process. We cannot use the
1402# normal Linux return path in this case because if we use the IRET hypercall 1280 * normal Linux return path in this case because if we use the IRET hypercall
1403# to pop the stack frame we end up in an infinite loop of failsafe callbacks. 1281 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1404# We distinguish between categories by comparing each saved segment register 1282 * We distinguish between categories by comparing each saved segment register
1405# with its current contents: any discrepancy means we in category 1. 1283 * with its current contents: any discrepancy means we in category 1.
1406*/ 1284 */
1407ENTRY(xen_failsafe_callback) 1285ENTRY(xen_failsafe_callback)
1408 framesz = (RIP-0x30) /* workaround buggy gas */ 1286 INTR_FRAME 1 (6*8)
1409 _frame framesz 1287 /*CFI_REL_OFFSET gs,GS*/
1410 CFI_REL_OFFSET rcx, 0 1288 /*CFI_REL_OFFSET fs,FS*/
1411 CFI_REL_OFFSET r11, 8 1289 /*CFI_REL_OFFSET es,ES*/
1290 /*CFI_REL_OFFSET ds,DS*/
1291 CFI_REL_OFFSET r11,8
1292 CFI_REL_OFFSET rcx,0
1412 movw %ds,%cx 1293 movw %ds,%cx
1413 cmpw %cx,0x10(%rsp) 1294 cmpw %cx,0x10(%rsp)
1414 CFI_REMEMBER_STATE 1295 CFI_REMEMBER_STATE
@@ -1429,12 +1310,9 @@ ENTRY(xen_failsafe_callback)
1429 CFI_RESTORE r11 1310 CFI_RESTORE r11
1430 addq $0x30,%rsp 1311 addq $0x30,%rsp
1431 CFI_ADJUST_CFA_OFFSET -0x30 1312 CFI_ADJUST_CFA_OFFSET -0x30
1432 pushq $0 1313 pushq_cfi $0 /* RIP */
1433 CFI_ADJUST_CFA_OFFSET 8 1314 pushq_cfi %r11
1434 pushq %r11 1315 pushq_cfi %rcx
1435 CFI_ADJUST_CFA_OFFSET 8
1436 pushq %rcx
1437 CFI_ADJUST_CFA_OFFSET 8
1438 jmp general_protection 1316 jmp general_protection
1439 CFI_RESTORE_STATE 1317 CFI_RESTORE_STATE
14401: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ 13181: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
@@ -1444,11 +1322,223 @@ ENTRY(xen_failsafe_callback)
1444 CFI_RESTORE r11 1322 CFI_RESTORE r11
1445 addq $0x30,%rsp 1323 addq $0x30,%rsp
1446 CFI_ADJUST_CFA_OFFSET -0x30 1324 CFI_ADJUST_CFA_OFFSET -0x30
1447 pushq $0 1325 pushq_cfi $0
1448 CFI_ADJUST_CFA_OFFSET 8
1449 SAVE_ALL 1326 SAVE_ALL
1450 jmp error_exit 1327 jmp error_exit
1451 CFI_ENDPROC 1328 CFI_ENDPROC
1452END(xen_failsafe_callback) 1329END(xen_failsafe_callback)
1453 1330
1454#endif /* CONFIG_XEN */ 1331#endif /* CONFIG_XEN */
1332
1333/*
1334 * Some functions should be protected against kprobes
1335 */
1336 .pushsection .kprobes.text, "ax"
1337
1338paranoidzeroentry_ist debug do_debug DEBUG_STACK
1339paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
1340paranoiderrorentry stack_segment do_stack_segment
1341errorentry general_protection do_general_protection
1342errorentry page_fault do_page_fault
1343#ifdef CONFIG_X86_MCE
1344paranoidzeroentry machine_check do_machine_check
1345#endif
1346
1347 /*
1348 * "Paranoid" exit path from exception stack.
1349 * Paranoid because this is used by NMIs and cannot take
1350 * any kernel state for granted.
1351 * We don't do kernel preemption checks here, because only
1352 * NMI should be common and it does not enable IRQs and
1353 * cannot get reschedule ticks.
1354 *
1355 * "trace" is 0 for the NMI handler only, because irq-tracing
1356 * is fundamentally NMI-unsafe. (we cannot change the soft and
1357 * hard flags at once, atomically)
1358 */
1359
1360 /* ebx: no swapgs flag */
1361ENTRY(paranoid_exit)
1362 INTR_FRAME
1363 DISABLE_INTERRUPTS(CLBR_NONE)
1364 TRACE_IRQS_OFF
1365 testl %ebx,%ebx /* swapgs needed? */
1366 jnz paranoid_restore
1367 testl $3,CS(%rsp)
1368 jnz paranoid_userspace
1369paranoid_swapgs:
1370 TRACE_IRQS_IRETQ 0
1371 SWAPGS_UNSAFE_STACK
1372paranoid_restore:
1373 RESTORE_ALL 8
1374 jmp irq_return
1375paranoid_userspace:
1376 GET_THREAD_INFO(%rcx)
1377 movl TI_flags(%rcx),%ebx
1378 andl $_TIF_WORK_MASK,%ebx
1379 jz paranoid_swapgs
1380 movq %rsp,%rdi /* &pt_regs */
1381 call sync_regs
1382 movq %rax,%rsp /* switch stack for scheduling */
1383 testl $_TIF_NEED_RESCHED,%ebx
1384 jnz paranoid_schedule
1385 movl %ebx,%edx /* arg3: thread flags */
1386 TRACE_IRQS_ON
1387 ENABLE_INTERRUPTS(CLBR_NONE)
1388 xorl %esi,%esi /* arg2: oldset */
1389 movq %rsp,%rdi /* arg1: &pt_regs */
1390 call do_notify_resume
1391 DISABLE_INTERRUPTS(CLBR_NONE)
1392 TRACE_IRQS_OFF
1393 jmp paranoid_userspace
1394paranoid_schedule:
1395 TRACE_IRQS_ON
1396 ENABLE_INTERRUPTS(CLBR_ANY)
1397 call schedule
1398 DISABLE_INTERRUPTS(CLBR_ANY)
1399 TRACE_IRQS_OFF
1400 jmp paranoid_userspace
1401 CFI_ENDPROC
1402END(paranoid_exit)
1403
1404/*
1405 * Exception entry point. This expects an error code/orig_rax on the stack.
1406 * returns in "no swapgs flag" in %ebx.
1407 */
1408ENTRY(error_entry)
1409 XCPT_FRAME
1410 CFI_ADJUST_CFA_OFFSET 15*8
1411 /* oldrax contains error code */
1412 cld
1413 movq_cfi rdi, RDI+8
1414 movq_cfi rsi, RSI+8
1415 movq_cfi rdx, RDX+8
1416 movq_cfi rcx, RCX+8
1417 movq_cfi rax, RAX+8
1418 movq_cfi r8, R8+8
1419 movq_cfi r9, R9+8
1420 movq_cfi r10, R10+8
1421 movq_cfi r11, R11+8
1422 movq_cfi rbx, RBX+8
1423 movq_cfi rbp, RBP+8
1424 movq_cfi r12, R12+8
1425 movq_cfi r13, R13+8
1426 movq_cfi r14, R14+8
1427 movq_cfi r15, R15+8
1428 xorl %ebx,%ebx
1429 testl $3,CS+8(%rsp)
1430 je error_kernelspace
1431error_swapgs:
1432 SWAPGS
1433error_sti:
1434 TRACE_IRQS_OFF
1435 ret
1436 CFI_ENDPROC
1437
1438/*
1439 * There are two places in the kernel that can potentially fault with
1440 * usergs. Handle them here. The exception handlers after iret run with
1441 * kernel gs again, so don't set the user space flag. B stepping K8s
1442 * sometimes report an truncated RIP for IRET exceptions returning to
1443 * compat mode. Check for these here too.
1444 */
1445error_kernelspace:
1446 incl %ebx
1447 leaq irq_return(%rip),%rcx
1448 cmpq %rcx,RIP+8(%rsp)
1449 je error_swapgs
1450 movl %ecx,%ecx /* zero extend */
1451 cmpq %rcx,RIP+8(%rsp)
1452 je error_swapgs
1453 cmpq $gs_change,RIP+8(%rsp)
1454 je error_swapgs
1455 jmp error_sti
1456END(error_entry)
1457
1458
1459/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1460ENTRY(error_exit)
1461 DEFAULT_FRAME
1462 movl %ebx,%eax
1463 RESTORE_REST
1464 DISABLE_INTERRUPTS(CLBR_NONE)
1465 TRACE_IRQS_OFF
1466 GET_THREAD_INFO(%rcx)
1467 testl %eax,%eax
1468 jne retint_kernel
1469 LOCKDEP_SYS_EXIT_IRQ
1470 movl TI_flags(%rcx),%edx
1471 movl $_TIF_WORK_MASK,%edi
1472 andl %edi,%edx
1473 jnz retint_careful
1474 jmp retint_swapgs
1475 CFI_ENDPROC
1476END(error_exit)
1477
1478
1479 /* runs on exception stack */
1480ENTRY(nmi)
1481 INTR_FRAME
1482 PARAVIRT_ADJUST_EXCEPTION_FRAME
1483 pushq_cfi $-1
1484 subq $15*8, %rsp
1485 CFI_ADJUST_CFA_OFFSET 15*8
1486 call save_paranoid
1487 DEFAULT_FRAME 0
1488 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1489 movq %rsp,%rdi
1490 movq $-1,%rsi
1491 call do_nmi
1492#ifdef CONFIG_TRACE_IRQFLAGS
1493 /* paranoidexit; without TRACE_IRQS_OFF */
1494 /* ebx: no swapgs flag */
1495 DISABLE_INTERRUPTS(CLBR_NONE)
1496 testl %ebx,%ebx /* swapgs needed? */
1497 jnz nmi_restore
1498 testl $3,CS(%rsp)
1499 jnz nmi_userspace
1500nmi_swapgs:
1501 SWAPGS_UNSAFE_STACK
1502nmi_restore:
1503 RESTORE_ALL 8
1504 jmp irq_return
1505nmi_userspace:
1506 GET_THREAD_INFO(%rcx)
1507 movl TI_flags(%rcx),%ebx
1508 andl $_TIF_WORK_MASK,%ebx
1509 jz nmi_swapgs
1510 movq %rsp,%rdi /* &pt_regs */
1511 call sync_regs
1512 movq %rax,%rsp /* switch stack for scheduling */
1513 testl $_TIF_NEED_RESCHED,%ebx
1514 jnz nmi_schedule
1515 movl %ebx,%edx /* arg3: thread flags */
1516 ENABLE_INTERRUPTS(CLBR_NONE)
1517 xorl %esi,%esi /* arg2: oldset */
1518 movq %rsp,%rdi /* arg1: &pt_regs */
1519 call do_notify_resume
1520 DISABLE_INTERRUPTS(CLBR_NONE)
1521 jmp nmi_userspace
1522nmi_schedule:
1523 ENABLE_INTERRUPTS(CLBR_ANY)
1524 call schedule
1525 DISABLE_INTERRUPTS(CLBR_ANY)
1526 jmp nmi_userspace
1527 CFI_ENDPROC
1528#else
1529 jmp paranoid_exit
1530 CFI_ENDPROC
1531#endif
1532END(nmi)
1533
1534ENTRY(ignore_sysret)
1535 CFI_STARTPROC
1536 mov $-ENOSYS,%eax
1537 sysret
1538 CFI_ENDPROC
1539END(ignore_sysret)
1540
1541/*
1542 * End of kprobes section
1543 */
1544 .popsection