aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/entry_64.S
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2007-10-11 05:17:24 -0400
committerThomas Gleixner <tglx@linutronix.de>2007-10-11 05:17:24 -0400
commit250c22777fe1ccd7ac588579a6c16db4c0161cc5 (patch)
tree55c317efb7d792ec6fdae1d1937c67a502c48dec /arch/x86/kernel/entry_64.S
parent2db55d344e529492545cb3b755c7e9ba8e4fa94e (diff)
x86_64: move kernel
Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/entry_64.S')
-rw-r--r--arch/x86/kernel/entry_64.S1172
1 files changed, 1172 insertions, 0 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
new file mode 100644
index 000000000000..1d232e5f5658
--- /dev/null
+++ b/arch/x86/kernel/entry_64.S
@@ -0,0 +1,1172 @@
1/*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 */
8
9/*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
23 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
38 */
39
40#include <linux/linkage.h>
41#include <asm/segment.h>
42#include <asm/cache.h>
43#include <asm/errno.h>
44#include <asm/dwarf2.h>
45#include <asm/calling.h>
46#include <asm/asm-offsets.h>
47#include <asm/msr.h>
48#include <asm/unistd.h>
49#include <asm/thread_info.h>
50#include <asm/hw_irq.h>
51#include <asm/page.h>
52#include <asm/irqflags.h>
53
54 .code64
55
56#ifndef CONFIG_PREEMPT
57#define retint_kernel retint_restore_args
58#endif
59
60
61.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
62#ifdef CONFIG_TRACE_IRQFLAGS
63 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
64 jnc 1f
65 TRACE_IRQS_ON
661:
67#endif
68.endm
69
70/*
71 * C code is not supposed to know about undefined top of stack. Every time
72 * a C function with an pt_regs argument is called from the SYSCALL based
73 * fast path FIXUP_TOP_OF_STACK is needed.
74 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
75 * manipulation.
76 */
77
78 /* %rsp:at FRAMEEND */
79 .macro FIXUP_TOP_OF_STACK tmp
80 movq %gs:pda_oldrsp,\tmp
81 movq \tmp,RSP(%rsp)
82 movq $__USER_DS,SS(%rsp)
83 movq $__USER_CS,CS(%rsp)
84 movq $-1,RCX(%rsp)
85 movq R11(%rsp),\tmp /* get eflags */
86 movq \tmp,EFLAGS(%rsp)
87 .endm
88
89 .macro RESTORE_TOP_OF_STACK tmp,offset=0
90 movq RSP-\offset(%rsp),\tmp
91 movq \tmp,%gs:pda_oldrsp
92 movq EFLAGS-\offset(%rsp),\tmp
93 movq \tmp,R11-\offset(%rsp)
94 .endm
95
96 .macro FAKE_STACK_FRAME child_rip
97 /* push in order ss, rsp, eflags, cs, rip */
98 xorl %eax, %eax
99 pushq %rax /* ss */
100 CFI_ADJUST_CFA_OFFSET 8
101 /*CFI_REL_OFFSET ss,0*/
102 pushq %rax /* rsp */
103 CFI_ADJUST_CFA_OFFSET 8
104 CFI_REL_OFFSET rsp,0
105 pushq $(1<<9) /* eflags - interrupts on */
106 CFI_ADJUST_CFA_OFFSET 8
107 /*CFI_REL_OFFSET rflags,0*/
108 pushq $__KERNEL_CS /* cs */
109 CFI_ADJUST_CFA_OFFSET 8
110 /*CFI_REL_OFFSET cs,0*/
111 pushq \child_rip /* rip */
112 CFI_ADJUST_CFA_OFFSET 8
113 CFI_REL_OFFSET rip,0
114 pushq %rax /* orig rax */
115 CFI_ADJUST_CFA_OFFSET 8
116 .endm
117
118 .macro UNFAKE_STACK_FRAME
119 addq $8*6, %rsp
120 CFI_ADJUST_CFA_OFFSET -(6*8)
121 .endm
122
123 .macro CFI_DEFAULT_STACK start=1
124 .if \start
125 CFI_STARTPROC simple
126 CFI_SIGNAL_FRAME
127 CFI_DEF_CFA rsp,SS+8
128 .else
129 CFI_DEF_CFA_OFFSET SS+8
130 .endif
131 CFI_REL_OFFSET r15,R15
132 CFI_REL_OFFSET r14,R14
133 CFI_REL_OFFSET r13,R13
134 CFI_REL_OFFSET r12,R12
135 CFI_REL_OFFSET rbp,RBP
136 CFI_REL_OFFSET rbx,RBX
137 CFI_REL_OFFSET r11,R11
138 CFI_REL_OFFSET r10,R10
139 CFI_REL_OFFSET r9,R9
140 CFI_REL_OFFSET r8,R8
141 CFI_REL_OFFSET rax,RAX
142 CFI_REL_OFFSET rcx,RCX
143 CFI_REL_OFFSET rdx,RDX
144 CFI_REL_OFFSET rsi,RSI
145 CFI_REL_OFFSET rdi,RDI
146 CFI_REL_OFFSET rip,RIP
147 /*CFI_REL_OFFSET cs,CS*/
148 /*CFI_REL_OFFSET rflags,EFLAGS*/
149 CFI_REL_OFFSET rsp,RSP
150 /*CFI_REL_OFFSET ss,SS*/
151 .endm
152/*
153 * A newly forked process directly context switches into this.
154 */
155/* rdi: prev */
156ENTRY(ret_from_fork)
157 CFI_DEFAULT_STACK
158 push kernel_eflags(%rip)
159 CFI_ADJUST_CFA_OFFSET 4
160 popf # reset kernel eflags
161 CFI_ADJUST_CFA_OFFSET -4
162 call schedule_tail
163 GET_THREAD_INFO(%rcx)
164 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
165 jnz rff_trace
166rff_action:
167 RESTORE_REST
168 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
169 je int_ret_from_sys_call
170 testl $_TIF_IA32,threadinfo_flags(%rcx)
171 jnz int_ret_from_sys_call
172 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
173 jmp ret_from_sys_call
174rff_trace:
175 movq %rsp,%rdi
176 call syscall_trace_leave
177 GET_THREAD_INFO(%rcx)
178 jmp rff_action
179 CFI_ENDPROC
180END(ret_from_fork)
181
182/*
183 * System call entry. Upto 6 arguments in registers are supported.
184 *
185 * SYSCALL does not save anything on the stack and does not change the
186 * stack pointer.
187 */
188
189/*
190 * Register setup:
191 * rax system call number
192 * rdi arg0
193 * rcx return address for syscall/sysret, C arg3
194 * rsi arg1
195 * rdx arg2
196 * r10 arg3 (--> moved to rcx for C)
197 * r8 arg4
198 * r9 arg5
199 * r11 eflags for syscall/sysret, temporary for C
200 * r12-r15,rbp,rbx saved by C code, not touched.
201 *
202 * Interrupts are off on entry.
203 * Only called from user space.
204 *
205 * XXX if we had a free scratch register we could save the RSP into the stack frame
206 * and report it properly in ps. Unfortunately we haven't.
207 *
208 * When user can change the frames always force IRET. That is because
209 * it deals with uncanonical addresses better. SYSRET has trouble
210 * with them due to bugs in both AMD and Intel CPUs.
211 */
212
213ENTRY(system_call)
214 CFI_STARTPROC simple
215 CFI_SIGNAL_FRAME
216 CFI_DEF_CFA rsp,PDA_STACKOFFSET
217 CFI_REGISTER rip,rcx
218 /*CFI_REGISTER rflags,r11*/
219 swapgs
220 movq %rsp,%gs:pda_oldrsp
221 movq %gs:pda_kernelstack,%rsp
222 /*
223 * No need to follow this irqs off/on section - it's straight
224 * and short:
225 */
226 sti
227 SAVE_ARGS 8,1
228 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
229 movq %rcx,RIP-ARGOFFSET(%rsp)
230 CFI_REL_OFFSET rip,RIP-ARGOFFSET
231 GET_THREAD_INFO(%rcx)
232 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
233 jnz tracesys
234 cmpq $__NR_syscall_max,%rax
235 ja badsys
236 movq %r10,%rcx
237 call *sys_call_table(,%rax,8) # XXX: rip relative
238 movq %rax,RAX-ARGOFFSET(%rsp)
239/*
240 * Syscall return path ending with SYSRET (fast path)
241 * Has incomplete stack frame and undefined top of stack.
242 */
243ret_from_sys_call:
244 movl $_TIF_ALLWORK_MASK,%edi
245 /* edi: flagmask */
246sysret_check:
247 GET_THREAD_INFO(%rcx)
248 cli
249 TRACE_IRQS_OFF
250 movl threadinfo_flags(%rcx),%edx
251 andl %edi,%edx
252 jnz sysret_careful
253 CFI_REMEMBER_STATE
254 /*
255 * sysretq will re-enable interrupts:
256 */
257 TRACE_IRQS_ON
258 movq RIP-ARGOFFSET(%rsp),%rcx
259 CFI_REGISTER rip,rcx
260 RESTORE_ARGS 0,-ARG_SKIP,1
261 /*CFI_REGISTER rflags,r11*/
262 movq %gs:pda_oldrsp,%rsp
263 swapgs
264 sysretq
265
266 CFI_RESTORE_STATE
267 /* Handle reschedules */
268 /* edx: work, edi: workmask */
269sysret_careful:
270 bt $TIF_NEED_RESCHED,%edx
271 jnc sysret_signal
272 TRACE_IRQS_ON
273 sti
274 pushq %rdi
275 CFI_ADJUST_CFA_OFFSET 8
276 call schedule
277 popq %rdi
278 CFI_ADJUST_CFA_OFFSET -8
279 jmp sysret_check
280
281 /* Handle a signal */
282sysret_signal:
283 TRACE_IRQS_ON
284 sti
285 testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
286 jz 1f
287
288 /* Really a signal */
289 /* edx: work flags (arg3) */
290 leaq do_notify_resume(%rip),%rax
291 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
292 xorl %esi,%esi # oldset -> arg2
293 call ptregscall_common
2941: movl $_TIF_NEED_RESCHED,%edi
295 /* Use IRET because user could have changed frame. This
296 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
297 cli
298 TRACE_IRQS_OFF
299 jmp int_with_check
300
301badsys:
302 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
303 jmp ret_from_sys_call
304
305 /* Do syscall tracing */
306tracesys:
307 SAVE_REST
308 movq $-ENOSYS,RAX(%rsp)
309 FIXUP_TOP_OF_STACK %rdi
310 movq %rsp,%rdi
311 call syscall_trace_enter
312 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
313 RESTORE_REST
314 cmpq $__NR_syscall_max,%rax
315 movq $-ENOSYS,%rcx
316 cmova %rcx,%rax
317 ja 1f
318 movq %r10,%rcx /* fixup for C */
319 call *sys_call_table(,%rax,8)
3201: movq %rax,RAX-ARGOFFSET(%rsp)
321 /* Use IRET because user could have changed frame */
322
323/*
324 * Syscall return path ending with IRET.
325 * Has correct top of stack, but partial stack frame.
326 */
327 .globl int_ret_from_sys_call
328int_ret_from_sys_call:
329 cli
330 TRACE_IRQS_OFF
331 testl $3,CS-ARGOFFSET(%rsp)
332 je retint_restore_args
333 movl $_TIF_ALLWORK_MASK,%edi
334 /* edi: mask to check */
335int_with_check:
336 GET_THREAD_INFO(%rcx)
337 movl threadinfo_flags(%rcx),%edx
338 andl %edi,%edx
339 jnz int_careful
340 andl $~TS_COMPAT,threadinfo_status(%rcx)
341 jmp retint_swapgs
342
343 /* Either reschedule or signal or syscall exit tracking needed. */
344 /* First do a reschedule test. */
345 /* edx: work, edi: workmask */
346int_careful:
347 bt $TIF_NEED_RESCHED,%edx
348 jnc int_very_careful
349 TRACE_IRQS_ON
350 sti
351 pushq %rdi
352 CFI_ADJUST_CFA_OFFSET 8
353 call schedule
354 popq %rdi
355 CFI_ADJUST_CFA_OFFSET -8
356 cli
357 TRACE_IRQS_OFF
358 jmp int_with_check
359
360 /* handle signals and tracing -- both require a full stack frame */
361int_very_careful:
362 TRACE_IRQS_ON
363 sti
364 SAVE_REST
365 /* Check for syscall exit trace */
366 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
367 jz int_signal
368 pushq %rdi
369 CFI_ADJUST_CFA_OFFSET 8
370 leaq 8(%rsp),%rdi # &ptregs -> arg1
371 call syscall_trace_leave
372 popq %rdi
373 CFI_ADJUST_CFA_OFFSET -8
374 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
375 jmp int_restore_rest
376
377int_signal:
378 testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
379 jz 1f
380 movq %rsp,%rdi # &ptregs -> arg1
381 xorl %esi,%esi # oldset -> arg2
382 call do_notify_resume
3831: movl $_TIF_NEED_RESCHED,%edi
384int_restore_rest:
385 RESTORE_REST
386 cli
387 TRACE_IRQS_OFF
388 jmp int_with_check
389 CFI_ENDPROC
390END(system_call)
391
392/*
393 * Certain special system calls that need to save a complete full stack frame.
394 */
395
396 .macro PTREGSCALL label,func,arg
397 .globl \label
398\label:
399 leaq \func(%rip),%rax
400 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
401 jmp ptregscall_common
402END(\label)
403 .endm
404
405 CFI_STARTPROC
406
407 PTREGSCALL stub_clone, sys_clone, %r8
408 PTREGSCALL stub_fork, sys_fork, %rdi
409 PTREGSCALL stub_vfork, sys_vfork, %rdi
410 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
411 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
412 PTREGSCALL stub_iopl, sys_iopl, %rsi
413
414ENTRY(ptregscall_common)
415 popq %r11
416 CFI_ADJUST_CFA_OFFSET -8
417 CFI_REGISTER rip, r11
418 SAVE_REST
419 movq %r11, %r15
420 CFI_REGISTER rip, r15
421 FIXUP_TOP_OF_STACK %r11
422 call *%rax
423 RESTORE_TOP_OF_STACK %r11
424 movq %r15, %r11
425 CFI_REGISTER rip, r11
426 RESTORE_REST
427 pushq %r11
428 CFI_ADJUST_CFA_OFFSET 8
429 CFI_REL_OFFSET rip, 0
430 ret
431 CFI_ENDPROC
432END(ptregscall_common)
433
434ENTRY(stub_execve)
435 CFI_STARTPROC
436 popq %r11
437 CFI_ADJUST_CFA_OFFSET -8
438 CFI_REGISTER rip, r11
439 SAVE_REST
440 FIXUP_TOP_OF_STACK %r11
441 call sys_execve
442 RESTORE_TOP_OF_STACK %r11
443 movq %rax,RAX(%rsp)
444 RESTORE_REST
445 jmp int_ret_from_sys_call
446 CFI_ENDPROC
447END(stub_execve)
448
449/*
450 * sigreturn is special because it needs to restore all registers on return.
451 * This cannot be done with SYSRET, so use the IRET return path instead.
452 */
453ENTRY(stub_rt_sigreturn)
454 CFI_STARTPROC
455 addq $8, %rsp
456 CFI_ADJUST_CFA_OFFSET -8
457 SAVE_REST
458 movq %rsp,%rdi
459 FIXUP_TOP_OF_STACK %r11
460 call sys_rt_sigreturn
461 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
462 RESTORE_REST
463 jmp int_ret_from_sys_call
464 CFI_ENDPROC
465END(stub_rt_sigreturn)
466
467/*
468 * initial frame state for interrupts and exceptions
469 */
470 .macro _frame ref
471 CFI_STARTPROC simple
472 CFI_SIGNAL_FRAME
473 CFI_DEF_CFA rsp,SS+8-\ref
474 /*CFI_REL_OFFSET ss,SS-\ref*/
475 CFI_REL_OFFSET rsp,RSP-\ref
476 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
477 /*CFI_REL_OFFSET cs,CS-\ref*/
478 CFI_REL_OFFSET rip,RIP-\ref
479 .endm
480
481/* initial frame state for interrupts (and exceptions without error code) */
482#define INTR_FRAME _frame RIP
483/* initial frame state for exceptions with error code (and interrupts with
484 vector already pushed) */
485#define XCPT_FRAME _frame ORIG_RAX
486
487/*
488 * Interrupt entry/exit.
489 *
490 * Interrupt entry points save only callee clobbered registers in fast path.
491 *
492 * Entry runs with interrupts off.
493 */
494
495/* 0(%rsp): interrupt number */
496 .macro interrupt func
497 cld
498 SAVE_ARGS
499 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
500 pushq %rbp
501 CFI_ADJUST_CFA_OFFSET 8
502 CFI_REL_OFFSET rbp, 0
503 movq %rsp,%rbp
504 CFI_DEF_CFA_REGISTER rbp
505 testl $3,CS(%rdi)
506 je 1f
507 swapgs
508 /* irqcount is used to check if a CPU is already on an interrupt
509 stack or not. While this is essentially redundant with preempt_count
510 it is a little cheaper to use a separate counter in the PDA
511 (short of moving irq_enter into assembly, which would be too
512 much work) */
5131: incl %gs:pda_irqcount
514 cmoveq %gs:pda_irqstackptr,%rsp
515 push %rbp # backlink for old unwinder
516 /*
517 * We entered an interrupt context - irqs are off:
518 */
519 TRACE_IRQS_OFF
520 call \func
521 .endm
522
523ENTRY(common_interrupt)
524 XCPT_FRAME
525 interrupt do_IRQ
526 /* 0(%rsp): oldrsp-ARGOFFSET */
527ret_from_intr:
528 cli
529 TRACE_IRQS_OFF
530 decl %gs:pda_irqcount
531 leaveq
532 CFI_DEF_CFA_REGISTER rsp
533 CFI_ADJUST_CFA_OFFSET -8
534exit_intr:
535 GET_THREAD_INFO(%rcx)
536 testl $3,CS-ARGOFFSET(%rsp)
537 je retint_kernel
538
539 /* Interrupt came from user space */
540 /*
541 * Has a correct top of stack, but a partial stack frame
542 * %rcx: thread info. Interrupts off.
543 */
544retint_with_reschedule:
545 movl $_TIF_WORK_MASK,%edi
546retint_check:
547 movl threadinfo_flags(%rcx),%edx
548 andl %edi,%edx
549 CFI_REMEMBER_STATE
550 jnz retint_careful
551retint_swapgs:
552 /*
553 * The iretq could re-enable interrupts:
554 */
555 cli
556 TRACE_IRQS_IRETQ
557 swapgs
558 jmp restore_args
559
560retint_restore_args:
561 cli
562 /*
563 * The iretq could re-enable interrupts:
564 */
565 TRACE_IRQS_IRETQ
566restore_args:
567 RESTORE_ARGS 0,8,0
568iret_label:
569 iretq
570
571 .section __ex_table,"a"
572 .quad iret_label,bad_iret
573 .previous
574 .section .fixup,"ax"
575 /* force a signal here? this matches i386 behaviour */
576 /* running with kernel gs */
577bad_iret:
578 movq $11,%rdi /* SIGSEGV */
579 TRACE_IRQS_ON
580 sti
581 jmp do_exit
582 .previous
583
584 /* edi: workmask, edx: work */
585retint_careful:
586 CFI_RESTORE_STATE
587 bt $TIF_NEED_RESCHED,%edx
588 jnc retint_signal
589 TRACE_IRQS_ON
590 sti
591 pushq %rdi
592 CFI_ADJUST_CFA_OFFSET 8
593 call schedule
594 popq %rdi
595 CFI_ADJUST_CFA_OFFSET -8
596 GET_THREAD_INFO(%rcx)
597 cli
598 TRACE_IRQS_OFF
599 jmp retint_check
600
601retint_signal:
602 testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
603 jz retint_swapgs
604 TRACE_IRQS_ON
605 sti
606 SAVE_REST
607 movq $-1,ORIG_RAX(%rsp)
608 xorl %esi,%esi # oldset
609 movq %rsp,%rdi # &pt_regs
610 call do_notify_resume
611 RESTORE_REST
612 cli
613 TRACE_IRQS_OFF
614 movl $_TIF_NEED_RESCHED,%edi
615 GET_THREAD_INFO(%rcx)
616 jmp retint_check
617
618#ifdef CONFIG_PREEMPT
619 /* Returning to kernel space. Check if we need preemption */
620 /* rcx: threadinfo. interrupts off. */
621ENTRY(retint_kernel)
622 cmpl $0,threadinfo_preempt_count(%rcx)
623 jnz retint_restore_args
624 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
625 jnc retint_restore_args
626 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
627 jnc retint_restore_args
628 call preempt_schedule_irq
629 jmp exit_intr
630#endif
631
632 CFI_ENDPROC
633END(common_interrupt)
634
635/*
636 * APIC interrupts.
637 */
638 .macro apicinterrupt num,func
639 INTR_FRAME
640 pushq $~(\num)
641 CFI_ADJUST_CFA_OFFSET 8
642 interrupt \func
643 jmp ret_from_intr
644 CFI_ENDPROC
645 .endm
646
647ENTRY(thermal_interrupt)
648 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
649END(thermal_interrupt)
650
651ENTRY(threshold_interrupt)
652 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
653END(threshold_interrupt)
654
655#ifdef CONFIG_SMP
656ENTRY(reschedule_interrupt)
657 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
658END(reschedule_interrupt)
659
660 .macro INVALIDATE_ENTRY num
661ENTRY(invalidate_interrupt\num)
662 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
663END(invalidate_interrupt\num)
664 .endm
665
666 INVALIDATE_ENTRY 0
667 INVALIDATE_ENTRY 1
668 INVALIDATE_ENTRY 2
669 INVALIDATE_ENTRY 3
670 INVALIDATE_ENTRY 4
671 INVALIDATE_ENTRY 5
672 INVALIDATE_ENTRY 6
673 INVALIDATE_ENTRY 7
674
675ENTRY(call_function_interrupt)
676 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
677END(call_function_interrupt)
678ENTRY(irq_move_cleanup_interrupt)
679 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
680END(irq_move_cleanup_interrupt)
681#endif
682
683ENTRY(apic_timer_interrupt)
684 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
685END(apic_timer_interrupt)
686
687ENTRY(error_interrupt)
688 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
689END(error_interrupt)
690
691ENTRY(spurious_interrupt)
692 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
693END(spurious_interrupt)
694
695/*
696 * Exception entry points.
697 */
698 .macro zeroentry sym
699 INTR_FRAME
700 pushq $0 /* push error code/oldrax */
701 CFI_ADJUST_CFA_OFFSET 8
702 pushq %rax /* push real oldrax to the rdi slot */
703 CFI_ADJUST_CFA_OFFSET 8
704 CFI_REL_OFFSET rax,0
705 leaq \sym(%rip),%rax
706 jmp error_entry
707 CFI_ENDPROC
708 .endm
709
710 .macro errorentry sym
711 XCPT_FRAME
712 pushq %rax
713 CFI_ADJUST_CFA_OFFSET 8
714 CFI_REL_OFFSET rax,0
715 leaq \sym(%rip),%rax
716 jmp error_entry
717 CFI_ENDPROC
718 .endm
719
720 /* error code is on the stack already */
721 /* handle NMI like exceptions that can happen everywhere */
722 .macro paranoidentry sym, ist=0, irqtrace=1
723 SAVE_ALL
724 cld
725 movl $1,%ebx
726 movl $MSR_GS_BASE,%ecx
727 rdmsr
728 testl %edx,%edx
729 js 1f
730 swapgs
731 xorl %ebx,%ebx
7321:
733 .if \ist
734 movq %gs:pda_data_offset, %rbp
735 .endif
736 movq %rsp,%rdi
737 movq ORIG_RAX(%rsp),%rsi
738 movq $-1,ORIG_RAX(%rsp)
739 .if \ist
740 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
741 .endif
742 call \sym
743 .if \ist
744 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
745 .endif
746 cli
747 .if \irqtrace
748 TRACE_IRQS_OFF
749 .endif
750 .endm
751
752 /*
753 * "Paranoid" exit path from exception stack.
754 * Paranoid because this is used by NMIs and cannot take
755 * any kernel state for granted.
756 * We don't do kernel preemption checks here, because only
757 * NMI should be common and it does not enable IRQs and
758 * cannot get reschedule ticks.
759 *
760 * "trace" is 0 for the NMI handler only, because irq-tracing
761 * is fundamentally NMI-unsafe. (we cannot change the soft and
762 * hard flags at once, atomically)
763 */
764 .macro paranoidexit trace=1
765 /* ebx: no swapgs flag */
766paranoid_exit\trace:
767 testl %ebx,%ebx /* swapgs needed? */
768 jnz paranoid_restore\trace
769 testl $3,CS(%rsp)
770 jnz paranoid_userspace\trace
771paranoid_swapgs\trace:
772 .if \trace
773 TRACE_IRQS_IRETQ 0
774 .endif
775 swapgs
776paranoid_restore\trace:
777 RESTORE_ALL 8
778 iretq
779paranoid_userspace\trace:
780 GET_THREAD_INFO(%rcx)
781 movl threadinfo_flags(%rcx),%ebx
782 andl $_TIF_WORK_MASK,%ebx
783 jz paranoid_swapgs\trace
784 movq %rsp,%rdi /* &pt_regs */
785 call sync_regs
786 movq %rax,%rsp /* switch stack for scheduling */
787 testl $_TIF_NEED_RESCHED,%ebx
788 jnz paranoid_schedule\trace
789 movl %ebx,%edx /* arg3: thread flags */
790 .if \trace
791 TRACE_IRQS_ON
792 .endif
793 sti
794 xorl %esi,%esi /* arg2: oldset */
795 movq %rsp,%rdi /* arg1: &pt_regs */
796 call do_notify_resume
797 cli
798 .if \trace
799 TRACE_IRQS_OFF
800 .endif
801 jmp paranoid_userspace\trace
802paranoid_schedule\trace:
803 .if \trace
804 TRACE_IRQS_ON
805 .endif
806 sti
807 call schedule
808 cli
809 .if \trace
810 TRACE_IRQS_OFF
811 .endif
812 jmp paranoid_userspace\trace
813 CFI_ENDPROC
814 .endm
815
816/*
817 * Exception entry point. This expects an error code/orig_rax on the stack
818 * and the exception handler in %rax.
819 */
820KPROBE_ENTRY(error_entry)
821 _frame RDI
822 CFI_REL_OFFSET rax,0
823 /* rdi slot contains rax, oldrax contains error code */
824 cld
825 subq $14*8,%rsp
826 CFI_ADJUST_CFA_OFFSET (14*8)
827 movq %rsi,13*8(%rsp)
828 CFI_REL_OFFSET rsi,RSI
829 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
830 CFI_REGISTER rax,rsi
831 movq %rdx,12*8(%rsp)
832 CFI_REL_OFFSET rdx,RDX
833 movq %rcx,11*8(%rsp)
834 CFI_REL_OFFSET rcx,RCX
835 movq %rsi,10*8(%rsp) /* store rax */
836 CFI_REL_OFFSET rax,RAX
837 movq %r8, 9*8(%rsp)
838 CFI_REL_OFFSET r8,R8
839 movq %r9, 8*8(%rsp)
840 CFI_REL_OFFSET r9,R9
841 movq %r10,7*8(%rsp)
842 CFI_REL_OFFSET r10,R10
843 movq %r11,6*8(%rsp)
844 CFI_REL_OFFSET r11,R11
845 movq %rbx,5*8(%rsp)
846 CFI_REL_OFFSET rbx,RBX
847 movq %rbp,4*8(%rsp)
848 CFI_REL_OFFSET rbp,RBP
849 movq %r12,3*8(%rsp)
850 CFI_REL_OFFSET r12,R12
851 movq %r13,2*8(%rsp)
852 CFI_REL_OFFSET r13,R13
853 movq %r14,1*8(%rsp)
854 CFI_REL_OFFSET r14,R14
855 movq %r15,(%rsp)
856 CFI_REL_OFFSET r15,R15
857 xorl %ebx,%ebx
858 testl $3,CS(%rsp)
859 je error_kernelspace
860error_swapgs:
861 swapgs
862error_sti:
863 movq %rdi,RDI(%rsp)
864 CFI_REL_OFFSET rdi,RDI
865 movq %rsp,%rdi
866 movq ORIG_RAX(%rsp),%rsi /* get error code */
867 movq $-1,ORIG_RAX(%rsp)
868 call *%rax
869 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
870error_exit:
871 movl %ebx,%eax
872 RESTORE_REST
873 cli
874 TRACE_IRQS_OFF
875 GET_THREAD_INFO(%rcx)
876 testl %eax,%eax
877 jne retint_kernel
878 movl threadinfo_flags(%rcx),%edx
879 movl $_TIF_WORK_MASK,%edi
880 andl %edi,%edx
881 jnz retint_careful
882 /*
883 * The iret might restore flags:
884 */
885 TRACE_IRQS_IRETQ
886 swapgs
887 RESTORE_ARGS 0,8,0
888 jmp iret_label
889 CFI_ENDPROC
890
891error_kernelspace:
892 incl %ebx
893 /* There are two places in the kernel that can potentially fault with
894 usergs. Handle them here. The exception handlers after
895 iret run with kernel gs again, so don't set the user space flag.
896 B stepping K8s sometimes report an truncated RIP for IRET
897 exceptions returning to compat mode. Check for these here too. */
898 leaq iret_label(%rip),%rbp
899 cmpq %rbp,RIP(%rsp)
900 je error_swapgs
901 movl %ebp,%ebp /* zero extend */
902 cmpq %rbp,RIP(%rsp)
903 je error_swapgs
904 cmpq $gs_change,RIP(%rsp)
905 je error_swapgs
906 jmp error_sti
907KPROBE_END(error_entry)
908
909 /* Reload gs selector with exception handling */
910 /* edi: new selector */
911ENTRY(load_gs_index)
912 CFI_STARTPROC
913 pushf
914 CFI_ADJUST_CFA_OFFSET 8
915 cli
916 swapgs
917gs_change:
918 movl %edi,%gs
9192: mfence /* workaround */
920 swapgs
921 popf
922 CFI_ADJUST_CFA_OFFSET -8
923 ret
924 CFI_ENDPROC
925ENDPROC(load_gs_index)
926
927 .section __ex_table,"a"
928 .align 8
929 .quad gs_change,bad_gs
930 .previous
931 .section .fixup,"ax"
932 /* running with kernelgs */
933bad_gs:
934 swapgs /* switch back to user gs */
935 xorl %eax,%eax
936 movl %eax,%gs
937 jmp 2b
938 .previous
939
940/*
941 * Create a kernel thread.
942 *
943 * C extern interface:
944 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
945 *
946 * asm input arguments:
947 * rdi: fn, rsi: arg, rdx: flags
948 */
949ENTRY(kernel_thread)
950 CFI_STARTPROC
951 FAKE_STACK_FRAME $child_rip
952 SAVE_ALL
953
954 # rdi: flags, rsi: usp, rdx: will be &pt_regs
955 movq %rdx,%rdi
956 orq kernel_thread_flags(%rip),%rdi
957 movq $-1, %rsi
958 movq %rsp, %rdx
959
960 xorl %r8d,%r8d
961 xorl %r9d,%r9d
962
963 # clone now
964 call do_fork
965 movq %rax,RAX(%rsp)
966 xorl %edi,%edi
967
968 /*
969 * It isn't worth to check for reschedule here,
970 * so internally to the x86_64 port you can rely on kernel_thread()
971 * not to reschedule the child before returning, this avoids the need
972 * of hacks for example to fork off the per-CPU idle tasks.
973 * [Hopefully no generic code relies on the reschedule -AK]
974 */
975 RESTORE_ALL
976 UNFAKE_STACK_FRAME
977 ret
978 CFI_ENDPROC
979ENDPROC(kernel_thread)
980
981child_rip:
982 pushq $0 # fake return address
983 CFI_STARTPROC
984 /*
985 * Here we are in the child and the registers are set as they were
986 * at kernel_thread() invocation in the parent.
987 */
988 movq %rdi, %rax
989 movq %rsi, %rdi
990 call *%rax
991 # exit
992 xorl %edi, %edi
993 call do_exit
994 CFI_ENDPROC
995ENDPROC(child_rip)
996
997/*
998 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
999 *
1000 * C extern interface:
1001 * extern long execve(char *name, char **argv, char **envp)
1002 *
1003 * asm input arguments:
1004 * rdi: name, rsi: argv, rdx: envp
1005 *
1006 * We want to fallback into:
1007 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
1008 *
1009 * do_sys_execve asm fallback arguments:
1010 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
1011 */
1012ENTRY(kernel_execve)
1013 CFI_STARTPROC
1014 FAKE_STACK_FRAME $0
1015 SAVE_ALL
1016 call sys_execve
1017 movq %rax, RAX(%rsp)
1018 RESTORE_REST
1019 testq %rax,%rax
1020 je int_ret_from_sys_call
1021 RESTORE_ARGS
1022 UNFAKE_STACK_FRAME
1023 ret
1024 CFI_ENDPROC
1025ENDPROC(kernel_execve)
1026
1027KPROBE_ENTRY(page_fault)
1028 errorentry do_page_fault
1029KPROBE_END(page_fault)
1030
1031ENTRY(coprocessor_error)
1032 zeroentry do_coprocessor_error
1033END(coprocessor_error)
1034
1035ENTRY(simd_coprocessor_error)
1036 zeroentry do_simd_coprocessor_error
1037END(simd_coprocessor_error)
1038
1039ENTRY(device_not_available)
1040 zeroentry math_state_restore
1041END(device_not_available)
1042
1043 /* runs on exception stack */
1044KPROBE_ENTRY(debug)
1045 INTR_FRAME
1046 pushq $0
1047 CFI_ADJUST_CFA_OFFSET 8
1048 paranoidentry do_debug, DEBUG_STACK
1049 paranoidexit
1050KPROBE_END(debug)
1051
1052 /* runs on exception stack */
1053KPROBE_ENTRY(nmi)
1054 INTR_FRAME
1055 pushq $-1
1056 CFI_ADJUST_CFA_OFFSET 8
1057 paranoidentry do_nmi, 0, 0
1058#ifdef CONFIG_TRACE_IRQFLAGS
1059 paranoidexit 0
1060#else
1061 jmp paranoid_exit1
1062 CFI_ENDPROC
1063#endif
1064KPROBE_END(nmi)
1065
1066KPROBE_ENTRY(int3)
1067 INTR_FRAME
1068 pushq $0
1069 CFI_ADJUST_CFA_OFFSET 8
1070 paranoidentry do_int3, DEBUG_STACK
1071 jmp paranoid_exit1
1072 CFI_ENDPROC
1073KPROBE_END(int3)
1074
1075ENTRY(overflow)
1076 zeroentry do_overflow
1077END(overflow)
1078
1079ENTRY(bounds)
1080 zeroentry do_bounds
1081END(bounds)
1082
1083ENTRY(invalid_op)
1084 zeroentry do_invalid_op
1085END(invalid_op)
1086
1087ENTRY(coprocessor_segment_overrun)
1088 zeroentry do_coprocessor_segment_overrun
1089END(coprocessor_segment_overrun)
1090
1091ENTRY(reserved)
1092 zeroentry do_reserved
1093END(reserved)
1094
1095 /* runs on exception stack */
1096ENTRY(double_fault)
1097 XCPT_FRAME
1098 paranoidentry do_double_fault
1099 jmp paranoid_exit1
1100 CFI_ENDPROC
1101END(double_fault)
1102
1103ENTRY(invalid_TSS)
1104 errorentry do_invalid_TSS
1105END(invalid_TSS)
1106
1107ENTRY(segment_not_present)
1108 errorentry do_segment_not_present
1109END(segment_not_present)
1110
1111 /* runs on exception stack */
1112ENTRY(stack_segment)
1113 XCPT_FRAME
1114 paranoidentry do_stack_segment
1115 jmp paranoid_exit1
1116 CFI_ENDPROC
1117END(stack_segment)
1118
1119KPROBE_ENTRY(general_protection)
1120 errorentry do_general_protection
1121KPROBE_END(general_protection)
1122
1123ENTRY(alignment_check)
1124 errorentry do_alignment_check
1125END(alignment_check)
1126
1127ENTRY(divide_error)
1128 zeroentry do_divide_error
1129END(divide_error)
1130
1131ENTRY(spurious_interrupt_bug)
1132 zeroentry do_spurious_interrupt_bug
1133END(spurious_interrupt_bug)
1134
1135#ifdef CONFIG_X86_MCE
1136 /* runs on exception stack */
1137ENTRY(machine_check)
1138 INTR_FRAME
1139 pushq $0
1140 CFI_ADJUST_CFA_OFFSET 8
1141 paranoidentry do_machine_check
1142 jmp paranoid_exit1
1143 CFI_ENDPROC
1144END(machine_check)
1145#endif
1146
1147/* Call softirq on interrupt stack. Interrupts are off. */
1148ENTRY(call_softirq)
1149 CFI_STARTPROC
1150 push %rbp
1151 CFI_ADJUST_CFA_OFFSET 8
1152 CFI_REL_OFFSET rbp,0
1153 mov %rsp,%rbp
1154 CFI_DEF_CFA_REGISTER rbp
1155 incl %gs:pda_irqcount
1156 cmove %gs:pda_irqstackptr,%rsp
1157 push %rbp # backlink for old unwinder
1158 call __do_softirq
1159 leaveq
1160 CFI_DEF_CFA_REGISTER rsp
1161 CFI_ADJUST_CFA_OFFSET -8
1162 decl %gs:pda_irqcount
1163 ret
1164 CFI_ENDPROC
1165ENDPROC(call_softirq)
1166
1167KPROBE_ENTRY(ignore_sysret)
1168 CFI_STARTPROC
1169 mov $-ENOSYS,%eax
1170 sysret
1171 CFI_ENDPROC
1172ENDPROC(ignore_sysret)