aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/entry_64.S
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-12-23 10:30:31 -0500
committerIngo Molnar <mingo@elte.hu>2008-12-23 10:30:31 -0500
commitbed4f13065b520e564adffbfcd1c1a764a9c887e (patch)
treed847ddd3bab64cb126ee6679dc4a949386554d55 /arch/x86/kernel/entry_64.S
parent3e5621edb3392b28efb260ac99b2d26fb8b44e73 (diff)
parentbf8bd66d0580f296f257d371ee41a0a137b541c7 (diff)
Merge branch 'x86/irq' into x86/core
Diffstat (limited to 'arch/x86/kernel/entry_64.S')
-rw-r--r--arch/x86/kernel/entry_64.S1231
1 files changed, 659 insertions, 572 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 42571baaca32..3194636a4293 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -60,7 +60,6 @@
60#define __AUDIT_ARCH_LE 0x40000000 60#define __AUDIT_ARCH_LE 0x40000000
61 61
62 .code64 62 .code64
63
64#ifdef CONFIG_FUNCTION_TRACER 63#ifdef CONFIG_FUNCTION_TRACER
65#ifdef CONFIG_DYNAMIC_FTRACE 64#ifdef CONFIG_DYNAMIC_FTRACE
66ENTRY(mcount) 65ENTRY(mcount)
@@ -169,21 +168,21 @@ ENTRY(native_usergs_sysret64)
169 */ 168 */
170 169
171 /* %rsp:at FRAMEEND */ 170 /* %rsp:at FRAMEEND */
172 .macro FIXUP_TOP_OF_STACK tmp 171 .macro FIXUP_TOP_OF_STACK tmp offset=0
173 movq %gs:pda_oldrsp,\tmp 172 movq %gs:pda_oldrsp,\tmp
174 movq \tmp,RSP(%rsp) 173 movq \tmp,RSP+\offset(%rsp)
175 movq $__USER_DS,SS(%rsp) 174 movq $__USER_DS,SS+\offset(%rsp)
176 movq $__USER_CS,CS(%rsp) 175 movq $__USER_CS,CS+\offset(%rsp)
177 movq $-1,RCX(%rsp) 176 movq $-1,RCX+\offset(%rsp)
178 movq R11(%rsp),\tmp /* get eflags */ 177 movq R11+\offset(%rsp),\tmp /* get eflags */
179 movq \tmp,EFLAGS(%rsp) 178 movq \tmp,EFLAGS+\offset(%rsp)
180 .endm 179 .endm
181 180
182 .macro RESTORE_TOP_OF_STACK tmp,offset=0 181 .macro RESTORE_TOP_OF_STACK tmp offset=0
183 movq RSP-\offset(%rsp),\tmp 182 movq RSP+\offset(%rsp),\tmp
184 movq \tmp,%gs:pda_oldrsp 183 movq \tmp,%gs:pda_oldrsp
185 movq EFLAGS-\offset(%rsp),\tmp 184 movq EFLAGS+\offset(%rsp),\tmp
186 movq \tmp,R11-\offset(%rsp) 185 movq \tmp,R11+\offset(%rsp)
187 .endm 186 .endm
188 187
189 .macro FAKE_STACK_FRAME child_rip 188 .macro FAKE_STACK_FRAME child_rip
@@ -195,7 +194,7 @@ ENTRY(native_usergs_sysret64)
195 pushq %rax /* rsp */ 194 pushq %rax /* rsp */
196 CFI_ADJUST_CFA_OFFSET 8 195 CFI_ADJUST_CFA_OFFSET 8
197 CFI_REL_OFFSET rsp,0 196 CFI_REL_OFFSET rsp,0
198 pushq $(1<<9) /* eflags - interrupts on */ 197 pushq $X86_EFLAGS_IF /* eflags - interrupts on */
199 CFI_ADJUST_CFA_OFFSET 8 198 CFI_ADJUST_CFA_OFFSET 8
200 /*CFI_REL_OFFSET rflags,0*/ 199 /*CFI_REL_OFFSET rflags,0*/
201 pushq $__KERNEL_CS /* cs */ 200 pushq $__KERNEL_CS /* cs */
@@ -213,64 +212,184 @@ ENTRY(native_usergs_sysret64)
213 CFI_ADJUST_CFA_OFFSET -(6*8) 212 CFI_ADJUST_CFA_OFFSET -(6*8)
214 .endm 213 .endm
215 214
216 .macro CFI_DEFAULT_STACK start=1 215/*
216 * initial frame state for interrupts (and exceptions without error code)
217 */
218 .macro EMPTY_FRAME start=1 offset=0
217 .if \start 219 .if \start
218 CFI_STARTPROC simple 220 CFI_STARTPROC simple
219 CFI_SIGNAL_FRAME 221 CFI_SIGNAL_FRAME
220 CFI_DEF_CFA rsp,SS+8 222 CFI_DEF_CFA rsp,8+\offset
221 .else 223 .else
222 CFI_DEF_CFA_OFFSET SS+8 224 CFI_DEF_CFA_OFFSET 8+\offset
223 .endif 225 .endif
224 CFI_REL_OFFSET r15,R15
225 CFI_REL_OFFSET r14,R14
226 CFI_REL_OFFSET r13,R13
227 CFI_REL_OFFSET r12,R12
228 CFI_REL_OFFSET rbp,RBP
229 CFI_REL_OFFSET rbx,RBX
230 CFI_REL_OFFSET r11,R11
231 CFI_REL_OFFSET r10,R10
232 CFI_REL_OFFSET r9,R9
233 CFI_REL_OFFSET r8,R8
234 CFI_REL_OFFSET rax,RAX
235 CFI_REL_OFFSET rcx,RCX
236 CFI_REL_OFFSET rdx,RDX
237 CFI_REL_OFFSET rsi,RSI
238 CFI_REL_OFFSET rdi,RDI
239 CFI_REL_OFFSET rip,RIP
240 /*CFI_REL_OFFSET cs,CS*/
241 /*CFI_REL_OFFSET rflags,EFLAGS*/
242 CFI_REL_OFFSET rsp,RSP
243 /*CFI_REL_OFFSET ss,SS*/
244 .endm 226 .endm
227
228/*
229 * initial frame state for interrupts (and exceptions without error code)
230 */
231 .macro INTR_FRAME start=1 offset=0
232 EMPTY_FRAME \start, SS+8+\offset-RIP
233 /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
234 CFI_REL_OFFSET rsp, RSP+\offset-RIP
235 /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
236 /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
237 CFI_REL_OFFSET rip, RIP+\offset-RIP
238 .endm
239
240/*
241 * initial frame state for exceptions with error code (and interrupts
242 * with vector already pushed)
243 */
244 .macro XCPT_FRAME start=1 offset=0
245 INTR_FRAME \start, RIP+\offset-ORIG_RAX
246 /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
247 .endm
248
249/*
250 * frame that enables calling into C.
251 */
252 .macro PARTIAL_FRAME start=1 offset=0
253 XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
254 CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
255 CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
256 CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
257 CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
258 CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
259 CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
260 CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
261 CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
262 CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
263 .endm
264
265/*
266 * frame that enables passing a complete pt_regs to a C function.
267 */
268 .macro DEFAULT_FRAME start=1 offset=0
269 PARTIAL_FRAME \start, R11+\offset-R15
270 CFI_REL_OFFSET rbx, RBX+\offset
271 CFI_REL_OFFSET rbp, RBP+\offset
272 CFI_REL_OFFSET r12, R12+\offset
273 CFI_REL_OFFSET r13, R13+\offset
274 CFI_REL_OFFSET r14, R14+\offset
275 CFI_REL_OFFSET r15, R15+\offset
276 .endm
277
278/* save partial stack frame */
279ENTRY(save_args)
280 XCPT_FRAME
281 cld
282 movq_cfi rdi, RDI+16-ARGOFFSET
283 movq_cfi rsi, RSI+16-ARGOFFSET
284 movq_cfi rdx, RDX+16-ARGOFFSET
285 movq_cfi rcx, RCX+16-ARGOFFSET
286 movq_cfi rax, RAX+16-ARGOFFSET
287 movq_cfi r8, R8+16-ARGOFFSET
288 movq_cfi r9, R9+16-ARGOFFSET
289 movq_cfi r10, R10+16-ARGOFFSET
290 movq_cfi r11, R11+16-ARGOFFSET
291
292 leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
293 movq_cfi rbp, 8 /* push %rbp */
294 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
295 testl $3, CS(%rdi)
296 je 1f
297 SWAPGS
298 /*
299 * irqcount is used to check if a CPU is already on an interrupt stack
300 * or not. While this is essentially redundant with preempt_count it is
301 * a little cheaper to use a separate counter in the PDA (short of
302 * moving irq_enter into assembly, which would be too much work)
303 */
3041: incl %gs:pda_irqcount
305 jne 2f
306 popq_cfi %rax /* move return address... */
307 mov %gs:pda_irqstackptr,%rsp
308 EMPTY_FRAME 0
309 pushq_cfi %rax /* ... to the new stack */
310 /*
311 * We entered an interrupt context - irqs are off:
312 */
3132: TRACE_IRQS_OFF
314 ret
315 CFI_ENDPROC
316END(save_args)
317
318ENTRY(save_rest)
319 PARTIAL_FRAME 1 REST_SKIP+8
320 movq 5*8+16(%rsp), %r11 /* save return address */
321 movq_cfi rbx, RBX+16
322 movq_cfi rbp, RBP+16
323 movq_cfi r12, R12+16
324 movq_cfi r13, R13+16
325 movq_cfi r14, R14+16
326 movq_cfi r15, R15+16
327 movq %r11, 8(%rsp) /* return address */
328 FIXUP_TOP_OF_STACK %r11, 16
329 ret
330 CFI_ENDPROC
331END(save_rest)
332
333/* save complete stack frame */
334ENTRY(save_paranoid)
335 XCPT_FRAME 1 RDI+8
336 cld
337 movq_cfi rdi, RDI+8
338 movq_cfi rsi, RSI+8
339 movq_cfi rdx, RDX+8
340 movq_cfi rcx, RCX+8
341 movq_cfi rax, RAX+8
342 movq_cfi r8, R8+8
343 movq_cfi r9, R9+8
344 movq_cfi r10, R10+8
345 movq_cfi r11, R11+8
346 movq_cfi rbx, RBX+8
347 movq_cfi rbp, RBP+8
348 movq_cfi r12, R12+8
349 movq_cfi r13, R13+8
350 movq_cfi r14, R14+8
351 movq_cfi r15, R15+8
352 movl $1,%ebx
353 movl $MSR_GS_BASE,%ecx
354 rdmsr
355 testl %edx,%edx
356 js 1f /* negative -> in kernel */
357 SWAPGS
358 xorl %ebx,%ebx
3591: ret
360 CFI_ENDPROC
361END(save_paranoid)
362
245/* 363/*
246 * A newly forked process directly context switches into this. 364 * A newly forked process directly context switches into this address.
365 *
366 * rdi: prev task we switched from
247 */ 367 */
248/* rdi: prev */
249ENTRY(ret_from_fork) 368ENTRY(ret_from_fork)
250 CFI_DEFAULT_STACK 369 DEFAULT_FRAME
370
251 push kernel_eflags(%rip) 371 push kernel_eflags(%rip)
252 CFI_ADJUST_CFA_OFFSET 8 372 CFI_ADJUST_CFA_OFFSET 8
253 popf # reset kernel eflags 373 popf # reset kernel eflags
254 CFI_ADJUST_CFA_OFFSET -8 374 CFI_ADJUST_CFA_OFFSET -8
255 call schedule_tail 375
376 call schedule_tail # rdi: 'prev' task parameter
377
256 GET_THREAD_INFO(%rcx) 378 GET_THREAD_INFO(%rcx)
257 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 379
258 CFI_REMEMBER_STATE 380 CFI_REMEMBER_STATE
259 jnz rff_trace
260rff_action:
261 RESTORE_REST 381 RESTORE_REST
262 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? 382
383 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
263 je int_ret_from_sys_call 384 je int_ret_from_sys_call
264 testl $_TIF_IA32,TI_flags(%rcx) 385
386 testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
265 jnz int_ret_from_sys_call 387 jnz int_ret_from_sys_call
266 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET 388
267 jmp ret_from_sys_call 389 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
390 jmp ret_from_sys_call # go to the SYSRET fastpath
391
268 CFI_RESTORE_STATE 392 CFI_RESTORE_STATE
269rff_trace:
270 movq %rsp,%rdi
271 call syscall_trace_leave
272 GET_THREAD_INFO(%rcx)
273 jmp rff_action
274 CFI_ENDPROC 393 CFI_ENDPROC
275END(ret_from_fork) 394END(ret_from_fork)
276 395
@@ -390,10 +509,13 @@ sysret_signal:
390 jc sysret_audit 509 jc sysret_audit
391#endif 510#endif
392 /* edx: work flags (arg3) */ 511 /* edx: work flags (arg3) */
393 leaq do_notify_resume(%rip),%rax
394 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 512 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
395 xorl %esi,%esi # oldset -> arg2 513 xorl %esi,%esi # oldset -> arg2
396 call ptregscall_common 514 SAVE_REST
515 FIXUP_TOP_OF_STACK %r11
516 call do_notify_resume
517 RESTORE_TOP_OF_STACK %r11
518 RESTORE_REST
397 movl $_TIF_WORK_MASK,%edi 519 movl $_TIF_WORK_MASK,%edi
398 /* Use IRET because user could have changed frame. This 520 /* Use IRET because user could have changed frame. This
399 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ 521 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
@@ -537,18 +659,20 @@ END(system_call)
537/* 659/*
538 * Certain special system calls that need to save a complete full stack frame. 660 * Certain special system calls that need to save a complete full stack frame.
539 */ 661 */
540
541 .macro PTREGSCALL label,func,arg 662 .macro PTREGSCALL label,func,arg
542 .globl \label 663ENTRY(\label)
543\label: 664 PARTIAL_FRAME 1 8 /* offset 8: return address */
544 leaq \func(%rip),%rax 665 subq $REST_SKIP, %rsp
545 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 666 CFI_ADJUST_CFA_OFFSET REST_SKIP
546 jmp ptregscall_common 667 call save_rest
668 DEFAULT_FRAME 0 8 /* offset 8: return address */
669 leaq 8(%rsp), \arg /* pt_regs pointer */
670 call \func
671 jmp ptregscall_common
672 CFI_ENDPROC
547END(\label) 673END(\label)
548 .endm 674 .endm
549 675
550 CFI_STARTPROC
551
552 PTREGSCALL stub_clone, sys_clone, %r8 676 PTREGSCALL stub_clone, sys_clone, %r8
553 PTREGSCALL stub_fork, sys_fork, %rdi 677 PTREGSCALL stub_fork, sys_fork, %rdi
554 PTREGSCALL stub_vfork, sys_vfork, %rdi 678 PTREGSCALL stub_vfork, sys_vfork, %rdi
@@ -556,22 +680,15 @@ END(\label)
556 PTREGSCALL stub_iopl, sys_iopl, %rsi 680 PTREGSCALL stub_iopl, sys_iopl, %rsi
557 681
558ENTRY(ptregscall_common) 682ENTRY(ptregscall_common)
559 popq %r11 683 DEFAULT_FRAME 1 8 /* offset 8: return address */
560 CFI_ADJUST_CFA_OFFSET -8 684 RESTORE_TOP_OF_STACK %r11, 8
561 CFI_REGISTER rip, r11 685 movq_cfi_restore R15+8, r15
562 SAVE_REST 686 movq_cfi_restore R14+8, r14
563 movq %r11, %r15 687 movq_cfi_restore R13+8, r13
564 CFI_REGISTER rip, r15 688 movq_cfi_restore R12+8, r12
565 FIXUP_TOP_OF_STACK %r11 689 movq_cfi_restore RBP+8, rbp
566 call *%rax 690 movq_cfi_restore RBX+8, rbx
567 RESTORE_TOP_OF_STACK %r11 691 ret $REST_SKIP /* pop extended registers */
568 movq %r15, %r11
569 CFI_REGISTER rip, r11
570 RESTORE_REST
571 pushq %r11
572 CFI_ADJUST_CFA_OFFSET 8
573 CFI_REL_OFFSET rip, 0
574 ret
575 CFI_ENDPROC 692 CFI_ENDPROC
576END(ptregscall_common) 693END(ptregscall_common)
577 694
@@ -610,70 +727,70 @@ ENTRY(stub_rt_sigreturn)
610END(stub_rt_sigreturn) 727END(stub_rt_sigreturn)
611 728
612/* 729/*
613 * initial frame state for interrupts and exceptions 730 * Build the entry stubs and pointer table with some assembler magic.
731 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
732 * single cache line on all modern x86 implementations.
614 */ 733 */
615 .macro _frame ref 734 .section .init.rodata,"a"
616 CFI_STARTPROC simple 735ENTRY(interrupt)
617 CFI_SIGNAL_FRAME 736 .text
618 CFI_DEF_CFA rsp,SS+8-\ref 737 .p2align 5
619 /*CFI_REL_OFFSET ss,SS-\ref*/ 738 .p2align CONFIG_X86_L1_CACHE_SHIFT
620 CFI_REL_OFFSET rsp,RSP-\ref 739ENTRY(irq_entries_start)
621 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ 740 INTR_FRAME
622 /*CFI_REL_OFFSET cs,CS-\ref*/ 741vector=FIRST_EXTERNAL_VECTOR
623 CFI_REL_OFFSET rip,RIP-\ref 742.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
624 .endm 743 .balign 32
744 .rept 7
745 .if vector < NR_VECTORS
746 .if vector <> FIRST_EXTERNAL_VECTOR
747 CFI_ADJUST_CFA_OFFSET -8
748 .endif
7491: pushq $(~vector+0x80) /* Note: always in signed byte range */
750 CFI_ADJUST_CFA_OFFSET 8
751 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
752 jmp 2f
753 .endif
754 .previous
755 .quad 1b
756 .text
757vector=vector+1
758 .endif
759 .endr
7602: jmp common_interrupt
761.endr
762 CFI_ENDPROC
763END(irq_entries_start)
625 764
626/* initial frame state for interrupts (and exceptions without error code) */ 765.previous
627#define INTR_FRAME _frame RIP 766END(interrupt)
628/* initial frame state for exceptions with error code (and interrupts with 767.previous
629 vector already pushed) */
630#define XCPT_FRAME _frame ORIG_RAX
631 768
632/* 769/*
633 * Interrupt entry/exit. 770 * Interrupt entry/exit.
634 * 771 *
635 * Interrupt entry points save only callee clobbered registers in fast path. 772 * Interrupt entry points save only callee clobbered registers in fast path.
636 * 773 *
637 * Entry runs with interrupts off. 774 * Entry runs with interrupts off.
638 */ 775 */
639 776
640/* 0(%rsp): interrupt number */ 777/* 0(%rsp): ~(interrupt number) */
641 .macro interrupt func 778 .macro interrupt func
642 cld 779 subq $10*8, %rsp
643 SAVE_ARGS 780 CFI_ADJUST_CFA_OFFSET 10*8
644 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 781 call save_args
645 pushq %rbp 782 PARTIAL_FRAME 0
646 /*
647 * Save rbp twice: One is for marking the stack frame, as usual, and the
648 * other, to fill pt_regs properly. This is because bx comes right
649 * before the last saved register in that structure, and not bp. If the
650 * base pointer were in the place bx is today, this would not be needed.
651 */
652 movq %rbp, -8(%rsp)
653 CFI_ADJUST_CFA_OFFSET 8
654 CFI_REL_OFFSET rbp, 0
655 movq %rsp,%rbp
656 CFI_DEF_CFA_REGISTER rbp
657 testl $3,CS(%rdi)
658 je 1f
659 SWAPGS
660 /* irqcount is used to check if a CPU is already on an interrupt
661 stack or not. While this is essentially redundant with preempt_count
662 it is a little cheaper to use a separate counter in the PDA
663 (short of moving irq_enter into assembly, which would be too
664 much work) */
6651: incl %gs:pda_irqcount
666 cmoveq %gs:pda_irqstackptr,%rsp
667 push %rbp # backlink for old unwinder
668 /*
669 * We entered an interrupt context - irqs are off:
670 */
671 TRACE_IRQS_OFF
672 call \func 783 call \func
673 .endm 784 .endm
674 785
675ENTRY(common_interrupt) 786 /*
787 * The interrupt stubs push (~vector+0x80) onto the stack and
788 * then jump to common_interrupt.
789 */
790 .p2align CONFIG_X86_L1_CACHE_SHIFT
791common_interrupt:
676 XCPT_FRAME 792 XCPT_FRAME
793 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
677 interrupt do_IRQ 794 interrupt do_IRQ
678 /* 0(%rsp): oldrsp-ARGOFFSET */ 795 /* 0(%rsp): oldrsp-ARGOFFSET */
679ret_from_intr: 796ret_from_intr:
@@ -808,315 +925,202 @@ END(common_interrupt)
808/* 925/*
809 * APIC interrupts. 926 * APIC interrupts.
810 */ 927 */
811 .macro apicinterrupt num,func 928.macro apicinterrupt num sym do_sym
929ENTRY(\sym)
812 INTR_FRAME 930 INTR_FRAME
813 pushq $~(\num) 931 pushq $~(\num)
814 CFI_ADJUST_CFA_OFFSET 8 932 CFI_ADJUST_CFA_OFFSET 8
815 interrupt \func 933 interrupt \do_sym
816 jmp ret_from_intr 934 jmp ret_from_intr
817 CFI_ENDPROC 935 CFI_ENDPROC
818 .endm 936END(\sym)
937.endm
819 938
820ENTRY(thermal_interrupt) 939#ifdef CONFIG_SMP
821 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt 940apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
822END(thermal_interrupt) 941 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
942#endif
823 943
824ENTRY(threshold_interrupt) 944apicinterrupt UV_BAU_MESSAGE \
825 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt 945 uv_bau_message_intr1 uv_bau_message_interrupt
826END(threshold_interrupt) 946apicinterrupt LOCAL_TIMER_VECTOR \
947 apic_timer_interrupt smp_apic_timer_interrupt
827 948
828#ifdef CONFIG_SMP 949#ifdef CONFIG_SMP
829ENTRY(reschedule_interrupt) 950apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
830 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt 951 invalidate_interrupt0 smp_invalidate_interrupt
831END(reschedule_interrupt) 952apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
832 953 invalidate_interrupt1 smp_invalidate_interrupt
833 .macro INVALIDATE_ENTRY num 954apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
834ENTRY(invalidate_interrupt\num) 955 invalidate_interrupt2 smp_invalidate_interrupt
835 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt 956apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
836END(invalidate_interrupt\num) 957 invalidate_interrupt3 smp_invalidate_interrupt
837 .endm 958apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
838 959 invalidate_interrupt4 smp_invalidate_interrupt
839 INVALIDATE_ENTRY 0 960apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
840 INVALIDATE_ENTRY 1 961 invalidate_interrupt5 smp_invalidate_interrupt
841 INVALIDATE_ENTRY 2 962apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
842 INVALIDATE_ENTRY 3 963 invalidate_interrupt6 smp_invalidate_interrupt
843 INVALIDATE_ENTRY 4 964apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
844 INVALIDATE_ENTRY 5 965 invalidate_interrupt7 smp_invalidate_interrupt
845 INVALIDATE_ENTRY 6
846 INVALIDATE_ENTRY 7
847
848ENTRY(call_function_interrupt)
849 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
850END(call_function_interrupt)
851ENTRY(call_function_single_interrupt)
852 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
853END(call_function_single_interrupt)
854ENTRY(irq_move_cleanup_interrupt)
855 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
856END(irq_move_cleanup_interrupt)
857#endif 966#endif
858 967
859ENTRY(apic_timer_interrupt) 968apicinterrupt THRESHOLD_APIC_VECTOR \
860 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 969 threshold_interrupt mce_threshold_interrupt
861END(apic_timer_interrupt) 970apicinterrupt THERMAL_APIC_VECTOR \
971 thermal_interrupt smp_thermal_interrupt
862 972
863ENTRY(uv_bau_message_intr1) 973#ifdef CONFIG_SMP
864 apicinterrupt 220,uv_bau_message_interrupt 974apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
865END(uv_bau_message_intr1) 975 call_function_single_interrupt smp_call_function_single_interrupt
866 976apicinterrupt CALL_FUNCTION_VECTOR \
867ENTRY(error_interrupt) 977 call_function_interrupt smp_call_function_interrupt
868 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt 978apicinterrupt RESCHEDULE_VECTOR \
869END(error_interrupt) 979 reschedule_interrupt smp_reschedule_interrupt
980#endif
870 981
871ENTRY(spurious_interrupt) 982apicinterrupt ERROR_APIC_VECTOR \
872 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt 983 error_interrupt smp_error_interrupt
873END(spurious_interrupt) 984apicinterrupt SPURIOUS_APIC_VECTOR \
985 spurious_interrupt smp_spurious_interrupt
874 986
875/* 987/*
876 * Exception entry points. 988 * Exception entry points.
877 */ 989 */
878 .macro zeroentry sym 990.macro zeroentry sym do_sym
991ENTRY(\sym)
879 INTR_FRAME 992 INTR_FRAME
880 PARAVIRT_ADJUST_EXCEPTION_FRAME 993 PARAVIRT_ADJUST_EXCEPTION_FRAME
881 pushq $0 /* push error code/oldrax */ 994 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
882 CFI_ADJUST_CFA_OFFSET 8 995 subq $15*8,%rsp
883 pushq %rax /* push real oldrax to the rdi slot */ 996 CFI_ADJUST_CFA_OFFSET 15*8
884 CFI_ADJUST_CFA_OFFSET 8 997 call error_entry
885 CFI_REL_OFFSET rax,0 998 DEFAULT_FRAME 0
886 leaq \sym(%rip),%rax 999 movq %rsp,%rdi /* pt_regs pointer */
887 jmp error_entry 1000 xorl %esi,%esi /* no error code */
1001 call \do_sym
1002 jmp error_exit /* %ebx: no swapgs flag */
888 CFI_ENDPROC 1003 CFI_ENDPROC
889 .endm 1004END(\sym)
1005.endm
890 1006
891 .macro errorentry sym 1007.macro paranoidzeroentry sym do_sym
892 XCPT_FRAME 1008ENTRY(\sym)
1009 INTR_FRAME
893 PARAVIRT_ADJUST_EXCEPTION_FRAME 1010 PARAVIRT_ADJUST_EXCEPTION_FRAME
894 pushq %rax 1011 pushq $-1 /* ORIG_RAX: no syscall to restart */
895 CFI_ADJUST_CFA_OFFSET 8 1012 CFI_ADJUST_CFA_OFFSET 8
896 CFI_REL_OFFSET rax,0 1013 subq $15*8, %rsp
897 leaq \sym(%rip),%rax 1014 call save_paranoid
898 jmp error_entry 1015 TRACE_IRQS_OFF
1016 movq %rsp,%rdi /* pt_regs pointer */
1017 xorl %esi,%esi /* no error code */
1018 call \do_sym
1019 jmp paranoid_exit /* %ebx: no swapgs flag */
899 CFI_ENDPROC 1020 CFI_ENDPROC
900 .endm 1021END(\sym)
1022.endm
901 1023
902 /* error code is on the stack already */ 1024.macro paranoidzeroentry_ist sym do_sym ist
903 /* handle NMI like exceptions that can happen everywhere */ 1025ENTRY(\sym)
904 .macro paranoidentry sym, ist=0, irqtrace=1 1026 INTR_FRAME
905 SAVE_ALL 1027 PARAVIRT_ADJUST_EXCEPTION_FRAME
906 cld 1028 pushq $-1 /* ORIG_RAX: no syscall to restart */
907 movl $1,%ebx 1029 CFI_ADJUST_CFA_OFFSET 8
908 movl $MSR_GS_BASE,%ecx 1030 subq $15*8, %rsp
909 rdmsr 1031 call save_paranoid
910 testl %edx,%edx
911 js 1f
912 SWAPGS
913 xorl %ebx,%ebx
9141:
915 .if \ist
916 movq %gs:pda_data_offset, %rbp
917 .endif
918 .if \irqtrace
919 TRACE_IRQS_OFF
920 .endif
921 movq %rsp,%rdi
922 movq ORIG_RAX(%rsp),%rsi
923 movq $-1,ORIG_RAX(%rsp)
924 .if \ist
925 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
926 .endif
927 call \sym
928 .if \ist
929 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
930 .endif
931 DISABLE_INTERRUPTS(CLBR_NONE)
932 .if \irqtrace
933 TRACE_IRQS_OFF 1032 TRACE_IRQS_OFF
934 .endif 1033 movq %rsp,%rdi /* pt_regs pointer */
935 .endm 1034 xorl %esi,%esi /* no error code */
1035 movq %gs:pda_data_offset, %rbp
1036 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
1037 call \do_sym
1038 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
1039 jmp paranoid_exit /* %ebx: no swapgs flag */
1040 CFI_ENDPROC
1041END(\sym)
1042.endm
936 1043
937 /* 1044.macro errorentry sym do_sym
938 * "Paranoid" exit path from exception stack. 1045ENTRY(\sym)
939 * Paranoid because this is used by NMIs and cannot take 1046 XCPT_FRAME
940 * any kernel state for granted. 1047 PARAVIRT_ADJUST_EXCEPTION_FRAME
941 * We don't do kernel preemption checks here, because only 1048 subq $15*8,%rsp
942 * NMI should be common and it does not enable IRQs and 1049 CFI_ADJUST_CFA_OFFSET 15*8
943 * cannot get reschedule ticks. 1050 call error_entry
944 * 1051 DEFAULT_FRAME 0
945 * "trace" is 0 for the NMI handler only, because irq-tracing 1052 movq %rsp,%rdi /* pt_regs pointer */
946 * is fundamentally NMI-unsafe. (we cannot change the soft and 1053 movq ORIG_RAX(%rsp),%rsi /* get error code */
947 * hard flags at once, atomically) 1054 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
948 */ 1055 call \do_sym
949 .macro paranoidexit trace=1 1056 jmp error_exit /* %ebx: no swapgs flag */
950 /* ebx: no swapgs flag */
951paranoid_exit\trace:
952 testl %ebx,%ebx /* swapgs needed? */
953 jnz paranoid_restore\trace
954 testl $3,CS(%rsp)
955 jnz paranoid_userspace\trace
956paranoid_swapgs\trace:
957 .if \trace
958 TRACE_IRQS_IRETQ 0
959 .endif
960 SWAPGS_UNSAFE_STACK
961paranoid_restore\trace:
962 RESTORE_ALL 8
963 jmp irq_return
964paranoid_userspace\trace:
965 GET_THREAD_INFO(%rcx)
966 movl TI_flags(%rcx),%ebx
967 andl $_TIF_WORK_MASK,%ebx
968 jz paranoid_swapgs\trace
969 movq %rsp,%rdi /* &pt_regs */
970 call sync_regs
971 movq %rax,%rsp /* switch stack for scheduling */
972 testl $_TIF_NEED_RESCHED,%ebx
973 jnz paranoid_schedule\trace
974 movl %ebx,%edx /* arg3: thread flags */
975 .if \trace
976 TRACE_IRQS_ON
977 .endif
978 ENABLE_INTERRUPTS(CLBR_NONE)
979 xorl %esi,%esi /* arg2: oldset */
980 movq %rsp,%rdi /* arg1: &pt_regs */
981 call do_notify_resume
982 DISABLE_INTERRUPTS(CLBR_NONE)
983 .if \trace
984 TRACE_IRQS_OFF
985 .endif
986 jmp paranoid_userspace\trace
987paranoid_schedule\trace:
988 .if \trace
989 TRACE_IRQS_ON
990 .endif
991 ENABLE_INTERRUPTS(CLBR_ANY)
992 call schedule
993 DISABLE_INTERRUPTS(CLBR_ANY)
994 .if \trace
995 TRACE_IRQS_OFF
996 .endif
997 jmp paranoid_userspace\trace
998 CFI_ENDPROC 1057 CFI_ENDPROC
999 .endm 1058END(\sym)
1059.endm
1000 1060
1001/* 1061 /* error code is on the stack already */
1002 * Exception entry point. This expects an error code/orig_rax on the stack 1062.macro paranoiderrorentry sym do_sym
1003 * and the exception handler in %rax. 1063ENTRY(\sym)
1004 */ 1064 XCPT_FRAME
1005KPROBE_ENTRY(error_entry) 1065 PARAVIRT_ADJUST_EXCEPTION_FRAME
1006 _frame RDI 1066 subq $15*8,%rsp
1007 CFI_REL_OFFSET rax,0 1067 CFI_ADJUST_CFA_OFFSET 15*8
1008 /* rdi slot contains rax, oldrax contains error code */ 1068 call save_paranoid
1009 cld 1069 DEFAULT_FRAME 0
1010 subq $14*8,%rsp
1011 CFI_ADJUST_CFA_OFFSET (14*8)
1012 movq %rsi,13*8(%rsp)
1013 CFI_REL_OFFSET rsi,RSI
1014 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
1015 CFI_REGISTER rax,rsi
1016 movq %rdx,12*8(%rsp)
1017 CFI_REL_OFFSET rdx,RDX
1018 movq %rcx,11*8(%rsp)
1019 CFI_REL_OFFSET rcx,RCX
1020 movq %rsi,10*8(%rsp) /* store rax */
1021 CFI_REL_OFFSET rax,RAX
1022 movq %r8, 9*8(%rsp)
1023 CFI_REL_OFFSET r8,R8
1024 movq %r9, 8*8(%rsp)
1025 CFI_REL_OFFSET r9,R9
1026 movq %r10,7*8(%rsp)
1027 CFI_REL_OFFSET r10,R10
1028 movq %r11,6*8(%rsp)
1029 CFI_REL_OFFSET r11,R11
1030 movq %rbx,5*8(%rsp)
1031 CFI_REL_OFFSET rbx,RBX
1032 movq %rbp,4*8(%rsp)
1033 CFI_REL_OFFSET rbp,RBP
1034 movq %r12,3*8(%rsp)
1035 CFI_REL_OFFSET r12,R12
1036 movq %r13,2*8(%rsp)
1037 CFI_REL_OFFSET r13,R13
1038 movq %r14,1*8(%rsp)
1039 CFI_REL_OFFSET r14,R14
1040 movq %r15,(%rsp)
1041 CFI_REL_OFFSET r15,R15
1042 xorl %ebx,%ebx
1043 testl $3,CS(%rsp)
1044 je error_kernelspace
1045error_swapgs:
1046 SWAPGS
1047error_sti:
1048 TRACE_IRQS_OFF 1070 TRACE_IRQS_OFF
1049 movq %rdi,RDI(%rsp) 1071 movq %rsp,%rdi /* pt_regs pointer */
1050 CFI_REL_OFFSET rdi,RDI
1051 movq %rsp,%rdi
1052 movq ORIG_RAX(%rsp),%rsi /* get error code */ 1072 movq ORIG_RAX(%rsp),%rsi /* get error code */
1053 movq $-1,ORIG_RAX(%rsp) 1073 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
1054 call *%rax 1074 call \do_sym
1055 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ 1075 jmp paranoid_exit /* %ebx: no swapgs flag */
1056error_exit:
1057 movl %ebx,%eax
1058 RESTORE_REST
1059 DISABLE_INTERRUPTS(CLBR_NONE)
1060 TRACE_IRQS_OFF
1061 GET_THREAD_INFO(%rcx)
1062 testl %eax,%eax
1063 jne retint_kernel
1064 LOCKDEP_SYS_EXIT_IRQ
1065 movl TI_flags(%rcx),%edx
1066 movl $_TIF_WORK_MASK,%edi
1067 andl %edi,%edx
1068 jnz retint_careful
1069 jmp retint_swapgs
1070 CFI_ENDPROC 1076 CFI_ENDPROC
1077END(\sym)
1078.endm
1071 1079
1072error_kernelspace: 1080zeroentry divide_error do_divide_error
1073 incl %ebx 1081zeroentry overflow do_overflow
1074 /* There are two places in the kernel that can potentially fault with 1082zeroentry bounds do_bounds
1075 usergs. Handle them here. The exception handlers after 1083zeroentry invalid_op do_invalid_op
1076 iret run with kernel gs again, so don't set the user space flag. 1084zeroentry device_not_available do_device_not_available
1077 B stepping K8s sometimes report an truncated RIP for IRET 1085paranoiderrorentry double_fault do_double_fault
1078 exceptions returning to compat mode. Check for these here too. */ 1086zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
1079 leaq irq_return(%rip),%rcx 1087errorentry invalid_TSS do_invalid_TSS
1080 cmpq %rcx,RIP(%rsp) 1088errorentry segment_not_present do_segment_not_present
1081 je error_swapgs 1089zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1082 movl %ecx,%ecx /* zero extend */ 1090zeroentry coprocessor_error do_coprocessor_error
1083 cmpq %rcx,RIP(%rsp) 1091errorentry alignment_check do_alignment_check
1084 je error_swapgs 1092zeroentry simd_coprocessor_error do_simd_coprocessor_error
1085 cmpq $gs_change,RIP(%rsp) 1093
1086 je error_swapgs 1094 /* Reload gs selector with exception handling */
1087 jmp error_sti 1095 /* edi: new selector */
1088KPROBE_END(error_entry)
1089
1090 /* Reload gs selector with exception handling */
1091 /* edi: new selector */
1092ENTRY(native_load_gs_index) 1096ENTRY(native_load_gs_index)
1093 CFI_STARTPROC 1097 CFI_STARTPROC
1094 pushf 1098 pushf
1095 CFI_ADJUST_CFA_OFFSET 8 1099 CFI_ADJUST_CFA_OFFSET 8
1096 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) 1100 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1097 SWAPGS 1101 SWAPGS
1098gs_change: 1102gs_change:
1099 movl %edi,%gs 1103 movl %edi,%gs
11002: mfence /* workaround */ 11042: mfence /* workaround */
1101 SWAPGS 1105 SWAPGS
1102 popf 1106 popf
1103 CFI_ADJUST_CFA_OFFSET -8 1107 CFI_ADJUST_CFA_OFFSET -8
1104 ret 1108 ret
1105 CFI_ENDPROC 1109 CFI_ENDPROC
1106ENDPROC(native_load_gs_index) 1110END(native_load_gs_index)
1107 1111
1108 .section __ex_table,"a" 1112 .section __ex_table,"a"
1109 .align 8 1113 .align 8
1110 .quad gs_change,bad_gs 1114 .quad gs_change,bad_gs
1111 .previous 1115 .previous
1112 .section .fixup,"ax" 1116 .section .fixup,"ax"
1113 /* running with kernelgs */ 1117 /* running with kernelgs */
1114bad_gs: 1118bad_gs:
1115 SWAPGS /* switch back to user gs */ 1119 SWAPGS /* switch back to user gs */
1116 xorl %eax,%eax 1120 xorl %eax,%eax
1117 movl %eax,%gs 1121 movl %eax,%gs
1118 jmp 2b 1122 jmp 2b
1119 .previous 1123 .previous
1120 1124
1121/* 1125/*
1122 * Create a kernel thread. 1126 * Create a kernel thread.
@@ -1151,15 +1155,15 @@ ENTRY(kernel_thread)
1151 * so internally to the x86_64 port you can rely on kernel_thread() 1155 * so internally to the x86_64 port you can rely on kernel_thread()
1152 * not to reschedule the child before returning, this avoids the need 1156 * not to reschedule the child before returning, this avoids the need
1153 * of hacks for example to fork off the per-CPU idle tasks. 1157 * of hacks for example to fork off the per-CPU idle tasks.
1154 * [Hopefully no generic code relies on the reschedule -AK] 1158 * [Hopefully no generic code relies on the reschedule -AK]
1155 */ 1159 */
1156 RESTORE_ALL 1160 RESTORE_ALL
1157 UNFAKE_STACK_FRAME 1161 UNFAKE_STACK_FRAME
1158 ret 1162 ret
1159 CFI_ENDPROC 1163 CFI_ENDPROC
1160ENDPROC(kernel_thread) 1164END(kernel_thread)
1161 1165
1162child_rip: 1166ENTRY(child_rip)
1163 pushq $0 # fake return address 1167 pushq $0 # fake return address
1164 CFI_STARTPROC 1168 CFI_STARTPROC
1165 /* 1169 /*
@@ -1174,7 +1178,7 @@ child_rip:
1174 call do_exit 1178 call do_exit
1175 ud2 # padding for call trace 1179 ud2 # padding for call trace
1176 CFI_ENDPROC 1180 CFI_ENDPROC
1177ENDPROC(child_rip) 1181END(child_rip)
1178 1182
1179/* 1183/*
1180 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 1184 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -1205,129 +1209,7 @@ ENTRY(kernel_execve)
1205 UNFAKE_STACK_FRAME 1209 UNFAKE_STACK_FRAME
1206 ret 1210 ret
1207 CFI_ENDPROC 1211 CFI_ENDPROC
1208ENDPROC(kernel_execve) 1212END(kernel_execve)
1209
1210KPROBE_ENTRY(page_fault)
1211 errorentry do_page_fault
1212KPROBE_END(page_fault)
1213
1214ENTRY(coprocessor_error)
1215 zeroentry do_coprocessor_error
1216END(coprocessor_error)
1217
1218ENTRY(simd_coprocessor_error)
1219 zeroentry do_simd_coprocessor_error
1220END(simd_coprocessor_error)
1221
1222ENTRY(device_not_available)
1223 zeroentry do_device_not_available
1224END(device_not_available)
1225
1226 /* runs on exception stack */
1227KPROBE_ENTRY(debug)
1228 INTR_FRAME
1229 PARAVIRT_ADJUST_EXCEPTION_FRAME
1230 pushq $0
1231 CFI_ADJUST_CFA_OFFSET 8
1232 paranoidentry do_debug, DEBUG_STACK
1233 paranoidexit
1234KPROBE_END(debug)
1235
1236 /* runs on exception stack */
1237KPROBE_ENTRY(nmi)
1238 INTR_FRAME
1239 PARAVIRT_ADJUST_EXCEPTION_FRAME
1240 pushq $-1
1241 CFI_ADJUST_CFA_OFFSET 8
1242 paranoidentry do_nmi, 0, 0
1243#ifdef CONFIG_TRACE_IRQFLAGS
1244 paranoidexit 0
1245#else
1246 jmp paranoid_exit1
1247 CFI_ENDPROC
1248#endif
1249KPROBE_END(nmi)
1250
1251KPROBE_ENTRY(int3)
1252 INTR_FRAME
1253 PARAVIRT_ADJUST_EXCEPTION_FRAME
1254 pushq $0
1255 CFI_ADJUST_CFA_OFFSET 8
1256 paranoidentry do_int3, DEBUG_STACK
1257 jmp paranoid_exit1
1258 CFI_ENDPROC
1259KPROBE_END(int3)
1260
1261ENTRY(overflow)
1262 zeroentry do_overflow
1263END(overflow)
1264
1265ENTRY(bounds)
1266 zeroentry do_bounds
1267END(bounds)
1268
1269ENTRY(invalid_op)
1270 zeroentry do_invalid_op
1271END(invalid_op)
1272
1273ENTRY(coprocessor_segment_overrun)
1274 zeroentry do_coprocessor_segment_overrun
1275END(coprocessor_segment_overrun)
1276
1277 /* runs on exception stack */
1278ENTRY(double_fault)
1279 XCPT_FRAME
1280 PARAVIRT_ADJUST_EXCEPTION_FRAME
1281 paranoidentry do_double_fault
1282 jmp paranoid_exit1
1283 CFI_ENDPROC
1284END(double_fault)
1285
1286ENTRY(invalid_TSS)
1287 errorentry do_invalid_TSS
1288END(invalid_TSS)
1289
1290ENTRY(segment_not_present)
1291 errorentry do_segment_not_present
1292END(segment_not_present)
1293
1294 /* runs on exception stack */
1295ENTRY(stack_segment)
1296 XCPT_FRAME
1297 PARAVIRT_ADJUST_EXCEPTION_FRAME
1298 paranoidentry do_stack_segment
1299 jmp paranoid_exit1
1300 CFI_ENDPROC
1301END(stack_segment)
1302
1303KPROBE_ENTRY(general_protection)
1304 errorentry do_general_protection
1305KPROBE_END(general_protection)
1306
1307ENTRY(alignment_check)
1308 errorentry do_alignment_check
1309END(alignment_check)
1310
1311ENTRY(divide_error)
1312 zeroentry do_divide_error
1313END(divide_error)
1314
1315ENTRY(spurious_interrupt_bug)
1316 zeroentry do_spurious_interrupt_bug
1317END(spurious_interrupt_bug)
1318
1319#ifdef CONFIG_X86_MCE
1320 /* runs on exception stack */
1321ENTRY(machine_check)
1322 INTR_FRAME
1323 PARAVIRT_ADJUST_EXCEPTION_FRAME
1324 pushq $0
1325 CFI_ADJUST_CFA_OFFSET 8
1326 paranoidentry do_machine_check
1327 jmp paranoid_exit1
1328 CFI_ENDPROC
1329END(machine_check)
1330#endif
1331 1213
1332/* Call softirq on interrupt stack. Interrupts are off. */ 1214/* Call softirq on interrupt stack. Interrupts are off. */
1333ENTRY(call_softirq) 1215ENTRY(call_softirq)
@@ -1347,40 +1229,33 @@ ENTRY(call_softirq)
1347 decl %gs:pda_irqcount 1229 decl %gs:pda_irqcount
1348 ret 1230 ret
1349 CFI_ENDPROC 1231 CFI_ENDPROC
1350ENDPROC(call_softirq) 1232END(call_softirq)
1351
1352KPROBE_ENTRY(ignore_sysret)
1353 CFI_STARTPROC
1354 mov $-ENOSYS,%eax
1355 sysret
1356 CFI_ENDPROC
1357ENDPROC(ignore_sysret)
1358 1233
1359#ifdef CONFIG_XEN 1234#ifdef CONFIG_XEN
1360ENTRY(xen_hypervisor_callback) 1235zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
1361 zeroentry xen_do_hypervisor_callback
1362END(xen_hypervisor_callback)
1363 1236
1364/* 1237/*
1365# A note on the "critical region" in our callback handler. 1238 * A note on the "critical region" in our callback handler.
1366# We want to avoid stacking callback handlers due to events occurring 1239 * We want to avoid stacking callback handlers due to events occurring
1367# during handling of the last event. To do this, we keep events disabled 1240 * during handling of the last event. To do this, we keep events disabled
1368# until we've done all processing. HOWEVER, we must enable events before 1241 * until we've done all processing. HOWEVER, we must enable events before
1369# popping the stack frame (can't be done atomically) and so it would still 1242 * popping the stack frame (can't be done atomically) and so it would still
1370# be possible to get enough handler activations to overflow the stack. 1243 * be possible to get enough handler activations to overflow the stack.
1371# Although unlikely, bugs of that kind are hard to track down, so we'd 1244 * Although unlikely, bugs of that kind are hard to track down, so we'd
1372# like to avoid the possibility. 1245 * like to avoid the possibility.
1373# So, on entry to the handler we detect whether we interrupted an 1246 * So, on entry to the handler we detect whether we interrupted an
1374# existing activation in its critical region -- if so, we pop the current 1247 * existing activation in its critical region -- if so, we pop the current
1375# activation and restart the handler using the previous one. 1248 * activation and restart the handler using the previous one.
1376*/ 1249 */
1377ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 1250ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1378 CFI_STARTPROC 1251 CFI_STARTPROC
1379/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will 1252/*
1380 see the correct pointer to the pt_regs */ 1253 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1254 * see the correct pointer to the pt_regs
1255 */
1381 movq %rdi, %rsp # we don't return, adjust the stack frame 1256 movq %rdi, %rsp # we don't return, adjust the stack frame
1382 CFI_ENDPROC 1257 CFI_ENDPROC
1383 CFI_DEFAULT_STACK 1258 DEFAULT_FRAME
138411: incl %gs:pda_irqcount 125911: incl %gs:pda_irqcount
1385 movq %rsp,%rbp 1260 movq %rsp,%rbp
1386 CFI_DEF_CFA_REGISTER rbp 1261 CFI_DEF_CFA_REGISTER rbp
@@ -1395,23 +1270,26 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1395END(do_hypervisor_callback) 1270END(do_hypervisor_callback)
1396 1271
1397/* 1272/*
1398# Hypervisor uses this for application faults while it executes. 1273 * Hypervisor uses this for application faults while it executes.
1399# We get here for two reasons: 1274 * We get here for two reasons:
1400# 1. Fault while reloading DS, ES, FS or GS 1275 * 1. Fault while reloading DS, ES, FS or GS
1401# 2. Fault while executing IRET 1276 * 2. Fault while executing IRET
1402# Category 1 we do not need to fix up as Xen has already reloaded all segment 1277 * Category 1 we do not need to fix up as Xen has already reloaded all segment
1403# registers that could be reloaded and zeroed the others. 1278 * registers that could be reloaded and zeroed the others.
1404# Category 2 we fix up by killing the current process. We cannot use the 1279 * Category 2 we fix up by killing the current process. We cannot use the
1405# normal Linux return path in this case because if we use the IRET hypercall 1280 * normal Linux return path in this case because if we use the IRET hypercall
1406# to pop the stack frame we end up in an infinite loop of failsafe callbacks. 1281 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1407# We distinguish between categories by comparing each saved segment register 1282 * We distinguish between categories by comparing each saved segment register
1408# with its current contents: any discrepancy means we in category 1. 1283 * with its current contents: any discrepancy means we in category 1.
1409*/ 1284 */
1410ENTRY(xen_failsafe_callback) 1285ENTRY(xen_failsafe_callback)
1411 framesz = (RIP-0x30) /* workaround buggy gas */ 1286 INTR_FRAME 1 (6*8)
1412 _frame framesz 1287 /*CFI_REL_OFFSET gs,GS*/
1413 CFI_REL_OFFSET rcx, 0 1288 /*CFI_REL_OFFSET fs,FS*/
1414 CFI_REL_OFFSET r11, 8 1289 /*CFI_REL_OFFSET es,ES*/
1290 /*CFI_REL_OFFSET ds,DS*/
1291 CFI_REL_OFFSET r11,8
1292 CFI_REL_OFFSET rcx,0
1415 movw %ds,%cx 1293 movw %ds,%cx
1416 cmpw %cx,0x10(%rsp) 1294 cmpw %cx,0x10(%rsp)
1417 CFI_REMEMBER_STATE 1295 CFI_REMEMBER_STATE
@@ -1432,12 +1310,9 @@ ENTRY(xen_failsafe_callback)
1432 CFI_RESTORE r11 1310 CFI_RESTORE r11
1433 addq $0x30,%rsp 1311 addq $0x30,%rsp
1434 CFI_ADJUST_CFA_OFFSET -0x30 1312 CFI_ADJUST_CFA_OFFSET -0x30
1435 pushq $0 1313 pushq_cfi $0 /* RIP */
1436 CFI_ADJUST_CFA_OFFSET 8 1314 pushq_cfi %r11
1437 pushq %r11 1315 pushq_cfi %rcx
1438 CFI_ADJUST_CFA_OFFSET 8
1439 pushq %rcx
1440 CFI_ADJUST_CFA_OFFSET 8
1441 jmp general_protection 1316 jmp general_protection
1442 CFI_RESTORE_STATE 1317 CFI_RESTORE_STATE
14431: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ 13181: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
@@ -1447,11 +1322,223 @@ ENTRY(xen_failsafe_callback)
1447 CFI_RESTORE r11 1322 CFI_RESTORE r11
1448 addq $0x30,%rsp 1323 addq $0x30,%rsp
1449 CFI_ADJUST_CFA_OFFSET -0x30 1324 CFI_ADJUST_CFA_OFFSET -0x30
1450 pushq $0 1325 pushq_cfi $0
1451 CFI_ADJUST_CFA_OFFSET 8
1452 SAVE_ALL 1326 SAVE_ALL
1453 jmp error_exit 1327 jmp error_exit
1454 CFI_ENDPROC 1328 CFI_ENDPROC
1455END(xen_failsafe_callback) 1329END(xen_failsafe_callback)
1456 1330
1457#endif /* CONFIG_XEN */ 1331#endif /* CONFIG_XEN */
1332
1333/*
1334 * Some functions should be protected against kprobes
1335 */
1336 .pushsection .kprobes.text, "ax"
1337
1338paranoidzeroentry_ist debug do_debug DEBUG_STACK
1339paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
1340paranoiderrorentry stack_segment do_stack_segment
1341errorentry general_protection do_general_protection
1342errorentry page_fault do_page_fault
1343#ifdef CONFIG_X86_MCE
1344paranoidzeroentry machine_check do_machine_check
1345#endif
1346
1347 /*
1348 * "Paranoid" exit path from exception stack.
1349 * Paranoid because this is used by NMIs and cannot take
1350 * any kernel state for granted.
1351 * We don't do kernel preemption checks here, because only
1352 * NMI should be common and it does not enable IRQs and
1353 * cannot get reschedule ticks.
1354 *
1355 * "trace" is 0 for the NMI handler only, because irq-tracing
1356 * is fundamentally NMI-unsafe. (we cannot change the soft and
1357 * hard flags at once, atomically)
1358 */
1359
1360 /* ebx: no swapgs flag */
1361ENTRY(paranoid_exit)
1362 INTR_FRAME
1363 DISABLE_INTERRUPTS(CLBR_NONE)
1364 TRACE_IRQS_OFF
1365 testl %ebx,%ebx /* swapgs needed? */
1366 jnz paranoid_restore
1367 testl $3,CS(%rsp)
1368 jnz paranoid_userspace
1369paranoid_swapgs:
1370 TRACE_IRQS_IRETQ 0
1371 SWAPGS_UNSAFE_STACK
1372paranoid_restore:
1373 RESTORE_ALL 8
1374 jmp irq_return
1375paranoid_userspace:
1376 GET_THREAD_INFO(%rcx)
1377 movl TI_flags(%rcx),%ebx
1378 andl $_TIF_WORK_MASK,%ebx
1379 jz paranoid_swapgs
1380 movq %rsp,%rdi /* &pt_regs */
1381 call sync_regs
1382 movq %rax,%rsp /* switch stack for scheduling */
1383 testl $_TIF_NEED_RESCHED,%ebx
1384 jnz paranoid_schedule
1385 movl %ebx,%edx /* arg3: thread flags */
1386 TRACE_IRQS_ON
1387 ENABLE_INTERRUPTS(CLBR_NONE)
1388 xorl %esi,%esi /* arg2: oldset */
1389 movq %rsp,%rdi /* arg1: &pt_regs */
1390 call do_notify_resume
1391 DISABLE_INTERRUPTS(CLBR_NONE)
1392 TRACE_IRQS_OFF
1393 jmp paranoid_userspace
1394paranoid_schedule:
1395 TRACE_IRQS_ON
1396 ENABLE_INTERRUPTS(CLBR_ANY)
1397 call schedule
1398 DISABLE_INTERRUPTS(CLBR_ANY)
1399 TRACE_IRQS_OFF
1400 jmp paranoid_userspace
1401 CFI_ENDPROC
1402END(paranoid_exit)
1403
1404/*
1405 * Exception entry point. This expects an error code/orig_rax on the stack.
1406 * returns in "no swapgs flag" in %ebx.
1407 */
1408ENTRY(error_entry)
1409 XCPT_FRAME
1410 CFI_ADJUST_CFA_OFFSET 15*8
1411 /* oldrax contains error code */
1412 cld
1413 movq_cfi rdi, RDI+8
1414 movq_cfi rsi, RSI+8
1415 movq_cfi rdx, RDX+8
1416 movq_cfi rcx, RCX+8
1417 movq_cfi rax, RAX+8
1418 movq_cfi r8, R8+8
1419 movq_cfi r9, R9+8
1420 movq_cfi r10, R10+8
1421 movq_cfi r11, R11+8
1422 movq_cfi rbx, RBX+8
1423 movq_cfi rbp, RBP+8
1424 movq_cfi r12, R12+8
1425 movq_cfi r13, R13+8
1426 movq_cfi r14, R14+8
1427 movq_cfi r15, R15+8
1428 xorl %ebx,%ebx
1429 testl $3,CS+8(%rsp)
1430 je error_kernelspace
1431error_swapgs:
1432 SWAPGS
1433error_sti:
1434 TRACE_IRQS_OFF
1435 ret
1436 CFI_ENDPROC
1437
1438/*
1439 * There are two places in the kernel that can potentially fault with
1440 * usergs. Handle them here. The exception handlers after iret run with
1441 * kernel gs again, so don't set the user space flag. B stepping K8s
1442 * sometimes report an truncated RIP for IRET exceptions returning to
1443 * compat mode. Check for these here too.
1444 */
1445error_kernelspace:
1446 incl %ebx
1447 leaq irq_return(%rip),%rcx
1448 cmpq %rcx,RIP+8(%rsp)
1449 je error_swapgs
1450 movl %ecx,%ecx /* zero extend */
1451 cmpq %rcx,RIP+8(%rsp)
1452 je error_swapgs
1453 cmpq $gs_change,RIP+8(%rsp)
1454 je error_swapgs
1455 jmp error_sti
1456END(error_entry)
1457
1458
1459/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1460ENTRY(error_exit)
1461 DEFAULT_FRAME
1462 movl %ebx,%eax
1463 RESTORE_REST
1464 DISABLE_INTERRUPTS(CLBR_NONE)
1465 TRACE_IRQS_OFF
1466 GET_THREAD_INFO(%rcx)
1467 testl %eax,%eax
1468 jne retint_kernel
1469 LOCKDEP_SYS_EXIT_IRQ
1470 movl TI_flags(%rcx),%edx
1471 movl $_TIF_WORK_MASK,%edi
1472 andl %edi,%edx
1473 jnz retint_careful
1474 jmp retint_swapgs
1475 CFI_ENDPROC
1476END(error_exit)
1477
1478
1479 /* runs on exception stack */
1480ENTRY(nmi)
1481 INTR_FRAME
1482 PARAVIRT_ADJUST_EXCEPTION_FRAME
1483 pushq_cfi $-1
1484 subq $15*8, %rsp
1485 CFI_ADJUST_CFA_OFFSET 15*8
1486 call save_paranoid
1487 DEFAULT_FRAME 0
1488 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1489 movq %rsp,%rdi
1490 movq $-1,%rsi
1491 call do_nmi
1492#ifdef CONFIG_TRACE_IRQFLAGS
1493 /* paranoidexit; without TRACE_IRQS_OFF */
1494 /* ebx: no swapgs flag */
1495 DISABLE_INTERRUPTS(CLBR_NONE)
1496 testl %ebx,%ebx /* swapgs needed? */
1497 jnz nmi_restore
1498 testl $3,CS(%rsp)
1499 jnz nmi_userspace
1500nmi_swapgs:
1501 SWAPGS_UNSAFE_STACK
1502nmi_restore:
1503 RESTORE_ALL 8
1504 jmp irq_return
1505nmi_userspace:
1506 GET_THREAD_INFO(%rcx)
1507 movl TI_flags(%rcx),%ebx
1508 andl $_TIF_WORK_MASK,%ebx
1509 jz nmi_swapgs
1510 movq %rsp,%rdi /* &pt_regs */
1511 call sync_regs
1512 movq %rax,%rsp /* switch stack for scheduling */
1513 testl $_TIF_NEED_RESCHED,%ebx
1514 jnz nmi_schedule
1515 movl %ebx,%edx /* arg3: thread flags */
1516 ENABLE_INTERRUPTS(CLBR_NONE)
1517 xorl %esi,%esi /* arg2: oldset */
1518 movq %rsp,%rdi /* arg1: &pt_regs */
1519 call do_notify_resume
1520 DISABLE_INTERRUPTS(CLBR_NONE)
1521 jmp nmi_userspace
1522nmi_schedule:
1523 ENABLE_INTERRUPTS(CLBR_ANY)
1524 call schedule
1525 DISABLE_INTERRUPTS(CLBR_ANY)
1526 jmp nmi_userspace
1527 CFI_ENDPROC
1528#else
1529 jmp paranoid_exit
1530 CFI_ENDPROC
1531#endif
1532END(nmi)
1533
1534ENTRY(ignore_sysret)
1535 CFI_STARTPROC
1536 mov $-ENOSYS,%eax
1537 sysret
1538 CFI_ENDPROC
1539END(ignore_sysret)
1540
1541/*
1542 * End of kprobes section
1543 */
1544 .popsection