aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/dwarf2.h97
-rw-r--r--arch/x86/include/asm/hardirq_32.h2
-rw-r--r--arch/x86/include/asm/hardirq_64.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h4
-rw-r--r--arch/x86/include/asm/linkage.h60
-rw-r--r--arch/x86/kernel/apic.c13
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c2
-rw-r--r--arch/x86/kernel/entry_32.S476
-rw-r--r--arch/x86/kernel/entry_64.S1231
-rw-r--r--arch/x86/kernel/irqinit_32.c2
-rw-r--r--arch/x86/kernel/irqinit_64.c66
-rw-r--r--arch/x86/kernel/smp.c18
-rw-r--r--arch/x86/kernel/time_32.c2
-rw-r--r--arch/x86/kernel/time_64.c2
-rw-r--r--arch/x86/kernel/tlb_32.c2
-rw-r--r--arch/x86/kernel/tlb_64.c2
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/lguest/boot.c3
-rw-r--r--include/linux/linkage.h8
20 files changed, 1054 insertions, 946 deletions
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index 804b6e6be929..3afc5e87cfdd 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -6,56 +6,91 @@
6#endif 6#endif
7 7
8/* 8/*
9 Macros for dwarf2 CFI unwind table entries. 9 * Macros for dwarf2 CFI unwind table entries.
10 See "as.info" for details on these pseudo ops. Unfortunately 10 * See "as.info" for details on these pseudo ops. Unfortunately
11 they are only supported in very new binutils, so define them 11 * they are only supported in very new binutils, so define them
12 away for older version. 12 * away for older version.
13 */ 13 */
14 14
15#ifdef CONFIG_AS_CFI 15#ifdef CONFIG_AS_CFI
16 16
17#define CFI_STARTPROC .cfi_startproc 17#define CFI_STARTPROC .cfi_startproc
18#define CFI_ENDPROC .cfi_endproc 18#define CFI_ENDPROC .cfi_endproc
19#define CFI_DEF_CFA .cfi_def_cfa 19#define CFI_DEF_CFA .cfi_def_cfa
20#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register 20#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
21#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset 21#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
22#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset 22#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
23#define CFI_OFFSET .cfi_offset 23#define CFI_OFFSET .cfi_offset
24#define CFI_REL_OFFSET .cfi_rel_offset 24#define CFI_REL_OFFSET .cfi_rel_offset
25#define CFI_REGISTER .cfi_register 25#define CFI_REGISTER .cfi_register
26#define CFI_RESTORE .cfi_restore 26#define CFI_RESTORE .cfi_restore
27#define CFI_REMEMBER_STATE .cfi_remember_state 27#define CFI_REMEMBER_STATE .cfi_remember_state
28#define CFI_RESTORE_STATE .cfi_restore_state 28#define CFI_RESTORE_STATE .cfi_restore_state
29#define CFI_UNDEFINED .cfi_undefined 29#define CFI_UNDEFINED .cfi_undefined
30 30
31#ifdef CONFIG_AS_CFI_SIGNAL_FRAME 31#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
32#define CFI_SIGNAL_FRAME .cfi_signal_frame 32#define CFI_SIGNAL_FRAME .cfi_signal_frame
33#else 33#else
34#define CFI_SIGNAL_FRAME 34#define CFI_SIGNAL_FRAME
35#endif 35#endif
36 36
37#else 37#else
38 38
39/* Due to the structure of pre-exisiting code, don't use assembler line 39/*
40 comment character # to ignore the arguments. Instead, use a dummy macro. */ 40 * Due to the structure of pre-exisiting code, don't use assembler line
41 * comment character # to ignore the arguments. Instead, use a dummy macro.
42 */
41.macro cfi_ignore a=0, b=0, c=0, d=0 43.macro cfi_ignore a=0, b=0, c=0, d=0
42.endm 44.endm
43 45
44#define CFI_STARTPROC cfi_ignore 46#define CFI_STARTPROC cfi_ignore
45#define CFI_ENDPROC cfi_ignore 47#define CFI_ENDPROC cfi_ignore
46#define CFI_DEF_CFA cfi_ignore 48#define CFI_DEF_CFA cfi_ignore
47#define CFI_DEF_CFA_REGISTER cfi_ignore 49#define CFI_DEF_CFA_REGISTER cfi_ignore
48#define CFI_DEF_CFA_OFFSET cfi_ignore 50#define CFI_DEF_CFA_OFFSET cfi_ignore
49#define CFI_ADJUST_CFA_OFFSET cfi_ignore 51#define CFI_ADJUST_CFA_OFFSET cfi_ignore
50#define CFI_OFFSET cfi_ignore 52#define CFI_OFFSET cfi_ignore
51#define CFI_REL_OFFSET cfi_ignore 53#define CFI_REL_OFFSET cfi_ignore
52#define CFI_REGISTER cfi_ignore 54#define CFI_REGISTER cfi_ignore
53#define CFI_RESTORE cfi_ignore 55#define CFI_RESTORE cfi_ignore
54#define CFI_REMEMBER_STATE cfi_ignore 56#define CFI_REMEMBER_STATE cfi_ignore
55#define CFI_RESTORE_STATE cfi_ignore 57#define CFI_RESTORE_STATE cfi_ignore
56#define CFI_UNDEFINED cfi_ignore 58#define CFI_UNDEFINED cfi_ignore
57#define CFI_SIGNAL_FRAME cfi_ignore 59#define CFI_SIGNAL_FRAME cfi_ignore
58 60
59#endif 61#endif
60 62
63/*
64 * An attempt to make CFI annotations more or less
65 * correct and shorter. It is implied that you know
66 * what you're doing if you use them.
67 */
68#ifdef __ASSEMBLY__
69#ifdef CONFIG_X86_64
70 .macro pushq_cfi reg
71 pushq \reg
72 CFI_ADJUST_CFA_OFFSET 8
73 .endm
74
75 .macro popq_cfi reg
76 popq \reg
77 CFI_ADJUST_CFA_OFFSET -8
78 .endm
79
80 .macro movq_cfi reg offset=0
81 movq %\reg, \offset(%rsp)
82 CFI_REL_OFFSET \reg, \offset
83 .endm
84
85 .macro movq_cfi_restore offset reg
86 movq \offset(%rsp), %\reg
87 CFI_RESTORE \reg
88 .endm
89#else /*!CONFIG_X86_64*/
90
91 /* 32bit defenitions are missed yet */
92
93#endif /*!CONFIG_X86_64*/
94#endif /*__ASSEMBLY__*/
95
61#endif /* _ASM_X86_DWARF2_H */ 96#endif /* _ASM_X86_DWARF2_H */
diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h
index 5ca135e72f2b..cf7954d1405f 100644
--- a/arch/x86/include/asm/hardirq_32.h
+++ b/arch/x86/include/asm/hardirq_32.h
@@ -22,6 +22,8 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
22#define __ARCH_IRQ_STAT 22#define __ARCH_IRQ_STAT
23#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) 23#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
24 24
25#define inc_irq_stat(member) (__get_cpu_var(irq_stat).member++)
26
25void ack_bad_irq(unsigned int irq); 27void ack_bad_irq(unsigned int irq);
26#include <linux/irq_cpustat.h> 28#include <linux/irq_cpustat.h>
27 29
diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h
index 1ba381fc51d3..b5a6b5d56704 100644
--- a/arch/x86/include/asm/hardirq_64.h
+++ b/arch/x86/include/asm/hardirq_64.h
@@ -11,6 +11,8 @@
11 11
12#define __ARCH_IRQ_STAT 1 12#define __ARCH_IRQ_STAT 1
13 13
14#define inc_irq_stat(member) add_pda(member, 1)
15
14#define local_softirq_pending() read_pda(__softirq_pending) 16#define local_softirq_pending() read_pda(__softirq_pending)
15 17
16#define __ARCH_SET_SOFTIRQ_PENDING 1 18#define __ARCH_SET_SOFTIRQ_PENDING 1
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b97aecb0b61d..8de644b6b959 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -109,9 +109,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
109#endif 109#endif
110#endif 110#endif
111 111
112#ifdef CONFIG_X86_32 112extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
113extern void (*const interrupt[NR_VECTORS])(void);
114#endif
115 113
116typedef int vector_irq_t[NR_VECTORS]; 114typedef int vector_irq_t[NR_VECTORS];
117DECLARE_PER_CPU(vector_irq_t, vector_irq); 115DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index f61ee8f937e4..5d98d0b68ffc 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -57,5 +57,65 @@
57#define __ALIGN_STR ".align 16,0x90" 57#define __ALIGN_STR ".align 16,0x90"
58#endif 58#endif
59 59
60/*
61 * to check ENTRY_X86/END_X86 and
62 * KPROBE_ENTRY_X86/KPROBE_END_X86
63 * unbalanced-missed-mixed appearance
64 */
65#define __set_entry_x86 .set ENTRY_X86_IN, 0
66#define __unset_entry_x86 .set ENTRY_X86_IN, 1
67#define __set_kprobe_x86 .set KPROBE_X86_IN, 0
68#define __unset_kprobe_x86 .set KPROBE_X86_IN, 1
69
70#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed"
71
72#define __check_entry_x86 \
73 .ifdef ENTRY_X86_IN; \
74 .ifeq ENTRY_X86_IN; \
75 __macro_err_x86; \
76 .abort; \
77 .endif; \
78 .endif
79
80#define __check_kprobe_x86 \
81 .ifdef KPROBE_X86_IN; \
82 .ifeq KPROBE_X86_IN; \
83 __macro_err_x86; \
84 .abort; \
85 .endif; \
86 .endif
87
88#define __check_entry_kprobe_x86 \
89 __check_entry_x86; \
90 __check_kprobe_x86
91
92#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86
93
94#define ENTRY_X86(name) \
95 __check_entry_kprobe_x86; \
96 __set_entry_x86; \
97 .globl name; \
98 __ALIGN; \
99 name:
100
101#define END_X86(name) \
102 __unset_entry_x86; \
103 __check_entry_kprobe_x86; \
104 .size name, .-name
105
106#define KPROBE_ENTRY_X86(name) \
107 __check_entry_kprobe_x86; \
108 __set_kprobe_x86; \
109 .pushsection .kprobes.text, "ax"; \
110 .globl name; \
111 __ALIGN; \
112 name:
113
114#define KPROBE_END_X86(name) \
115 __unset_kprobe_x86; \
116 __check_entry_kprobe_x86; \
117 .size name, .-name; \
118 .popsection
119
60#endif /* _ASM_X86_LINKAGE_H */ 120#endif /* _ASM_X86_LINKAGE_H */
61 121
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 20c6e12c0475..7397911f8478 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -777,11 +777,7 @@ static void local_apic_timer_interrupt(void)
777 /* 777 /*
778 * the NMI deadlock-detector uses this. 778 * the NMI deadlock-detector uses this.
779 */ 779 */
780#ifdef CONFIG_X86_64 780 inc_irq_stat(apic_timer_irqs);
781 add_pda(apic_timer_irqs, 1);
782#else
783 per_cpu(irq_stat, cpu).apic_timer_irqs++;
784#endif
785 781
786 evt->event_handler(evt); 782 evt->event_handler(evt);
787} 783}
@@ -1677,14 +1673,11 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1677 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) 1673 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
1678 ack_APIC_irq(); 1674 ack_APIC_irq();
1679 1675
1680#ifdef CONFIG_X86_64 1676 inc_irq_stat(irq_spurious_count);
1681 add_pda(irq_spurious_count, 1); 1677
1682#else
1683 /* see sw-dev-man vol 3, chapter 7.4.13.5 */ 1678 /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1684 pr_info("spurious APIC interrupt on CPU#%d, " 1679 pr_info("spurious APIC interrupt on CPU#%d, "
1685 "should never happen.\n", smp_processor_id()); 1680 "should never happen.\n", smp_processor_id());
1686 __get_cpu_var(irq_stat).irq_spurious_count++;
1687#endif
1688 irq_exit(); 1681 irq_exit();
1689} 1682}
1690 1683
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 5eb390a4b2e9..748c8f9e7a05 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -237,7 +237,7 @@ asmlinkage void mce_threshold_interrupt(void)
237 } 237 }
238 } 238 }
239out: 239out:
240 add_pda(irq_threshold_count, 1); 240 inc_irq_stat(irq_threshold_count);
241 irq_exit(); 241 irq_exit();
242} 242}
243 243
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index c17eaf5dd6dd..4b48f251fd39 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -26,7 +26,7 @@ asmlinkage void smp_thermal_interrupt(void)
26 if (therm_throt_process(msr_val & 1)) 26 if (therm_throt_process(msr_val & 1))
27 mce_log_therm_throt_event(smp_processor_id(), msr_val); 27 mce_log_therm_throt_event(smp_processor_id(), msr_val);
28 28
29 add_pda(irq_thermal_count, 1); 29 inc_irq_stat(irq_thermal_count);
30 irq_exit(); 30 irq_exit();
31} 31}
32 32
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f6402c4ba10d..fe7014176eb0 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -619,28 +619,37 @@ END(syscall_badsys)
61927:; 61927:;
620 620
621/* 621/*
622 * Build the entry stubs and pointer table with 622 * Build the entry stubs and pointer table with some assembler magic.
623 * some assembler magic. 623 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
624 * single cache line on all modern x86 implementations.
624 */ 625 */
625.section .rodata,"a" 626.section .init.rodata,"a"
626ENTRY(interrupt) 627ENTRY(interrupt)
627.text 628.text
628 629 .p2align 5
630 .p2align CONFIG_X86_L1_CACHE_SHIFT
629ENTRY(irq_entries_start) 631ENTRY(irq_entries_start)
630 RING0_INT_FRAME 632 RING0_INT_FRAME
631vector=0 633vector=FIRST_EXTERNAL_VECTOR
632.rept NR_VECTORS 634.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
633 ALIGN 635 .balign 32
634 .if vector 636 .rept 7
637 .if vector < NR_VECTORS
638 .if vector <> FIRST_EXTERNAL_VECTOR
635 CFI_ADJUST_CFA_OFFSET -4 639 CFI_ADJUST_CFA_OFFSET -4
636 .endif 640 .endif
6371: pushl $~(vector) 6411: pushl $(~vector+0x80) /* Note: always in signed byte range */
638 CFI_ADJUST_CFA_OFFSET 4 642 CFI_ADJUST_CFA_OFFSET 4
639 jmp common_interrupt 643 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
640 .previous 644 jmp 2f
645 .endif
646 .previous
641 .long 1b 647 .long 1b
642 .text 648 .text
643vector=vector+1 649vector=vector+1
650 .endif
651 .endr
6522: jmp common_interrupt
644.endr 653.endr
645END(irq_entries_start) 654END(irq_entries_start)
646 655
@@ -652,8 +661,9 @@ END(interrupt)
652 * the CPU automatically disables interrupts when executing an IRQ vector, 661 * the CPU automatically disables interrupts when executing an IRQ vector,
653 * so IRQ-flags tracing has to follow that: 662 * so IRQ-flags tracing has to follow that:
654 */ 663 */
655 ALIGN 664 .p2align CONFIG_X86_L1_CACHE_SHIFT
656common_interrupt: 665common_interrupt:
666 addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
657 SAVE_ALL 667 SAVE_ALL
658 TRACE_IRQS_OFF 668 TRACE_IRQS_OFF
659 movl %esp,%eax 669 movl %esp,%eax
@@ -678,65 +688,6 @@ ENDPROC(name)
678/* The include is where all of the SMP etc. interrupts come from */ 688/* The include is where all of the SMP etc. interrupts come from */
679#include "entry_arch.h" 689#include "entry_arch.h"
680 690
681KPROBE_ENTRY(page_fault)
682 RING0_EC_FRAME
683 pushl $do_page_fault
684 CFI_ADJUST_CFA_OFFSET 4
685 ALIGN
686error_code:
687 /* the function address is in %fs's slot on the stack */
688 pushl %es
689 CFI_ADJUST_CFA_OFFSET 4
690 /*CFI_REL_OFFSET es, 0*/
691 pushl %ds
692 CFI_ADJUST_CFA_OFFSET 4
693 /*CFI_REL_OFFSET ds, 0*/
694 pushl %eax
695 CFI_ADJUST_CFA_OFFSET 4
696 CFI_REL_OFFSET eax, 0
697 pushl %ebp
698 CFI_ADJUST_CFA_OFFSET 4
699 CFI_REL_OFFSET ebp, 0
700 pushl %edi
701 CFI_ADJUST_CFA_OFFSET 4
702 CFI_REL_OFFSET edi, 0
703 pushl %esi
704 CFI_ADJUST_CFA_OFFSET 4
705 CFI_REL_OFFSET esi, 0
706 pushl %edx
707 CFI_ADJUST_CFA_OFFSET 4
708 CFI_REL_OFFSET edx, 0
709 pushl %ecx
710 CFI_ADJUST_CFA_OFFSET 4
711 CFI_REL_OFFSET ecx, 0
712 pushl %ebx
713 CFI_ADJUST_CFA_OFFSET 4
714 CFI_REL_OFFSET ebx, 0
715 cld
716 pushl %fs
717 CFI_ADJUST_CFA_OFFSET 4
718 /*CFI_REL_OFFSET fs, 0*/
719 movl $(__KERNEL_PERCPU), %ecx
720 movl %ecx, %fs
721 UNWIND_ESPFIX_STACK
722 popl %ecx
723 CFI_ADJUST_CFA_OFFSET -4
724 /*CFI_REGISTER es, ecx*/
725 movl PT_FS(%esp), %edi # get the function address
726 movl PT_ORIG_EAX(%esp), %edx # get the error code
727 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
728 mov %ecx, PT_FS(%esp)
729 /*CFI_REL_OFFSET fs, ES*/
730 movl $(__USER_DS), %ecx
731 movl %ecx, %ds
732 movl %ecx, %es
733 TRACE_IRQS_OFF
734 movl %esp,%eax # pt_regs pointer
735 call *%edi
736 jmp ret_from_exception
737 CFI_ENDPROC
738KPROBE_END(page_fault)
739
740ENTRY(coprocessor_error) 691ENTRY(coprocessor_error)
741 RING0_INT_FRAME 692 RING0_INT_FRAME
742 pushl $0 693 pushl $0
@@ -767,140 +718,6 @@ ENTRY(device_not_available)
767 CFI_ENDPROC 718 CFI_ENDPROC
768END(device_not_available) 719END(device_not_available)
769 720
770/*
771 * Debug traps and NMI can happen at the one SYSENTER instruction
772 * that sets up the real kernel stack. Check here, since we can't
773 * allow the wrong stack to be used.
774 *
775 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
776 * already pushed 3 words if it hits on the sysenter instruction:
777 * eflags, cs and eip.
778 *
779 * We just load the right stack, and push the three (known) values
780 * by hand onto the new stack - while updating the return eip past
781 * the instruction that would have done it for sysenter.
782 */
783#define FIX_STACK(offset, ok, label) \
784 cmpw $__KERNEL_CS,4(%esp); \
785 jne ok; \
786label: \
787 movl TSS_sysenter_sp0+offset(%esp),%esp; \
788 CFI_DEF_CFA esp, 0; \
789 CFI_UNDEFINED eip; \
790 pushfl; \
791 CFI_ADJUST_CFA_OFFSET 4; \
792 pushl $__KERNEL_CS; \
793 CFI_ADJUST_CFA_OFFSET 4; \
794 pushl $sysenter_past_esp; \
795 CFI_ADJUST_CFA_OFFSET 4; \
796 CFI_REL_OFFSET eip, 0
797
798KPROBE_ENTRY(debug)
799 RING0_INT_FRAME
800 cmpl $ia32_sysenter_target,(%esp)
801 jne debug_stack_correct
802 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
803debug_stack_correct:
804 pushl $-1 # mark this as an int
805 CFI_ADJUST_CFA_OFFSET 4
806 SAVE_ALL
807 TRACE_IRQS_OFF
808 xorl %edx,%edx # error code 0
809 movl %esp,%eax # pt_regs pointer
810 call do_debug
811 jmp ret_from_exception
812 CFI_ENDPROC
813KPROBE_END(debug)
814
815/*
816 * NMI is doubly nasty. It can happen _while_ we're handling
817 * a debug fault, and the debug fault hasn't yet been able to
818 * clear up the stack. So we first check whether we got an
819 * NMI on the sysenter entry path, but after that we need to
820 * check whether we got an NMI on the debug path where the debug
821 * fault happened on the sysenter path.
822 */
823KPROBE_ENTRY(nmi)
824 RING0_INT_FRAME
825 pushl %eax
826 CFI_ADJUST_CFA_OFFSET 4
827 movl %ss, %eax
828 cmpw $__ESPFIX_SS, %ax
829 popl %eax
830 CFI_ADJUST_CFA_OFFSET -4
831 je nmi_espfix_stack
832 cmpl $ia32_sysenter_target,(%esp)
833 je nmi_stack_fixup
834 pushl %eax
835 CFI_ADJUST_CFA_OFFSET 4
836 movl %esp,%eax
837 /* Do not access memory above the end of our stack page,
838 * it might not exist.
839 */
840 andl $(THREAD_SIZE-1),%eax
841 cmpl $(THREAD_SIZE-20),%eax
842 popl %eax
843 CFI_ADJUST_CFA_OFFSET -4
844 jae nmi_stack_correct
845 cmpl $ia32_sysenter_target,12(%esp)
846 je nmi_debug_stack_check
847nmi_stack_correct:
848 /* We have a RING0_INT_FRAME here */
849 pushl %eax
850 CFI_ADJUST_CFA_OFFSET 4
851 SAVE_ALL
852 TRACE_IRQS_OFF
853 xorl %edx,%edx # zero error code
854 movl %esp,%eax # pt_regs pointer
855 call do_nmi
856 jmp restore_nocheck_notrace
857 CFI_ENDPROC
858
859nmi_stack_fixup:
860 RING0_INT_FRAME
861 FIX_STACK(12,nmi_stack_correct, 1)
862 jmp nmi_stack_correct
863
864nmi_debug_stack_check:
865 /* We have a RING0_INT_FRAME here */
866 cmpw $__KERNEL_CS,16(%esp)
867 jne nmi_stack_correct
868 cmpl $debug,(%esp)
869 jb nmi_stack_correct
870 cmpl $debug_esp_fix_insn,(%esp)
871 ja nmi_stack_correct
872 FIX_STACK(24,nmi_stack_correct, 1)
873 jmp nmi_stack_correct
874
875nmi_espfix_stack:
876 /* We have a RING0_INT_FRAME here.
877 *
878 * create the pointer to lss back
879 */
880 pushl %ss
881 CFI_ADJUST_CFA_OFFSET 4
882 pushl %esp
883 CFI_ADJUST_CFA_OFFSET 4
884 addw $4, (%esp)
885 /* copy the iret frame of 12 bytes */
886 .rept 3
887 pushl 16(%esp)
888 CFI_ADJUST_CFA_OFFSET 4
889 .endr
890 pushl %eax
891 CFI_ADJUST_CFA_OFFSET 4
892 SAVE_ALL
893 TRACE_IRQS_OFF
894 FIXUP_ESPFIX_STACK # %eax == %esp
895 xorl %edx,%edx # zero error code
896 call do_nmi
897 RESTORE_REGS
898 lss 12+4(%esp), %esp # back to espfix stack
899 CFI_ADJUST_CFA_OFFSET -24
900 jmp irq_return
901 CFI_ENDPROC
902KPROBE_END(nmi)
903
904#ifdef CONFIG_PARAVIRT 721#ifdef CONFIG_PARAVIRT
905ENTRY(native_iret) 722ENTRY(native_iret)
906 iret 723 iret
@@ -916,19 +733,6 @@ ENTRY(native_irq_enable_sysexit)
916END(native_irq_enable_sysexit) 733END(native_irq_enable_sysexit)
917#endif 734#endif
918 735
919KPROBE_ENTRY(int3)
920 RING0_INT_FRAME
921 pushl $-1 # mark this as an int
922 CFI_ADJUST_CFA_OFFSET 4
923 SAVE_ALL
924 TRACE_IRQS_OFF
925 xorl %edx,%edx # zero error code
926 movl %esp,%eax # pt_regs pointer
927 call do_int3
928 jmp ret_from_exception
929 CFI_ENDPROC
930KPROBE_END(int3)
931
932ENTRY(overflow) 736ENTRY(overflow)
933 RING0_INT_FRAME 737 RING0_INT_FRAME
934 pushl $0 738 pushl $0
@@ -993,14 +797,6 @@ ENTRY(stack_segment)
993 CFI_ENDPROC 797 CFI_ENDPROC
994END(stack_segment) 798END(stack_segment)
995 799
996KPROBE_ENTRY(general_protection)
997 RING0_EC_FRAME
998 pushl $do_general_protection
999 CFI_ADJUST_CFA_OFFSET 4
1000 jmp error_code
1001 CFI_ENDPROC
1002KPROBE_END(general_protection)
1003
1004ENTRY(alignment_check) 800ENTRY(alignment_check)
1005 RING0_EC_FRAME 801 RING0_EC_FRAME
1006 pushl $do_alignment_check 802 pushl $do_alignment_check
@@ -1211,3 +1007,227 @@ END(mcount)
1211#include "syscall_table_32.S" 1007#include "syscall_table_32.S"
1212 1008
1213syscall_table_size=(.-sys_call_table) 1009syscall_table_size=(.-sys_call_table)
1010
1011/*
1012 * Some functions should be protected against kprobes
1013 */
1014 .pushsection .kprobes.text, "ax"
1015
1016ENTRY(page_fault)
1017 RING0_EC_FRAME
1018 pushl $do_page_fault
1019 CFI_ADJUST_CFA_OFFSET 4
1020 ALIGN
1021error_code:
1022 /* the function address is in %fs's slot on the stack */
1023 pushl %es
1024 CFI_ADJUST_CFA_OFFSET 4
1025 /*CFI_REL_OFFSET es, 0*/
1026 pushl %ds
1027 CFI_ADJUST_CFA_OFFSET 4
1028 /*CFI_REL_OFFSET ds, 0*/
1029 pushl %eax
1030 CFI_ADJUST_CFA_OFFSET 4
1031 CFI_REL_OFFSET eax, 0
1032 pushl %ebp
1033 CFI_ADJUST_CFA_OFFSET 4
1034 CFI_REL_OFFSET ebp, 0
1035 pushl %edi
1036 CFI_ADJUST_CFA_OFFSET 4
1037 CFI_REL_OFFSET edi, 0
1038 pushl %esi
1039 CFI_ADJUST_CFA_OFFSET 4
1040 CFI_REL_OFFSET esi, 0
1041 pushl %edx
1042 CFI_ADJUST_CFA_OFFSET 4
1043 CFI_REL_OFFSET edx, 0
1044 pushl %ecx
1045 CFI_ADJUST_CFA_OFFSET 4
1046 CFI_REL_OFFSET ecx, 0
1047 pushl %ebx
1048 CFI_ADJUST_CFA_OFFSET 4
1049 CFI_REL_OFFSET ebx, 0
1050 cld
1051 pushl %fs
1052 CFI_ADJUST_CFA_OFFSET 4
1053 /*CFI_REL_OFFSET fs, 0*/
1054 movl $(__KERNEL_PERCPU), %ecx
1055 movl %ecx, %fs
1056 UNWIND_ESPFIX_STACK
1057 popl %ecx
1058 CFI_ADJUST_CFA_OFFSET -4
1059 /*CFI_REGISTER es, ecx*/
1060 movl PT_FS(%esp), %edi # get the function address
1061 movl PT_ORIG_EAX(%esp), %edx # get the error code
1062 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
1063 mov %ecx, PT_FS(%esp)
1064 /*CFI_REL_OFFSET fs, ES*/
1065 movl $(__USER_DS), %ecx
1066 movl %ecx, %ds
1067 movl %ecx, %es
1068 TRACE_IRQS_OFF
1069 movl %esp,%eax # pt_regs pointer
1070 call *%edi
1071 jmp ret_from_exception
1072 CFI_ENDPROC
1073END(page_fault)
1074
1075/*
1076 * Debug traps and NMI can happen at the one SYSENTER instruction
1077 * that sets up the real kernel stack. Check here, since we can't
1078 * allow the wrong stack to be used.
1079 *
1080 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
1081 * already pushed 3 words if it hits on the sysenter instruction:
1082 * eflags, cs and eip.
1083 *
1084 * We just load the right stack, and push the three (known) values
1085 * by hand onto the new stack - while updating the return eip past
1086 * the instruction that would have done it for sysenter.
1087 */
1088#define FIX_STACK(offset, ok, label) \
1089 cmpw $__KERNEL_CS,4(%esp); \
1090 jne ok; \
1091label: \
1092 movl TSS_sysenter_sp0+offset(%esp),%esp; \
1093 CFI_DEF_CFA esp, 0; \
1094 CFI_UNDEFINED eip; \
1095 pushfl; \
1096 CFI_ADJUST_CFA_OFFSET 4; \
1097 pushl $__KERNEL_CS; \
1098 CFI_ADJUST_CFA_OFFSET 4; \
1099 pushl $sysenter_past_esp; \
1100 CFI_ADJUST_CFA_OFFSET 4; \
1101 CFI_REL_OFFSET eip, 0
1102
1103ENTRY(debug)
1104 RING0_INT_FRAME
1105 cmpl $ia32_sysenter_target,(%esp)
1106 jne debug_stack_correct
1107 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
1108debug_stack_correct:
1109 pushl $-1 # mark this as an int
1110 CFI_ADJUST_CFA_OFFSET 4
1111 SAVE_ALL
1112 TRACE_IRQS_OFF
1113 xorl %edx,%edx # error code 0
1114 movl %esp,%eax # pt_regs pointer
1115 call do_debug
1116 jmp ret_from_exception
1117 CFI_ENDPROC
1118END(debug)
1119
1120/*
1121 * NMI is doubly nasty. It can happen _while_ we're handling
1122 * a debug fault, and the debug fault hasn't yet been able to
1123 * clear up the stack. So we first check whether we got an
1124 * NMI on the sysenter entry path, but after that we need to
1125 * check whether we got an NMI on the debug path where the debug
1126 * fault happened on the sysenter path.
1127 */
1128ENTRY(nmi)
1129 RING0_INT_FRAME
1130 pushl %eax
1131 CFI_ADJUST_CFA_OFFSET 4
1132 movl %ss, %eax
1133 cmpw $__ESPFIX_SS, %ax
1134 popl %eax
1135 CFI_ADJUST_CFA_OFFSET -4
1136 je nmi_espfix_stack
1137 cmpl $ia32_sysenter_target,(%esp)
1138 je nmi_stack_fixup
1139 pushl %eax
1140 CFI_ADJUST_CFA_OFFSET 4
1141 movl %esp,%eax
1142 /* Do not access memory above the end of our stack page,
1143 * it might not exist.
1144 */
1145 andl $(THREAD_SIZE-1),%eax
1146 cmpl $(THREAD_SIZE-20),%eax
1147 popl %eax
1148 CFI_ADJUST_CFA_OFFSET -4
1149 jae nmi_stack_correct
1150 cmpl $ia32_sysenter_target,12(%esp)
1151 je nmi_debug_stack_check
1152nmi_stack_correct:
1153 /* We have a RING0_INT_FRAME here */
1154 pushl %eax
1155 CFI_ADJUST_CFA_OFFSET 4
1156 SAVE_ALL
1157 TRACE_IRQS_OFF
1158 xorl %edx,%edx # zero error code
1159 movl %esp,%eax # pt_regs pointer
1160 call do_nmi
1161 jmp restore_nocheck_notrace
1162 CFI_ENDPROC
1163
1164nmi_stack_fixup:
1165 RING0_INT_FRAME
1166 FIX_STACK(12,nmi_stack_correct, 1)
1167 jmp nmi_stack_correct
1168
1169nmi_debug_stack_check:
1170 /* We have a RING0_INT_FRAME here */
1171 cmpw $__KERNEL_CS,16(%esp)
1172 jne nmi_stack_correct
1173 cmpl $debug,(%esp)
1174 jb nmi_stack_correct
1175 cmpl $debug_esp_fix_insn,(%esp)
1176 ja nmi_stack_correct
1177 FIX_STACK(24,nmi_stack_correct, 1)
1178 jmp nmi_stack_correct
1179
1180nmi_espfix_stack:
1181 /* We have a RING0_INT_FRAME here.
1182 *
1183 * create the pointer to lss back
1184 */
1185 pushl %ss
1186 CFI_ADJUST_CFA_OFFSET 4
1187 pushl %esp
1188 CFI_ADJUST_CFA_OFFSET 4
1189 addw $4, (%esp)
1190 /* copy the iret frame of 12 bytes */
1191 .rept 3
1192 pushl 16(%esp)
1193 CFI_ADJUST_CFA_OFFSET 4
1194 .endr
1195 pushl %eax
1196 CFI_ADJUST_CFA_OFFSET 4
1197 SAVE_ALL
1198 TRACE_IRQS_OFF
1199 FIXUP_ESPFIX_STACK # %eax == %esp
1200 xorl %edx,%edx # zero error code
1201 call do_nmi
1202 RESTORE_REGS
1203 lss 12+4(%esp), %esp # back to espfix stack
1204 CFI_ADJUST_CFA_OFFSET -24
1205 jmp irq_return
1206 CFI_ENDPROC
1207END(nmi)
1208
1209ENTRY(int3)
1210 RING0_INT_FRAME
1211 pushl $-1 # mark this as an int
1212 CFI_ADJUST_CFA_OFFSET 4
1213 SAVE_ALL
1214 TRACE_IRQS_OFF
1215 xorl %edx,%edx # zero error code
1216 movl %esp,%eax # pt_regs pointer
1217 call do_int3
1218 jmp ret_from_exception
1219 CFI_ENDPROC
1220END(int3)
1221
1222ENTRY(general_protection)
1223 RING0_EC_FRAME
1224 pushl $do_general_protection
1225 CFI_ADJUST_CFA_OFFSET 4
1226 jmp error_code
1227 CFI_ENDPROC
1228END(general_protection)
1229
1230/*
1231 * End of kprobes section
1232 */
1233 .popsection
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 42571baaca32..3194636a4293 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -60,7 +60,6 @@
60#define __AUDIT_ARCH_LE 0x40000000 60#define __AUDIT_ARCH_LE 0x40000000
61 61
62 .code64 62 .code64
63
64#ifdef CONFIG_FUNCTION_TRACER 63#ifdef CONFIG_FUNCTION_TRACER
65#ifdef CONFIG_DYNAMIC_FTRACE 64#ifdef CONFIG_DYNAMIC_FTRACE
66ENTRY(mcount) 65ENTRY(mcount)
@@ -169,21 +168,21 @@ ENTRY(native_usergs_sysret64)
169 */ 168 */
170 169
171 /* %rsp:at FRAMEEND */ 170 /* %rsp:at FRAMEEND */
172 .macro FIXUP_TOP_OF_STACK tmp 171 .macro FIXUP_TOP_OF_STACK tmp offset=0
173 movq %gs:pda_oldrsp,\tmp 172 movq %gs:pda_oldrsp,\tmp
174 movq \tmp,RSP(%rsp) 173 movq \tmp,RSP+\offset(%rsp)
175 movq $__USER_DS,SS(%rsp) 174 movq $__USER_DS,SS+\offset(%rsp)
176 movq $__USER_CS,CS(%rsp) 175 movq $__USER_CS,CS+\offset(%rsp)
177 movq $-1,RCX(%rsp) 176 movq $-1,RCX+\offset(%rsp)
178 movq R11(%rsp),\tmp /* get eflags */ 177 movq R11+\offset(%rsp),\tmp /* get eflags */
179 movq \tmp,EFLAGS(%rsp) 178 movq \tmp,EFLAGS+\offset(%rsp)
180 .endm 179 .endm
181 180
182 .macro RESTORE_TOP_OF_STACK tmp,offset=0 181 .macro RESTORE_TOP_OF_STACK tmp offset=0
183 movq RSP-\offset(%rsp),\tmp 182 movq RSP+\offset(%rsp),\tmp
184 movq \tmp,%gs:pda_oldrsp 183 movq \tmp,%gs:pda_oldrsp
185 movq EFLAGS-\offset(%rsp),\tmp 184 movq EFLAGS+\offset(%rsp),\tmp
186 movq \tmp,R11-\offset(%rsp) 185 movq \tmp,R11+\offset(%rsp)
187 .endm 186 .endm
188 187
189 .macro FAKE_STACK_FRAME child_rip 188 .macro FAKE_STACK_FRAME child_rip
@@ -195,7 +194,7 @@ ENTRY(native_usergs_sysret64)
195 pushq %rax /* rsp */ 194 pushq %rax /* rsp */
196 CFI_ADJUST_CFA_OFFSET 8 195 CFI_ADJUST_CFA_OFFSET 8
197 CFI_REL_OFFSET rsp,0 196 CFI_REL_OFFSET rsp,0
198 pushq $(1<<9) /* eflags - interrupts on */ 197 pushq $X86_EFLAGS_IF /* eflags - interrupts on */
199 CFI_ADJUST_CFA_OFFSET 8 198 CFI_ADJUST_CFA_OFFSET 8
200 /*CFI_REL_OFFSET rflags,0*/ 199 /*CFI_REL_OFFSET rflags,0*/
201 pushq $__KERNEL_CS /* cs */ 200 pushq $__KERNEL_CS /* cs */
@@ -213,64 +212,184 @@ ENTRY(native_usergs_sysret64)
213 CFI_ADJUST_CFA_OFFSET -(6*8) 212 CFI_ADJUST_CFA_OFFSET -(6*8)
214 .endm 213 .endm
215 214
216 .macro CFI_DEFAULT_STACK start=1 215/*
216 * initial frame state for interrupts (and exceptions without error code)
217 */
218 .macro EMPTY_FRAME start=1 offset=0
217 .if \start 219 .if \start
218 CFI_STARTPROC simple 220 CFI_STARTPROC simple
219 CFI_SIGNAL_FRAME 221 CFI_SIGNAL_FRAME
220 CFI_DEF_CFA rsp,SS+8 222 CFI_DEF_CFA rsp,8+\offset
221 .else 223 .else
222 CFI_DEF_CFA_OFFSET SS+8 224 CFI_DEF_CFA_OFFSET 8+\offset
223 .endif 225 .endif
224 CFI_REL_OFFSET r15,R15
225 CFI_REL_OFFSET r14,R14
226 CFI_REL_OFFSET r13,R13
227 CFI_REL_OFFSET r12,R12
228 CFI_REL_OFFSET rbp,RBP
229 CFI_REL_OFFSET rbx,RBX
230 CFI_REL_OFFSET r11,R11
231 CFI_REL_OFFSET r10,R10
232 CFI_REL_OFFSET r9,R9
233 CFI_REL_OFFSET r8,R8
234 CFI_REL_OFFSET rax,RAX
235 CFI_REL_OFFSET rcx,RCX
236 CFI_REL_OFFSET rdx,RDX
237 CFI_REL_OFFSET rsi,RSI
238 CFI_REL_OFFSET rdi,RDI
239 CFI_REL_OFFSET rip,RIP
240 /*CFI_REL_OFFSET cs,CS*/
241 /*CFI_REL_OFFSET rflags,EFLAGS*/
242 CFI_REL_OFFSET rsp,RSP
243 /*CFI_REL_OFFSET ss,SS*/
244 .endm 226 .endm
227
228/*
229 * initial frame state for interrupts (and exceptions without error code)
230 */
231 .macro INTR_FRAME start=1 offset=0
232 EMPTY_FRAME \start, SS+8+\offset-RIP
233 /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
234 CFI_REL_OFFSET rsp, RSP+\offset-RIP
235 /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
236 /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
237 CFI_REL_OFFSET rip, RIP+\offset-RIP
238 .endm
239
240/*
241 * initial frame state for exceptions with error code (and interrupts
242 * with vector already pushed)
243 */
244 .macro XCPT_FRAME start=1 offset=0
245 INTR_FRAME \start, RIP+\offset-ORIG_RAX
246 /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
247 .endm
248
249/*
250 * frame that enables calling into C.
251 */
252 .macro PARTIAL_FRAME start=1 offset=0
253 XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
254 CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
255 CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
256 CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
257 CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
258 CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
259 CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
260 CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
261 CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
262 CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
263 .endm
264
265/*
266 * frame that enables passing a complete pt_regs to a C function.
267 */
268 .macro DEFAULT_FRAME start=1 offset=0
269 PARTIAL_FRAME \start, R11+\offset-R15
270 CFI_REL_OFFSET rbx, RBX+\offset
271 CFI_REL_OFFSET rbp, RBP+\offset
272 CFI_REL_OFFSET r12, R12+\offset
273 CFI_REL_OFFSET r13, R13+\offset
274 CFI_REL_OFFSET r14, R14+\offset
275 CFI_REL_OFFSET r15, R15+\offset
276 .endm
277
278/* save partial stack frame */
279ENTRY(save_args)
280 XCPT_FRAME
281 cld
282 movq_cfi rdi, RDI+16-ARGOFFSET
283 movq_cfi rsi, RSI+16-ARGOFFSET
284 movq_cfi rdx, RDX+16-ARGOFFSET
285 movq_cfi rcx, RCX+16-ARGOFFSET
286 movq_cfi rax, RAX+16-ARGOFFSET
287 movq_cfi r8, R8+16-ARGOFFSET
288 movq_cfi r9, R9+16-ARGOFFSET
289 movq_cfi r10, R10+16-ARGOFFSET
290 movq_cfi r11, R11+16-ARGOFFSET
291
292 leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
293 movq_cfi rbp, 8 /* push %rbp */
294 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
295 testl $3, CS(%rdi)
296 je 1f
297 SWAPGS
298 /*
299 * irqcount is used to check if a CPU is already on an interrupt stack
300 * or not. While this is essentially redundant with preempt_count it is
301 * a little cheaper to use a separate counter in the PDA (short of
302 * moving irq_enter into assembly, which would be too much work)
303 */
3041: incl %gs:pda_irqcount
305 jne 2f
306 popq_cfi %rax /* move return address... */
307 mov %gs:pda_irqstackptr,%rsp
308 EMPTY_FRAME 0
309 pushq_cfi %rax /* ... to the new stack */
310 /*
311 * We entered an interrupt context - irqs are off:
312 */
3132: TRACE_IRQS_OFF
314 ret
315 CFI_ENDPROC
316END(save_args)
317
318ENTRY(save_rest)
319 PARTIAL_FRAME 1 REST_SKIP+8
320 movq 5*8+16(%rsp), %r11 /* save return address */
321 movq_cfi rbx, RBX+16
322 movq_cfi rbp, RBP+16
323 movq_cfi r12, R12+16
324 movq_cfi r13, R13+16
325 movq_cfi r14, R14+16
326 movq_cfi r15, R15+16
327 movq %r11, 8(%rsp) /* return address */
328 FIXUP_TOP_OF_STACK %r11, 16
329 ret
330 CFI_ENDPROC
331END(save_rest)
332
333/* save complete stack frame */
334ENTRY(save_paranoid)
335 XCPT_FRAME 1 RDI+8
336 cld
337 movq_cfi rdi, RDI+8
338 movq_cfi rsi, RSI+8
339 movq_cfi rdx, RDX+8
340 movq_cfi rcx, RCX+8
341 movq_cfi rax, RAX+8
342 movq_cfi r8, R8+8
343 movq_cfi r9, R9+8
344 movq_cfi r10, R10+8
345 movq_cfi r11, R11+8
346 movq_cfi rbx, RBX+8
347 movq_cfi rbp, RBP+8
348 movq_cfi r12, R12+8
349 movq_cfi r13, R13+8
350 movq_cfi r14, R14+8
351 movq_cfi r15, R15+8
352 movl $1,%ebx
353 movl $MSR_GS_BASE,%ecx
354 rdmsr
355 testl %edx,%edx
356 js 1f /* negative -> in kernel */
357 SWAPGS
358 xorl %ebx,%ebx
3591: ret
360 CFI_ENDPROC
361END(save_paranoid)
362
245/* 363/*
246 * A newly forked process directly context switches into this. 364 * A newly forked process directly context switches into this address.
365 *
366 * rdi: prev task we switched from
247 */ 367 */
248/* rdi: prev */
249ENTRY(ret_from_fork) 368ENTRY(ret_from_fork)
250 CFI_DEFAULT_STACK 369 DEFAULT_FRAME
370
251 push kernel_eflags(%rip) 371 push kernel_eflags(%rip)
252 CFI_ADJUST_CFA_OFFSET 8 372 CFI_ADJUST_CFA_OFFSET 8
253 popf # reset kernel eflags 373 popf # reset kernel eflags
254 CFI_ADJUST_CFA_OFFSET -8 374 CFI_ADJUST_CFA_OFFSET -8
255 call schedule_tail 375
376 call schedule_tail # rdi: 'prev' task parameter
377
256 GET_THREAD_INFO(%rcx) 378 GET_THREAD_INFO(%rcx)
257 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 379
258 CFI_REMEMBER_STATE 380 CFI_REMEMBER_STATE
259 jnz rff_trace
260rff_action:
261 RESTORE_REST 381 RESTORE_REST
262 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? 382
383 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
263 je int_ret_from_sys_call 384 je int_ret_from_sys_call
264 testl $_TIF_IA32,TI_flags(%rcx) 385
386 testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
265 jnz int_ret_from_sys_call 387 jnz int_ret_from_sys_call
266 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET 388
267 jmp ret_from_sys_call 389 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
390 jmp ret_from_sys_call # go to the SYSRET fastpath
391
268 CFI_RESTORE_STATE 392 CFI_RESTORE_STATE
269rff_trace:
270 movq %rsp,%rdi
271 call syscall_trace_leave
272 GET_THREAD_INFO(%rcx)
273 jmp rff_action
274 CFI_ENDPROC 393 CFI_ENDPROC
275END(ret_from_fork) 394END(ret_from_fork)
276 395
@@ -390,10 +509,13 @@ sysret_signal:
390 jc sysret_audit 509 jc sysret_audit
391#endif 510#endif
392 /* edx: work flags (arg3) */ 511 /* edx: work flags (arg3) */
393 leaq do_notify_resume(%rip),%rax
394 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 512 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
395 xorl %esi,%esi # oldset -> arg2 513 xorl %esi,%esi # oldset -> arg2
396 call ptregscall_common 514 SAVE_REST
515 FIXUP_TOP_OF_STACK %r11
516 call do_notify_resume
517 RESTORE_TOP_OF_STACK %r11
518 RESTORE_REST
397 movl $_TIF_WORK_MASK,%edi 519 movl $_TIF_WORK_MASK,%edi
398 /* Use IRET because user could have changed frame. This 520 /* Use IRET because user could have changed frame. This
399 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ 521 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
@@ -537,18 +659,20 @@ END(system_call)
537/* 659/*
538 * Certain special system calls that need to save a complete full stack frame. 660 * Certain special system calls that need to save a complete full stack frame.
539 */ 661 */
540
541 .macro PTREGSCALL label,func,arg 662 .macro PTREGSCALL label,func,arg
542 .globl \label 663ENTRY(\label)
543\label: 664 PARTIAL_FRAME 1 8 /* offset 8: return address */
544 leaq \func(%rip),%rax 665 subq $REST_SKIP, %rsp
545 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 666 CFI_ADJUST_CFA_OFFSET REST_SKIP
546 jmp ptregscall_common 667 call save_rest
668 DEFAULT_FRAME 0 8 /* offset 8: return address */
669 leaq 8(%rsp), \arg /* pt_regs pointer */
670 call \func
671 jmp ptregscall_common
672 CFI_ENDPROC
547END(\label) 673END(\label)
548 .endm 674 .endm
549 675
550 CFI_STARTPROC
551
552 PTREGSCALL stub_clone, sys_clone, %r8 676 PTREGSCALL stub_clone, sys_clone, %r8
553 PTREGSCALL stub_fork, sys_fork, %rdi 677 PTREGSCALL stub_fork, sys_fork, %rdi
554 PTREGSCALL stub_vfork, sys_vfork, %rdi 678 PTREGSCALL stub_vfork, sys_vfork, %rdi
@@ -556,22 +680,15 @@ END(\label)
556 PTREGSCALL stub_iopl, sys_iopl, %rsi 680 PTREGSCALL stub_iopl, sys_iopl, %rsi
557 681
558ENTRY(ptregscall_common) 682ENTRY(ptregscall_common)
559 popq %r11 683 DEFAULT_FRAME 1 8 /* offset 8: return address */
560 CFI_ADJUST_CFA_OFFSET -8 684 RESTORE_TOP_OF_STACK %r11, 8
561 CFI_REGISTER rip, r11 685 movq_cfi_restore R15+8, r15
562 SAVE_REST 686 movq_cfi_restore R14+8, r14
563 movq %r11, %r15 687 movq_cfi_restore R13+8, r13
564 CFI_REGISTER rip, r15 688 movq_cfi_restore R12+8, r12
565 FIXUP_TOP_OF_STACK %r11 689 movq_cfi_restore RBP+8, rbp
566 call *%rax 690 movq_cfi_restore RBX+8, rbx
567 RESTORE_TOP_OF_STACK %r11 691 ret $REST_SKIP /* pop extended registers */
568 movq %r15, %r11
569 CFI_REGISTER rip, r11
570 RESTORE_REST
571 pushq %r11
572 CFI_ADJUST_CFA_OFFSET 8
573 CFI_REL_OFFSET rip, 0
574 ret
575 CFI_ENDPROC 692 CFI_ENDPROC
576END(ptregscall_common) 693END(ptregscall_common)
577 694
@@ -610,70 +727,70 @@ ENTRY(stub_rt_sigreturn)
610END(stub_rt_sigreturn) 727END(stub_rt_sigreturn)
611 728
612/* 729/*
613 * initial frame state for interrupts and exceptions 730 * Build the entry stubs and pointer table with some assembler magic.
731 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
732 * single cache line on all modern x86 implementations.
614 */ 733 */
615 .macro _frame ref 734 .section .init.rodata,"a"
616 CFI_STARTPROC simple 735ENTRY(interrupt)
617 CFI_SIGNAL_FRAME 736 .text
618 CFI_DEF_CFA rsp,SS+8-\ref 737 .p2align 5
619 /*CFI_REL_OFFSET ss,SS-\ref*/ 738 .p2align CONFIG_X86_L1_CACHE_SHIFT
620 CFI_REL_OFFSET rsp,RSP-\ref 739ENTRY(irq_entries_start)
621 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ 740 INTR_FRAME
622 /*CFI_REL_OFFSET cs,CS-\ref*/ 741vector=FIRST_EXTERNAL_VECTOR
623 CFI_REL_OFFSET rip,RIP-\ref 742.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
624 .endm 743 .balign 32
744 .rept 7
745 .if vector < NR_VECTORS
746 .if vector <> FIRST_EXTERNAL_VECTOR
747 CFI_ADJUST_CFA_OFFSET -8
748 .endif
7491: pushq $(~vector+0x80) /* Note: always in signed byte range */
750 CFI_ADJUST_CFA_OFFSET 8
751 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
752 jmp 2f
753 .endif
754 .previous
755 .quad 1b
756 .text
757vector=vector+1
758 .endif
759 .endr
7602: jmp common_interrupt
761.endr
762 CFI_ENDPROC
763END(irq_entries_start)
625 764
626/* initial frame state for interrupts (and exceptions without error code) */ 765.previous
627#define INTR_FRAME _frame RIP 766END(interrupt)
628/* initial frame state for exceptions with error code (and interrupts with 767.previous
629 vector already pushed) */
630#define XCPT_FRAME _frame ORIG_RAX
631 768
632/* 769/*
633 * Interrupt entry/exit. 770 * Interrupt entry/exit.
634 * 771 *
635 * Interrupt entry points save only callee clobbered registers in fast path. 772 * Interrupt entry points save only callee clobbered registers in fast path.
636 * 773 *
637 * Entry runs with interrupts off. 774 * Entry runs with interrupts off.
638 */ 775 */
639 776
640/* 0(%rsp): interrupt number */ 777/* 0(%rsp): ~(interrupt number) */
641 .macro interrupt func 778 .macro interrupt func
642 cld 779 subq $10*8, %rsp
643 SAVE_ARGS 780 CFI_ADJUST_CFA_OFFSET 10*8
644 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 781 call save_args
645 pushq %rbp 782 PARTIAL_FRAME 0
646 /*
647 * Save rbp twice: One is for marking the stack frame, as usual, and the
648 * other, to fill pt_regs properly. This is because bx comes right
649 * before the last saved register in that structure, and not bp. If the
650 * base pointer were in the place bx is today, this would not be needed.
651 */
652 movq %rbp, -8(%rsp)
653 CFI_ADJUST_CFA_OFFSET 8
654 CFI_REL_OFFSET rbp, 0
655 movq %rsp,%rbp
656 CFI_DEF_CFA_REGISTER rbp
657 testl $3,CS(%rdi)
658 je 1f
659 SWAPGS
660 /* irqcount is used to check if a CPU is already on an interrupt
661 stack or not. While this is essentially redundant with preempt_count
662 it is a little cheaper to use a separate counter in the PDA
663 (short of moving irq_enter into assembly, which would be too
664 much work) */
6651: incl %gs:pda_irqcount
666 cmoveq %gs:pda_irqstackptr,%rsp
667 push %rbp # backlink for old unwinder
668 /*
669 * We entered an interrupt context - irqs are off:
670 */
671 TRACE_IRQS_OFF
672 call \func 783 call \func
673 .endm 784 .endm
674 785
675ENTRY(common_interrupt) 786 /*
787 * The interrupt stubs push (~vector+0x80) onto the stack and
788 * then jump to common_interrupt.
789 */
790 .p2align CONFIG_X86_L1_CACHE_SHIFT
791common_interrupt:
676 XCPT_FRAME 792 XCPT_FRAME
793 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
677 interrupt do_IRQ 794 interrupt do_IRQ
678 /* 0(%rsp): oldrsp-ARGOFFSET */ 795 /* 0(%rsp): oldrsp-ARGOFFSET */
679ret_from_intr: 796ret_from_intr:
@@ -808,315 +925,202 @@ END(common_interrupt)
808/* 925/*
809 * APIC interrupts. 926 * APIC interrupts.
810 */ 927 */
811 .macro apicinterrupt num,func 928.macro apicinterrupt num sym do_sym
929ENTRY(\sym)
812 INTR_FRAME 930 INTR_FRAME
813 pushq $~(\num) 931 pushq $~(\num)
814 CFI_ADJUST_CFA_OFFSET 8 932 CFI_ADJUST_CFA_OFFSET 8
815 interrupt \func 933 interrupt \do_sym
816 jmp ret_from_intr 934 jmp ret_from_intr
817 CFI_ENDPROC 935 CFI_ENDPROC
818 .endm 936END(\sym)
937.endm
819 938
820ENTRY(thermal_interrupt) 939#ifdef CONFIG_SMP
821 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt 940apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
822END(thermal_interrupt) 941 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
942#endif
823 943
824ENTRY(threshold_interrupt) 944apicinterrupt UV_BAU_MESSAGE \
825 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt 945 uv_bau_message_intr1 uv_bau_message_interrupt
826END(threshold_interrupt) 946apicinterrupt LOCAL_TIMER_VECTOR \
947 apic_timer_interrupt smp_apic_timer_interrupt
827 948
828#ifdef CONFIG_SMP 949#ifdef CONFIG_SMP
829ENTRY(reschedule_interrupt) 950apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
830 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt 951 invalidate_interrupt0 smp_invalidate_interrupt
831END(reschedule_interrupt) 952apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
832 953 invalidate_interrupt1 smp_invalidate_interrupt
833 .macro INVALIDATE_ENTRY num 954apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
834ENTRY(invalidate_interrupt\num) 955 invalidate_interrupt2 smp_invalidate_interrupt
835 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt 956apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
836END(invalidate_interrupt\num) 957 invalidate_interrupt3 smp_invalidate_interrupt
837 .endm 958apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
838 959 invalidate_interrupt4 smp_invalidate_interrupt
839 INVALIDATE_ENTRY 0 960apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
840 INVALIDATE_ENTRY 1 961 invalidate_interrupt5 smp_invalidate_interrupt
841 INVALIDATE_ENTRY 2 962apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
842 INVALIDATE_ENTRY 3 963 invalidate_interrupt6 smp_invalidate_interrupt
843 INVALIDATE_ENTRY 4 964apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
844 INVALIDATE_ENTRY 5 965 invalidate_interrupt7 smp_invalidate_interrupt
845 INVALIDATE_ENTRY 6
846 INVALIDATE_ENTRY 7
847
848ENTRY(call_function_interrupt)
849 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
850END(call_function_interrupt)
851ENTRY(call_function_single_interrupt)
852 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
853END(call_function_single_interrupt)
854ENTRY(irq_move_cleanup_interrupt)
855 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
856END(irq_move_cleanup_interrupt)
857#endif 966#endif
858 967
859ENTRY(apic_timer_interrupt) 968apicinterrupt THRESHOLD_APIC_VECTOR \
860 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 969 threshold_interrupt mce_threshold_interrupt
861END(apic_timer_interrupt) 970apicinterrupt THERMAL_APIC_VECTOR \
971 thermal_interrupt smp_thermal_interrupt
862 972
863ENTRY(uv_bau_message_intr1) 973#ifdef CONFIG_SMP
864 apicinterrupt 220,uv_bau_message_interrupt 974apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
865END(uv_bau_message_intr1) 975 call_function_single_interrupt smp_call_function_single_interrupt
866 976apicinterrupt CALL_FUNCTION_VECTOR \
867ENTRY(error_interrupt) 977 call_function_interrupt smp_call_function_interrupt
868 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt 978apicinterrupt RESCHEDULE_VECTOR \
869END(error_interrupt) 979 reschedule_interrupt smp_reschedule_interrupt
980#endif
870 981
871ENTRY(spurious_interrupt) 982apicinterrupt ERROR_APIC_VECTOR \
872 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt 983 error_interrupt smp_error_interrupt
873END(spurious_interrupt) 984apicinterrupt SPURIOUS_APIC_VECTOR \
985 spurious_interrupt smp_spurious_interrupt
874 986
875/* 987/*
876 * Exception entry points. 988 * Exception entry points.
877 */ 989 */
878 .macro zeroentry sym 990.macro zeroentry sym do_sym
991ENTRY(\sym)
879 INTR_FRAME 992 INTR_FRAME
880 PARAVIRT_ADJUST_EXCEPTION_FRAME 993 PARAVIRT_ADJUST_EXCEPTION_FRAME
881 pushq $0 /* push error code/oldrax */ 994 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
882 CFI_ADJUST_CFA_OFFSET 8 995 subq $15*8,%rsp
883 pushq %rax /* push real oldrax to the rdi slot */ 996 CFI_ADJUST_CFA_OFFSET 15*8
884 CFI_ADJUST_CFA_OFFSET 8 997 call error_entry
885 CFI_REL_OFFSET rax,0 998 DEFAULT_FRAME 0
886 leaq \sym(%rip),%rax 999 movq %rsp,%rdi /* pt_regs pointer */
887 jmp error_entry 1000 xorl %esi,%esi /* no error code */
1001 call \do_sym
1002 jmp error_exit /* %ebx: no swapgs flag */
888 CFI_ENDPROC 1003 CFI_ENDPROC
889 .endm 1004END(\sym)
1005.endm
890 1006
891 .macro errorentry sym 1007.macro paranoidzeroentry sym do_sym
892 XCPT_FRAME 1008ENTRY(\sym)
1009 INTR_FRAME
893 PARAVIRT_ADJUST_EXCEPTION_FRAME 1010 PARAVIRT_ADJUST_EXCEPTION_FRAME
894 pushq %rax 1011 pushq $-1 /* ORIG_RAX: no syscall to restart */
895 CFI_ADJUST_CFA_OFFSET 8 1012 CFI_ADJUST_CFA_OFFSET 8
896 CFI_REL_OFFSET rax,0 1013 subq $15*8, %rsp
897 leaq \sym(%rip),%rax 1014 call save_paranoid
898 jmp error_entry 1015 TRACE_IRQS_OFF
1016 movq %rsp,%rdi /* pt_regs pointer */
1017 xorl %esi,%esi /* no error code */
1018 call \do_sym
1019 jmp paranoid_exit /* %ebx: no swapgs flag */
899 CFI_ENDPROC 1020 CFI_ENDPROC
900 .endm 1021END(\sym)
1022.endm
901 1023
902 /* error code is on the stack already */ 1024.macro paranoidzeroentry_ist sym do_sym ist
903 /* handle NMI like exceptions that can happen everywhere */ 1025ENTRY(\sym)
904 .macro paranoidentry sym, ist=0, irqtrace=1 1026 INTR_FRAME
905 SAVE_ALL 1027 PARAVIRT_ADJUST_EXCEPTION_FRAME
906 cld 1028 pushq $-1 /* ORIG_RAX: no syscall to restart */
907 movl $1,%ebx 1029 CFI_ADJUST_CFA_OFFSET 8
908 movl $MSR_GS_BASE,%ecx 1030 subq $15*8, %rsp
909 rdmsr 1031 call save_paranoid
910 testl %edx,%edx
911 js 1f
912 SWAPGS
913 xorl %ebx,%ebx
9141:
915 .if \ist
916 movq %gs:pda_data_offset, %rbp
917 .endif
918 .if \irqtrace
919 TRACE_IRQS_OFF
920 .endif
921 movq %rsp,%rdi
922 movq ORIG_RAX(%rsp),%rsi
923 movq $-1,ORIG_RAX(%rsp)
924 .if \ist
925 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
926 .endif
927 call \sym
928 .if \ist
929 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
930 .endif
931 DISABLE_INTERRUPTS(CLBR_NONE)
932 .if \irqtrace
933 TRACE_IRQS_OFF 1032 TRACE_IRQS_OFF
934 .endif 1033 movq %rsp,%rdi /* pt_regs pointer */
935 .endm 1034 xorl %esi,%esi /* no error code */
1035 movq %gs:pda_data_offset, %rbp
1036 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
1037 call \do_sym
1038 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
1039 jmp paranoid_exit /* %ebx: no swapgs flag */
1040 CFI_ENDPROC
1041END(\sym)
1042.endm
936 1043
937 /* 1044.macro errorentry sym do_sym
938 * "Paranoid" exit path from exception stack. 1045ENTRY(\sym)
939 * Paranoid because this is used by NMIs and cannot take 1046 XCPT_FRAME
940 * any kernel state for granted. 1047 PARAVIRT_ADJUST_EXCEPTION_FRAME
941 * We don't do kernel preemption checks here, because only 1048 subq $15*8,%rsp
942 * NMI should be common and it does not enable IRQs and 1049 CFI_ADJUST_CFA_OFFSET 15*8
943 * cannot get reschedule ticks. 1050 call error_entry
944 * 1051 DEFAULT_FRAME 0
945 * "trace" is 0 for the NMI handler only, because irq-tracing 1052 movq %rsp,%rdi /* pt_regs pointer */
946 * is fundamentally NMI-unsafe. (we cannot change the soft and 1053 movq ORIG_RAX(%rsp),%rsi /* get error code */
947 * hard flags at once, atomically) 1054 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
948 */ 1055 call \do_sym
949 .macro paranoidexit trace=1 1056 jmp error_exit /* %ebx: no swapgs flag */
950 /* ebx: no swapgs flag */
951paranoid_exit\trace:
952 testl %ebx,%ebx /* swapgs needed? */
953 jnz paranoid_restore\trace
954 testl $3,CS(%rsp)
955 jnz paranoid_userspace\trace
956paranoid_swapgs\trace:
957 .if \trace
958 TRACE_IRQS_IRETQ 0
959 .endif
960 SWAPGS_UNSAFE_STACK
961paranoid_restore\trace:
962 RESTORE_ALL 8
963 jmp irq_return
964paranoid_userspace\trace:
965 GET_THREAD_INFO(%rcx)
966 movl TI_flags(%rcx),%ebx
967 andl $_TIF_WORK_MASK,%ebx
968 jz paranoid_swapgs\trace
969 movq %rsp,%rdi /* &pt_regs */
970 call sync_regs
971 movq %rax,%rsp /* switch stack for scheduling */
972 testl $_TIF_NEED_RESCHED,%ebx
973 jnz paranoid_schedule\trace
974 movl %ebx,%edx /* arg3: thread flags */
975 .if \trace
976 TRACE_IRQS_ON
977 .endif
978 ENABLE_INTERRUPTS(CLBR_NONE)
979 xorl %esi,%esi /* arg2: oldset */
980 movq %rsp,%rdi /* arg1: &pt_regs */
981 call do_notify_resume
982 DISABLE_INTERRUPTS(CLBR_NONE)
983 .if \trace
984 TRACE_IRQS_OFF
985 .endif
986 jmp paranoid_userspace\trace
987paranoid_schedule\trace:
988 .if \trace
989 TRACE_IRQS_ON
990 .endif
991 ENABLE_INTERRUPTS(CLBR_ANY)
992 call schedule
993 DISABLE_INTERRUPTS(CLBR_ANY)
994 .if \trace
995 TRACE_IRQS_OFF
996 .endif
997 jmp paranoid_userspace\trace
998 CFI_ENDPROC 1057 CFI_ENDPROC
999 .endm 1058END(\sym)
1059.endm
1000 1060
1001/* 1061 /* error code is on the stack already */
1002 * Exception entry point. This expects an error code/orig_rax on the stack 1062.macro paranoiderrorentry sym do_sym
1003 * and the exception handler in %rax. 1063ENTRY(\sym)
1004 */ 1064 XCPT_FRAME
1005KPROBE_ENTRY(error_entry) 1065 PARAVIRT_ADJUST_EXCEPTION_FRAME
1006 _frame RDI 1066 subq $15*8,%rsp
1007 CFI_REL_OFFSET rax,0 1067 CFI_ADJUST_CFA_OFFSET 15*8
1008 /* rdi slot contains rax, oldrax contains error code */ 1068 call save_paranoid
1009 cld 1069 DEFAULT_FRAME 0
1010 subq $14*8,%rsp
1011 CFI_ADJUST_CFA_OFFSET (14*8)
1012 movq %rsi,13*8(%rsp)
1013 CFI_REL_OFFSET rsi,RSI
1014 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
1015 CFI_REGISTER rax,rsi
1016 movq %rdx,12*8(%rsp)
1017 CFI_REL_OFFSET rdx,RDX
1018 movq %rcx,11*8(%rsp)
1019 CFI_REL_OFFSET rcx,RCX
1020 movq %rsi,10*8(%rsp) /* store rax */
1021 CFI_REL_OFFSET rax,RAX
1022 movq %r8, 9*8(%rsp)
1023 CFI_REL_OFFSET r8,R8
1024 movq %r9, 8*8(%rsp)
1025 CFI_REL_OFFSET r9,R9
1026 movq %r10,7*8(%rsp)
1027 CFI_REL_OFFSET r10,R10
1028 movq %r11,6*8(%rsp)
1029 CFI_REL_OFFSET r11,R11
1030 movq %rbx,5*8(%rsp)
1031 CFI_REL_OFFSET rbx,RBX
1032 movq %rbp,4*8(%rsp)
1033 CFI_REL_OFFSET rbp,RBP
1034 movq %r12,3*8(%rsp)
1035 CFI_REL_OFFSET r12,R12
1036 movq %r13,2*8(%rsp)
1037 CFI_REL_OFFSET r13,R13
1038 movq %r14,1*8(%rsp)
1039 CFI_REL_OFFSET r14,R14
1040 movq %r15,(%rsp)
1041 CFI_REL_OFFSET r15,R15
1042 xorl %ebx,%ebx
1043 testl $3,CS(%rsp)
1044 je error_kernelspace
1045error_swapgs:
1046 SWAPGS
1047error_sti:
1048 TRACE_IRQS_OFF 1070 TRACE_IRQS_OFF
1049 movq %rdi,RDI(%rsp) 1071 movq %rsp,%rdi /* pt_regs pointer */
1050 CFI_REL_OFFSET rdi,RDI
1051 movq %rsp,%rdi
1052 movq ORIG_RAX(%rsp),%rsi /* get error code */ 1072 movq ORIG_RAX(%rsp),%rsi /* get error code */
1053 movq $-1,ORIG_RAX(%rsp) 1073 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
1054 call *%rax 1074 call \do_sym
1055 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ 1075 jmp paranoid_exit /* %ebx: no swapgs flag */
1056error_exit:
1057 movl %ebx,%eax
1058 RESTORE_REST
1059 DISABLE_INTERRUPTS(CLBR_NONE)
1060 TRACE_IRQS_OFF
1061 GET_THREAD_INFO(%rcx)
1062 testl %eax,%eax
1063 jne retint_kernel
1064 LOCKDEP_SYS_EXIT_IRQ
1065 movl TI_flags(%rcx),%edx
1066 movl $_TIF_WORK_MASK,%edi
1067 andl %edi,%edx
1068 jnz retint_careful
1069 jmp retint_swapgs
1070 CFI_ENDPROC 1076 CFI_ENDPROC
1077END(\sym)
1078.endm
1071 1079
1072error_kernelspace: 1080zeroentry divide_error do_divide_error
1073 incl %ebx 1081zeroentry overflow do_overflow
1074 /* There are two places in the kernel that can potentially fault with 1082zeroentry bounds do_bounds
1075 usergs. Handle them here. The exception handlers after 1083zeroentry invalid_op do_invalid_op
1076 iret run with kernel gs again, so don't set the user space flag. 1084zeroentry device_not_available do_device_not_available
1077 B stepping K8s sometimes report an truncated RIP for IRET 1085paranoiderrorentry double_fault do_double_fault
1078 exceptions returning to compat mode. Check for these here too. */ 1086zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
1079 leaq irq_return(%rip),%rcx 1087errorentry invalid_TSS do_invalid_TSS
1080 cmpq %rcx,RIP(%rsp) 1088errorentry segment_not_present do_segment_not_present
1081 je error_swapgs 1089zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1082 movl %ecx,%ecx /* zero extend */ 1090zeroentry coprocessor_error do_coprocessor_error
1083 cmpq %rcx,RIP(%rsp) 1091errorentry alignment_check do_alignment_check
1084 je error_swapgs 1092zeroentry simd_coprocessor_error do_simd_coprocessor_error
1085 cmpq $gs_change,RIP(%rsp) 1093
1086 je error_swapgs 1094 /* Reload gs selector with exception handling */
1087 jmp error_sti 1095 /* edi: new selector */
1088KPROBE_END(error_entry)
1089
1090 /* Reload gs selector with exception handling */
1091 /* edi: new selector */
1092ENTRY(native_load_gs_index) 1096ENTRY(native_load_gs_index)
1093 CFI_STARTPROC 1097 CFI_STARTPROC
1094 pushf 1098 pushf
1095 CFI_ADJUST_CFA_OFFSET 8 1099 CFI_ADJUST_CFA_OFFSET 8
1096 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) 1100 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1097 SWAPGS 1101 SWAPGS
1098gs_change: 1102gs_change:
1099 movl %edi,%gs 1103 movl %edi,%gs
11002: mfence /* workaround */ 11042: mfence /* workaround */
1101 SWAPGS 1105 SWAPGS
1102 popf 1106 popf
1103 CFI_ADJUST_CFA_OFFSET -8 1107 CFI_ADJUST_CFA_OFFSET -8
1104 ret 1108 ret
1105 CFI_ENDPROC 1109 CFI_ENDPROC
1106ENDPROC(native_load_gs_index) 1110END(native_load_gs_index)
1107 1111
1108 .section __ex_table,"a" 1112 .section __ex_table,"a"
1109 .align 8 1113 .align 8
1110 .quad gs_change,bad_gs 1114 .quad gs_change,bad_gs
1111 .previous 1115 .previous
1112 .section .fixup,"ax" 1116 .section .fixup,"ax"
1113 /* running with kernelgs */ 1117 /* running with kernelgs */
1114bad_gs: 1118bad_gs:
1115 SWAPGS /* switch back to user gs */ 1119 SWAPGS /* switch back to user gs */
1116 xorl %eax,%eax 1120 xorl %eax,%eax
1117 movl %eax,%gs 1121 movl %eax,%gs
1118 jmp 2b 1122 jmp 2b
1119 .previous 1123 .previous
1120 1124
1121/* 1125/*
1122 * Create a kernel thread. 1126 * Create a kernel thread.
@@ -1151,15 +1155,15 @@ ENTRY(kernel_thread)
1151 * so internally to the x86_64 port you can rely on kernel_thread() 1155 * so internally to the x86_64 port you can rely on kernel_thread()
1152 * not to reschedule the child before returning, this avoids the need 1156 * not to reschedule the child before returning, this avoids the need
1153 * of hacks for example to fork off the per-CPU idle tasks. 1157 * of hacks for example to fork off the per-CPU idle tasks.
1154 * [Hopefully no generic code relies on the reschedule -AK] 1158 * [Hopefully no generic code relies on the reschedule -AK]
1155 */ 1159 */
1156 RESTORE_ALL 1160 RESTORE_ALL
1157 UNFAKE_STACK_FRAME 1161 UNFAKE_STACK_FRAME
1158 ret 1162 ret
1159 CFI_ENDPROC 1163 CFI_ENDPROC
1160ENDPROC(kernel_thread) 1164END(kernel_thread)
1161 1165
1162child_rip: 1166ENTRY(child_rip)
1163 pushq $0 # fake return address 1167 pushq $0 # fake return address
1164 CFI_STARTPROC 1168 CFI_STARTPROC
1165 /* 1169 /*
@@ -1174,7 +1178,7 @@ child_rip:
1174 call do_exit 1178 call do_exit
1175 ud2 # padding for call trace 1179 ud2 # padding for call trace
1176 CFI_ENDPROC 1180 CFI_ENDPROC
1177ENDPROC(child_rip) 1181END(child_rip)
1178 1182
1179/* 1183/*
1180 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 1184 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -1205,129 +1209,7 @@ ENTRY(kernel_execve)
1205 UNFAKE_STACK_FRAME 1209 UNFAKE_STACK_FRAME
1206 ret 1210 ret
1207 CFI_ENDPROC 1211 CFI_ENDPROC
1208ENDPROC(kernel_execve) 1212END(kernel_execve)
1209
1210KPROBE_ENTRY(page_fault)
1211 errorentry do_page_fault
1212KPROBE_END(page_fault)
1213
1214ENTRY(coprocessor_error)
1215 zeroentry do_coprocessor_error
1216END(coprocessor_error)
1217
1218ENTRY(simd_coprocessor_error)
1219 zeroentry do_simd_coprocessor_error
1220END(simd_coprocessor_error)
1221
1222ENTRY(device_not_available)
1223 zeroentry do_device_not_available
1224END(device_not_available)
1225
1226 /* runs on exception stack */
1227KPROBE_ENTRY(debug)
1228 INTR_FRAME
1229 PARAVIRT_ADJUST_EXCEPTION_FRAME
1230 pushq $0
1231 CFI_ADJUST_CFA_OFFSET 8
1232 paranoidentry do_debug, DEBUG_STACK
1233 paranoidexit
1234KPROBE_END(debug)
1235
1236 /* runs on exception stack */
1237KPROBE_ENTRY(nmi)
1238 INTR_FRAME
1239 PARAVIRT_ADJUST_EXCEPTION_FRAME
1240 pushq $-1
1241 CFI_ADJUST_CFA_OFFSET 8
1242 paranoidentry do_nmi, 0, 0
1243#ifdef CONFIG_TRACE_IRQFLAGS
1244 paranoidexit 0
1245#else
1246 jmp paranoid_exit1
1247 CFI_ENDPROC
1248#endif
1249KPROBE_END(nmi)
1250
1251KPROBE_ENTRY(int3)
1252 INTR_FRAME
1253 PARAVIRT_ADJUST_EXCEPTION_FRAME
1254 pushq $0
1255 CFI_ADJUST_CFA_OFFSET 8
1256 paranoidentry do_int3, DEBUG_STACK
1257 jmp paranoid_exit1
1258 CFI_ENDPROC
1259KPROBE_END(int3)
1260
1261ENTRY(overflow)
1262 zeroentry do_overflow
1263END(overflow)
1264
1265ENTRY(bounds)
1266 zeroentry do_bounds
1267END(bounds)
1268
1269ENTRY(invalid_op)
1270 zeroentry do_invalid_op
1271END(invalid_op)
1272
1273ENTRY(coprocessor_segment_overrun)
1274 zeroentry do_coprocessor_segment_overrun
1275END(coprocessor_segment_overrun)
1276
1277 /* runs on exception stack */
1278ENTRY(double_fault)
1279 XCPT_FRAME
1280 PARAVIRT_ADJUST_EXCEPTION_FRAME
1281 paranoidentry do_double_fault
1282 jmp paranoid_exit1
1283 CFI_ENDPROC
1284END(double_fault)
1285
1286ENTRY(invalid_TSS)
1287 errorentry do_invalid_TSS
1288END(invalid_TSS)
1289
1290ENTRY(segment_not_present)
1291 errorentry do_segment_not_present
1292END(segment_not_present)
1293
1294 /* runs on exception stack */
1295ENTRY(stack_segment)
1296 XCPT_FRAME
1297 PARAVIRT_ADJUST_EXCEPTION_FRAME
1298 paranoidentry do_stack_segment
1299 jmp paranoid_exit1
1300 CFI_ENDPROC
1301END(stack_segment)
1302
1303KPROBE_ENTRY(general_protection)
1304 errorentry do_general_protection
1305KPROBE_END(general_protection)
1306
1307ENTRY(alignment_check)
1308 errorentry do_alignment_check
1309END(alignment_check)
1310
1311ENTRY(divide_error)
1312 zeroentry do_divide_error
1313END(divide_error)
1314
1315ENTRY(spurious_interrupt_bug)
1316 zeroentry do_spurious_interrupt_bug
1317END(spurious_interrupt_bug)
1318
1319#ifdef CONFIG_X86_MCE
1320 /* runs on exception stack */
1321ENTRY(machine_check)
1322 INTR_FRAME
1323 PARAVIRT_ADJUST_EXCEPTION_FRAME
1324 pushq $0
1325 CFI_ADJUST_CFA_OFFSET 8
1326 paranoidentry do_machine_check
1327 jmp paranoid_exit1
1328 CFI_ENDPROC
1329END(machine_check)
1330#endif
1331 1213
1332/* Call softirq on interrupt stack. Interrupts are off. */ 1214/* Call softirq on interrupt stack. Interrupts are off. */
1333ENTRY(call_softirq) 1215ENTRY(call_softirq)
@@ -1347,40 +1229,33 @@ ENTRY(call_softirq)
1347 decl %gs:pda_irqcount 1229 decl %gs:pda_irqcount
1348 ret 1230 ret
1349 CFI_ENDPROC 1231 CFI_ENDPROC
1350ENDPROC(call_softirq) 1232END(call_softirq)
1351
1352KPROBE_ENTRY(ignore_sysret)
1353 CFI_STARTPROC
1354 mov $-ENOSYS,%eax
1355 sysret
1356 CFI_ENDPROC
1357ENDPROC(ignore_sysret)
1358 1233
1359#ifdef CONFIG_XEN 1234#ifdef CONFIG_XEN
1360ENTRY(xen_hypervisor_callback) 1235zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
1361 zeroentry xen_do_hypervisor_callback
1362END(xen_hypervisor_callback)
1363 1236
1364/* 1237/*
1365# A note on the "critical region" in our callback handler. 1238 * A note on the "critical region" in our callback handler.
1366# We want to avoid stacking callback handlers due to events occurring 1239 * We want to avoid stacking callback handlers due to events occurring
1367# during handling of the last event. To do this, we keep events disabled 1240 * during handling of the last event. To do this, we keep events disabled
1368# until we've done all processing. HOWEVER, we must enable events before 1241 * until we've done all processing. HOWEVER, we must enable events before
1369# popping the stack frame (can't be done atomically) and so it would still 1242 * popping the stack frame (can't be done atomically) and so it would still
1370# be possible to get enough handler activations to overflow the stack. 1243 * be possible to get enough handler activations to overflow the stack.
1371# Although unlikely, bugs of that kind are hard to track down, so we'd 1244 * Although unlikely, bugs of that kind are hard to track down, so we'd
1372# like to avoid the possibility. 1245 * like to avoid the possibility.
1373# So, on entry to the handler we detect whether we interrupted an 1246 * So, on entry to the handler we detect whether we interrupted an
1374# existing activation in its critical region -- if so, we pop the current 1247 * existing activation in its critical region -- if so, we pop the current
1375# activation and restart the handler using the previous one. 1248 * activation and restart the handler using the previous one.
1376*/ 1249 */
1377ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 1250ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1378 CFI_STARTPROC 1251 CFI_STARTPROC
1379/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will 1252/*
1380 see the correct pointer to the pt_regs */ 1253 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1254 * see the correct pointer to the pt_regs
1255 */
1381 movq %rdi, %rsp # we don't return, adjust the stack frame 1256 movq %rdi, %rsp # we don't return, adjust the stack frame
1382 CFI_ENDPROC 1257 CFI_ENDPROC
1383 CFI_DEFAULT_STACK 1258 DEFAULT_FRAME
138411: incl %gs:pda_irqcount 125911: incl %gs:pda_irqcount
1385 movq %rsp,%rbp 1260 movq %rsp,%rbp
1386 CFI_DEF_CFA_REGISTER rbp 1261 CFI_DEF_CFA_REGISTER rbp
@@ -1395,23 +1270,26 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1395END(do_hypervisor_callback) 1270END(do_hypervisor_callback)
1396 1271
1397/* 1272/*
1398# Hypervisor uses this for application faults while it executes. 1273 * Hypervisor uses this for application faults while it executes.
1399# We get here for two reasons: 1274 * We get here for two reasons:
1400# 1. Fault while reloading DS, ES, FS or GS 1275 * 1. Fault while reloading DS, ES, FS or GS
1401# 2. Fault while executing IRET 1276 * 2. Fault while executing IRET
1402# Category 1 we do not need to fix up as Xen has already reloaded all segment 1277 * Category 1 we do not need to fix up as Xen has already reloaded all segment
1403# registers that could be reloaded and zeroed the others. 1278 * registers that could be reloaded and zeroed the others.
1404# Category 2 we fix up by killing the current process. We cannot use the 1279 * Category 2 we fix up by killing the current process. We cannot use the
1405# normal Linux return path in this case because if we use the IRET hypercall 1280 * normal Linux return path in this case because if we use the IRET hypercall
1406# to pop the stack frame we end up in an infinite loop of failsafe callbacks. 1281 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1407# We distinguish between categories by comparing each saved segment register 1282 * We distinguish between categories by comparing each saved segment register
1408# with its current contents: any discrepancy means we in category 1. 1283 * with its current contents: any discrepancy means we in category 1.
1409*/ 1284 */
1410ENTRY(xen_failsafe_callback) 1285ENTRY(xen_failsafe_callback)
1411 framesz = (RIP-0x30) /* workaround buggy gas */ 1286 INTR_FRAME 1 (6*8)
1412 _frame framesz 1287 /*CFI_REL_OFFSET gs,GS*/
1413 CFI_REL_OFFSET rcx, 0 1288 /*CFI_REL_OFFSET fs,FS*/
1414 CFI_REL_OFFSET r11, 8 1289 /*CFI_REL_OFFSET es,ES*/
1290 /*CFI_REL_OFFSET ds,DS*/
1291 CFI_REL_OFFSET r11,8
1292 CFI_REL_OFFSET rcx,0
1415 movw %ds,%cx 1293 movw %ds,%cx
1416 cmpw %cx,0x10(%rsp) 1294 cmpw %cx,0x10(%rsp)
1417 CFI_REMEMBER_STATE 1295 CFI_REMEMBER_STATE
@@ -1432,12 +1310,9 @@ ENTRY(xen_failsafe_callback)
1432 CFI_RESTORE r11 1310 CFI_RESTORE r11
1433 addq $0x30,%rsp 1311 addq $0x30,%rsp
1434 CFI_ADJUST_CFA_OFFSET -0x30 1312 CFI_ADJUST_CFA_OFFSET -0x30
1435 pushq $0 1313 pushq_cfi $0 /* RIP */
1436 CFI_ADJUST_CFA_OFFSET 8 1314 pushq_cfi %r11
1437 pushq %r11 1315 pushq_cfi %rcx
1438 CFI_ADJUST_CFA_OFFSET 8
1439 pushq %rcx
1440 CFI_ADJUST_CFA_OFFSET 8
1441 jmp general_protection 1316 jmp general_protection
1442 CFI_RESTORE_STATE 1317 CFI_RESTORE_STATE
14431: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ 13181: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
@@ -1447,11 +1322,223 @@ ENTRY(xen_failsafe_callback)
1447 CFI_RESTORE r11 1322 CFI_RESTORE r11
1448 addq $0x30,%rsp 1323 addq $0x30,%rsp
1449 CFI_ADJUST_CFA_OFFSET -0x30 1324 CFI_ADJUST_CFA_OFFSET -0x30
1450 pushq $0 1325 pushq_cfi $0
1451 CFI_ADJUST_CFA_OFFSET 8
1452 SAVE_ALL 1326 SAVE_ALL
1453 jmp error_exit 1327 jmp error_exit
1454 CFI_ENDPROC 1328 CFI_ENDPROC
1455END(xen_failsafe_callback) 1329END(xen_failsafe_callback)
1456 1330
1457#endif /* CONFIG_XEN */ 1331#endif /* CONFIG_XEN */
1332
1333/*
1334 * Some functions should be protected against kprobes
1335 */
1336 .pushsection .kprobes.text, "ax"
1337
1338paranoidzeroentry_ist debug do_debug DEBUG_STACK
1339paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
1340paranoiderrorentry stack_segment do_stack_segment
1341errorentry general_protection do_general_protection
1342errorentry page_fault do_page_fault
1343#ifdef CONFIG_X86_MCE
1344paranoidzeroentry machine_check do_machine_check
1345#endif
1346
1347 /*
1348 * "Paranoid" exit path from exception stack.
1349 * Paranoid because this is used by NMIs and cannot take
1350 * any kernel state for granted.
1351 * We don't do kernel preemption checks here, because only
1352 * NMI should be common and it does not enable IRQs and
1353 * cannot get reschedule ticks.
1354 *
1355 * "trace" is 0 for the NMI handler only, because irq-tracing
1356 * is fundamentally NMI-unsafe. (we cannot change the soft and
1357 * hard flags at once, atomically)
1358 */
1359
1360 /* ebx: no swapgs flag */
1361ENTRY(paranoid_exit)
1362 INTR_FRAME
1363 DISABLE_INTERRUPTS(CLBR_NONE)
1364 TRACE_IRQS_OFF
1365 testl %ebx,%ebx /* swapgs needed? */
1366 jnz paranoid_restore
1367 testl $3,CS(%rsp)
1368 jnz paranoid_userspace
1369paranoid_swapgs:
1370 TRACE_IRQS_IRETQ 0
1371 SWAPGS_UNSAFE_STACK
1372paranoid_restore:
1373 RESTORE_ALL 8
1374 jmp irq_return
1375paranoid_userspace:
1376 GET_THREAD_INFO(%rcx)
1377 movl TI_flags(%rcx),%ebx
1378 andl $_TIF_WORK_MASK,%ebx
1379 jz paranoid_swapgs
1380 movq %rsp,%rdi /* &pt_regs */
1381 call sync_regs
1382 movq %rax,%rsp /* switch stack for scheduling */
1383 testl $_TIF_NEED_RESCHED,%ebx
1384 jnz paranoid_schedule
1385 movl %ebx,%edx /* arg3: thread flags */
1386 TRACE_IRQS_ON
1387 ENABLE_INTERRUPTS(CLBR_NONE)
1388 xorl %esi,%esi /* arg2: oldset */
1389 movq %rsp,%rdi /* arg1: &pt_regs */
1390 call do_notify_resume
1391 DISABLE_INTERRUPTS(CLBR_NONE)
1392 TRACE_IRQS_OFF
1393 jmp paranoid_userspace
1394paranoid_schedule:
1395 TRACE_IRQS_ON
1396 ENABLE_INTERRUPTS(CLBR_ANY)
1397 call schedule
1398 DISABLE_INTERRUPTS(CLBR_ANY)
1399 TRACE_IRQS_OFF
1400 jmp paranoid_userspace
1401 CFI_ENDPROC
1402END(paranoid_exit)
1403
1404/*
1405 * Exception entry point. This expects an error code/orig_rax on the stack.
1406 * returns in "no swapgs flag" in %ebx.
1407 */
1408ENTRY(error_entry)
1409 XCPT_FRAME
1410 CFI_ADJUST_CFA_OFFSET 15*8
1411 /* oldrax contains error code */
1412 cld
1413 movq_cfi rdi, RDI+8
1414 movq_cfi rsi, RSI+8
1415 movq_cfi rdx, RDX+8
1416 movq_cfi rcx, RCX+8
1417 movq_cfi rax, RAX+8
1418 movq_cfi r8, R8+8
1419 movq_cfi r9, R9+8
1420 movq_cfi r10, R10+8
1421 movq_cfi r11, R11+8
1422 movq_cfi rbx, RBX+8
1423 movq_cfi rbp, RBP+8
1424 movq_cfi r12, R12+8
1425 movq_cfi r13, R13+8
1426 movq_cfi r14, R14+8
1427 movq_cfi r15, R15+8
1428 xorl %ebx,%ebx
1429 testl $3,CS+8(%rsp)
1430 je error_kernelspace
1431error_swapgs:
1432 SWAPGS
1433error_sti:
1434 TRACE_IRQS_OFF
1435 ret
1436 CFI_ENDPROC
1437
1438/*
1439 * There are two places in the kernel that can potentially fault with
1440 * usergs. Handle them here. The exception handlers after iret run with
1441 * kernel gs again, so don't set the user space flag. B stepping K8s
1442 * sometimes report an truncated RIP for IRET exceptions returning to
1443 * compat mode. Check for these here too.
1444 */
1445error_kernelspace:
1446 incl %ebx
1447 leaq irq_return(%rip),%rcx
1448 cmpq %rcx,RIP+8(%rsp)
1449 je error_swapgs
1450 movl %ecx,%ecx /* zero extend */
1451 cmpq %rcx,RIP+8(%rsp)
1452 je error_swapgs
1453 cmpq $gs_change,RIP+8(%rsp)
1454 je error_swapgs
1455 jmp error_sti
1456END(error_entry)
1457
1458
1459/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1460ENTRY(error_exit)
1461 DEFAULT_FRAME
1462 movl %ebx,%eax
1463 RESTORE_REST
1464 DISABLE_INTERRUPTS(CLBR_NONE)
1465 TRACE_IRQS_OFF
1466 GET_THREAD_INFO(%rcx)
1467 testl %eax,%eax
1468 jne retint_kernel
1469 LOCKDEP_SYS_EXIT_IRQ
1470 movl TI_flags(%rcx),%edx
1471 movl $_TIF_WORK_MASK,%edi
1472 andl %edi,%edx
1473 jnz retint_careful
1474 jmp retint_swapgs
1475 CFI_ENDPROC
1476END(error_exit)
1477
1478
1479 /* runs on exception stack */
1480ENTRY(nmi)
1481 INTR_FRAME
1482 PARAVIRT_ADJUST_EXCEPTION_FRAME
1483 pushq_cfi $-1
1484 subq $15*8, %rsp
1485 CFI_ADJUST_CFA_OFFSET 15*8
1486 call save_paranoid
1487 DEFAULT_FRAME 0
1488 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1489 movq %rsp,%rdi
1490 movq $-1,%rsi
1491 call do_nmi
1492#ifdef CONFIG_TRACE_IRQFLAGS
1493 /* paranoidexit; without TRACE_IRQS_OFF */
1494 /* ebx: no swapgs flag */
1495 DISABLE_INTERRUPTS(CLBR_NONE)
1496 testl %ebx,%ebx /* swapgs needed? */
1497 jnz nmi_restore
1498 testl $3,CS(%rsp)
1499 jnz nmi_userspace
1500nmi_swapgs:
1501 SWAPGS_UNSAFE_STACK
1502nmi_restore:
1503 RESTORE_ALL 8
1504 jmp irq_return
1505nmi_userspace:
1506 GET_THREAD_INFO(%rcx)
1507 movl TI_flags(%rcx),%ebx
1508 andl $_TIF_WORK_MASK,%ebx
1509 jz nmi_swapgs
1510 movq %rsp,%rdi /* &pt_regs */
1511 call sync_regs
1512 movq %rax,%rsp /* switch stack for scheduling */
1513 testl $_TIF_NEED_RESCHED,%ebx
1514 jnz nmi_schedule
1515 movl %ebx,%edx /* arg3: thread flags */
1516 ENABLE_INTERRUPTS(CLBR_NONE)
1517 xorl %esi,%esi /* arg2: oldset */
1518 movq %rsp,%rdi /* arg1: &pt_regs */
1519 call do_notify_resume
1520 DISABLE_INTERRUPTS(CLBR_NONE)
1521 jmp nmi_userspace
1522nmi_schedule:
1523 ENABLE_INTERRUPTS(CLBR_ANY)
1524 call schedule
1525 DISABLE_INTERRUPTS(CLBR_ANY)
1526 jmp nmi_userspace
1527 CFI_ENDPROC
1528#else
1529 jmp paranoid_exit
1530 CFI_ENDPROC
1531#endif
1532END(nmi)
1533
1534ENTRY(ignore_sysret)
1535 CFI_STARTPROC
1536 mov $-ENOSYS,%eax
1537 sysret
1538 CFI_ENDPROC
1539END(ignore_sysret)
1540
1541/*
1542 * End of kprobes section
1543 */
1544 .popsection
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 845aa9803e80..607db63044a5 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -129,7 +129,7 @@ void __init native_init_IRQ(void)
129 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { 129 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
130 /* SYSCALL_VECTOR was reserved in trap_init. */ 130 /* SYSCALL_VECTOR was reserved in trap_init. */
131 if (i != SYSCALL_VECTOR) 131 if (i != SYSCALL_VECTOR)
132 set_intr_gate(i, interrupt[i]); 132 set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
133 } 133 }
134 134
135 135
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ff0235391285..8670b3ce626e 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -24,41 +24,6 @@
24#include <asm/i8259.h> 24#include <asm/i8259.h>
25 25
26/* 26/*
27 * Common place to define all x86 IRQ vectors
28 *
29 * This builds up the IRQ handler stubs using some ugly macros in irq.h
30 *
31 * These macros create the low-level assembly IRQ routines that save
32 * register context and call do_IRQ(). do_IRQ() then does all the
33 * operations that are needed to keep the AT (or SMP IOAPIC)
34 * interrupt-controller happy.
35 */
36
37#define IRQ_NAME2(nr) nr##_interrupt(void)
38#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
39
40/*
41 * SMP has a few special interrupts for IPI messages
42 */
43
44#define BUILD_IRQ(nr) \
45 asmlinkage void IRQ_NAME(nr); \
46 asm("\n.text\n.p2align\n" \
47 "IRQ" #nr "_interrupt:\n\t" \
48 "push $~(" #nr ") ; " \
49 "jmp common_interrupt\n" \
50 ".previous");
51
52#define BI(x,y) \
53 BUILD_IRQ(x##y)
54
55#define BUILD_16_IRQS(x) \
56 BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
57 BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
58 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
59 BI(x,c) BI(x,d) BI(x,e) BI(x,f)
60
61/*
62 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: 27 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
63 * (these are usually mapped to vectors 0x30-0x3f) 28 * (these are usually mapped to vectors 0x30-0x3f)
64 */ 29 */
@@ -73,37 +38,6 @@
73 * 38 *
74 * (these are usually mapped into the 0x30-0xff vector range) 39 * (these are usually mapped into the 0x30-0xff vector range)
75 */ 40 */
76 BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
77BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
78BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
79BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
80
81#undef BUILD_16_IRQS
82#undef BI
83
84
85#define IRQ(x,y) \
86 IRQ##x##y##_interrupt
87
88#define IRQLIST_16(x) \
89 IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
90 IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
91 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
92 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
93
94/* for the irq vectors */
95static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
96 IRQLIST_16(0x2), IRQLIST_16(0x3),
97 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
98 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
99 IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
100};
101
102#undef IRQ
103#undef IRQLIST_16
104
105
106
107 41
108/* 42/*
109 * IRQ2 is cascade interrupt to second interrupt controller 43 * IRQ2 is cascade interrupt to second interrupt controller
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 3f92b134ab90..7e558db362c1 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -165,11 +165,7 @@ static void native_smp_send_stop(void)
165void smp_reschedule_interrupt(struct pt_regs *regs) 165void smp_reschedule_interrupt(struct pt_regs *regs)
166{ 166{
167 ack_APIC_irq(); 167 ack_APIC_irq();
168#ifdef CONFIG_X86_32 168 inc_irq_stat(irq_resched_count);
169 __get_cpu_var(irq_stat).irq_resched_count++;
170#else
171 add_pda(irq_resched_count, 1);
172#endif
173} 169}
174 170
175void smp_call_function_interrupt(struct pt_regs *regs) 171void smp_call_function_interrupt(struct pt_regs *regs)
@@ -177,11 +173,7 @@ void smp_call_function_interrupt(struct pt_regs *regs)
177 ack_APIC_irq(); 173 ack_APIC_irq();
178 irq_enter(); 174 irq_enter();
179 generic_smp_call_function_interrupt(); 175 generic_smp_call_function_interrupt();
180#ifdef CONFIG_X86_32 176 inc_irq_stat(irq_call_count);
181 __get_cpu_var(irq_stat).irq_call_count++;
182#else
183 add_pda(irq_call_count, 1);
184#endif
185 irq_exit(); 177 irq_exit();
186} 178}
187 179
@@ -190,11 +182,7 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
190 ack_APIC_irq(); 182 ack_APIC_irq();
191 irq_enter(); 183 irq_enter();
192 generic_smp_call_function_single_interrupt(); 184 generic_smp_call_function_single_interrupt();
193#ifdef CONFIG_X86_32 185 inc_irq_stat(irq_call_count);
194 __get_cpu_var(irq_stat).irq_call_count++;
195#else
196 add_pda(irq_call_count, 1);
197#endif
198 irq_exit(); 186 irq_exit();
199} 187}
200 188
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 77b400f06ea2..65309e4cb1c0 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -75,7 +75,7 @@ EXPORT_SYMBOL(profile_pc);
75irqreturn_t timer_interrupt(int irq, void *dev_id) 75irqreturn_t timer_interrupt(int irq, void *dev_id)
76{ 76{
77 /* Keep nmi watchdog up to date */ 77 /* Keep nmi watchdog up to date */
78 per_cpu(irq_stat, smp_processor_id()).irq0_irqs++; 78 inc_irq_stat(irq0_irqs);
79 79
80#ifdef CONFIG_X86_IO_APIC 80#ifdef CONFIG_X86_IO_APIC
81 if (timer_ack) { 81 if (timer_ack) {
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 1b7711b31037..891e7a7c4334 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -51,7 +51,7 @@ EXPORT_SYMBOL(profile_pc);
51 51
52static irqreturn_t timer_interrupt(int irq, void *dev_id) 52static irqreturn_t timer_interrupt(int irq, void *dev_id)
53{ 53{
54 add_pda(irq0_irqs, 1); 54 inc_irq_stat(irq0_irqs);
55 55
56 global_clock_event->event_handler(global_clock_event); 56 global_clock_event->event_handler(global_clock_event);
57 57
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index 4290d918b58a..8da059f949be 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -118,7 +118,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
118 smp_mb__after_clear_bit(); 118 smp_mb__after_clear_bit();
119out: 119out:
120 put_cpu_no_resched(); 120 put_cpu_no_resched();
121 __get_cpu_var(irq_stat).irq_tlb_count++; 121 inc_irq_stat(irq_tlb_count);
122} 122}
123 123
124void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, 124void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 8f919ca69494..29887d7081a9 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -154,7 +154,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
154out: 154out:
155 ack_APIC_irq(); 155 ack_APIC_irq();
156 cpu_clear(cpu, f->flush_cpumask); 156 cpu_clear(cpu, f->flush_cpumask);
157 add_pda(irq_tlb_count, 1); 157 inc_irq_stat(irq_tlb_count);
158} 158}
159 159
160void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, 160void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c320c29255c2..73ece2633f47 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -481,11 +481,7 @@ do_nmi(struct pt_regs *regs, long error_code)
481{ 481{
482 nmi_enter(); 482 nmi_enter();
483 483
484#ifdef CONFIG_X86_32 484 inc_irq_stat(__nmi_count);
485 { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); }
486#else
487 add_pda(__nmi_count, 1);
488#endif
489 485
490 if (!ignore_nmis) 486 if (!ignore_nmis)
491 default_do_nmi(regs); 487 default_do_nmi(regs);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a5d8e1ace1cf..50a779264bb1 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -590,7 +590,8 @@ static void __init lguest_init_IRQ(void)
590 * a straightforward 1 to 1 mapping, so force that here. */ 590 * a straightforward 1 to 1 mapping, so force that here. */
591 __get_cpu_var(vector_irq)[vector] = i; 591 __get_cpu_var(vector_irq)[vector] = i;
592 if (vector != SYSCALL_VECTOR) { 592 if (vector != SYSCALL_VECTOR) {
593 set_intr_gate(vector, interrupt[vector]); 593 set_intr_gate(vector,
594 interrupt[vector-FIRST_EXTERNAL_VECTOR]);
594 set_irq_chip_and_handler_name(i, &lguest_irq_controller, 595 set_irq_chip_and_handler_name(i, &lguest_irq_controller,
595 handle_level_irq, 596 handle_level_irq,
596 "level"); 597 "level");
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 9fd1f859021b..fee9e59649c1 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -64,14 +64,6 @@
64 name: 64 name:
65#endif 65#endif
66 66
67#define KPROBE_ENTRY(name) \
68 .pushsection .kprobes.text, "ax"; \
69 ENTRY(name)
70
71#define KPROBE_END(name) \
72 END(name); \
73 .popsection
74
75#ifndef END 67#ifndef END
76#define END(name) \ 68#define END(name) \
77 .size name, .-name 69 .size name, .-name