aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--arch/x86/ia32/ia32_signal.c5
-rw-r--r--arch/x86/include/asm/bitops.h10
-rw-r--r--arch/x86/include/asm/byteorder.h74
-rw-r--r--arch/x86/include/asm/dwarf2.h97
-rw-r--r--arch/x86/include/asm/hw_irq.h4
-rw-r--r--arch/x86/include/asm/irq.h4
-rw-r--r--arch/x86/include/asm/irq_regs_32.h2
-rw-r--r--arch/x86/include/asm/linkage.h60
-rw-r--r--arch/x86/include/asm/tsc.h8
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/entry_32.S477
-rw-r--r--arch/x86/kernel/entry_64.S1360
-rw-r--r--arch/x86/kernel/irq_64.c24
-rw-r--r--arch/x86/kernel/irqinit_32.c2
-rw-r--r--arch/x86/kernel/irqinit_64.c66
-rw-r--r--arch/x86/kernel/time_64.c2
-rw-r--r--arch/x86/kernel/vsyscall_64.c9
-rw-r--r--arch/x86/lguest/boot.c3
-rw-r--r--arch/x86/mm/init_32.c2
20 files changed, 1171 insertions, 1051 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7719f7..d4d4cb7629ea 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -242,21 +242,13 @@ config X86_FIND_SMP_CONFIG
242 def_bool y 242 def_bool y
243 depends on X86_MPPARSE || X86_VOYAGER 243 depends on X86_MPPARSE || X86_VOYAGER
244 244
245if ACPI
246config X86_MPPARSE 245config X86_MPPARSE
247 def_bool y 246 bool "Enable MPS table" if ACPI
248 bool "Enable MPS table" 247 default y
249 depends on X86_LOCAL_APIC 248 depends on X86_LOCAL_APIC
250 help 249 help
251 For old smp systems that do not have proper acpi support. Newer systems 250 For old smp systems that do not have proper acpi support. Newer systems
252 (esp with 64bit cpus) with acpi support, MADT and DSDT will override it 251 (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
253endif
254
255if !ACPI
256config X86_MPPARSE
257 def_bool y
258 depends on X86_LOCAL_APIC
259endif
260 252
261choice 253choice
262 prompt "Subarchitecture Type" 254 prompt "Subarchitecture Type"
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 4bc02b23674b..e82ebd652263 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -572,11 +572,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
572 regs->dx = (unsigned long) &frame->info; 572 regs->dx = (unsigned long) &frame->info;
573 regs->cx = (unsigned long) &frame->uc; 573 regs->cx = (unsigned long) &frame->uc;
574 574
575 /* Make -mregparm=3 work */
576 regs->ax = sig;
577 regs->dx = (unsigned long) &frame->info;
578 regs->cx = (unsigned long) &frame->uc;
579
580 loadsegment(ds, __USER32_DS); 575 loadsegment(ds, __USER32_DS);
581 loadsegment(es, __USER32_DS); 576 loadsegment(es, __USER32_DS);
582 577
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 360010322711..9fa9dcdf344b 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -168,7 +168,15 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
168 */ 168 */
169static inline void change_bit(int nr, volatile unsigned long *addr) 169static inline void change_bit(int nr, volatile unsigned long *addr)
170{ 170{
171 asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr)); 171 if (IS_IMMEDIATE(nr)) {
172 asm volatile(LOCK_PREFIX "xorb %1,%0"
173 : CONST_MASK_ADDR(nr, addr)
174 : "iq" ((u8)CONST_MASK(nr)));
175 } else {
176 asm volatile(LOCK_PREFIX "btc %1,%0"
177 : BITOP_ADDR(addr)
178 : "Ir" (nr));
179 }
172} 180}
173 181
174/** 182/**
diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h
index e02ae2d89acf..f110ad417df3 100644
--- a/arch/x86/include/asm/byteorder.h
+++ b/arch/x86/include/asm/byteorder.h
@@ -4,26 +4,33 @@
4#include <asm/types.h> 4#include <asm/types.h>
5#include <linux/compiler.h> 5#include <linux/compiler.h>
6 6
7#ifdef __GNUC__ 7#define __LITTLE_ENDIAN
8 8
9#ifdef __i386__ 9static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
10
11static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
12{ 10{
13#ifdef CONFIG_X86_BSWAP 11#ifdef __i386__
14 asm("bswap %0" : "=r" (x) : "0" (x)); 12# ifdef CONFIG_X86_BSWAP
15#else 13 asm("bswap %0" : "=r" (val) : "0" (val));
14# else
16 asm("xchgb %b0,%h0\n\t" /* swap lower bytes */ 15 asm("xchgb %b0,%h0\n\t" /* swap lower bytes */
17 "rorl $16,%0\n\t" /* swap words */ 16 "rorl $16,%0\n\t" /* swap words */
18 "xchgb %b0,%h0" /* swap higher bytes */ 17 "xchgb %b0,%h0" /* swap higher bytes */
19 : "=q" (x) 18 : "=q" (val)
20 : "0" (x)); 19 : "0" (val));
20# endif
21
22#else /* __i386__ */
23 asm("bswapl %0"
24 : "=r" (val)
25 : "0" (val));
21#endif 26#endif
22 return x; 27 return val;
23} 28}
29#define __arch_swab32 __arch_swab32
24 30
25static inline __attribute_const__ __u64 ___arch__swab64(__u64 val) 31static inline __attribute_const__ __u64 __arch_swab64(__u64 val)
26{ 32{
33#ifdef __i386__
27 union { 34 union {
28 struct { 35 struct {
29 __u32 a; 36 __u32 a;
@@ -32,50 +39,27 @@ static inline __attribute_const__ __u64 ___arch__swab64(__u64 val)
32 __u64 u; 39 __u64 u;
33 } v; 40 } v;
34 v.u = val; 41 v.u = val;
35#ifdef CONFIG_X86_BSWAP 42# ifdef CONFIG_X86_BSWAP
36 asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1" 43 asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1"
37 : "=r" (v.s.a), "=r" (v.s.b) 44 : "=r" (v.s.a), "=r" (v.s.b)
38 : "0" (v.s.a), "1" (v.s.b)); 45 : "0" (v.s.a), "1" (v.s.b));
39#else 46# else
40 v.s.a = ___arch__swab32(v.s.a); 47 v.s.a = __arch_swab32(v.s.a);
41 v.s.b = ___arch__swab32(v.s.b); 48 v.s.b = __arch_swab32(v.s.b);
42 asm("xchgl %0,%1" 49 asm("xchgl %0,%1"
43 : "=r" (v.s.a), "=r" (v.s.b) 50 : "=r" (v.s.a), "=r" (v.s.b)
44 : "0" (v.s.a), "1" (v.s.b)); 51 : "0" (v.s.a), "1" (v.s.b));
45#endif 52# endif
46 return v.u; 53 return v.u;
47}
48
49#else /* __i386__ */ 54#else /* __i386__ */
50
51static inline __attribute_const__ __u64 ___arch__swab64(__u64 x)
52{
53 asm("bswapq %0" 55 asm("bswapq %0"
54 : "=r" (x) 56 : "=r" (val)
55 : "0" (x)); 57 : "0" (val));
56 return x; 58 return val;
57}
58
59static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
60{
61 asm("bswapl %0"
62 : "=r" (x)
63 : "0" (x));
64 return x;
65}
66
67#endif 59#endif
60}
61#define __arch_swab64 __arch_swab64
68 62
69/* Do not define swab16. Gcc is smart enough to recognize "C" version and 63#include <linux/byteorder.h>
70 convert it into rotation or exhange. */
71
72#define __arch__swab64(x) ___arch__swab64(x)
73#define __arch__swab32(x) ___arch__swab32(x)
74
75#define __BYTEORDER_HAS_U64__
76
77#endif /* __GNUC__ */
78
79#include <linux/byteorder/little_endian.h>
80 64
81#endif /* _ASM_X86_BYTEORDER_H */ 65#endif /* _ASM_X86_BYTEORDER_H */
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index 804b6e6be929..3afc5e87cfdd 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -6,56 +6,91 @@
6#endif 6#endif
7 7
8/* 8/*
9 Macros for dwarf2 CFI unwind table entries. 9 * Macros for dwarf2 CFI unwind table entries.
10 See "as.info" for details on these pseudo ops. Unfortunately 10 * See "as.info" for details on these pseudo ops. Unfortunately
11 they are only supported in very new binutils, so define them 11 * they are only supported in very new binutils, so define them
12 away for older version. 12 * away for older version.
13 */ 13 */
14 14
15#ifdef CONFIG_AS_CFI 15#ifdef CONFIG_AS_CFI
16 16
17#define CFI_STARTPROC .cfi_startproc 17#define CFI_STARTPROC .cfi_startproc
18#define CFI_ENDPROC .cfi_endproc 18#define CFI_ENDPROC .cfi_endproc
19#define CFI_DEF_CFA .cfi_def_cfa 19#define CFI_DEF_CFA .cfi_def_cfa
20#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register 20#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
21#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset 21#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
22#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset 22#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
23#define CFI_OFFSET .cfi_offset 23#define CFI_OFFSET .cfi_offset
24#define CFI_REL_OFFSET .cfi_rel_offset 24#define CFI_REL_OFFSET .cfi_rel_offset
25#define CFI_REGISTER .cfi_register 25#define CFI_REGISTER .cfi_register
26#define CFI_RESTORE .cfi_restore 26#define CFI_RESTORE .cfi_restore
27#define CFI_REMEMBER_STATE .cfi_remember_state 27#define CFI_REMEMBER_STATE .cfi_remember_state
28#define CFI_RESTORE_STATE .cfi_restore_state 28#define CFI_RESTORE_STATE .cfi_restore_state
29#define CFI_UNDEFINED .cfi_undefined 29#define CFI_UNDEFINED .cfi_undefined
30 30
31#ifdef CONFIG_AS_CFI_SIGNAL_FRAME 31#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
32#define CFI_SIGNAL_FRAME .cfi_signal_frame 32#define CFI_SIGNAL_FRAME .cfi_signal_frame
33#else 33#else
34#define CFI_SIGNAL_FRAME 34#define CFI_SIGNAL_FRAME
35#endif 35#endif
36 36
37#else 37#else
38 38
39/* Due to the structure of pre-exisiting code, don't use assembler line 39/*
40 comment character # to ignore the arguments. Instead, use a dummy macro. */ 40 * Due to the structure of pre-exisiting code, don't use assembler line
41 * comment character # to ignore the arguments. Instead, use a dummy macro.
42 */
41.macro cfi_ignore a=0, b=0, c=0, d=0 43.macro cfi_ignore a=0, b=0, c=0, d=0
42.endm 44.endm
43 45
44#define CFI_STARTPROC cfi_ignore 46#define CFI_STARTPROC cfi_ignore
45#define CFI_ENDPROC cfi_ignore 47#define CFI_ENDPROC cfi_ignore
46#define CFI_DEF_CFA cfi_ignore 48#define CFI_DEF_CFA cfi_ignore
47#define CFI_DEF_CFA_REGISTER cfi_ignore 49#define CFI_DEF_CFA_REGISTER cfi_ignore
48#define CFI_DEF_CFA_OFFSET cfi_ignore 50#define CFI_DEF_CFA_OFFSET cfi_ignore
49#define CFI_ADJUST_CFA_OFFSET cfi_ignore 51#define CFI_ADJUST_CFA_OFFSET cfi_ignore
50#define CFI_OFFSET cfi_ignore 52#define CFI_OFFSET cfi_ignore
51#define CFI_REL_OFFSET cfi_ignore 53#define CFI_REL_OFFSET cfi_ignore
52#define CFI_REGISTER cfi_ignore 54#define CFI_REGISTER cfi_ignore
53#define CFI_RESTORE cfi_ignore 55#define CFI_RESTORE cfi_ignore
54#define CFI_REMEMBER_STATE cfi_ignore 56#define CFI_REMEMBER_STATE cfi_ignore
55#define CFI_RESTORE_STATE cfi_ignore 57#define CFI_RESTORE_STATE cfi_ignore
56#define CFI_UNDEFINED cfi_ignore 58#define CFI_UNDEFINED cfi_ignore
57#define CFI_SIGNAL_FRAME cfi_ignore 59#define CFI_SIGNAL_FRAME cfi_ignore
58 60
59#endif 61#endif
60 62
63/*
64 * An attempt to make CFI annotations more or less
65 * correct and shorter. It is implied that you know
66 * what you're doing if you use them.
67 */
68#ifdef __ASSEMBLY__
69#ifdef CONFIG_X86_64
70 .macro pushq_cfi reg
71 pushq \reg
72 CFI_ADJUST_CFA_OFFSET 8
73 .endm
74
75 .macro popq_cfi reg
76 popq \reg
77 CFI_ADJUST_CFA_OFFSET -8
78 .endm
79
80 .macro movq_cfi reg offset=0
81 movq %\reg, \offset(%rsp)
82 CFI_REL_OFFSET \reg, \offset
83 .endm
84
85 .macro movq_cfi_restore offset reg
86 movq \offset(%rsp), %\reg
87 CFI_RESTORE \reg
88 .endm
89#else /*!CONFIG_X86_64*/
90
91 /* 32bit defenitions are missed yet */
92
93#endif /*!CONFIG_X86_64*/
94#endif /*__ASSEMBLY__*/
95
61#endif /* _ASM_X86_DWARF2_H */ 96#endif /* _ASM_X86_DWARF2_H */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b97aecb0b61d..8de644b6b959 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -109,9 +109,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
109#endif 109#endif
110#endif 110#endif
111 111
112#ifdef CONFIG_X86_32 112extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
113extern void (*const interrupt[NR_VECTORS])(void);
114#endif
115 113
116typedef int vector_irq_t[NR_VECTORS]; 114typedef int vector_irq_t[NR_VECTORS];
117DECLARE_PER_CPU(vector_irq_t, vector_irq); 115DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index bae0eda95486..28e409fc73f3 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -31,10 +31,6 @@ static inline int irq_canonicalize(int irq)
31# endif 31# endif
32#endif 32#endif
33 33
34#ifdef CONFIG_IRQBALANCE
35extern int irqbalance_disable(char *str);
36#endif
37
38#ifdef CONFIG_HOTPLUG_CPU 34#ifdef CONFIG_HOTPLUG_CPU
39#include <linux/cpumask.h> 35#include <linux/cpumask.h>
40extern void fixup_irqs(cpumask_t map); 36extern void fixup_irqs(cpumask_t map);
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
index af2f02d27fc7..86afd7473457 100644
--- a/arch/x86/include/asm/irq_regs_32.h
+++ b/arch/x86/include/asm/irq_regs_32.h
@@ -9,6 +9,8 @@
9 9
10#include <asm/percpu.h> 10#include <asm/percpu.h>
11 11
12#define ARCH_HAS_OWN_IRQ_REGS
13
12DECLARE_PER_CPU(struct pt_regs *, irq_regs); 14DECLARE_PER_CPU(struct pt_regs *, irq_regs);
13 15
14static inline struct pt_regs *get_irq_regs(void) 16static inline struct pt_regs *get_irq_regs(void)
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index f61ee8f937e4..5d98d0b68ffc 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -57,5 +57,65 @@
57#define __ALIGN_STR ".align 16,0x90" 57#define __ALIGN_STR ".align 16,0x90"
58#endif 58#endif
59 59
60/*
61 * to check ENTRY_X86/END_X86 and
62 * KPROBE_ENTRY_X86/KPROBE_END_X86
63 * unbalanced-missed-mixed appearance
64 */
65#define __set_entry_x86 .set ENTRY_X86_IN, 0
66#define __unset_entry_x86 .set ENTRY_X86_IN, 1
67#define __set_kprobe_x86 .set KPROBE_X86_IN, 0
68#define __unset_kprobe_x86 .set KPROBE_X86_IN, 1
69
70#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed"
71
72#define __check_entry_x86 \
73 .ifdef ENTRY_X86_IN; \
74 .ifeq ENTRY_X86_IN; \
75 __macro_err_x86; \
76 .abort; \
77 .endif; \
78 .endif
79
80#define __check_kprobe_x86 \
81 .ifdef KPROBE_X86_IN; \
82 .ifeq KPROBE_X86_IN; \
83 __macro_err_x86; \
84 .abort; \
85 .endif; \
86 .endif
87
88#define __check_entry_kprobe_x86 \
89 __check_entry_x86; \
90 __check_kprobe_x86
91
92#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86
93
94#define ENTRY_X86(name) \
95 __check_entry_kprobe_x86; \
96 __set_entry_x86; \
97 .globl name; \
98 __ALIGN; \
99 name:
100
101#define END_X86(name) \
102 __unset_entry_x86; \
103 __check_entry_kprobe_x86; \
104 .size name, .-name
105
106#define KPROBE_ENTRY_X86(name) \
107 __check_entry_kprobe_x86; \
108 __set_kprobe_x86; \
109 .pushsection .kprobes.text, "ax"; \
110 .globl name; \
111 __ALIGN; \
112 name:
113
114#define KPROBE_END_X86(name) \
115 __unset_kprobe_x86; \
116 __check_entry_kprobe_x86; \
117 .size name, .-name; \
118 .popsection
119
60#endif /* _ASM_X86_LINKAGE_H */ 120#endif /* _ASM_X86_LINKAGE_H */
61 121
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 9cd83a8e40d5..38ae163cc91b 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -34,8 +34,6 @@ static inline cycles_t get_cycles(void)
34 34
35static __always_inline cycles_t vget_cycles(void) 35static __always_inline cycles_t vget_cycles(void)
36{ 36{
37 cycles_t cycles;
38
39 /* 37 /*
40 * We only do VDSOs on TSC capable CPUs, so this shouldnt 38 * We only do VDSOs on TSC capable CPUs, so this shouldnt
41 * access boot_cpu_data (which is not VDSO-safe): 39 * access boot_cpu_data (which is not VDSO-safe):
@@ -44,11 +42,7 @@ static __always_inline cycles_t vget_cycles(void)
44 if (!cpu_has_tsc) 42 if (!cpu_has_tsc)
45 return 0; 43 return 0;
46#endif 44#endif
47 rdtsc_barrier(); 45 return (cycles_t)__native_read_tsc();
48 cycles = (cycles_t)__native_read_tsc();
49 rdtsc_barrier();
50
51 return cycles;
52} 46}
53 47
54extern void tsc_init(void); 48extern void tsc_init(void);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b62a7667828e..3de1f2350457 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -12,6 +12,7 @@ CFLAGS_REMOVE_tsc.o = -pg
12CFLAGS_REMOVE_rtc.o = -pg 12CFLAGS_REMOVE_rtc.o = -pg
13CFLAGS_REMOVE_paravirt-spinlocks.o = -pg 13CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
14CFLAGS_REMOVE_ftrace.o = -pg 14CFLAGS_REMOVE_ftrace.o = -pg
15CFLAGS_REMOVE_early_printk.o = -pg
15endif 16endif
16 17
17# 18#
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 28b597ef9ca1..fe7014176eb0 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -619,28 +619,37 @@ END(syscall_badsys)
61927:; 61927:;
620 620
621/* 621/*
622 * Build the entry stubs and pointer table with 622 * Build the entry stubs and pointer table with some assembler magic.
623 * some assembler magic. 623 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
624 * single cache line on all modern x86 implementations.
624 */ 625 */
625.section .rodata,"a" 626.section .init.rodata,"a"
626ENTRY(interrupt) 627ENTRY(interrupt)
627.text 628.text
628 629 .p2align 5
630 .p2align CONFIG_X86_L1_CACHE_SHIFT
629ENTRY(irq_entries_start) 631ENTRY(irq_entries_start)
630 RING0_INT_FRAME 632 RING0_INT_FRAME
631vector=0 633vector=FIRST_EXTERNAL_VECTOR
632.rept NR_VECTORS 634.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
633 ALIGN 635 .balign 32
634 .if vector 636 .rept 7
637 .if vector < NR_VECTORS
638 .if vector <> FIRST_EXTERNAL_VECTOR
635 CFI_ADJUST_CFA_OFFSET -4 639 CFI_ADJUST_CFA_OFFSET -4
636 .endif 640 .endif
6371: pushl $~(vector) 6411: pushl $(~vector+0x80) /* Note: always in signed byte range */
638 CFI_ADJUST_CFA_OFFSET 4 642 CFI_ADJUST_CFA_OFFSET 4
639 jmp common_interrupt 643 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
640 .previous 644 jmp 2f
645 .endif
646 .previous
641 .long 1b 647 .long 1b
642 .text 648 .text
643vector=vector+1 649vector=vector+1
650 .endif
651 .endr
6522: jmp common_interrupt
644.endr 653.endr
645END(irq_entries_start) 654END(irq_entries_start)
646 655
@@ -652,8 +661,9 @@ END(interrupt)
652 * the CPU automatically disables interrupts when executing an IRQ vector, 661 * the CPU automatically disables interrupts when executing an IRQ vector,
653 * so IRQ-flags tracing has to follow that: 662 * so IRQ-flags tracing has to follow that:
654 */ 663 */
655 ALIGN 664 .p2align CONFIG_X86_L1_CACHE_SHIFT
656common_interrupt: 665common_interrupt:
666 addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
657 SAVE_ALL 667 SAVE_ALL
658 TRACE_IRQS_OFF 668 TRACE_IRQS_OFF
659 movl %esp,%eax 669 movl %esp,%eax
@@ -678,65 +688,6 @@ ENDPROC(name)
678/* The include is where all of the SMP etc. interrupts come from */ 688/* The include is where all of the SMP etc. interrupts come from */
679#include "entry_arch.h" 689#include "entry_arch.h"
680 690
681KPROBE_ENTRY(page_fault)
682 RING0_EC_FRAME
683 pushl $do_page_fault
684 CFI_ADJUST_CFA_OFFSET 4
685 ALIGN
686error_code:
687 /* the function address is in %fs's slot on the stack */
688 pushl %es
689 CFI_ADJUST_CFA_OFFSET 4
690 /*CFI_REL_OFFSET es, 0*/
691 pushl %ds
692 CFI_ADJUST_CFA_OFFSET 4
693 /*CFI_REL_OFFSET ds, 0*/
694 pushl %eax
695 CFI_ADJUST_CFA_OFFSET 4
696 CFI_REL_OFFSET eax, 0
697 pushl %ebp
698 CFI_ADJUST_CFA_OFFSET 4
699 CFI_REL_OFFSET ebp, 0
700 pushl %edi
701 CFI_ADJUST_CFA_OFFSET 4
702 CFI_REL_OFFSET edi, 0
703 pushl %esi
704 CFI_ADJUST_CFA_OFFSET 4
705 CFI_REL_OFFSET esi, 0
706 pushl %edx
707 CFI_ADJUST_CFA_OFFSET 4
708 CFI_REL_OFFSET edx, 0
709 pushl %ecx
710 CFI_ADJUST_CFA_OFFSET 4
711 CFI_REL_OFFSET ecx, 0
712 pushl %ebx
713 CFI_ADJUST_CFA_OFFSET 4
714 CFI_REL_OFFSET ebx, 0
715 cld
716 pushl %fs
717 CFI_ADJUST_CFA_OFFSET 4
718 /*CFI_REL_OFFSET fs, 0*/
719 movl $(__KERNEL_PERCPU), %ecx
720 movl %ecx, %fs
721 UNWIND_ESPFIX_STACK
722 popl %ecx
723 CFI_ADJUST_CFA_OFFSET -4
724 /*CFI_REGISTER es, ecx*/
725 movl PT_FS(%esp), %edi # get the function address
726 movl PT_ORIG_EAX(%esp), %edx # get the error code
727 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
728 mov %ecx, PT_FS(%esp)
729 /*CFI_REL_OFFSET fs, ES*/
730 movl $(__USER_DS), %ecx
731 movl %ecx, %ds
732 movl %ecx, %es
733 TRACE_IRQS_OFF
734 movl %esp,%eax # pt_regs pointer
735 call *%edi
736 jmp ret_from_exception
737 CFI_ENDPROC
738KPROBE_END(page_fault)
739
740ENTRY(coprocessor_error) 691ENTRY(coprocessor_error)
741 RING0_INT_FRAME 692 RING0_INT_FRAME
742 pushl $0 693 pushl $0
@@ -767,140 +718,6 @@ ENTRY(device_not_available)
767 CFI_ENDPROC 718 CFI_ENDPROC
768END(device_not_available) 719END(device_not_available)
769 720
770/*
771 * Debug traps and NMI can happen at the one SYSENTER instruction
772 * that sets up the real kernel stack. Check here, since we can't
773 * allow the wrong stack to be used.
774 *
775 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
776 * already pushed 3 words if it hits on the sysenter instruction:
777 * eflags, cs and eip.
778 *
779 * We just load the right stack, and push the three (known) values
780 * by hand onto the new stack - while updating the return eip past
781 * the instruction that would have done it for sysenter.
782 */
783#define FIX_STACK(offset, ok, label) \
784 cmpw $__KERNEL_CS,4(%esp); \
785 jne ok; \
786label: \
787 movl TSS_sysenter_sp0+offset(%esp),%esp; \
788 CFI_DEF_CFA esp, 0; \
789 CFI_UNDEFINED eip; \
790 pushfl; \
791 CFI_ADJUST_CFA_OFFSET 4; \
792 pushl $__KERNEL_CS; \
793 CFI_ADJUST_CFA_OFFSET 4; \
794 pushl $sysenter_past_esp; \
795 CFI_ADJUST_CFA_OFFSET 4; \
796 CFI_REL_OFFSET eip, 0
797
798KPROBE_ENTRY(debug)
799 RING0_INT_FRAME
800 cmpl $ia32_sysenter_target,(%esp)
801 jne debug_stack_correct
802 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
803debug_stack_correct:
804 pushl $-1 # mark this as an int
805 CFI_ADJUST_CFA_OFFSET 4
806 SAVE_ALL
807 TRACE_IRQS_OFF
808 xorl %edx,%edx # error code 0
809 movl %esp,%eax # pt_regs pointer
810 call do_debug
811 jmp ret_from_exception
812 CFI_ENDPROC
813KPROBE_END(debug)
814
815/*
816 * NMI is doubly nasty. It can happen _while_ we're handling
817 * a debug fault, and the debug fault hasn't yet been able to
818 * clear up the stack. So we first check whether we got an
819 * NMI on the sysenter entry path, but after that we need to
820 * check whether we got an NMI on the debug path where the debug
821 * fault happened on the sysenter path.
822 */
823KPROBE_ENTRY(nmi)
824 RING0_INT_FRAME
825 pushl %eax
826 CFI_ADJUST_CFA_OFFSET 4
827 movl %ss, %eax
828 cmpw $__ESPFIX_SS, %ax
829 popl %eax
830 CFI_ADJUST_CFA_OFFSET -4
831 je nmi_espfix_stack
832 cmpl $ia32_sysenter_target,(%esp)
833 je nmi_stack_fixup
834 pushl %eax
835 CFI_ADJUST_CFA_OFFSET 4
836 movl %esp,%eax
837 /* Do not access memory above the end of our stack page,
838 * it might not exist.
839 */
840 andl $(THREAD_SIZE-1),%eax
841 cmpl $(THREAD_SIZE-20),%eax
842 popl %eax
843 CFI_ADJUST_CFA_OFFSET -4
844 jae nmi_stack_correct
845 cmpl $ia32_sysenter_target,12(%esp)
846 je nmi_debug_stack_check
847nmi_stack_correct:
848 /* We have a RING0_INT_FRAME here */
849 pushl %eax
850 CFI_ADJUST_CFA_OFFSET 4
851 SAVE_ALL
852 TRACE_IRQS_OFF
853 xorl %edx,%edx # zero error code
854 movl %esp,%eax # pt_regs pointer
855 call do_nmi
856 jmp restore_nocheck_notrace
857 CFI_ENDPROC
858
859nmi_stack_fixup:
860 RING0_INT_FRAME
861 FIX_STACK(12,nmi_stack_correct, 1)
862 jmp nmi_stack_correct
863
864nmi_debug_stack_check:
865 /* We have a RING0_INT_FRAME here */
866 cmpw $__KERNEL_CS,16(%esp)
867 jne nmi_stack_correct
868 cmpl $debug,(%esp)
869 jb nmi_stack_correct
870 cmpl $debug_esp_fix_insn,(%esp)
871 ja nmi_stack_correct
872 FIX_STACK(24,nmi_stack_correct, 1)
873 jmp nmi_stack_correct
874
875nmi_espfix_stack:
876 /* We have a RING0_INT_FRAME here.
877 *
878 * create the pointer to lss back
879 */
880 pushl %ss
881 CFI_ADJUST_CFA_OFFSET 4
882 pushl %esp
883 CFI_ADJUST_CFA_OFFSET 4
884 addw $4, (%esp)
885 /* copy the iret frame of 12 bytes */
886 .rept 3
887 pushl 16(%esp)
888 CFI_ADJUST_CFA_OFFSET 4
889 .endr
890 pushl %eax
891 CFI_ADJUST_CFA_OFFSET 4
892 SAVE_ALL
893 TRACE_IRQS_OFF
894 FIXUP_ESPFIX_STACK # %eax == %esp
895 xorl %edx,%edx # zero error code
896 call do_nmi
897 RESTORE_REGS
898 lss 12+4(%esp), %esp # back to espfix stack
899 CFI_ADJUST_CFA_OFFSET -24
900 jmp irq_return
901 CFI_ENDPROC
902KPROBE_END(nmi)
903
904#ifdef CONFIG_PARAVIRT 721#ifdef CONFIG_PARAVIRT
905ENTRY(native_iret) 722ENTRY(native_iret)
906 iret 723 iret
@@ -916,19 +733,6 @@ ENTRY(native_irq_enable_sysexit)
916END(native_irq_enable_sysexit) 733END(native_irq_enable_sysexit)
917#endif 734#endif
918 735
919KPROBE_ENTRY(int3)
920 RING0_INT_FRAME
921 pushl $-1 # mark this as an int
922 CFI_ADJUST_CFA_OFFSET 4
923 SAVE_ALL
924 TRACE_IRQS_OFF
925 xorl %edx,%edx # zero error code
926 movl %esp,%eax # pt_regs pointer
927 call do_int3
928 jmp ret_from_exception
929 CFI_ENDPROC
930KPROBE_END(int3)
931
932ENTRY(overflow) 736ENTRY(overflow)
933 RING0_INT_FRAME 737 RING0_INT_FRAME
934 pushl $0 738 pushl $0
@@ -993,14 +797,6 @@ ENTRY(stack_segment)
993 CFI_ENDPROC 797 CFI_ENDPROC
994END(stack_segment) 798END(stack_segment)
995 799
996KPROBE_ENTRY(general_protection)
997 RING0_EC_FRAME
998 pushl $do_general_protection
999 CFI_ADJUST_CFA_OFFSET 4
1000 jmp error_code
1001 CFI_ENDPROC
1002KPROBE_END(general_protection)
1003
1004ENTRY(alignment_check) 800ENTRY(alignment_check)
1005 RING0_EC_FRAME 801 RING0_EC_FRAME
1006 pushl $do_alignment_check 802 pushl $do_alignment_check
@@ -1051,6 +847,7 @@ ENTRY(kernel_thread_helper)
1051 push %eax 847 push %eax
1052 CFI_ADJUST_CFA_OFFSET 4 848 CFI_ADJUST_CFA_OFFSET 4
1053 call do_exit 849 call do_exit
850 ud2 # padding for call trace
1054 CFI_ENDPROC 851 CFI_ENDPROC
1055ENDPROC(kernel_thread_helper) 852ENDPROC(kernel_thread_helper)
1056 853
@@ -1210,3 +1007,227 @@ END(mcount)
1210#include "syscall_table_32.S" 1007#include "syscall_table_32.S"
1211 1008
1212syscall_table_size=(.-sys_call_table) 1009syscall_table_size=(.-sys_call_table)
1010
1011/*
1012 * Some functions should be protected against kprobes
1013 */
1014 .pushsection .kprobes.text, "ax"
1015
1016ENTRY(page_fault)
1017 RING0_EC_FRAME
1018 pushl $do_page_fault
1019 CFI_ADJUST_CFA_OFFSET 4
1020 ALIGN
1021error_code:
1022 /* the function address is in %fs's slot on the stack */
1023 pushl %es
1024 CFI_ADJUST_CFA_OFFSET 4
1025 /*CFI_REL_OFFSET es, 0*/
1026 pushl %ds
1027 CFI_ADJUST_CFA_OFFSET 4
1028 /*CFI_REL_OFFSET ds, 0*/
1029 pushl %eax
1030 CFI_ADJUST_CFA_OFFSET 4
1031 CFI_REL_OFFSET eax, 0
1032 pushl %ebp
1033 CFI_ADJUST_CFA_OFFSET 4
1034 CFI_REL_OFFSET ebp, 0
1035 pushl %edi
1036 CFI_ADJUST_CFA_OFFSET 4
1037 CFI_REL_OFFSET edi, 0
1038 pushl %esi
1039 CFI_ADJUST_CFA_OFFSET 4
1040 CFI_REL_OFFSET esi, 0
1041 pushl %edx
1042 CFI_ADJUST_CFA_OFFSET 4
1043 CFI_REL_OFFSET edx, 0
1044 pushl %ecx
1045 CFI_ADJUST_CFA_OFFSET 4
1046 CFI_REL_OFFSET ecx, 0
1047 pushl %ebx
1048 CFI_ADJUST_CFA_OFFSET 4
1049 CFI_REL_OFFSET ebx, 0
1050 cld
1051 pushl %fs
1052 CFI_ADJUST_CFA_OFFSET 4
1053 /*CFI_REL_OFFSET fs, 0*/
1054 movl $(__KERNEL_PERCPU), %ecx
1055 movl %ecx, %fs
1056 UNWIND_ESPFIX_STACK
1057 popl %ecx
1058 CFI_ADJUST_CFA_OFFSET -4
1059 /*CFI_REGISTER es, ecx*/
1060 movl PT_FS(%esp), %edi # get the function address
1061 movl PT_ORIG_EAX(%esp), %edx # get the error code
1062 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
1063 mov %ecx, PT_FS(%esp)
1064 /*CFI_REL_OFFSET fs, ES*/
1065 movl $(__USER_DS), %ecx
1066 movl %ecx, %ds
1067 movl %ecx, %es
1068 TRACE_IRQS_OFF
1069 movl %esp,%eax # pt_regs pointer
1070 call *%edi
1071 jmp ret_from_exception
1072 CFI_ENDPROC
1073END(page_fault)
1074
1075/*
1076 * Debug traps and NMI can happen at the one SYSENTER instruction
1077 * that sets up the real kernel stack. Check here, since we can't
1078 * allow the wrong stack to be used.
1079 *
1080 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
1081 * already pushed 3 words if it hits on the sysenter instruction:
1082 * eflags, cs and eip.
1083 *
1084 * We just load the right stack, and push the three (known) values
1085 * by hand onto the new stack - while updating the return eip past
1086 * the instruction that would have done it for sysenter.
1087 */
1088#define FIX_STACK(offset, ok, label) \
1089 cmpw $__KERNEL_CS,4(%esp); \
1090 jne ok; \
1091label: \
1092 movl TSS_sysenter_sp0+offset(%esp),%esp; \
1093 CFI_DEF_CFA esp, 0; \
1094 CFI_UNDEFINED eip; \
1095 pushfl; \
1096 CFI_ADJUST_CFA_OFFSET 4; \
1097 pushl $__KERNEL_CS; \
1098 CFI_ADJUST_CFA_OFFSET 4; \
1099 pushl $sysenter_past_esp; \
1100 CFI_ADJUST_CFA_OFFSET 4; \
1101 CFI_REL_OFFSET eip, 0
1102
1103ENTRY(debug)
1104 RING0_INT_FRAME
1105 cmpl $ia32_sysenter_target,(%esp)
1106 jne debug_stack_correct
1107 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
1108debug_stack_correct:
1109 pushl $-1 # mark this as an int
1110 CFI_ADJUST_CFA_OFFSET 4
1111 SAVE_ALL
1112 TRACE_IRQS_OFF
1113 xorl %edx,%edx # error code 0
1114 movl %esp,%eax # pt_regs pointer
1115 call do_debug
1116 jmp ret_from_exception
1117 CFI_ENDPROC
1118END(debug)
1119
1120/*
1121 * NMI is doubly nasty. It can happen _while_ we're handling
1122 * a debug fault, and the debug fault hasn't yet been able to
1123 * clear up the stack. So we first check whether we got an
1124 * NMI on the sysenter entry path, but after that we need to
1125 * check whether we got an NMI on the debug path where the debug
1126 * fault happened on the sysenter path.
1127 */
1128ENTRY(nmi)
1129 RING0_INT_FRAME
1130 pushl %eax
1131 CFI_ADJUST_CFA_OFFSET 4
1132 movl %ss, %eax
1133 cmpw $__ESPFIX_SS, %ax
1134 popl %eax
1135 CFI_ADJUST_CFA_OFFSET -4
1136 je nmi_espfix_stack
1137 cmpl $ia32_sysenter_target,(%esp)
1138 je nmi_stack_fixup
1139 pushl %eax
1140 CFI_ADJUST_CFA_OFFSET 4
1141 movl %esp,%eax
1142 /* Do not access memory above the end of our stack page,
1143 * it might not exist.
1144 */
1145 andl $(THREAD_SIZE-1),%eax
1146 cmpl $(THREAD_SIZE-20),%eax
1147 popl %eax
1148 CFI_ADJUST_CFA_OFFSET -4
1149 jae nmi_stack_correct
1150 cmpl $ia32_sysenter_target,12(%esp)
1151 je nmi_debug_stack_check
1152nmi_stack_correct:
1153 /* We have a RING0_INT_FRAME here */
1154 pushl %eax
1155 CFI_ADJUST_CFA_OFFSET 4
1156 SAVE_ALL
1157 TRACE_IRQS_OFF
1158 xorl %edx,%edx # zero error code
1159 movl %esp,%eax # pt_regs pointer
1160 call do_nmi
1161 jmp restore_nocheck_notrace
1162 CFI_ENDPROC
1163
1164nmi_stack_fixup:
1165 RING0_INT_FRAME
1166 FIX_STACK(12,nmi_stack_correct, 1)
1167 jmp nmi_stack_correct
1168
1169nmi_debug_stack_check:
1170 /* We have a RING0_INT_FRAME here */
1171 cmpw $__KERNEL_CS,16(%esp)
1172 jne nmi_stack_correct
1173 cmpl $debug,(%esp)
1174 jb nmi_stack_correct
1175 cmpl $debug_esp_fix_insn,(%esp)
1176 ja nmi_stack_correct
1177 FIX_STACK(24,nmi_stack_correct, 1)
1178 jmp nmi_stack_correct
1179
1180nmi_espfix_stack:
1181 /* We have a RING0_INT_FRAME here.
1182 *
1183 * create the pointer to lss back
1184 */
1185 pushl %ss
1186 CFI_ADJUST_CFA_OFFSET 4
1187 pushl %esp
1188 CFI_ADJUST_CFA_OFFSET 4
1189 addw $4, (%esp)
1190 /* copy the iret frame of 12 bytes */
1191 .rept 3
1192 pushl 16(%esp)
1193 CFI_ADJUST_CFA_OFFSET 4
1194 .endr
1195 pushl %eax
1196 CFI_ADJUST_CFA_OFFSET 4
1197 SAVE_ALL
1198 TRACE_IRQS_OFF
1199 FIXUP_ESPFIX_STACK # %eax == %esp
1200 xorl %edx,%edx # zero error code
1201 call do_nmi
1202 RESTORE_REGS
1203 lss 12+4(%esp), %esp # back to espfix stack
1204 CFI_ADJUST_CFA_OFFSET -24
1205 jmp irq_return
1206 CFI_ENDPROC
1207END(nmi)
1208
1209ENTRY(int3)
1210 RING0_INT_FRAME
1211 pushl $-1 # mark this as an int
1212 CFI_ADJUST_CFA_OFFSET 4
1213 SAVE_ALL
1214 TRACE_IRQS_OFF
1215 xorl %edx,%edx # zero error code
1216 movl %esp,%eax # pt_regs pointer
1217 call do_int3
1218 jmp ret_from_exception
1219 CFI_ENDPROC
1220END(int3)
1221
1222ENTRY(general_protection)
1223 RING0_EC_FRAME
1224 pushl $do_general_protection
1225 CFI_ADJUST_CFA_OFFSET 4
1226 jmp error_code
1227 CFI_ENDPROC
1228END(general_protection)
1229
1230/*
1231 * End of kprobes section
1232 */
1233 .popsection
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b86f332c96a6..3194636a4293 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -11,15 +11,15 @@
11 * 11 *
12 * NOTE: This code handles signal-recognition, which happens every time 12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call. 13 * after an interrupt and after each system call.
14 * 14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is 15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al. 16 * only done for syscall tracing, signals or fork/exec et.al.
17 * 17 *
18 * A note on terminology: 18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP 19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack. 20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11. 21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved. 22 * - full stack frame: Like partial stack frame, but all register saved.
23 * 23 *
24 * Some macro usage: 24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better 25 * - CFI macros are used to generate dwarf2 unwind information for better
@@ -60,7 +60,6 @@
60#define __AUDIT_ARCH_LE 0x40000000 60#define __AUDIT_ARCH_LE 0x40000000
61 61
62 .code64 62 .code64
63
64#ifdef CONFIG_FUNCTION_TRACER 63#ifdef CONFIG_FUNCTION_TRACER
65#ifdef CONFIG_DYNAMIC_FTRACE 64#ifdef CONFIG_DYNAMIC_FTRACE
66ENTRY(mcount) 65ENTRY(mcount)
@@ -142,7 +141,7 @@ END(mcount)
142 141
143#ifndef CONFIG_PREEMPT 142#ifndef CONFIG_PREEMPT
144#define retint_kernel retint_restore_args 143#define retint_kernel retint_restore_args
145#endif 144#endif
146 145
147#ifdef CONFIG_PARAVIRT 146#ifdef CONFIG_PARAVIRT
148ENTRY(native_usergs_sysret64) 147ENTRY(native_usergs_sysret64)
@@ -161,29 +160,29 @@ ENTRY(native_usergs_sysret64)
161.endm 160.endm
162 161
163/* 162/*
164 * C code is not supposed to know about undefined top of stack. Every time 163 * C code is not supposed to know about undefined top of stack. Every time
165 * a C function with an pt_regs argument is called from the SYSCALL based 164 * a C function with an pt_regs argument is called from the SYSCALL based
166 * fast path FIXUP_TOP_OF_STACK is needed. 165 * fast path FIXUP_TOP_OF_STACK is needed.
167 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs 166 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
168 * manipulation. 167 * manipulation.
169 */ 168 */
170 169
171 /* %rsp:at FRAMEEND */ 170 /* %rsp:at FRAMEEND */
172 .macro FIXUP_TOP_OF_STACK tmp 171 .macro FIXUP_TOP_OF_STACK tmp offset=0
173 movq %gs:pda_oldrsp,\tmp 172 movq %gs:pda_oldrsp,\tmp
174 movq \tmp,RSP(%rsp) 173 movq \tmp,RSP+\offset(%rsp)
175 movq $__USER_DS,SS(%rsp) 174 movq $__USER_DS,SS+\offset(%rsp)
176 movq $__USER_CS,CS(%rsp) 175 movq $__USER_CS,CS+\offset(%rsp)
177 movq $-1,RCX(%rsp) 176 movq $-1,RCX+\offset(%rsp)
178 movq R11(%rsp),\tmp /* get eflags */ 177 movq R11+\offset(%rsp),\tmp /* get eflags */
179 movq \tmp,EFLAGS(%rsp) 178 movq \tmp,EFLAGS+\offset(%rsp)
180 .endm 179 .endm
181 180
182 .macro RESTORE_TOP_OF_STACK tmp,offset=0 181 .macro RESTORE_TOP_OF_STACK tmp offset=0
183 movq RSP-\offset(%rsp),\tmp 182 movq RSP+\offset(%rsp),\tmp
184 movq \tmp,%gs:pda_oldrsp 183 movq \tmp,%gs:pda_oldrsp
185 movq EFLAGS-\offset(%rsp),\tmp 184 movq EFLAGS+\offset(%rsp),\tmp
186 movq \tmp,R11-\offset(%rsp) 185 movq \tmp,R11+\offset(%rsp)
187 .endm 186 .endm
188 187
189 .macro FAKE_STACK_FRAME child_rip 188 .macro FAKE_STACK_FRAME child_rip
@@ -195,7 +194,7 @@ ENTRY(native_usergs_sysret64)
195 pushq %rax /* rsp */ 194 pushq %rax /* rsp */
196 CFI_ADJUST_CFA_OFFSET 8 195 CFI_ADJUST_CFA_OFFSET 8
197 CFI_REL_OFFSET rsp,0 196 CFI_REL_OFFSET rsp,0
198 pushq $(1<<9) /* eflags - interrupts on */ 197 pushq $X86_EFLAGS_IF /* eflags - interrupts on */
199 CFI_ADJUST_CFA_OFFSET 8 198 CFI_ADJUST_CFA_OFFSET 8
200 /*CFI_REL_OFFSET rflags,0*/ 199 /*CFI_REL_OFFSET rflags,0*/
201 pushq $__KERNEL_CS /* cs */ 200 pushq $__KERNEL_CS /* cs */
@@ -213,62 +212,184 @@ ENTRY(native_usergs_sysret64)
213 CFI_ADJUST_CFA_OFFSET -(6*8) 212 CFI_ADJUST_CFA_OFFSET -(6*8)
214 .endm 213 .endm
215 214
216 .macro CFI_DEFAULT_STACK start=1 215/*
216 * initial frame state for interrupts (and exceptions without error code)
217 */
218 .macro EMPTY_FRAME start=1 offset=0
217 .if \start 219 .if \start
218 CFI_STARTPROC simple 220 CFI_STARTPROC simple
219 CFI_SIGNAL_FRAME 221 CFI_SIGNAL_FRAME
220 CFI_DEF_CFA rsp,SS+8 222 CFI_DEF_CFA rsp,8+\offset
221 .else 223 .else
222 CFI_DEF_CFA_OFFSET SS+8 224 CFI_DEF_CFA_OFFSET 8+\offset
223 .endif 225 .endif
224 CFI_REL_OFFSET r15,R15
225 CFI_REL_OFFSET r14,R14
226 CFI_REL_OFFSET r13,R13
227 CFI_REL_OFFSET r12,R12
228 CFI_REL_OFFSET rbp,RBP
229 CFI_REL_OFFSET rbx,RBX
230 CFI_REL_OFFSET r11,R11
231 CFI_REL_OFFSET r10,R10
232 CFI_REL_OFFSET r9,R9
233 CFI_REL_OFFSET r8,R8
234 CFI_REL_OFFSET rax,RAX
235 CFI_REL_OFFSET rcx,RCX
236 CFI_REL_OFFSET rdx,RDX
237 CFI_REL_OFFSET rsi,RSI
238 CFI_REL_OFFSET rdi,RDI
239 CFI_REL_OFFSET rip,RIP
240 /*CFI_REL_OFFSET cs,CS*/
241 /*CFI_REL_OFFSET rflags,EFLAGS*/
242 CFI_REL_OFFSET rsp,RSP
243 /*CFI_REL_OFFSET ss,SS*/
244 .endm 226 .endm
227
228/*
229 * initial frame state for interrupts (and exceptions without error code)
230 */
231 .macro INTR_FRAME start=1 offset=0
232 EMPTY_FRAME \start, SS+8+\offset-RIP
233 /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
234 CFI_REL_OFFSET rsp, RSP+\offset-RIP
235 /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
236 /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
237 CFI_REL_OFFSET rip, RIP+\offset-RIP
238 .endm
239
240/*
241 * initial frame state for exceptions with error code (and interrupts
242 * with vector already pushed)
243 */
244 .macro XCPT_FRAME start=1 offset=0
245 INTR_FRAME \start, RIP+\offset-ORIG_RAX
246 /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
247 .endm
248
249/*
250 * frame that enables calling into C.
251 */
252 .macro PARTIAL_FRAME start=1 offset=0
253 XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
254 CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
255 CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
256 CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
257 CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
258 CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
259 CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
260 CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
261 CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
262 CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
263 .endm
264
265/*
266 * frame that enables passing a complete pt_regs to a C function.
267 */
268 .macro DEFAULT_FRAME start=1 offset=0
269 PARTIAL_FRAME \start, R11+\offset-R15
270 CFI_REL_OFFSET rbx, RBX+\offset
271 CFI_REL_OFFSET rbp, RBP+\offset
272 CFI_REL_OFFSET r12, R12+\offset
273 CFI_REL_OFFSET r13, R13+\offset
274 CFI_REL_OFFSET r14, R14+\offset
275 CFI_REL_OFFSET r15, R15+\offset
276 .endm
277
278/* save partial stack frame */
279ENTRY(save_args)
280 XCPT_FRAME
281 cld
282 movq_cfi rdi, RDI+16-ARGOFFSET
283 movq_cfi rsi, RSI+16-ARGOFFSET
284 movq_cfi rdx, RDX+16-ARGOFFSET
285 movq_cfi rcx, RCX+16-ARGOFFSET
286 movq_cfi rax, RAX+16-ARGOFFSET
287 movq_cfi r8, R8+16-ARGOFFSET
288 movq_cfi r9, R9+16-ARGOFFSET
289 movq_cfi r10, R10+16-ARGOFFSET
290 movq_cfi r11, R11+16-ARGOFFSET
291
292 leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
293 movq_cfi rbp, 8 /* push %rbp */
294 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
295 testl $3, CS(%rdi)
296 je 1f
297 SWAPGS
298 /*
299 * irqcount is used to check if a CPU is already on an interrupt stack
300 * or not. While this is essentially redundant with preempt_count it is
301 * a little cheaper to use a separate counter in the PDA (short of
302 * moving irq_enter into assembly, which would be too much work)
303 */
3041: incl %gs:pda_irqcount
305 jne 2f
306 popq_cfi %rax /* move return address... */
307 mov %gs:pda_irqstackptr,%rsp
308 EMPTY_FRAME 0
309 pushq_cfi %rax /* ... to the new stack */
310 /*
311 * We entered an interrupt context - irqs are off:
312 */
3132: TRACE_IRQS_OFF
314 ret
315 CFI_ENDPROC
316END(save_args)
317
318ENTRY(save_rest)
319 PARTIAL_FRAME 1 REST_SKIP+8
320 movq 5*8+16(%rsp), %r11 /* save return address */
321 movq_cfi rbx, RBX+16
322 movq_cfi rbp, RBP+16
323 movq_cfi r12, R12+16
324 movq_cfi r13, R13+16
325 movq_cfi r14, R14+16
326 movq_cfi r15, R15+16
327 movq %r11, 8(%rsp) /* return address */
328 FIXUP_TOP_OF_STACK %r11, 16
329 ret
330 CFI_ENDPROC
331END(save_rest)
332
333/* save complete stack frame */
334ENTRY(save_paranoid)
335 XCPT_FRAME 1 RDI+8
336 cld
337 movq_cfi rdi, RDI+8
338 movq_cfi rsi, RSI+8
339 movq_cfi rdx, RDX+8
340 movq_cfi rcx, RCX+8
341 movq_cfi rax, RAX+8
342 movq_cfi r8, R8+8
343 movq_cfi r9, R9+8
344 movq_cfi r10, R10+8
345 movq_cfi r11, R11+8
346 movq_cfi rbx, RBX+8
347 movq_cfi rbp, RBP+8
348 movq_cfi r12, R12+8
349 movq_cfi r13, R13+8
350 movq_cfi r14, R14+8
351 movq_cfi r15, R15+8
352 movl $1,%ebx
353 movl $MSR_GS_BASE,%ecx
354 rdmsr
355 testl %edx,%edx
356 js 1f /* negative -> in kernel */
357 SWAPGS
358 xorl %ebx,%ebx
3591: ret
360 CFI_ENDPROC
361END(save_paranoid)
362
245/* 363/*
246 * A newly forked process directly context switches into this. 364 * A newly forked process directly context switches into this address.
247 */ 365 *
248/* rdi: prev */ 366 * rdi: prev task we switched from
367 */
249ENTRY(ret_from_fork) 368ENTRY(ret_from_fork)
250 CFI_DEFAULT_STACK 369 DEFAULT_FRAME
370
251 push kernel_eflags(%rip) 371 push kernel_eflags(%rip)
252 CFI_ADJUST_CFA_OFFSET 8 372 CFI_ADJUST_CFA_OFFSET 8
253 popf # reset kernel eflags 373 popf # reset kernel eflags
254 CFI_ADJUST_CFA_OFFSET -8 374 CFI_ADJUST_CFA_OFFSET -8
255 call schedule_tail 375
376 call schedule_tail # rdi: 'prev' task parameter
377
256 GET_THREAD_INFO(%rcx) 378 GET_THREAD_INFO(%rcx)
257 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 379
258 jnz rff_trace 380 CFI_REMEMBER_STATE
259rff_action:
260 RESTORE_REST 381 RESTORE_REST
261 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? 382
383 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
262 je int_ret_from_sys_call 384 je int_ret_from_sys_call
263 testl $_TIF_IA32,TI_flags(%rcx) 385
386 testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
264 jnz int_ret_from_sys_call 387 jnz int_ret_from_sys_call
265 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET 388
266 jmp ret_from_sys_call 389 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
267rff_trace: 390 jmp ret_from_sys_call # go to the SYSRET fastpath
268 movq %rsp,%rdi 391
269 call syscall_trace_leave 392 CFI_RESTORE_STATE
270 GET_THREAD_INFO(%rcx)
271 jmp rff_action
272 CFI_ENDPROC 393 CFI_ENDPROC
273END(ret_from_fork) 394END(ret_from_fork)
274 395
@@ -278,20 +399,20 @@ END(ret_from_fork)
278 * SYSCALL does not save anything on the stack and does not change the 399 * SYSCALL does not save anything on the stack and does not change the
279 * stack pointer. 400 * stack pointer.
280 */ 401 */
281 402
282/* 403/*
283 * Register setup: 404 * Register setup:
284 * rax system call number 405 * rax system call number
285 * rdi arg0 406 * rdi arg0
286 * rcx return address for syscall/sysret, C arg3 407 * rcx return address for syscall/sysret, C arg3
287 * rsi arg1 408 * rsi arg1
288 * rdx arg2 409 * rdx arg2
289 * r10 arg3 (--> moved to rcx for C) 410 * r10 arg3 (--> moved to rcx for C)
290 * r8 arg4 411 * r8 arg4
291 * r9 arg5 412 * r9 arg5
292 * r11 eflags for syscall/sysret, temporary for C 413 * r11 eflags for syscall/sysret, temporary for C
293 * r12-r15,rbp,rbx saved by C code, not touched. 414 * r12-r15,rbp,rbx saved by C code, not touched.
294 * 415 *
295 * Interrupts are off on entry. 416 * Interrupts are off on entry.
296 * Only called from user space. 417 * Only called from user space.
297 * 418 *
@@ -301,7 +422,7 @@ END(ret_from_fork)
301 * When user can change the frames always force IRET. That is because 422 * When user can change the frames always force IRET. That is because
302 * it deals with uncanonical addresses better. SYSRET has trouble 423 * it deals with uncanonical addresses better. SYSRET has trouble
303 * with them due to bugs in both AMD and Intel CPUs. 424 * with them due to bugs in both AMD and Intel CPUs.
304 */ 425 */
305 426
306ENTRY(system_call) 427ENTRY(system_call)
307 CFI_STARTPROC simple 428 CFI_STARTPROC simple
@@ -317,7 +438,7 @@ ENTRY(system_call)
317 */ 438 */
318ENTRY(system_call_after_swapgs) 439ENTRY(system_call_after_swapgs)
319 440
320 movq %rsp,%gs:pda_oldrsp 441 movq %rsp,%gs:pda_oldrsp
321 movq %gs:pda_kernelstack,%rsp 442 movq %gs:pda_kernelstack,%rsp
322 /* 443 /*
323 * No need to follow this irqs off/on section - it's straight 444 * No need to follow this irqs off/on section - it's straight
@@ -325,7 +446,7 @@ ENTRY(system_call_after_swapgs)
325 */ 446 */
326 ENABLE_INTERRUPTS(CLBR_NONE) 447 ENABLE_INTERRUPTS(CLBR_NONE)
327 SAVE_ARGS 8,1 448 SAVE_ARGS 8,1
328 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 449 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
329 movq %rcx,RIP-ARGOFFSET(%rsp) 450 movq %rcx,RIP-ARGOFFSET(%rsp)
330 CFI_REL_OFFSET rip,RIP-ARGOFFSET 451 CFI_REL_OFFSET rip,RIP-ARGOFFSET
331 GET_THREAD_INFO(%rcx) 452 GET_THREAD_INFO(%rcx)
@@ -339,19 +460,19 @@ system_call_fastpath:
339 movq %rax,RAX-ARGOFFSET(%rsp) 460 movq %rax,RAX-ARGOFFSET(%rsp)
340/* 461/*
341 * Syscall return path ending with SYSRET (fast path) 462 * Syscall return path ending with SYSRET (fast path)
342 * Has incomplete stack frame and undefined top of stack. 463 * Has incomplete stack frame and undefined top of stack.
343 */ 464 */
344ret_from_sys_call: 465ret_from_sys_call:
345 movl $_TIF_ALLWORK_MASK,%edi 466 movl $_TIF_ALLWORK_MASK,%edi
346 /* edi: flagmask */ 467 /* edi: flagmask */
347sysret_check: 468sysret_check:
348 LOCKDEP_SYS_EXIT 469 LOCKDEP_SYS_EXIT
349 GET_THREAD_INFO(%rcx) 470 GET_THREAD_INFO(%rcx)
350 DISABLE_INTERRUPTS(CLBR_NONE) 471 DISABLE_INTERRUPTS(CLBR_NONE)
351 TRACE_IRQS_OFF 472 TRACE_IRQS_OFF
352 movl TI_flags(%rcx),%edx 473 movl TI_flags(%rcx),%edx
353 andl %edi,%edx 474 andl %edi,%edx
354 jnz sysret_careful 475 jnz sysret_careful
355 CFI_REMEMBER_STATE 476 CFI_REMEMBER_STATE
356 /* 477 /*
357 * sysretq will re-enable interrupts: 478 * sysretq will re-enable interrupts:
@@ -366,7 +487,7 @@ sysret_check:
366 487
367 CFI_RESTORE_STATE 488 CFI_RESTORE_STATE
368 /* Handle reschedules */ 489 /* Handle reschedules */
369 /* edx: work, edi: workmask */ 490 /* edx: work, edi: workmask */
370sysret_careful: 491sysret_careful:
371 bt $TIF_NEED_RESCHED,%edx 492 bt $TIF_NEED_RESCHED,%edx
372 jnc sysret_signal 493 jnc sysret_signal
@@ -379,7 +500,7 @@ sysret_careful:
379 CFI_ADJUST_CFA_OFFSET -8 500 CFI_ADJUST_CFA_OFFSET -8
380 jmp sysret_check 501 jmp sysret_check
381 502
382 /* Handle a signal */ 503 /* Handle a signal */
383sysret_signal: 504sysret_signal:
384 TRACE_IRQS_ON 505 TRACE_IRQS_ON
385 ENABLE_INTERRUPTS(CLBR_NONE) 506 ENABLE_INTERRUPTS(CLBR_NONE)
@@ -388,17 +509,20 @@ sysret_signal:
388 jc sysret_audit 509 jc sysret_audit
389#endif 510#endif
390 /* edx: work flags (arg3) */ 511 /* edx: work flags (arg3) */
391 leaq do_notify_resume(%rip),%rax
392 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 512 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
393 xorl %esi,%esi # oldset -> arg2 513 xorl %esi,%esi # oldset -> arg2
394 call ptregscall_common 514 SAVE_REST
515 FIXUP_TOP_OF_STACK %r11
516 call do_notify_resume
517 RESTORE_TOP_OF_STACK %r11
518 RESTORE_REST
395 movl $_TIF_WORK_MASK,%edi 519 movl $_TIF_WORK_MASK,%edi
396 /* Use IRET because user could have changed frame. This 520 /* Use IRET because user could have changed frame. This
397 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ 521 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
398 DISABLE_INTERRUPTS(CLBR_NONE) 522 DISABLE_INTERRUPTS(CLBR_NONE)
399 TRACE_IRQS_OFF 523 TRACE_IRQS_OFF
400 jmp int_with_check 524 jmp int_with_check
401 525
402badsys: 526badsys:
403 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 527 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
404 jmp ret_from_sys_call 528 jmp ret_from_sys_call
@@ -437,7 +561,7 @@ sysret_audit:
437#endif /* CONFIG_AUDITSYSCALL */ 561#endif /* CONFIG_AUDITSYSCALL */
438 562
439 /* Do syscall tracing */ 563 /* Do syscall tracing */
440tracesys: 564tracesys:
441#ifdef CONFIG_AUDITSYSCALL 565#ifdef CONFIG_AUDITSYSCALL
442 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 566 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
443 jz auditsys 567 jz auditsys
@@ -460,8 +584,8 @@ tracesys:
460 call *sys_call_table(,%rax,8) 584 call *sys_call_table(,%rax,8)
461 movq %rax,RAX-ARGOFFSET(%rsp) 585 movq %rax,RAX-ARGOFFSET(%rsp)
462 /* Use IRET because user could have changed frame */ 586 /* Use IRET because user could have changed frame */
463 587
464/* 588/*
465 * Syscall return path ending with IRET. 589 * Syscall return path ending with IRET.
466 * Has correct top of stack, but partial stack frame. 590 * Has correct top of stack, but partial stack frame.
467 */ 591 */
@@ -505,18 +629,18 @@ int_very_careful:
505 TRACE_IRQS_ON 629 TRACE_IRQS_ON
506 ENABLE_INTERRUPTS(CLBR_NONE) 630 ENABLE_INTERRUPTS(CLBR_NONE)
507 SAVE_REST 631 SAVE_REST
508 /* Check for syscall exit trace */ 632 /* Check for syscall exit trace */
509 testl $_TIF_WORK_SYSCALL_EXIT,%edx 633 testl $_TIF_WORK_SYSCALL_EXIT,%edx
510 jz int_signal 634 jz int_signal
511 pushq %rdi 635 pushq %rdi
512 CFI_ADJUST_CFA_OFFSET 8 636 CFI_ADJUST_CFA_OFFSET 8
513 leaq 8(%rsp),%rdi # &ptregs -> arg1 637 leaq 8(%rsp),%rdi # &ptregs -> arg1
514 call syscall_trace_leave 638 call syscall_trace_leave
515 popq %rdi 639 popq %rdi
516 CFI_ADJUST_CFA_OFFSET -8 640 CFI_ADJUST_CFA_OFFSET -8
517 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi 641 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
518 jmp int_restore_rest 642 jmp int_restore_rest
519 643
520int_signal: 644int_signal:
521 testl $_TIF_DO_NOTIFY_MASK,%edx 645 testl $_TIF_DO_NOTIFY_MASK,%edx
522 jz 1f 646 jz 1f
@@ -531,22 +655,24 @@ int_restore_rest:
531 jmp int_with_check 655 jmp int_with_check
532 CFI_ENDPROC 656 CFI_ENDPROC
533END(system_call) 657END(system_call)
534 658
535/* 659/*
536 * Certain special system calls that need to save a complete full stack frame. 660 * Certain special system calls that need to save a complete full stack frame.
537 */ 661 */
538
539 .macro PTREGSCALL label,func,arg 662 .macro PTREGSCALL label,func,arg
540 .globl \label 663ENTRY(\label)
541\label: 664 PARTIAL_FRAME 1 8 /* offset 8: return address */
542 leaq \func(%rip),%rax 665 subq $REST_SKIP, %rsp
543 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 666 CFI_ADJUST_CFA_OFFSET REST_SKIP
544 jmp ptregscall_common 667 call save_rest
668 DEFAULT_FRAME 0 8 /* offset 8: return address */
669 leaq 8(%rsp), \arg /* pt_regs pointer */
670 call \func
671 jmp ptregscall_common
672 CFI_ENDPROC
545END(\label) 673END(\label)
546 .endm 674 .endm
547 675
548 CFI_STARTPROC
549
550 PTREGSCALL stub_clone, sys_clone, %r8 676 PTREGSCALL stub_clone, sys_clone, %r8
551 PTREGSCALL stub_fork, sys_fork, %rdi 677 PTREGSCALL stub_fork, sys_fork, %rdi
552 PTREGSCALL stub_vfork, sys_vfork, %rdi 678 PTREGSCALL stub_vfork, sys_vfork, %rdi
@@ -554,25 +680,18 @@ END(\label)
554 PTREGSCALL stub_iopl, sys_iopl, %rsi 680 PTREGSCALL stub_iopl, sys_iopl, %rsi
555 681
556ENTRY(ptregscall_common) 682ENTRY(ptregscall_common)
557 popq %r11 683 DEFAULT_FRAME 1 8 /* offset 8: return address */
558 CFI_ADJUST_CFA_OFFSET -8 684 RESTORE_TOP_OF_STACK %r11, 8
559 CFI_REGISTER rip, r11 685 movq_cfi_restore R15+8, r15
560 SAVE_REST 686 movq_cfi_restore R14+8, r14
561 movq %r11, %r15 687 movq_cfi_restore R13+8, r13
562 CFI_REGISTER rip, r15 688 movq_cfi_restore R12+8, r12
563 FIXUP_TOP_OF_STACK %r11 689 movq_cfi_restore RBP+8, rbp
564 call *%rax 690 movq_cfi_restore RBX+8, rbx
565 RESTORE_TOP_OF_STACK %r11 691 ret $REST_SKIP /* pop extended registers */
566 movq %r15, %r11
567 CFI_REGISTER rip, r11
568 RESTORE_REST
569 pushq %r11
570 CFI_ADJUST_CFA_OFFSET 8
571 CFI_REL_OFFSET rip, 0
572 ret
573 CFI_ENDPROC 692 CFI_ENDPROC
574END(ptregscall_common) 693END(ptregscall_common)
575 694
576ENTRY(stub_execve) 695ENTRY(stub_execve)
577 CFI_STARTPROC 696 CFI_STARTPROC
578 popq %r11 697 popq %r11
@@ -588,11 +707,11 @@ ENTRY(stub_execve)
588 jmp int_ret_from_sys_call 707 jmp int_ret_from_sys_call
589 CFI_ENDPROC 708 CFI_ENDPROC
590END(stub_execve) 709END(stub_execve)
591 710
592/* 711/*
593 * sigreturn is special because it needs to restore all registers on return. 712 * sigreturn is special because it needs to restore all registers on return.
594 * This cannot be done with SYSRET, so use the IRET return path instead. 713 * This cannot be done with SYSRET, so use the IRET return path instead.
595 */ 714 */
596ENTRY(stub_rt_sigreturn) 715ENTRY(stub_rt_sigreturn)
597 CFI_STARTPROC 716 CFI_STARTPROC
598 addq $8, %rsp 717 addq $8, %rsp
@@ -608,70 +727,70 @@ ENTRY(stub_rt_sigreturn)
608END(stub_rt_sigreturn) 727END(stub_rt_sigreturn)
609 728
610/* 729/*
611 * initial frame state for interrupts and exceptions 730 * Build the entry stubs and pointer table with some assembler magic.
731 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
732 * single cache line on all modern x86 implementations.
612 */ 733 */
613 .macro _frame ref 734 .section .init.rodata,"a"
614 CFI_STARTPROC simple 735ENTRY(interrupt)
615 CFI_SIGNAL_FRAME 736 .text
616 CFI_DEF_CFA rsp,SS+8-\ref 737 .p2align 5
617 /*CFI_REL_OFFSET ss,SS-\ref*/ 738 .p2align CONFIG_X86_L1_CACHE_SHIFT
618 CFI_REL_OFFSET rsp,RSP-\ref 739ENTRY(irq_entries_start)
619 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ 740 INTR_FRAME
620 /*CFI_REL_OFFSET cs,CS-\ref*/ 741vector=FIRST_EXTERNAL_VECTOR
621 CFI_REL_OFFSET rip,RIP-\ref 742.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
622 .endm 743 .balign 32
744 .rept 7
745 .if vector < NR_VECTORS
746 .if vector <> FIRST_EXTERNAL_VECTOR
747 CFI_ADJUST_CFA_OFFSET -8
748 .endif
7491: pushq $(~vector+0x80) /* Note: always in signed byte range */
750 CFI_ADJUST_CFA_OFFSET 8
751 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
752 jmp 2f
753 .endif
754 .previous
755 .quad 1b
756 .text
757vector=vector+1
758 .endif
759 .endr
7602: jmp common_interrupt
761.endr
762 CFI_ENDPROC
763END(irq_entries_start)
623 764
624/* initial frame state for interrupts (and exceptions without error code) */ 765.previous
625#define INTR_FRAME _frame RIP 766END(interrupt)
626/* initial frame state for exceptions with error code (and interrupts with 767.previous
627 vector already pushed) */
628#define XCPT_FRAME _frame ORIG_RAX
629 768
630/* 769/*
631 * Interrupt entry/exit. 770 * Interrupt entry/exit.
632 * 771 *
633 * Interrupt entry points save only callee clobbered registers in fast path. 772 * Interrupt entry points save only callee clobbered registers in fast path.
634 * 773 *
635 * Entry runs with interrupts off. 774 * Entry runs with interrupts off.
636 */ 775 */
637 776
638/* 0(%rsp): interrupt number */ 777/* 0(%rsp): ~(interrupt number) */
639 .macro interrupt func 778 .macro interrupt func
640 cld 779 subq $10*8, %rsp
641 SAVE_ARGS 780 CFI_ADJUST_CFA_OFFSET 10*8
642 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 781 call save_args
643 pushq %rbp 782 PARTIAL_FRAME 0
644 /*
645 * Save rbp twice: One is for marking the stack frame, as usual, and the
646 * other, to fill pt_regs properly. This is because bx comes right
647 * before the last saved register in that structure, and not bp. If the
648 * base pointer were in the place bx is today, this would not be needed.
649 */
650 movq %rbp, -8(%rsp)
651 CFI_ADJUST_CFA_OFFSET 8
652 CFI_REL_OFFSET rbp, 0
653 movq %rsp,%rbp
654 CFI_DEF_CFA_REGISTER rbp
655 testl $3,CS(%rdi)
656 je 1f
657 SWAPGS
658 /* irqcount is used to check if a CPU is already on an interrupt
659 stack or not. While this is essentially redundant with preempt_count
660 it is a little cheaper to use a separate counter in the PDA
661 (short of moving irq_enter into assembly, which would be too
662 much work) */
6631: incl %gs:pda_irqcount
664 cmoveq %gs:pda_irqstackptr,%rsp
665 push %rbp # backlink for old unwinder
666 /*
667 * We entered an interrupt context - irqs are off:
668 */
669 TRACE_IRQS_OFF
670 call \func 783 call \func
671 .endm 784 .endm
672 785
673ENTRY(common_interrupt) 786 /*
787 * The interrupt stubs push (~vector+0x80) onto the stack and
788 * then jump to common_interrupt.
789 */
790 .p2align CONFIG_X86_L1_CACHE_SHIFT
791common_interrupt:
674 XCPT_FRAME 792 XCPT_FRAME
793 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
675 interrupt do_IRQ 794 interrupt do_IRQ
676 /* 0(%rsp): oldrsp-ARGOFFSET */ 795 /* 0(%rsp): oldrsp-ARGOFFSET */
677ret_from_intr: 796ret_from_intr:
@@ -685,12 +804,12 @@ exit_intr:
685 GET_THREAD_INFO(%rcx) 804 GET_THREAD_INFO(%rcx)
686 testl $3,CS-ARGOFFSET(%rsp) 805 testl $3,CS-ARGOFFSET(%rsp)
687 je retint_kernel 806 je retint_kernel
688 807
689 /* Interrupt came from user space */ 808 /* Interrupt came from user space */
690 /* 809 /*
691 * Has a correct top of stack, but a partial stack frame 810 * Has a correct top of stack, but a partial stack frame
692 * %rcx: thread info. Interrupts off. 811 * %rcx: thread info. Interrupts off.
693 */ 812 */
694retint_with_reschedule: 813retint_with_reschedule:
695 movl $_TIF_WORK_MASK,%edi 814 movl $_TIF_WORK_MASK,%edi
696retint_check: 815retint_check:
@@ -763,20 +882,20 @@ retint_careful:
763 pushq %rdi 882 pushq %rdi
764 CFI_ADJUST_CFA_OFFSET 8 883 CFI_ADJUST_CFA_OFFSET 8
765 call schedule 884 call schedule
766 popq %rdi 885 popq %rdi
767 CFI_ADJUST_CFA_OFFSET -8 886 CFI_ADJUST_CFA_OFFSET -8
768 GET_THREAD_INFO(%rcx) 887 GET_THREAD_INFO(%rcx)
769 DISABLE_INTERRUPTS(CLBR_NONE) 888 DISABLE_INTERRUPTS(CLBR_NONE)
770 TRACE_IRQS_OFF 889 TRACE_IRQS_OFF
771 jmp retint_check 890 jmp retint_check
772 891
773retint_signal: 892retint_signal:
774 testl $_TIF_DO_NOTIFY_MASK,%edx 893 testl $_TIF_DO_NOTIFY_MASK,%edx
775 jz retint_swapgs 894 jz retint_swapgs
776 TRACE_IRQS_ON 895 TRACE_IRQS_ON
777 ENABLE_INTERRUPTS(CLBR_NONE) 896 ENABLE_INTERRUPTS(CLBR_NONE)
778 SAVE_REST 897 SAVE_REST
779 movq $-1,ORIG_RAX(%rsp) 898 movq $-1,ORIG_RAX(%rsp)
780 xorl %esi,%esi # oldset 899 xorl %esi,%esi # oldset
781 movq %rsp,%rdi # &pt_regs 900 movq %rsp,%rdi # &pt_regs
782 call do_notify_resume 901 call do_notify_resume
@@ -798,324 +917,211 @@ ENTRY(retint_kernel)
798 jnc retint_restore_args 917 jnc retint_restore_args
799 call preempt_schedule_irq 918 call preempt_schedule_irq
800 jmp exit_intr 919 jmp exit_intr
801#endif 920#endif
802 921
803 CFI_ENDPROC 922 CFI_ENDPROC
804END(common_interrupt) 923END(common_interrupt)
805 924
806/* 925/*
807 * APIC interrupts. 926 * APIC interrupts.
808 */ 927 */
809 .macro apicinterrupt num,func 928.macro apicinterrupt num sym do_sym
929ENTRY(\sym)
810 INTR_FRAME 930 INTR_FRAME
811 pushq $~(\num) 931 pushq $~(\num)
812 CFI_ADJUST_CFA_OFFSET 8 932 CFI_ADJUST_CFA_OFFSET 8
813 interrupt \func 933 interrupt \do_sym
814 jmp ret_from_intr 934 jmp ret_from_intr
815 CFI_ENDPROC 935 CFI_ENDPROC
816 .endm 936END(\sym)
817 937.endm
818ENTRY(thermal_interrupt)
819 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
820END(thermal_interrupt)
821
822ENTRY(threshold_interrupt)
823 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
824END(threshold_interrupt)
825
826#ifdef CONFIG_SMP
827ENTRY(reschedule_interrupt)
828 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
829END(reschedule_interrupt)
830
831 .macro INVALIDATE_ENTRY num
832ENTRY(invalidate_interrupt\num)
833 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
834END(invalidate_interrupt\num)
835 .endm
836 938
837 INVALIDATE_ENTRY 0 939#ifdef CONFIG_SMP
838 INVALIDATE_ENTRY 1 940apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
839 INVALIDATE_ENTRY 2 941 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
840 INVALIDATE_ENTRY 3
841 INVALIDATE_ENTRY 4
842 INVALIDATE_ENTRY 5
843 INVALIDATE_ENTRY 6
844 INVALIDATE_ENTRY 7
845
846ENTRY(call_function_interrupt)
847 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
848END(call_function_interrupt)
849ENTRY(call_function_single_interrupt)
850 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
851END(call_function_single_interrupt)
852ENTRY(irq_move_cleanup_interrupt)
853 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
854END(irq_move_cleanup_interrupt)
855#endif 942#endif
856 943
857ENTRY(apic_timer_interrupt) 944apicinterrupt UV_BAU_MESSAGE \
858 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 945 uv_bau_message_intr1 uv_bau_message_interrupt
859END(apic_timer_interrupt) 946apicinterrupt LOCAL_TIMER_VECTOR \
947 apic_timer_interrupt smp_apic_timer_interrupt
948
949#ifdef CONFIG_SMP
950apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
951 invalidate_interrupt0 smp_invalidate_interrupt
952apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
953 invalidate_interrupt1 smp_invalidate_interrupt
954apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
955 invalidate_interrupt2 smp_invalidate_interrupt
956apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
957 invalidate_interrupt3 smp_invalidate_interrupt
958apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
959 invalidate_interrupt4 smp_invalidate_interrupt
960apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
961 invalidate_interrupt5 smp_invalidate_interrupt
962apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
963 invalidate_interrupt6 smp_invalidate_interrupt
964apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
965 invalidate_interrupt7 smp_invalidate_interrupt
966#endif
860 967
861ENTRY(uv_bau_message_intr1) 968apicinterrupt THRESHOLD_APIC_VECTOR \
862 apicinterrupt 220,uv_bau_message_interrupt 969 threshold_interrupt mce_threshold_interrupt
863END(uv_bau_message_intr1) 970apicinterrupt THERMAL_APIC_VECTOR \
971 thermal_interrupt smp_thermal_interrupt
972
973#ifdef CONFIG_SMP
974apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
975 call_function_single_interrupt smp_call_function_single_interrupt
976apicinterrupt CALL_FUNCTION_VECTOR \
977 call_function_interrupt smp_call_function_interrupt
978apicinterrupt RESCHEDULE_VECTOR \
979 reschedule_interrupt smp_reschedule_interrupt
980#endif
864 981
865ENTRY(error_interrupt) 982apicinterrupt ERROR_APIC_VECTOR \
866 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt 983 error_interrupt smp_error_interrupt
867END(error_interrupt) 984apicinterrupt SPURIOUS_APIC_VECTOR \
985 spurious_interrupt smp_spurious_interrupt
868 986
869ENTRY(spurious_interrupt)
870 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
871END(spurious_interrupt)
872
873/* 987/*
874 * Exception entry points. 988 * Exception entry points.
875 */ 989 */
876 .macro zeroentry sym 990.macro zeroentry sym do_sym
991ENTRY(\sym)
877 INTR_FRAME 992 INTR_FRAME
878 PARAVIRT_ADJUST_EXCEPTION_FRAME 993 PARAVIRT_ADJUST_EXCEPTION_FRAME
879 pushq $0 /* push error code/oldrax */ 994 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
880 CFI_ADJUST_CFA_OFFSET 8 995 subq $15*8,%rsp
881 pushq %rax /* push real oldrax to the rdi slot */ 996 CFI_ADJUST_CFA_OFFSET 15*8
882 CFI_ADJUST_CFA_OFFSET 8 997 call error_entry
883 CFI_REL_OFFSET rax,0 998 DEFAULT_FRAME 0
884 leaq \sym(%rip),%rax 999 movq %rsp,%rdi /* pt_regs pointer */
885 jmp error_entry 1000 xorl %esi,%esi /* no error code */
1001 call \do_sym
1002 jmp error_exit /* %ebx: no swapgs flag */
886 CFI_ENDPROC 1003 CFI_ENDPROC
887 .endm 1004END(\sym)
1005.endm
888 1006
889 .macro errorentry sym 1007.macro paranoidzeroentry sym do_sym
890 XCPT_FRAME 1008ENTRY(\sym)
1009 INTR_FRAME
891 PARAVIRT_ADJUST_EXCEPTION_FRAME 1010 PARAVIRT_ADJUST_EXCEPTION_FRAME
892 pushq %rax 1011 pushq $-1 /* ORIG_RAX: no syscall to restart */
893 CFI_ADJUST_CFA_OFFSET 8 1012 CFI_ADJUST_CFA_OFFSET 8
894 CFI_REL_OFFSET rax,0 1013 subq $15*8, %rsp
895 leaq \sym(%rip),%rax 1014 call save_paranoid
896 jmp error_entry 1015 TRACE_IRQS_OFF
1016 movq %rsp,%rdi /* pt_regs pointer */
1017 xorl %esi,%esi /* no error code */
1018 call \do_sym
1019 jmp paranoid_exit /* %ebx: no swapgs flag */
897 CFI_ENDPROC 1020 CFI_ENDPROC
898 .endm 1021END(\sym)
1022.endm
899 1023
900 /* error code is on the stack already */ 1024.macro paranoidzeroentry_ist sym do_sym ist
901 /* handle NMI like exceptions that can happen everywhere */ 1025ENTRY(\sym)
902 .macro paranoidentry sym, ist=0, irqtrace=1 1026 INTR_FRAME
903 SAVE_ALL 1027 PARAVIRT_ADJUST_EXCEPTION_FRAME
904 cld 1028 pushq $-1 /* ORIG_RAX: no syscall to restart */
905 movl $1,%ebx 1029 CFI_ADJUST_CFA_OFFSET 8
906 movl $MSR_GS_BASE,%ecx 1030 subq $15*8, %rsp
907 rdmsr 1031 call save_paranoid
908 testl %edx,%edx
909 js 1f
910 SWAPGS
911 xorl %ebx,%ebx
9121:
913 .if \ist
914 movq %gs:pda_data_offset, %rbp
915 .endif
916 .if \irqtrace
917 TRACE_IRQS_OFF
918 .endif
919 movq %rsp,%rdi
920 movq ORIG_RAX(%rsp),%rsi
921 movq $-1,ORIG_RAX(%rsp)
922 .if \ist
923 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
924 .endif
925 call \sym
926 .if \ist
927 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
928 .endif
929 DISABLE_INTERRUPTS(CLBR_NONE)
930 .if \irqtrace
931 TRACE_IRQS_OFF 1032 TRACE_IRQS_OFF
932 .endif 1033 movq %rsp,%rdi /* pt_regs pointer */
933 .endm 1034 xorl %esi,%esi /* no error code */
1035 movq %gs:pda_data_offset, %rbp
1036 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
1037 call \do_sym
1038 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
1039 jmp paranoid_exit /* %ebx: no swapgs flag */
1040 CFI_ENDPROC
1041END(\sym)
1042.endm
934 1043
935 /* 1044.macro errorentry sym do_sym
936 * "Paranoid" exit path from exception stack. 1045ENTRY(\sym)
937 * Paranoid because this is used by NMIs and cannot take 1046 XCPT_FRAME
938 * any kernel state for granted. 1047 PARAVIRT_ADJUST_EXCEPTION_FRAME
939 * We don't do kernel preemption checks here, because only 1048 subq $15*8,%rsp
940 * NMI should be common and it does not enable IRQs and 1049 CFI_ADJUST_CFA_OFFSET 15*8
941 * cannot get reschedule ticks. 1050 call error_entry
942 * 1051 DEFAULT_FRAME 0
943 * "trace" is 0 for the NMI handler only, because irq-tracing 1052 movq %rsp,%rdi /* pt_regs pointer */
944 * is fundamentally NMI-unsafe. (we cannot change the soft and 1053 movq ORIG_RAX(%rsp),%rsi /* get error code */
945 * hard flags at once, atomically) 1054 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
946 */ 1055 call \do_sym
947 .macro paranoidexit trace=1 1056 jmp error_exit /* %ebx: no swapgs flag */
948 /* ebx: no swapgs flag */
949paranoid_exit\trace:
950 testl %ebx,%ebx /* swapgs needed? */
951 jnz paranoid_restore\trace
952 testl $3,CS(%rsp)
953 jnz paranoid_userspace\trace
954paranoid_swapgs\trace:
955 .if \trace
956 TRACE_IRQS_IRETQ 0
957 .endif
958 SWAPGS_UNSAFE_STACK
959paranoid_restore\trace:
960 RESTORE_ALL 8
961 jmp irq_return
962paranoid_userspace\trace:
963 GET_THREAD_INFO(%rcx)
964 movl TI_flags(%rcx),%ebx
965 andl $_TIF_WORK_MASK,%ebx
966 jz paranoid_swapgs\trace
967 movq %rsp,%rdi /* &pt_regs */
968 call sync_regs
969 movq %rax,%rsp /* switch stack for scheduling */
970 testl $_TIF_NEED_RESCHED,%ebx
971 jnz paranoid_schedule\trace
972 movl %ebx,%edx /* arg3: thread flags */
973 .if \trace
974 TRACE_IRQS_ON
975 .endif
976 ENABLE_INTERRUPTS(CLBR_NONE)
977 xorl %esi,%esi /* arg2: oldset */
978 movq %rsp,%rdi /* arg1: &pt_regs */
979 call do_notify_resume
980 DISABLE_INTERRUPTS(CLBR_NONE)
981 .if \trace
982 TRACE_IRQS_OFF
983 .endif
984 jmp paranoid_userspace\trace
985paranoid_schedule\trace:
986 .if \trace
987 TRACE_IRQS_ON
988 .endif
989 ENABLE_INTERRUPTS(CLBR_ANY)
990 call schedule
991 DISABLE_INTERRUPTS(CLBR_ANY)
992 .if \trace
993 TRACE_IRQS_OFF
994 .endif
995 jmp paranoid_userspace\trace
996 CFI_ENDPROC 1057 CFI_ENDPROC
997 .endm 1058END(\sym)
1059.endm
998 1060
999/* 1061 /* error code is on the stack already */
1000 * Exception entry point. This expects an error code/orig_rax on the stack 1062.macro paranoiderrorentry sym do_sym
1001 * and the exception handler in %rax. 1063ENTRY(\sym)
1002 */ 1064 XCPT_FRAME
1003KPROBE_ENTRY(error_entry) 1065 PARAVIRT_ADJUST_EXCEPTION_FRAME
1004 _frame RDI 1066 subq $15*8,%rsp
1005 CFI_REL_OFFSET rax,0 1067 CFI_ADJUST_CFA_OFFSET 15*8
1006 /* rdi slot contains rax, oldrax contains error code */ 1068 call save_paranoid
1007 cld 1069 DEFAULT_FRAME 0
1008 subq $14*8,%rsp
1009 CFI_ADJUST_CFA_OFFSET (14*8)
1010 movq %rsi,13*8(%rsp)
1011 CFI_REL_OFFSET rsi,RSI
1012 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
1013 CFI_REGISTER rax,rsi
1014 movq %rdx,12*8(%rsp)
1015 CFI_REL_OFFSET rdx,RDX
1016 movq %rcx,11*8(%rsp)
1017 CFI_REL_OFFSET rcx,RCX
1018 movq %rsi,10*8(%rsp) /* store rax */
1019 CFI_REL_OFFSET rax,RAX
1020 movq %r8, 9*8(%rsp)
1021 CFI_REL_OFFSET r8,R8
1022 movq %r9, 8*8(%rsp)
1023 CFI_REL_OFFSET r9,R9
1024 movq %r10,7*8(%rsp)
1025 CFI_REL_OFFSET r10,R10
1026 movq %r11,6*8(%rsp)
1027 CFI_REL_OFFSET r11,R11
1028 movq %rbx,5*8(%rsp)
1029 CFI_REL_OFFSET rbx,RBX
1030 movq %rbp,4*8(%rsp)
1031 CFI_REL_OFFSET rbp,RBP
1032 movq %r12,3*8(%rsp)
1033 CFI_REL_OFFSET r12,R12
1034 movq %r13,2*8(%rsp)
1035 CFI_REL_OFFSET r13,R13
1036 movq %r14,1*8(%rsp)
1037 CFI_REL_OFFSET r14,R14
1038 movq %r15,(%rsp)
1039 CFI_REL_OFFSET r15,R15
1040 xorl %ebx,%ebx
1041 testl $3,CS(%rsp)
1042 je error_kernelspace
1043error_swapgs:
1044 SWAPGS
1045error_sti:
1046 TRACE_IRQS_OFF
1047 movq %rdi,RDI(%rsp)
1048 CFI_REL_OFFSET rdi,RDI
1049 movq %rsp,%rdi
1050 movq ORIG_RAX(%rsp),%rsi /* get error code */
1051 movq $-1,ORIG_RAX(%rsp)
1052 call *%rax
1053 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1054error_exit:
1055 movl %ebx,%eax
1056 RESTORE_REST
1057 DISABLE_INTERRUPTS(CLBR_NONE)
1058 TRACE_IRQS_OFF 1070 TRACE_IRQS_OFF
1059 GET_THREAD_INFO(%rcx) 1071 movq %rsp,%rdi /* pt_regs pointer */
1060 testl %eax,%eax 1072 movq ORIG_RAX(%rsp),%rsi /* get error code */
1061 jne retint_kernel 1073 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
1062 LOCKDEP_SYS_EXIT_IRQ 1074 call \do_sym
1063 movl TI_flags(%rcx),%edx 1075 jmp paranoid_exit /* %ebx: no swapgs flag */
1064 movl $_TIF_WORK_MASK,%edi
1065 andl %edi,%edx
1066 jnz retint_careful
1067 jmp retint_swapgs
1068 CFI_ENDPROC 1076 CFI_ENDPROC
1077END(\sym)
1078.endm
1069 1079
1070error_kernelspace: 1080zeroentry divide_error do_divide_error
1071 incl %ebx 1081zeroentry overflow do_overflow
1072 /* There are two places in the kernel that can potentially fault with 1082zeroentry bounds do_bounds
1073 usergs. Handle them here. The exception handlers after 1083zeroentry invalid_op do_invalid_op
1074 iret run with kernel gs again, so don't set the user space flag. 1084zeroentry device_not_available do_device_not_available
1075 B stepping K8s sometimes report an truncated RIP for IRET 1085paranoiderrorentry double_fault do_double_fault
1076 exceptions returning to compat mode. Check for these here too. */ 1086zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
1077 leaq irq_return(%rip),%rcx 1087errorentry invalid_TSS do_invalid_TSS
1078 cmpq %rcx,RIP(%rsp) 1088errorentry segment_not_present do_segment_not_present
1079 je error_swapgs 1089zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1080 movl %ecx,%ecx /* zero extend */ 1090zeroentry coprocessor_error do_coprocessor_error
1081 cmpq %rcx,RIP(%rsp) 1091errorentry alignment_check do_alignment_check
1082 je error_swapgs 1092zeroentry simd_coprocessor_error do_simd_coprocessor_error
1083 cmpq $gs_change,RIP(%rsp) 1093
1084 je error_swapgs 1094 /* Reload gs selector with exception handling */
1085 jmp error_sti 1095 /* edi: new selector */
1086KPROBE_END(error_entry)
1087
1088 /* Reload gs selector with exception handling */
1089 /* edi: new selector */
1090ENTRY(native_load_gs_index) 1096ENTRY(native_load_gs_index)
1091 CFI_STARTPROC 1097 CFI_STARTPROC
1092 pushf 1098 pushf
1093 CFI_ADJUST_CFA_OFFSET 8 1099 CFI_ADJUST_CFA_OFFSET 8
1094 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) 1100 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1095 SWAPGS 1101 SWAPGS
1096gs_change: 1102gs_change:
1097 movl %edi,%gs 1103 movl %edi,%gs
10982: mfence /* workaround */ 11042: mfence /* workaround */
1099 SWAPGS 1105 SWAPGS
1100 popf 1106 popf
1101 CFI_ADJUST_CFA_OFFSET -8 1107 CFI_ADJUST_CFA_OFFSET -8
1102 ret 1108 ret
1103 CFI_ENDPROC 1109 CFI_ENDPROC
1104ENDPROC(native_load_gs_index) 1110END(native_load_gs_index)
1105 1111
1106 .section __ex_table,"a" 1112 .section __ex_table,"a"
1107 .align 8 1113 .align 8
1108 .quad gs_change,bad_gs 1114 .quad gs_change,bad_gs
1109 .previous 1115 .previous
1110 .section .fixup,"ax" 1116 .section .fixup,"ax"
1111 /* running with kernelgs */ 1117 /* running with kernelgs */
1112bad_gs: 1118bad_gs:
1113 SWAPGS /* switch back to user gs */ 1119 SWAPGS /* switch back to user gs */
1114 xorl %eax,%eax 1120 xorl %eax,%eax
1115 movl %eax,%gs 1121 movl %eax,%gs
1116 jmp 2b 1122 jmp 2b
1117 .previous 1123 .previous
1118 1124
1119/* 1125/*
1120 * Create a kernel thread. 1126 * Create a kernel thread.
1121 * 1127 *
@@ -1138,7 +1144,7 @@ ENTRY(kernel_thread)
1138 1144
1139 xorl %r8d,%r8d 1145 xorl %r8d,%r8d
1140 xorl %r9d,%r9d 1146 xorl %r9d,%r9d
1141 1147
1142 # clone now 1148 # clone now
1143 call do_fork 1149 call do_fork
1144 movq %rax,RAX(%rsp) 1150 movq %rax,RAX(%rsp)
@@ -1149,15 +1155,15 @@ ENTRY(kernel_thread)
1149 * so internally to the x86_64 port you can rely on kernel_thread() 1155 * so internally to the x86_64 port you can rely on kernel_thread()
1150 * not to reschedule the child before returning, this avoids the need 1156 * not to reschedule the child before returning, this avoids the need
1151 * of hacks for example to fork off the per-CPU idle tasks. 1157 * of hacks for example to fork off the per-CPU idle tasks.
1152 * [Hopefully no generic code relies on the reschedule -AK] 1158 * [Hopefully no generic code relies on the reschedule -AK]
1153 */ 1159 */
1154 RESTORE_ALL 1160 RESTORE_ALL
1155 UNFAKE_STACK_FRAME 1161 UNFAKE_STACK_FRAME
1156 ret 1162 ret
1157 CFI_ENDPROC 1163 CFI_ENDPROC
1158ENDPROC(kernel_thread) 1164END(kernel_thread)
1159 1165
1160child_rip: 1166ENTRY(child_rip)
1161 pushq $0 # fake return address 1167 pushq $0 # fake return address
1162 CFI_STARTPROC 1168 CFI_STARTPROC
1163 /* 1169 /*
@@ -1170,8 +1176,9 @@ child_rip:
1170 # exit 1176 # exit
1171 mov %eax, %edi 1177 mov %eax, %edi
1172 call do_exit 1178 call do_exit
1179 ud2 # padding for call trace
1173 CFI_ENDPROC 1180 CFI_ENDPROC
1174ENDPROC(child_rip) 1181END(child_rip)
1175 1182
1176/* 1183/*
1177 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 1184 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -1191,10 +1198,10 @@ ENDPROC(child_rip)
1191ENTRY(kernel_execve) 1198ENTRY(kernel_execve)
1192 CFI_STARTPROC 1199 CFI_STARTPROC
1193 FAKE_STACK_FRAME $0 1200 FAKE_STACK_FRAME $0
1194 SAVE_ALL 1201 SAVE_ALL
1195 movq %rsp,%rcx 1202 movq %rsp,%rcx
1196 call sys_execve 1203 call sys_execve
1197 movq %rax, RAX(%rsp) 1204 movq %rax, RAX(%rsp)
1198 RESTORE_REST 1205 RESTORE_REST
1199 testq %rax,%rax 1206 testq %rax,%rax
1200 je int_ret_from_sys_call 1207 je int_ret_from_sys_call
@@ -1202,129 +1209,7 @@ ENTRY(kernel_execve)
1202 UNFAKE_STACK_FRAME 1209 UNFAKE_STACK_FRAME
1203 ret 1210 ret
1204 CFI_ENDPROC 1211 CFI_ENDPROC
1205ENDPROC(kernel_execve) 1212END(kernel_execve)
1206
1207KPROBE_ENTRY(page_fault)
1208 errorentry do_page_fault
1209KPROBE_END(page_fault)
1210
1211ENTRY(coprocessor_error)
1212 zeroentry do_coprocessor_error
1213END(coprocessor_error)
1214
1215ENTRY(simd_coprocessor_error)
1216 zeroentry do_simd_coprocessor_error
1217END(simd_coprocessor_error)
1218
1219ENTRY(device_not_available)
1220 zeroentry do_device_not_available
1221END(device_not_available)
1222
1223 /* runs on exception stack */
1224KPROBE_ENTRY(debug)
1225 INTR_FRAME
1226 PARAVIRT_ADJUST_EXCEPTION_FRAME
1227 pushq $0
1228 CFI_ADJUST_CFA_OFFSET 8
1229 paranoidentry do_debug, DEBUG_STACK
1230 paranoidexit
1231KPROBE_END(debug)
1232
1233 /* runs on exception stack */
1234KPROBE_ENTRY(nmi)
1235 INTR_FRAME
1236 PARAVIRT_ADJUST_EXCEPTION_FRAME
1237 pushq $-1
1238 CFI_ADJUST_CFA_OFFSET 8
1239 paranoidentry do_nmi, 0, 0
1240#ifdef CONFIG_TRACE_IRQFLAGS
1241 paranoidexit 0
1242#else
1243 jmp paranoid_exit1
1244 CFI_ENDPROC
1245#endif
1246KPROBE_END(nmi)
1247
1248KPROBE_ENTRY(int3)
1249 INTR_FRAME
1250 PARAVIRT_ADJUST_EXCEPTION_FRAME
1251 pushq $0
1252 CFI_ADJUST_CFA_OFFSET 8
1253 paranoidentry do_int3, DEBUG_STACK
1254 jmp paranoid_exit1
1255 CFI_ENDPROC
1256KPROBE_END(int3)
1257
1258ENTRY(overflow)
1259 zeroentry do_overflow
1260END(overflow)
1261
1262ENTRY(bounds)
1263 zeroentry do_bounds
1264END(bounds)
1265
1266ENTRY(invalid_op)
1267 zeroentry do_invalid_op
1268END(invalid_op)
1269
1270ENTRY(coprocessor_segment_overrun)
1271 zeroentry do_coprocessor_segment_overrun
1272END(coprocessor_segment_overrun)
1273
1274 /* runs on exception stack */
1275ENTRY(double_fault)
1276 XCPT_FRAME
1277 PARAVIRT_ADJUST_EXCEPTION_FRAME
1278 paranoidentry do_double_fault
1279 jmp paranoid_exit1
1280 CFI_ENDPROC
1281END(double_fault)
1282
1283ENTRY(invalid_TSS)
1284 errorentry do_invalid_TSS
1285END(invalid_TSS)
1286
1287ENTRY(segment_not_present)
1288 errorentry do_segment_not_present
1289END(segment_not_present)
1290
1291 /* runs on exception stack */
1292ENTRY(stack_segment)
1293 XCPT_FRAME
1294 PARAVIRT_ADJUST_EXCEPTION_FRAME
1295 paranoidentry do_stack_segment
1296 jmp paranoid_exit1
1297 CFI_ENDPROC
1298END(stack_segment)
1299
1300KPROBE_ENTRY(general_protection)
1301 errorentry do_general_protection
1302KPROBE_END(general_protection)
1303
1304ENTRY(alignment_check)
1305 errorentry do_alignment_check
1306END(alignment_check)
1307
1308ENTRY(divide_error)
1309 zeroentry do_divide_error
1310END(divide_error)
1311
1312ENTRY(spurious_interrupt_bug)
1313 zeroentry do_spurious_interrupt_bug
1314END(spurious_interrupt_bug)
1315
1316#ifdef CONFIG_X86_MCE
1317 /* runs on exception stack */
1318ENTRY(machine_check)
1319 INTR_FRAME
1320 PARAVIRT_ADJUST_EXCEPTION_FRAME
1321 pushq $0
1322 CFI_ADJUST_CFA_OFFSET 8
1323 paranoidentry do_machine_check
1324 jmp paranoid_exit1
1325 CFI_ENDPROC
1326END(machine_check)
1327#endif
1328 1213
1329/* Call softirq on interrupt stack. Interrupts are off. */ 1214/* Call softirq on interrupt stack. Interrupts are off. */
1330ENTRY(call_softirq) 1215ENTRY(call_softirq)
@@ -1344,40 +1229,33 @@ ENTRY(call_softirq)
1344 decl %gs:pda_irqcount 1229 decl %gs:pda_irqcount
1345 ret 1230 ret
1346 CFI_ENDPROC 1231 CFI_ENDPROC
1347ENDPROC(call_softirq) 1232END(call_softirq)
1348
1349KPROBE_ENTRY(ignore_sysret)
1350 CFI_STARTPROC
1351 mov $-ENOSYS,%eax
1352 sysret
1353 CFI_ENDPROC
1354ENDPROC(ignore_sysret)
1355 1233
1356#ifdef CONFIG_XEN 1234#ifdef CONFIG_XEN
1357ENTRY(xen_hypervisor_callback) 1235zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
1358 zeroentry xen_do_hypervisor_callback
1359END(xen_hypervisor_callback)
1360 1236
1361/* 1237/*
1362# A note on the "critical region" in our callback handler. 1238 * A note on the "critical region" in our callback handler.
1363# We want to avoid stacking callback handlers due to events occurring 1239 * We want to avoid stacking callback handlers due to events occurring
1364# during handling of the last event. To do this, we keep events disabled 1240 * during handling of the last event. To do this, we keep events disabled
1365# until we've done all processing. HOWEVER, we must enable events before 1241 * until we've done all processing. HOWEVER, we must enable events before
1366# popping the stack frame (can't be done atomically) and so it would still 1242 * popping the stack frame (can't be done atomically) and so it would still
1367# be possible to get enough handler activations to overflow the stack. 1243 * be possible to get enough handler activations to overflow the stack.
1368# Although unlikely, bugs of that kind are hard to track down, so we'd 1244 * Although unlikely, bugs of that kind are hard to track down, so we'd
1369# like to avoid the possibility. 1245 * like to avoid the possibility.
1370# So, on entry to the handler we detect whether we interrupted an 1246 * So, on entry to the handler we detect whether we interrupted an
1371# existing activation in its critical region -- if so, we pop the current 1247 * existing activation in its critical region -- if so, we pop the current
1372# activation and restart the handler using the previous one. 1248 * activation and restart the handler using the previous one.
1373*/ 1249 */
1374ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 1250ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1375 CFI_STARTPROC 1251 CFI_STARTPROC
1376/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will 1252/*
1377 see the correct pointer to the pt_regs */ 1253 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1254 * see the correct pointer to the pt_regs
1255 */
1378 movq %rdi, %rsp # we don't return, adjust the stack frame 1256 movq %rdi, %rsp # we don't return, adjust the stack frame
1379 CFI_ENDPROC 1257 CFI_ENDPROC
1380 CFI_DEFAULT_STACK 1258 DEFAULT_FRAME
138111: incl %gs:pda_irqcount 125911: incl %gs:pda_irqcount
1382 movq %rsp,%rbp 1260 movq %rsp,%rbp
1383 CFI_DEF_CFA_REGISTER rbp 1261 CFI_DEF_CFA_REGISTER rbp
@@ -1392,23 +1270,26 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1392END(do_hypervisor_callback) 1270END(do_hypervisor_callback)
1393 1271
1394/* 1272/*
1395# Hypervisor uses this for application faults while it executes. 1273 * Hypervisor uses this for application faults while it executes.
1396# We get here for two reasons: 1274 * We get here for two reasons:
1397# 1. Fault while reloading DS, ES, FS or GS 1275 * 1. Fault while reloading DS, ES, FS or GS
1398# 2. Fault while executing IRET 1276 * 2. Fault while executing IRET
1399# Category 1 we do not need to fix up as Xen has already reloaded all segment 1277 * Category 1 we do not need to fix up as Xen has already reloaded all segment
1400# registers that could be reloaded and zeroed the others. 1278 * registers that could be reloaded and zeroed the others.
1401# Category 2 we fix up by killing the current process. We cannot use the 1279 * Category 2 we fix up by killing the current process. We cannot use the
1402# normal Linux return path in this case because if we use the IRET hypercall 1280 * normal Linux return path in this case because if we use the IRET hypercall
1403# to pop the stack frame we end up in an infinite loop of failsafe callbacks. 1281 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1404# We distinguish between categories by comparing each saved segment register 1282 * We distinguish between categories by comparing each saved segment register
1405# with its current contents: any discrepancy means we in category 1. 1283 * with its current contents: any discrepancy means we in category 1.
1406*/ 1284 */
1407ENTRY(xen_failsafe_callback) 1285ENTRY(xen_failsafe_callback)
1408 framesz = (RIP-0x30) /* workaround buggy gas */ 1286 INTR_FRAME 1 (6*8)
1409 _frame framesz 1287 /*CFI_REL_OFFSET gs,GS*/
1410 CFI_REL_OFFSET rcx, 0 1288 /*CFI_REL_OFFSET fs,FS*/
1411 CFI_REL_OFFSET r11, 8 1289 /*CFI_REL_OFFSET es,ES*/
1290 /*CFI_REL_OFFSET ds,DS*/
1291 CFI_REL_OFFSET r11,8
1292 CFI_REL_OFFSET rcx,0
1412 movw %ds,%cx 1293 movw %ds,%cx
1413 cmpw %cx,0x10(%rsp) 1294 cmpw %cx,0x10(%rsp)
1414 CFI_REMEMBER_STATE 1295 CFI_REMEMBER_STATE
@@ -1429,12 +1310,9 @@ ENTRY(xen_failsafe_callback)
1429 CFI_RESTORE r11 1310 CFI_RESTORE r11
1430 addq $0x30,%rsp 1311 addq $0x30,%rsp
1431 CFI_ADJUST_CFA_OFFSET -0x30 1312 CFI_ADJUST_CFA_OFFSET -0x30
1432 pushq $0 1313 pushq_cfi $0 /* RIP */
1433 CFI_ADJUST_CFA_OFFSET 8 1314 pushq_cfi %r11
1434 pushq %r11 1315 pushq_cfi %rcx
1435 CFI_ADJUST_CFA_OFFSET 8
1436 pushq %rcx
1437 CFI_ADJUST_CFA_OFFSET 8
1438 jmp general_protection 1316 jmp general_protection
1439 CFI_RESTORE_STATE 1317 CFI_RESTORE_STATE
14401: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ 13181: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
@@ -1444,11 +1322,223 @@ ENTRY(xen_failsafe_callback)
1444 CFI_RESTORE r11 1322 CFI_RESTORE r11
1445 addq $0x30,%rsp 1323 addq $0x30,%rsp
1446 CFI_ADJUST_CFA_OFFSET -0x30 1324 CFI_ADJUST_CFA_OFFSET -0x30
1447 pushq $0 1325 pushq_cfi $0
1448 CFI_ADJUST_CFA_OFFSET 8
1449 SAVE_ALL 1326 SAVE_ALL
1450 jmp error_exit 1327 jmp error_exit
1451 CFI_ENDPROC 1328 CFI_ENDPROC
1452END(xen_failsafe_callback) 1329END(xen_failsafe_callback)
1453 1330
1454#endif /* CONFIG_XEN */ 1331#endif /* CONFIG_XEN */
1332
1333/*
1334 * Some functions should be protected against kprobes
1335 */
1336 .pushsection .kprobes.text, "ax"
1337
1338paranoidzeroentry_ist debug do_debug DEBUG_STACK
1339paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
1340paranoiderrorentry stack_segment do_stack_segment
1341errorentry general_protection do_general_protection
1342errorentry page_fault do_page_fault
1343#ifdef CONFIG_X86_MCE
1344paranoidzeroentry machine_check do_machine_check
1345#endif
1346
1347 /*
1348 * "Paranoid" exit path from exception stack.
1349 * Paranoid because this is used by NMIs and cannot take
1350 * any kernel state for granted.
1351 * We don't do kernel preemption checks here, because only
1352 * NMI should be common and it does not enable IRQs and
1353 * cannot get reschedule ticks.
1354 *
1355 * "trace" is 0 for the NMI handler only, because irq-tracing
1356 * is fundamentally NMI-unsafe. (we cannot change the soft and
1357 * hard flags at once, atomically)
1358 */
1359
1360 /* ebx: no swapgs flag */
1361ENTRY(paranoid_exit)
1362 INTR_FRAME
1363 DISABLE_INTERRUPTS(CLBR_NONE)
1364 TRACE_IRQS_OFF
1365 testl %ebx,%ebx /* swapgs needed? */
1366 jnz paranoid_restore
1367 testl $3,CS(%rsp)
1368 jnz paranoid_userspace
1369paranoid_swapgs:
1370 TRACE_IRQS_IRETQ 0
1371 SWAPGS_UNSAFE_STACK
1372paranoid_restore:
1373 RESTORE_ALL 8
1374 jmp irq_return
1375paranoid_userspace:
1376 GET_THREAD_INFO(%rcx)
1377 movl TI_flags(%rcx),%ebx
1378 andl $_TIF_WORK_MASK,%ebx
1379 jz paranoid_swapgs
1380 movq %rsp,%rdi /* &pt_regs */
1381 call sync_regs
1382 movq %rax,%rsp /* switch stack for scheduling */
1383 testl $_TIF_NEED_RESCHED,%ebx
1384 jnz paranoid_schedule
1385 movl %ebx,%edx /* arg3: thread flags */
1386 TRACE_IRQS_ON
1387 ENABLE_INTERRUPTS(CLBR_NONE)
1388 xorl %esi,%esi /* arg2: oldset */
1389 movq %rsp,%rdi /* arg1: &pt_regs */
1390 call do_notify_resume
1391 DISABLE_INTERRUPTS(CLBR_NONE)
1392 TRACE_IRQS_OFF
1393 jmp paranoid_userspace
1394paranoid_schedule:
1395 TRACE_IRQS_ON
1396 ENABLE_INTERRUPTS(CLBR_ANY)
1397 call schedule
1398 DISABLE_INTERRUPTS(CLBR_ANY)
1399 TRACE_IRQS_OFF
1400 jmp paranoid_userspace
1401 CFI_ENDPROC
1402END(paranoid_exit)
1403
1404/*
1405 * Exception entry point. This expects an error code/orig_rax on the stack.
1406 * returns in "no swapgs flag" in %ebx.
1407 */
1408ENTRY(error_entry)
1409 XCPT_FRAME
1410 CFI_ADJUST_CFA_OFFSET 15*8
1411 /* oldrax contains error code */
1412 cld
1413 movq_cfi rdi, RDI+8
1414 movq_cfi rsi, RSI+8
1415 movq_cfi rdx, RDX+8
1416 movq_cfi rcx, RCX+8
1417 movq_cfi rax, RAX+8
1418 movq_cfi r8, R8+8
1419 movq_cfi r9, R9+8
1420 movq_cfi r10, R10+8
1421 movq_cfi r11, R11+8
1422 movq_cfi rbx, RBX+8
1423 movq_cfi rbp, RBP+8
1424 movq_cfi r12, R12+8
1425 movq_cfi r13, R13+8
1426 movq_cfi r14, R14+8
1427 movq_cfi r15, R15+8
1428 xorl %ebx,%ebx
1429 testl $3,CS+8(%rsp)
1430 je error_kernelspace
1431error_swapgs:
1432 SWAPGS
1433error_sti:
1434 TRACE_IRQS_OFF
1435 ret
1436 CFI_ENDPROC
1437
1438/*
1439 * There are two places in the kernel that can potentially fault with
1440 * usergs. Handle them here. The exception handlers after iret run with
1441 * kernel gs again, so don't set the user space flag. B stepping K8s
1442 * sometimes report an truncated RIP for IRET exceptions returning to
1443 * compat mode. Check for these here too.
1444 */
1445error_kernelspace:
1446 incl %ebx
1447 leaq irq_return(%rip),%rcx
1448 cmpq %rcx,RIP+8(%rsp)
1449 je error_swapgs
1450 movl %ecx,%ecx /* zero extend */
1451 cmpq %rcx,RIP+8(%rsp)
1452 je error_swapgs
1453 cmpq $gs_change,RIP+8(%rsp)
1454 je error_swapgs
1455 jmp error_sti
1456END(error_entry)
1457
1458
1459/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1460ENTRY(error_exit)
1461 DEFAULT_FRAME
1462 movl %ebx,%eax
1463 RESTORE_REST
1464 DISABLE_INTERRUPTS(CLBR_NONE)
1465 TRACE_IRQS_OFF
1466 GET_THREAD_INFO(%rcx)
1467 testl %eax,%eax
1468 jne retint_kernel
1469 LOCKDEP_SYS_EXIT_IRQ
1470 movl TI_flags(%rcx),%edx
1471 movl $_TIF_WORK_MASK,%edi
1472 andl %edi,%edx
1473 jnz retint_careful
1474 jmp retint_swapgs
1475 CFI_ENDPROC
1476END(error_exit)
1477
1478
1479 /* runs on exception stack */
1480ENTRY(nmi)
1481 INTR_FRAME
1482 PARAVIRT_ADJUST_EXCEPTION_FRAME
1483 pushq_cfi $-1
1484 subq $15*8, %rsp
1485 CFI_ADJUST_CFA_OFFSET 15*8
1486 call save_paranoid
1487 DEFAULT_FRAME 0
1488 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1489 movq %rsp,%rdi
1490 movq $-1,%rsi
1491 call do_nmi
1492#ifdef CONFIG_TRACE_IRQFLAGS
1493 /* paranoidexit; without TRACE_IRQS_OFF */
1494 /* ebx: no swapgs flag */
1495 DISABLE_INTERRUPTS(CLBR_NONE)
1496 testl %ebx,%ebx /* swapgs needed? */
1497 jnz nmi_restore
1498 testl $3,CS(%rsp)
1499 jnz nmi_userspace
1500nmi_swapgs:
1501 SWAPGS_UNSAFE_STACK
1502nmi_restore:
1503 RESTORE_ALL 8
1504 jmp irq_return
1505nmi_userspace:
1506 GET_THREAD_INFO(%rcx)
1507 movl TI_flags(%rcx),%ebx
1508 andl $_TIF_WORK_MASK,%ebx
1509 jz nmi_swapgs
1510 movq %rsp,%rdi /* &pt_regs */
1511 call sync_regs
1512 movq %rax,%rsp /* switch stack for scheduling */
1513 testl $_TIF_NEED_RESCHED,%ebx
1514 jnz nmi_schedule
1515 movl %ebx,%edx /* arg3: thread flags */
1516 ENABLE_INTERRUPTS(CLBR_NONE)
1517 xorl %esi,%esi /* arg2: oldset */
1518 movq %rsp,%rdi /* arg1: &pt_regs */
1519 call do_notify_resume
1520 DISABLE_INTERRUPTS(CLBR_NONE)
1521 jmp nmi_userspace
1522nmi_schedule:
1523 ENABLE_INTERRUPTS(CLBR_ANY)
1524 call schedule
1525 DISABLE_INTERRUPTS(CLBR_ANY)
1526 jmp nmi_userspace
1527 CFI_ENDPROC
1528#else
1529 jmp paranoid_exit
1530 CFI_ENDPROC
1531#endif
1532END(nmi)
1533
1534ENTRY(ignore_sysret)
1535 CFI_STARTPROC
1536 mov $-ENOSYS,%eax
1537 sysret
1538 CFI_ENDPROC
1539END(ignore_sysret)
1540
1541/*
1542 * End of kprobes section
1543 */
1544 .popsection
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a0..1d3d0e71b044 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,7 +18,6 @@
18#include <asm/idle.h> 18#include <asm/idle.h>
19#include <asm/smp.h> 19#include <asm/smp.h>
20 20
21#ifdef CONFIG_DEBUG_STACKOVERFLOW
22/* 21/*
23 * Probabilistic stack overflow check: 22 * Probabilistic stack overflow check:
24 * 23 *
@@ -28,19 +27,18 @@
28 */ 27 */
29static inline void stack_overflow_check(struct pt_regs *regs) 28static inline void stack_overflow_check(struct pt_regs *regs)
30{ 29{
30#ifdef CONFIG_DEBUG_STACKOVERFLOW
31 u64 curbase = (u64)task_stack_page(current); 31 u64 curbase = (u64)task_stack_page(current);
32 static unsigned long warned = -60*HZ; 32
33 33 WARN_ONCE(regs->sp >= curbase &&
34 if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE && 34 regs->sp <= curbase + THREAD_SIZE &&
35 regs->sp < curbase + sizeof(struct thread_info) + 128 && 35 regs->sp < curbase + sizeof(struct thread_info) +
36 time_after(jiffies, warned + 60*HZ)) { 36 sizeof(struct pt_regs) + 128,
37 printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", 37
38 current->comm, curbase, regs->sp); 38 "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
39 show_stack(NULL,NULL); 39 current->comm, curbase, regs->sp);
40 warned = jiffies;
41 }
42}
43#endif 40#endif
41}
44 42
45/* 43/*
46 * do_IRQ handles all normal device IRQ's (the special 44 * do_IRQ handles all normal device IRQ's (the special
@@ -60,9 +58,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
60 irq_enter(); 58 irq_enter();
61 irq = __get_cpu_var(vector_irq)[vector]; 59 irq = __get_cpu_var(vector_irq)[vector];
62 60
63#ifdef CONFIG_DEBUG_STACKOVERFLOW
64 stack_overflow_check(regs); 61 stack_overflow_check(regs);
65#endif
66 62
67 desc = irq_to_desc(irq); 63 desc = irq_to_desc(irq);
68 if (likely(desc)) 64 if (likely(desc))
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 845aa9803e80..607db63044a5 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -129,7 +129,7 @@ void __init native_init_IRQ(void)
129 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { 129 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
130 /* SYSCALL_VECTOR was reserved in trap_init. */ 130 /* SYSCALL_VECTOR was reserved in trap_init. */
131 if (i != SYSCALL_VECTOR) 131 if (i != SYSCALL_VECTOR)
132 set_intr_gate(i, interrupt[i]); 132 set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
133 } 133 }
134 134
135 135
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ff0235391285..8670b3ce626e 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -24,41 +24,6 @@
24#include <asm/i8259.h> 24#include <asm/i8259.h>
25 25
26/* 26/*
27 * Common place to define all x86 IRQ vectors
28 *
29 * This builds up the IRQ handler stubs using some ugly macros in irq.h
30 *
31 * These macros create the low-level assembly IRQ routines that save
32 * register context and call do_IRQ(). do_IRQ() then does all the
33 * operations that are needed to keep the AT (or SMP IOAPIC)
34 * interrupt-controller happy.
35 */
36
37#define IRQ_NAME2(nr) nr##_interrupt(void)
38#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
39
40/*
41 * SMP has a few special interrupts for IPI messages
42 */
43
44#define BUILD_IRQ(nr) \
45 asmlinkage void IRQ_NAME(nr); \
46 asm("\n.text\n.p2align\n" \
47 "IRQ" #nr "_interrupt:\n\t" \
48 "push $~(" #nr ") ; " \
49 "jmp common_interrupt\n" \
50 ".previous");
51
52#define BI(x,y) \
53 BUILD_IRQ(x##y)
54
55#define BUILD_16_IRQS(x) \
56 BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
57 BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
58 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
59 BI(x,c) BI(x,d) BI(x,e) BI(x,f)
60
61/*
62 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: 27 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
63 * (these are usually mapped to vectors 0x30-0x3f) 28 * (these are usually mapped to vectors 0x30-0x3f)
64 */ 29 */
@@ -73,37 +38,6 @@
73 * 38 *
74 * (these are usually mapped into the 0x30-0xff vector range) 39 * (these are usually mapped into the 0x30-0xff vector range)
75 */ 40 */
76 BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
77BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
78BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
79BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
80
81#undef BUILD_16_IRQS
82#undef BI
83
84
85#define IRQ(x,y) \
86 IRQ##x##y##_interrupt
87
88#define IRQLIST_16(x) \
89 IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
90 IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
91 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
92 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
93
94/* for the irq vectors */
95static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
96 IRQLIST_16(0x2), IRQLIST_16(0x3),
97 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
98 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
99 IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
100};
101
102#undef IRQ
103#undef IRQLIST_16
104
105
106
107 41
108/* 42/*
109 * IRQ2 is cascade interrupt to second interrupt controller 43 * IRQ2 is cascade interrupt to second interrupt controller
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index cb19d650c216..418a095c5796 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -80,6 +80,8 @@ unsigned long __init calibrate_cpu(void)
80 break; 80 break;
81 no_ctr_free = (i == 4); 81 no_ctr_free = (i == 4);
82 if (no_ctr_free) { 82 if (no_ctr_free) {
83 WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... "
84 "cpu_khz value may be incorrect.\n");
83 i = 3; 85 i = 3;
84 rdmsrl(MSR_K7_EVNTSEL3, evntsel3); 86 rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
85 wrmsrl(MSR_K7_EVNTSEL3, 0); 87 wrmsrl(MSR_K7_EVNTSEL3, 0);
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0b8b6690a86d..ebf2f12900f5 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -128,7 +128,16 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
128 gettimeofday(tv,NULL); 128 gettimeofday(tv,NULL);
129 return; 129 return;
130 } 130 }
131
132 /*
133 * Surround the RDTSC by barriers, to make sure it's not
134 * speculated to outside the seqlock critical section and
135 * does not cause time warps:
136 */
137 rdtsc_barrier();
131 now = vread(); 138 now = vread();
139 rdtsc_barrier();
140
132 base = __vsyscall_gtod_data.clock.cycle_last; 141 base = __vsyscall_gtod_data.clock.cycle_last;
133 mask = __vsyscall_gtod_data.clock.mask; 142 mask = __vsyscall_gtod_data.clock.mask;
134 mult = __vsyscall_gtod_data.clock.mult; 143 mult = __vsyscall_gtod_data.clock.mult;
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a5d8e1ace1cf..50a779264bb1 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -590,7 +590,8 @@ static void __init lguest_init_IRQ(void)
590 * a straightforward 1 to 1 mapping, so force that here. */ 590 * a straightforward 1 to 1 mapping, so force that here. */
591 __get_cpu_var(vector_irq)[vector] = i; 591 __get_cpu_var(vector_irq)[vector] = i;
592 if (vector != SYSCALL_VECTOR) { 592 if (vector != SYSCALL_VECTOR) {
593 set_intr_gate(vector, interrupt[vector]); 593 set_intr_gate(vector,
594 interrupt[vector-FIRST_EXTERNAL_VECTOR]);
594 set_irq_chip_and_handler_name(i, &lguest_irq_controller, 595 set_irq_chip_and_handler_name(i, &lguest_irq_controller,
595 handle_level_irq, 596 handle_level_irq,
596 "level"); 597 "level");
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c483f4242079..3ffed259883e 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -102,6 +102,8 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
102 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 102 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
103 pud = pud_offset(pgd, 0); 103 pud = pud_offset(pgd, 0);
104 BUG_ON(pmd_table != pmd_offset(pud, 0)); 104 BUG_ON(pmd_table != pmd_offset(pud, 0));
105
106 return pmd_table;
105 } 107 }
106#endif 108#endif
107 pud = pud_offset(pgd, 0); 109 pud = pud_offset(pgd, 0);