aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--arch/x86/ia32/ia32_signal.c5
-rw-r--r--arch/x86/include/asm/bitops.h10
-rw-r--r--arch/x86/include/asm/byteorder.h74
-rw-r--r--arch/x86/include/asm/dwarf2.h97
-rw-r--r--arch/x86/include/asm/hw_irq.h4
-rw-r--r--arch/x86/include/asm/irq.h4
-rw-r--r--arch/x86/include/asm/irq_regs_32.h2
-rw-r--r--arch/x86/include/asm/linkage.h60
-rw-r--r--arch/x86/include/asm/tsc.h8
-rw-r--r--arch/x86/kernel/entry_32.S476
-rw-r--r--arch/x86/kernel/entry_64.S1332
-rw-r--r--arch/x86/kernel/irqinit_32.c2
-rw-r--r--arch/x86/kernel/irqinit_64.c66
-rw-r--r--arch/x86/kernel/vsyscall_64.c9
-rw-r--r--arch/x86/lguest/boot.c3
-rw-r--r--arch/x86/mm/init_32.c2
17 files changed, 1141 insertions, 1025 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7719f7..d4d4cb7629ea 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -242,21 +242,13 @@ config X86_FIND_SMP_CONFIG
242 def_bool y 242 def_bool y
243 depends on X86_MPPARSE || X86_VOYAGER 243 depends on X86_MPPARSE || X86_VOYAGER
244 244
245if ACPI
246config X86_MPPARSE 245config X86_MPPARSE
247 def_bool y 246 bool "Enable MPS table" if ACPI
248 bool "Enable MPS table" 247 default y
249 depends on X86_LOCAL_APIC 248 depends on X86_LOCAL_APIC
250 help 249 help
251 For old smp systems that do not have proper acpi support. Newer systems 250 For old smp systems that do not have proper acpi support. Newer systems
252 (esp with 64bit cpus) with acpi support, MADT and DSDT will override it 251 (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
253endif
254
255if !ACPI
256config X86_MPPARSE
257 def_bool y
258 depends on X86_LOCAL_APIC
259endif
260 252
261choice 253choice
262 prompt "Subarchitecture Type" 254 prompt "Subarchitecture Type"
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 4bc02b23674b..e82ebd652263 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -572,11 +572,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
572 regs->dx = (unsigned long) &frame->info; 572 regs->dx = (unsigned long) &frame->info;
573 regs->cx = (unsigned long) &frame->uc; 573 regs->cx = (unsigned long) &frame->uc;
574 574
575 /* Make -mregparm=3 work */
576 regs->ax = sig;
577 regs->dx = (unsigned long) &frame->info;
578 regs->cx = (unsigned long) &frame->uc;
579
580 loadsegment(ds, __USER32_DS); 575 loadsegment(ds, __USER32_DS);
581 loadsegment(es, __USER32_DS); 576 loadsegment(es, __USER32_DS);
582 577
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 360010322711..9fa9dcdf344b 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -168,7 +168,15 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
168 */ 168 */
169static inline void change_bit(int nr, volatile unsigned long *addr) 169static inline void change_bit(int nr, volatile unsigned long *addr)
170{ 170{
171 asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr)); 171 if (IS_IMMEDIATE(nr)) {
172 asm volatile(LOCK_PREFIX "xorb %1,%0"
173 : CONST_MASK_ADDR(nr, addr)
174 : "iq" ((u8)CONST_MASK(nr)));
175 } else {
176 asm volatile(LOCK_PREFIX "btc %1,%0"
177 : BITOP_ADDR(addr)
178 : "Ir" (nr));
179 }
172} 180}
173 181
174/** 182/**
diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h
index e02ae2d89acf..f110ad417df3 100644
--- a/arch/x86/include/asm/byteorder.h
+++ b/arch/x86/include/asm/byteorder.h
@@ -4,26 +4,33 @@
4#include <asm/types.h> 4#include <asm/types.h>
5#include <linux/compiler.h> 5#include <linux/compiler.h>
6 6
7#ifdef __GNUC__ 7#define __LITTLE_ENDIAN
8 8
9#ifdef __i386__ 9static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
10
11static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
12{ 10{
13#ifdef CONFIG_X86_BSWAP 11#ifdef __i386__
14 asm("bswap %0" : "=r" (x) : "0" (x)); 12# ifdef CONFIG_X86_BSWAP
15#else 13 asm("bswap %0" : "=r" (val) : "0" (val));
14# else
16 asm("xchgb %b0,%h0\n\t" /* swap lower bytes */ 15 asm("xchgb %b0,%h0\n\t" /* swap lower bytes */
17 "rorl $16,%0\n\t" /* swap words */ 16 "rorl $16,%0\n\t" /* swap words */
18 "xchgb %b0,%h0" /* swap higher bytes */ 17 "xchgb %b0,%h0" /* swap higher bytes */
19 : "=q" (x) 18 : "=q" (val)
20 : "0" (x)); 19 : "0" (val));
20# endif
21
22#else /* __i386__ */
23 asm("bswapl %0"
24 : "=r" (val)
25 : "0" (val));
21#endif 26#endif
22 return x; 27 return val;
23} 28}
29#define __arch_swab32 __arch_swab32
24 30
25static inline __attribute_const__ __u64 ___arch__swab64(__u64 val) 31static inline __attribute_const__ __u64 __arch_swab64(__u64 val)
26{ 32{
33#ifdef __i386__
27 union { 34 union {
28 struct { 35 struct {
29 __u32 a; 36 __u32 a;
@@ -32,50 +39,27 @@ static inline __attribute_const__ __u64 ___arch__swab64(__u64 val)
32 __u64 u; 39 __u64 u;
33 } v; 40 } v;
34 v.u = val; 41 v.u = val;
35#ifdef CONFIG_X86_BSWAP 42# ifdef CONFIG_X86_BSWAP
36 asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1" 43 asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1"
37 : "=r" (v.s.a), "=r" (v.s.b) 44 : "=r" (v.s.a), "=r" (v.s.b)
38 : "0" (v.s.a), "1" (v.s.b)); 45 : "0" (v.s.a), "1" (v.s.b));
39#else 46# else
40 v.s.a = ___arch__swab32(v.s.a); 47 v.s.a = __arch_swab32(v.s.a);
41 v.s.b = ___arch__swab32(v.s.b); 48 v.s.b = __arch_swab32(v.s.b);
42 asm("xchgl %0,%1" 49 asm("xchgl %0,%1"
43 : "=r" (v.s.a), "=r" (v.s.b) 50 : "=r" (v.s.a), "=r" (v.s.b)
44 : "0" (v.s.a), "1" (v.s.b)); 51 : "0" (v.s.a), "1" (v.s.b));
45#endif 52# endif
46 return v.u; 53 return v.u;
47}
48
49#else /* __i386__ */ 54#else /* __i386__ */
50
51static inline __attribute_const__ __u64 ___arch__swab64(__u64 x)
52{
53 asm("bswapq %0" 55 asm("bswapq %0"
54 : "=r" (x) 56 : "=r" (val)
55 : "0" (x)); 57 : "0" (val));
56 return x; 58 return val;
57}
58
59static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
60{
61 asm("bswapl %0"
62 : "=r" (x)
63 : "0" (x));
64 return x;
65}
66
67#endif 59#endif
60}
61#define __arch_swab64 __arch_swab64
68 62
69/* Do not define swab16. Gcc is smart enough to recognize "C" version and 63#include <linux/byteorder.h>
70 convert it into rotation or exhange. */
71
72#define __arch__swab64(x) ___arch__swab64(x)
73#define __arch__swab32(x) ___arch__swab32(x)
74
75#define __BYTEORDER_HAS_U64__
76
77#endif /* __GNUC__ */
78
79#include <linux/byteorder/little_endian.h>
80 64
81#endif /* _ASM_X86_BYTEORDER_H */ 65#endif /* _ASM_X86_BYTEORDER_H */
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index 804b6e6be929..3afc5e87cfdd 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -6,56 +6,91 @@
6#endif 6#endif
7 7
8/* 8/*
9 Macros for dwarf2 CFI unwind table entries. 9 * Macros for dwarf2 CFI unwind table entries.
10 See "as.info" for details on these pseudo ops. Unfortunately 10 * See "as.info" for details on these pseudo ops. Unfortunately
11 they are only supported in very new binutils, so define them 11 * they are only supported in very new binutils, so define them
12 away for older version. 12 * away for older version.
13 */ 13 */
14 14
15#ifdef CONFIG_AS_CFI 15#ifdef CONFIG_AS_CFI
16 16
17#define CFI_STARTPROC .cfi_startproc 17#define CFI_STARTPROC .cfi_startproc
18#define CFI_ENDPROC .cfi_endproc 18#define CFI_ENDPROC .cfi_endproc
19#define CFI_DEF_CFA .cfi_def_cfa 19#define CFI_DEF_CFA .cfi_def_cfa
20#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register 20#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
21#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset 21#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
22#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset 22#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
23#define CFI_OFFSET .cfi_offset 23#define CFI_OFFSET .cfi_offset
24#define CFI_REL_OFFSET .cfi_rel_offset 24#define CFI_REL_OFFSET .cfi_rel_offset
25#define CFI_REGISTER .cfi_register 25#define CFI_REGISTER .cfi_register
26#define CFI_RESTORE .cfi_restore 26#define CFI_RESTORE .cfi_restore
27#define CFI_REMEMBER_STATE .cfi_remember_state 27#define CFI_REMEMBER_STATE .cfi_remember_state
28#define CFI_RESTORE_STATE .cfi_restore_state 28#define CFI_RESTORE_STATE .cfi_restore_state
29#define CFI_UNDEFINED .cfi_undefined 29#define CFI_UNDEFINED .cfi_undefined
30 30
31#ifdef CONFIG_AS_CFI_SIGNAL_FRAME 31#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
32#define CFI_SIGNAL_FRAME .cfi_signal_frame 32#define CFI_SIGNAL_FRAME .cfi_signal_frame
33#else 33#else
34#define CFI_SIGNAL_FRAME 34#define CFI_SIGNAL_FRAME
35#endif 35#endif
36 36
37#else 37#else
38 38
39/* Due to the structure of pre-exisiting code, don't use assembler line 39/*
40 comment character # to ignore the arguments. Instead, use a dummy macro. */ 40 * Due to the structure of pre-exisiting code, don't use assembler line
41 * comment character # to ignore the arguments. Instead, use a dummy macro.
42 */
41.macro cfi_ignore a=0, b=0, c=0, d=0 43.macro cfi_ignore a=0, b=0, c=0, d=0
42.endm 44.endm
43 45
44#define CFI_STARTPROC cfi_ignore 46#define CFI_STARTPROC cfi_ignore
45#define CFI_ENDPROC cfi_ignore 47#define CFI_ENDPROC cfi_ignore
46#define CFI_DEF_CFA cfi_ignore 48#define CFI_DEF_CFA cfi_ignore
47#define CFI_DEF_CFA_REGISTER cfi_ignore 49#define CFI_DEF_CFA_REGISTER cfi_ignore
48#define CFI_DEF_CFA_OFFSET cfi_ignore 50#define CFI_DEF_CFA_OFFSET cfi_ignore
49#define CFI_ADJUST_CFA_OFFSET cfi_ignore 51#define CFI_ADJUST_CFA_OFFSET cfi_ignore
50#define CFI_OFFSET cfi_ignore 52#define CFI_OFFSET cfi_ignore
51#define CFI_REL_OFFSET cfi_ignore 53#define CFI_REL_OFFSET cfi_ignore
52#define CFI_REGISTER cfi_ignore 54#define CFI_REGISTER cfi_ignore
53#define CFI_RESTORE cfi_ignore 55#define CFI_RESTORE cfi_ignore
54#define CFI_REMEMBER_STATE cfi_ignore 56#define CFI_REMEMBER_STATE cfi_ignore
55#define CFI_RESTORE_STATE cfi_ignore 57#define CFI_RESTORE_STATE cfi_ignore
56#define CFI_UNDEFINED cfi_ignore 58#define CFI_UNDEFINED cfi_ignore
57#define CFI_SIGNAL_FRAME cfi_ignore 59#define CFI_SIGNAL_FRAME cfi_ignore
58 60
59#endif 61#endif
60 62
63/*
64 * An attempt to make CFI annotations more or less
65 * correct and shorter. It is implied that you know
66 * what you're doing if you use them.
67 */
68#ifdef __ASSEMBLY__
69#ifdef CONFIG_X86_64
70 .macro pushq_cfi reg
71 pushq \reg
72 CFI_ADJUST_CFA_OFFSET 8
73 .endm
74
75 .macro popq_cfi reg
76 popq \reg
77 CFI_ADJUST_CFA_OFFSET -8
78 .endm
79
80 .macro movq_cfi reg offset=0
81 movq %\reg, \offset(%rsp)
82 CFI_REL_OFFSET \reg, \offset
83 .endm
84
85 .macro movq_cfi_restore offset reg
86 movq \offset(%rsp), %\reg
87 CFI_RESTORE \reg
88 .endm
89#else /*!CONFIG_X86_64*/
90
91 /* 32bit defenitions are missed yet */
92
93#endif /*!CONFIG_X86_64*/
94#endif /*__ASSEMBLY__*/
95
61#endif /* _ASM_X86_DWARF2_H */ 96#endif /* _ASM_X86_DWARF2_H */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b97aecb0b61d..8de644b6b959 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -109,9 +109,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
109#endif 109#endif
110#endif 110#endif
111 111
112#ifdef CONFIG_X86_32 112extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
113extern void (*const interrupt[NR_VECTORS])(void);
114#endif
115 113
116typedef int vector_irq_t[NR_VECTORS]; 114typedef int vector_irq_t[NR_VECTORS];
117DECLARE_PER_CPU(vector_irq_t, vector_irq); 115DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index bae0eda95486..28e409fc73f3 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -31,10 +31,6 @@ static inline int irq_canonicalize(int irq)
31# endif 31# endif
32#endif 32#endif
33 33
34#ifdef CONFIG_IRQBALANCE
35extern int irqbalance_disable(char *str);
36#endif
37
38#ifdef CONFIG_HOTPLUG_CPU 34#ifdef CONFIG_HOTPLUG_CPU
39#include <linux/cpumask.h> 35#include <linux/cpumask.h>
40extern void fixup_irqs(cpumask_t map); 36extern void fixup_irqs(cpumask_t map);
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
index af2f02d27fc7..86afd7473457 100644
--- a/arch/x86/include/asm/irq_regs_32.h
+++ b/arch/x86/include/asm/irq_regs_32.h
@@ -9,6 +9,8 @@
9 9
10#include <asm/percpu.h> 10#include <asm/percpu.h>
11 11
12#define ARCH_HAS_OWN_IRQ_REGS
13
12DECLARE_PER_CPU(struct pt_regs *, irq_regs); 14DECLARE_PER_CPU(struct pt_regs *, irq_regs);
13 15
14static inline struct pt_regs *get_irq_regs(void) 16static inline struct pt_regs *get_irq_regs(void)
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index f61ee8f937e4..5d98d0b68ffc 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -57,5 +57,65 @@
57#define __ALIGN_STR ".align 16,0x90" 57#define __ALIGN_STR ".align 16,0x90"
58#endif 58#endif
59 59
60/*
61 * to check ENTRY_X86/END_X86 and
62 * KPROBE_ENTRY_X86/KPROBE_END_X86
63 * unbalanced-missed-mixed appearance
64 */
65#define __set_entry_x86 .set ENTRY_X86_IN, 0
66#define __unset_entry_x86 .set ENTRY_X86_IN, 1
67#define __set_kprobe_x86 .set KPROBE_X86_IN, 0
68#define __unset_kprobe_x86 .set KPROBE_X86_IN, 1
69
70#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed"
71
72#define __check_entry_x86 \
73 .ifdef ENTRY_X86_IN; \
74 .ifeq ENTRY_X86_IN; \
75 __macro_err_x86; \
76 .abort; \
77 .endif; \
78 .endif
79
80#define __check_kprobe_x86 \
81 .ifdef KPROBE_X86_IN; \
82 .ifeq KPROBE_X86_IN; \
83 __macro_err_x86; \
84 .abort; \
85 .endif; \
86 .endif
87
88#define __check_entry_kprobe_x86 \
89 __check_entry_x86; \
90 __check_kprobe_x86
91
92#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86
93
94#define ENTRY_X86(name) \
95 __check_entry_kprobe_x86; \
96 __set_entry_x86; \
97 .globl name; \
98 __ALIGN; \
99 name:
100
101#define END_X86(name) \
102 __unset_entry_x86; \
103 __check_entry_kprobe_x86; \
104 .size name, .-name
105
106#define KPROBE_ENTRY_X86(name) \
107 __check_entry_kprobe_x86; \
108 __set_kprobe_x86; \
109 .pushsection .kprobes.text, "ax"; \
110 .globl name; \
111 __ALIGN; \
112 name:
113
114#define KPROBE_END_X86(name) \
115 __unset_kprobe_x86; \
116 __check_entry_kprobe_x86; \
117 .size name, .-name; \
118 .popsection
119
60#endif /* _ASM_X86_LINKAGE_H */ 120#endif /* _ASM_X86_LINKAGE_H */
61 121
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 9cd83a8e40d5..38ae163cc91b 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -34,8 +34,6 @@ static inline cycles_t get_cycles(void)
34 34
35static __always_inline cycles_t vget_cycles(void) 35static __always_inline cycles_t vget_cycles(void)
36{ 36{
37 cycles_t cycles;
38
39 /* 37 /*
40 * We only do VDSOs on TSC capable CPUs, so this shouldnt 38 * We only do VDSOs on TSC capable CPUs, so this shouldnt
41 * access boot_cpu_data (which is not VDSO-safe): 39 * access boot_cpu_data (which is not VDSO-safe):
@@ -44,11 +42,7 @@ static __always_inline cycles_t vget_cycles(void)
44 if (!cpu_has_tsc) 42 if (!cpu_has_tsc)
45 return 0; 43 return 0;
46#endif 44#endif
47 rdtsc_barrier(); 45 return (cycles_t)__native_read_tsc();
48 cycles = (cycles_t)__native_read_tsc();
49 rdtsc_barrier();
50
51 return cycles;
52} 46}
53 47
54extern void tsc_init(void); 48extern void tsc_init(void);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f6402c4ba10d..fe7014176eb0 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -619,28 +619,37 @@ END(syscall_badsys)
61927:; 61927:;
620 620
621/* 621/*
622 * Build the entry stubs and pointer table with 622 * Build the entry stubs and pointer table with some assembler magic.
623 * some assembler magic. 623 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
624 * single cache line on all modern x86 implementations.
624 */ 625 */
625.section .rodata,"a" 626.section .init.rodata,"a"
626ENTRY(interrupt) 627ENTRY(interrupt)
627.text 628.text
628 629 .p2align 5
630 .p2align CONFIG_X86_L1_CACHE_SHIFT
629ENTRY(irq_entries_start) 631ENTRY(irq_entries_start)
630 RING0_INT_FRAME 632 RING0_INT_FRAME
631vector=0 633vector=FIRST_EXTERNAL_VECTOR
632.rept NR_VECTORS 634.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
633 ALIGN 635 .balign 32
634 .if vector 636 .rept 7
637 .if vector < NR_VECTORS
638 .if vector <> FIRST_EXTERNAL_VECTOR
635 CFI_ADJUST_CFA_OFFSET -4 639 CFI_ADJUST_CFA_OFFSET -4
636 .endif 640 .endif
6371: pushl $~(vector) 6411: pushl $(~vector+0x80) /* Note: always in signed byte range */
638 CFI_ADJUST_CFA_OFFSET 4 642 CFI_ADJUST_CFA_OFFSET 4
639 jmp common_interrupt 643 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
640 .previous 644 jmp 2f
645 .endif
646 .previous
641 .long 1b 647 .long 1b
642 .text 648 .text
643vector=vector+1 649vector=vector+1
650 .endif
651 .endr
6522: jmp common_interrupt
644.endr 653.endr
645END(irq_entries_start) 654END(irq_entries_start)
646 655
@@ -652,8 +661,9 @@ END(interrupt)
652 * the CPU automatically disables interrupts when executing an IRQ vector, 661 * the CPU automatically disables interrupts when executing an IRQ vector,
653 * so IRQ-flags tracing has to follow that: 662 * so IRQ-flags tracing has to follow that:
654 */ 663 */
655 ALIGN 664 .p2align CONFIG_X86_L1_CACHE_SHIFT
656common_interrupt: 665common_interrupt:
666 addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
657 SAVE_ALL 667 SAVE_ALL
658 TRACE_IRQS_OFF 668 TRACE_IRQS_OFF
659 movl %esp,%eax 669 movl %esp,%eax
@@ -678,65 +688,6 @@ ENDPROC(name)
678/* The include is where all of the SMP etc. interrupts come from */ 688/* The include is where all of the SMP etc. interrupts come from */
679#include "entry_arch.h" 689#include "entry_arch.h"
680 690
681KPROBE_ENTRY(page_fault)
682 RING0_EC_FRAME
683 pushl $do_page_fault
684 CFI_ADJUST_CFA_OFFSET 4
685 ALIGN
686error_code:
687 /* the function address is in %fs's slot on the stack */
688 pushl %es
689 CFI_ADJUST_CFA_OFFSET 4
690 /*CFI_REL_OFFSET es, 0*/
691 pushl %ds
692 CFI_ADJUST_CFA_OFFSET 4
693 /*CFI_REL_OFFSET ds, 0*/
694 pushl %eax
695 CFI_ADJUST_CFA_OFFSET 4
696 CFI_REL_OFFSET eax, 0
697 pushl %ebp
698 CFI_ADJUST_CFA_OFFSET 4
699 CFI_REL_OFFSET ebp, 0
700 pushl %edi
701 CFI_ADJUST_CFA_OFFSET 4
702 CFI_REL_OFFSET edi, 0
703 pushl %esi
704 CFI_ADJUST_CFA_OFFSET 4
705 CFI_REL_OFFSET esi, 0
706 pushl %edx
707 CFI_ADJUST_CFA_OFFSET 4
708 CFI_REL_OFFSET edx, 0
709 pushl %ecx
710 CFI_ADJUST_CFA_OFFSET 4
711 CFI_REL_OFFSET ecx, 0
712 pushl %ebx
713 CFI_ADJUST_CFA_OFFSET 4
714 CFI_REL_OFFSET ebx, 0
715 cld
716 pushl %fs
717 CFI_ADJUST_CFA_OFFSET 4
718 /*CFI_REL_OFFSET fs, 0*/
719 movl $(__KERNEL_PERCPU), %ecx
720 movl %ecx, %fs
721 UNWIND_ESPFIX_STACK
722 popl %ecx
723 CFI_ADJUST_CFA_OFFSET -4
724 /*CFI_REGISTER es, ecx*/
725 movl PT_FS(%esp), %edi # get the function address
726 movl PT_ORIG_EAX(%esp), %edx # get the error code
727 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
728 mov %ecx, PT_FS(%esp)
729 /*CFI_REL_OFFSET fs, ES*/
730 movl $(__USER_DS), %ecx
731 movl %ecx, %ds
732 movl %ecx, %es
733 TRACE_IRQS_OFF
734 movl %esp,%eax # pt_regs pointer
735 call *%edi
736 jmp ret_from_exception
737 CFI_ENDPROC
738KPROBE_END(page_fault)
739
740ENTRY(coprocessor_error) 691ENTRY(coprocessor_error)
741 RING0_INT_FRAME 692 RING0_INT_FRAME
742 pushl $0 693 pushl $0
@@ -767,140 +718,6 @@ ENTRY(device_not_available)
767 CFI_ENDPROC 718 CFI_ENDPROC
768END(device_not_available) 719END(device_not_available)
769 720
770/*
771 * Debug traps and NMI can happen at the one SYSENTER instruction
772 * that sets up the real kernel stack. Check here, since we can't
773 * allow the wrong stack to be used.
774 *
775 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
776 * already pushed 3 words if it hits on the sysenter instruction:
777 * eflags, cs and eip.
778 *
779 * We just load the right stack, and push the three (known) values
780 * by hand onto the new stack - while updating the return eip past
781 * the instruction that would have done it for sysenter.
782 */
783#define FIX_STACK(offset, ok, label) \
784 cmpw $__KERNEL_CS,4(%esp); \
785 jne ok; \
786label: \
787 movl TSS_sysenter_sp0+offset(%esp),%esp; \
788 CFI_DEF_CFA esp, 0; \
789 CFI_UNDEFINED eip; \
790 pushfl; \
791 CFI_ADJUST_CFA_OFFSET 4; \
792 pushl $__KERNEL_CS; \
793 CFI_ADJUST_CFA_OFFSET 4; \
794 pushl $sysenter_past_esp; \
795 CFI_ADJUST_CFA_OFFSET 4; \
796 CFI_REL_OFFSET eip, 0
797
798KPROBE_ENTRY(debug)
799 RING0_INT_FRAME
800 cmpl $ia32_sysenter_target,(%esp)
801 jne debug_stack_correct
802 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
803debug_stack_correct:
804 pushl $-1 # mark this as an int
805 CFI_ADJUST_CFA_OFFSET 4
806 SAVE_ALL
807 TRACE_IRQS_OFF
808 xorl %edx,%edx # error code 0
809 movl %esp,%eax # pt_regs pointer
810 call do_debug
811 jmp ret_from_exception
812 CFI_ENDPROC
813KPROBE_END(debug)
814
815/*
816 * NMI is doubly nasty. It can happen _while_ we're handling
817 * a debug fault, and the debug fault hasn't yet been able to
818 * clear up the stack. So we first check whether we got an
819 * NMI on the sysenter entry path, but after that we need to
820 * check whether we got an NMI on the debug path where the debug
821 * fault happened on the sysenter path.
822 */
823KPROBE_ENTRY(nmi)
824 RING0_INT_FRAME
825 pushl %eax
826 CFI_ADJUST_CFA_OFFSET 4
827 movl %ss, %eax
828 cmpw $__ESPFIX_SS, %ax
829 popl %eax
830 CFI_ADJUST_CFA_OFFSET -4
831 je nmi_espfix_stack
832 cmpl $ia32_sysenter_target,(%esp)
833 je nmi_stack_fixup
834 pushl %eax
835 CFI_ADJUST_CFA_OFFSET 4
836 movl %esp,%eax
837 /* Do not access memory above the end of our stack page,
838 * it might not exist.
839 */
840 andl $(THREAD_SIZE-1),%eax
841 cmpl $(THREAD_SIZE-20),%eax
842 popl %eax
843 CFI_ADJUST_CFA_OFFSET -4
844 jae nmi_stack_correct
845 cmpl $ia32_sysenter_target,12(%esp)
846 je nmi_debug_stack_check
847nmi_stack_correct:
848 /* We have a RING0_INT_FRAME here */
849 pushl %eax
850 CFI_ADJUST_CFA_OFFSET 4
851 SAVE_ALL
852 TRACE_IRQS_OFF
853 xorl %edx,%edx # zero error code
854 movl %esp,%eax # pt_regs pointer
855 call do_nmi
856 jmp restore_nocheck_notrace
857 CFI_ENDPROC
858
859nmi_stack_fixup:
860 RING0_INT_FRAME
861 FIX_STACK(12,nmi_stack_correct, 1)
862 jmp nmi_stack_correct
863
864nmi_debug_stack_check:
865 /* We have a RING0_INT_FRAME here */
866 cmpw $__KERNEL_CS,16(%esp)
867 jne nmi_stack_correct
868 cmpl $debug,(%esp)
869 jb nmi_stack_correct
870 cmpl $debug_esp_fix_insn,(%esp)
871 ja nmi_stack_correct
872 FIX_STACK(24,nmi_stack_correct, 1)
873 jmp nmi_stack_correct
874
875nmi_espfix_stack:
876 /* We have a RING0_INT_FRAME here.
877 *
878 * create the pointer to lss back
879 */
880 pushl %ss
881 CFI_ADJUST_CFA_OFFSET 4
882 pushl %esp
883 CFI_ADJUST_CFA_OFFSET 4
884 addw $4, (%esp)
885 /* copy the iret frame of 12 bytes */
886 .rept 3
887 pushl 16(%esp)
888 CFI_ADJUST_CFA_OFFSET 4
889 .endr
890 pushl %eax
891 CFI_ADJUST_CFA_OFFSET 4
892 SAVE_ALL
893 TRACE_IRQS_OFF
894 FIXUP_ESPFIX_STACK # %eax == %esp
895 xorl %edx,%edx # zero error code
896 call do_nmi
897 RESTORE_REGS
898 lss 12+4(%esp), %esp # back to espfix stack
899 CFI_ADJUST_CFA_OFFSET -24
900 jmp irq_return
901 CFI_ENDPROC
902KPROBE_END(nmi)
903
904#ifdef CONFIG_PARAVIRT 721#ifdef CONFIG_PARAVIRT
905ENTRY(native_iret) 722ENTRY(native_iret)
906 iret 723 iret
@@ -916,19 +733,6 @@ ENTRY(native_irq_enable_sysexit)
916END(native_irq_enable_sysexit) 733END(native_irq_enable_sysexit)
917#endif 734#endif
918 735
919KPROBE_ENTRY(int3)
920 RING0_INT_FRAME
921 pushl $-1 # mark this as an int
922 CFI_ADJUST_CFA_OFFSET 4
923 SAVE_ALL
924 TRACE_IRQS_OFF
925 xorl %edx,%edx # zero error code
926 movl %esp,%eax # pt_regs pointer
927 call do_int3
928 jmp ret_from_exception
929 CFI_ENDPROC
930KPROBE_END(int3)
931
932ENTRY(overflow) 736ENTRY(overflow)
933 RING0_INT_FRAME 737 RING0_INT_FRAME
934 pushl $0 738 pushl $0
@@ -993,14 +797,6 @@ ENTRY(stack_segment)
993 CFI_ENDPROC 797 CFI_ENDPROC
994END(stack_segment) 798END(stack_segment)
995 799
996KPROBE_ENTRY(general_protection)
997 RING0_EC_FRAME
998 pushl $do_general_protection
999 CFI_ADJUST_CFA_OFFSET 4
1000 jmp error_code
1001 CFI_ENDPROC
1002KPROBE_END(general_protection)
1003
1004ENTRY(alignment_check) 800ENTRY(alignment_check)
1005 RING0_EC_FRAME 801 RING0_EC_FRAME
1006 pushl $do_alignment_check 802 pushl $do_alignment_check
@@ -1211,3 +1007,227 @@ END(mcount)
1211#include "syscall_table_32.S" 1007#include "syscall_table_32.S"
1212 1008
1213syscall_table_size=(.-sys_call_table) 1009syscall_table_size=(.-sys_call_table)
1010
1011/*
1012 * Some functions should be protected against kprobes
1013 */
1014 .pushsection .kprobes.text, "ax"
1015
1016ENTRY(page_fault)
1017 RING0_EC_FRAME
1018 pushl $do_page_fault
1019 CFI_ADJUST_CFA_OFFSET 4
1020 ALIGN
1021error_code:
1022 /* the function address is in %fs's slot on the stack */
1023 pushl %es
1024 CFI_ADJUST_CFA_OFFSET 4
1025 /*CFI_REL_OFFSET es, 0*/
1026 pushl %ds
1027 CFI_ADJUST_CFA_OFFSET 4
1028 /*CFI_REL_OFFSET ds, 0*/
1029 pushl %eax
1030 CFI_ADJUST_CFA_OFFSET 4
1031 CFI_REL_OFFSET eax, 0
1032 pushl %ebp
1033 CFI_ADJUST_CFA_OFFSET 4
1034 CFI_REL_OFFSET ebp, 0
1035 pushl %edi
1036 CFI_ADJUST_CFA_OFFSET 4
1037 CFI_REL_OFFSET edi, 0
1038 pushl %esi
1039 CFI_ADJUST_CFA_OFFSET 4
1040 CFI_REL_OFFSET esi, 0
1041 pushl %edx
1042 CFI_ADJUST_CFA_OFFSET 4
1043 CFI_REL_OFFSET edx, 0
1044 pushl %ecx
1045 CFI_ADJUST_CFA_OFFSET 4
1046 CFI_REL_OFFSET ecx, 0
1047 pushl %ebx
1048 CFI_ADJUST_CFA_OFFSET 4
1049 CFI_REL_OFFSET ebx, 0
1050 cld
1051 pushl %fs
1052 CFI_ADJUST_CFA_OFFSET 4
1053 /*CFI_REL_OFFSET fs, 0*/
1054 movl $(__KERNEL_PERCPU), %ecx
1055 movl %ecx, %fs
1056 UNWIND_ESPFIX_STACK
1057 popl %ecx
1058 CFI_ADJUST_CFA_OFFSET -4
1059 /*CFI_REGISTER es, ecx*/
1060 movl PT_FS(%esp), %edi # get the function address
1061 movl PT_ORIG_EAX(%esp), %edx # get the error code
1062 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
1063 mov %ecx, PT_FS(%esp)
1064 /*CFI_REL_OFFSET fs, ES*/
1065 movl $(__USER_DS), %ecx
1066 movl %ecx, %ds
1067 movl %ecx, %es
1068 TRACE_IRQS_OFF
1069 movl %esp,%eax # pt_regs pointer
1070 call *%edi
1071 jmp ret_from_exception
1072 CFI_ENDPROC
1073END(page_fault)
1074
1075/*
1076 * Debug traps and NMI can happen at the one SYSENTER instruction
1077 * that sets up the real kernel stack. Check here, since we can't
1078 * allow the wrong stack to be used.
1079 *
1080 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
1081 * already pushed 3 words if it hits on the sysenter instruction:
1082 * eflags, cs and eip.
1083 *
1084 * We just load the right stack, and push the three (known) values
1085 * by hand onto the new stack - while updating the return eip past
1086 * the instruction that would have done it for sysenter.
1087 */
1088#define FIX_STACK(offset, ok, label) \
1089 cmpw $__KERNEL_CS,4(%esp); \
1090 jne ok; \
1091label: \
1092 movl TSS_sysenter_sp0+offset(%esp),%esp; \
1093 CFI_DEF_CFA esp, 0; \
1094 CFI_UNDEFINED eip; \
1095 pushfl; \
1096 CFI_ADJUST_CFA_OFFSET 4; \
1097 pushl $__KERNEL_CS; \
1098 CFI_ADJUST_CFA_OFFSET 4; \
1099 pushl $sysenter_past_esp; \
1100 CFI_ADJUST_CFA_OFFSET 4; \
1101 CFI_REL_OFFSET eip, 0
1102
1103ENTRY(debug)
1104 RING0_INT_FRAME
1105 cmpl $ia32_sysenter_target,(%esp)
1106 jne debug_stack_correct
1107 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
1108debug_stack_correct:
1109 pushl $-1 # mark this as an int
1110 CFI_ADJUST_CFA_OFFSET 4
1111 SAVE_ALL
1112 TRACE_IRQS_OFF
1113 xorl %edx,%edx # error code 0
1114 movl %esp,%eax # pt_regs pointer
1115 call do_debug
1116 jmp ret_from_exception
1117 CFI_ENDPROC
1118END(debug)
1119
1120/*
1121 * NMI is doubly nasty. It can happen _while_ we're handling
1122 * a debug fault, and the debug fault hasn't yet been able to
1123 * clear up the stack. So we first check whether we got an
1124 * NMI on the sysenter entry path, but after that we need to
1125 * check whether we got an NMI on the debug path where the debug
1126 * fault happened on the sysenter path.
1127 */
1128ENTRY(nmi)
1129 RING0_INT_FRAME
1130 pushl %eax
1131 CFI_ADJUST_CFA_OFFSET 4
1132 movl %ss, %eax
1133 cmpw $__ESPFIX_SS, %ax
1134 popl %eax
1135 CFI_ADJUST_CFA_OFFSET -4
1136 je nmi_espfix_stack
1137 cmpl $ia32_sysenter_target,(%esp)
1138 je nmi_stack_fixup
1139 pushl %eax
1140 CFI_ADJUST_CFA_OFFSET 4
1141 movl %esp,%eax
1142 /* Do not access memory above the end of our stack page,
1143 * it might not exist.
1144 */
1145 andl $(THREAD_SIZE-1),%eax
1146 cmpl $(THREAD_SIZE-20),%eax
1147 popl %eax
1148 CFI_ADJUST_CFA_OFFSET -4
1149 jae nmi_stack_correct
1150 cmpl $ia32_sysenter_target,12(%esp)
1151 je nmi_debug_stack_check
1152nmi_stack_correct:
1153 /* We have a RING0_INT_FRAME here */
1154 pushl %eax
1155 CFI_ADJUST_CFA_OFFSET 4
1156 SAVE_ALL
1157 TRACE_IRQS_OFF
1158 xorl %edx,%edx # zero error code
1159 movl %esp,%eax # pt_regs pointer
1160 call do_nmi
1161 jmp restore_nocheck_notrace
1162 CFI_ENDPROC
1163
1164nmi_stack_fixup:
1165 RING0_INT_FRAME
1166 FIX_STACK(12,nmi_stack_correct, 1)
1167 jmp nmi_stack_correct
1168
1169nmi_debug_stack_check:
1170 /* We have a RING0_INT_FRAME here */
1171 cmpw $__KERNEL_CS,16(%esp)
1172 jne nmi_stack_correct
1173 cmpl $debug,(%esp)
1174 jb nmi_stack_correct
1175 cmpl $debug_esp_fix_insn,(%esp)
1176 ja nmi_stack_correct
1177 FIX_STACK(24,nmi_stack_correct, 1)
1178 jmp nmi_stack_correct
1179
1180nmi_espfix_stack:
1181 /* We have a RING0_INT_FRAME here.
1182 *
1183 * create the pointer to lss back
1184 */
1185 pushl %ss
1186 CFI_ADJUST_CFA_OFFSET 4
1187 pushl %esp
1188 CFI_ADJUST_CFA_OFFSET 4
1189 addw $4, (%esp)
1190 /* copy the iret frame of 12 bytes */
1191 .rept 3
1192 pushl 16(%esp)
1193 CFI_ADJUST_CFA_OFFSET 4
1194 .endr
1195 pushl %eax
1196 CFI_ADJUST_CFA_OFFSET 4
1197 SAVE_ALL
1198 TRACE_IRQS_OFF
1199 FIXUP_ESPFIX_STACK # %eax == %esp
1200 xorl %edx,%edx # zero error code
1201 call do_nmi
1202 RESTORE_REGS
1203 lss 12+4(%esp), %esp # back to espfix stack
1204 CFI_ADJUST_CFA_OFFSET -24
1205 jmp irq_return
1206 CFI_ENDPROC
1207END(nmi)
1208
1209ENTRY(int3)
1210 RING0_INT_FRAME
1211 pushl $-1 # mark this as an int
1212 CFI_ADJUST_CFA_OFFSET 4
1213 SAVE_ALL
1214 TRACE_IRQS_OFF
1215 xorl %edx,%edx # zero error code
1216 movl %esp,%eax # pt_regs pointer
1217 call do_int3
1218 jmp ret_from_exception
1219 CFI_ENDPROC
1220END(int3)
1221
1222ENTRY(general_protection)
1223 RING0_EC_FRAME
1224 pushl $do_general_protection
1225 CFI_ADJUST_CFA_OFFSET 4
1226 jmp error_code
1227 CFI_ENDPROC
1228END(general_protection)
1229
1230/*
1231 * End of kprobes section
1232 */
1233 .popsection
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 4a16bf31c783..e41734a537bd 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -11,15 +11,15 @@
11 * 11 *
12 * NOTE: This code handles signal-recognition, which happens every time 12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call. 13 * after an interrupt and after each system call.
14 * 14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is 15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al. 16 * only done for syscall tracing, signals or fork/exec et.al.
17 * 17 *
18 * A note on terminology: 18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP 19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack. 20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11. 21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved. 22 * - full stack frame: Like partial stack frame, but all register saved.
23 * 23 *
24 * Some macro usage: 24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better 25 * - CFI macros are used to generate dwarf2 unwind information for better
@@ -60,7 +60,6 @@
60#define __AUDIT_ARCH_LE 0x40000000 60#define __AUDIT_ARCH_LE 0x40000000
61 61
62 .code64 62 .code64
63
64#ifdef CONFIG_FUNCTION_TRACER 63#ifdef CONFIG_FUNCTION_TRACER
65#ifdef CONFIG_DYNAMIC_FTRACE 64#ifdef CONFIG_DYNAMIC_FTRACE
66ENTRY(mcount) 65ENTRY(mcount)
@@ -142,7 +141,7 @@ END(mcount)
142 141
143#ifndef CONFIG_PREEMPT 142#ifndef CONFIG_PREEMPT
144#define retint_kernel retint_restore_args 143#define retint_kernel retint_restore_args
145#endif 144#endif
146 145
147#ifdef CONFIG_PARAVIRT 146#ifdef CONFIG_PARAVIRT
148ENTRY(native_usergs_sysret64) 147ENTRY(native_usergs_sysret64)
@@ -161,29 +160,29 @@ ENTRY(native_usergs_sysret64)
161.endm 160.endm
162 161
163/* 162/*
164 * C code is not supposed to know about undefined top of stack. Every time 163 * C code is not supposed to know about undefined top of stack. Every time
165 * a C function with an pt_regs argument is called from the SYSCALL based 164 * a C function with an pt_regs argument is called from the SYSCALL based
166 * fast path FIXUP_TOP_OF_STACK is needed. 165 * fast path FIXUP_TOP_OF_STACK is needed.
167 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs 166 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
168 * manipulation. 167 * manipulation.
169 */ 168 */
170 169
171 /* %rsp:at FRAMEEND */ 170 /* %rsp:at FRAMEEND */
172 .macro FIXUP_TOP_OF_STACK tmp 171 .macro FIXUP_TOP_OF_STACK tmp offset=0
173 movq %gs:pda_oldrsp,\tmp 172 movq %gs:pda_oldrsp,\tmp
174 movq \tmp,RSP(%rsp) 173 movq \tmp,RSP+\offset(%rsp)
175 movq $__USER_DS,SS(%rsp) 174 movq $__USER_DS,SS+\offset(%rsp)
176 movq $__USER_CS,CS(%rsp) 175 movq $__USER_CS,CS+\offset(%rsp)
177 movq $-1,RCX(%rsp) 176 movq $-1,RCX+\offset(%rsp)
178 movq R11(%rsp),\tmp /* get eflags */ 177 movq R11+\offset(%rsp),\tmp /* get eflags */
179 movq \tmp,EFLAGS(%rsp) 178 movq \tmp,EFLAGS+\offset(%rsp)
180 .endm 179 .endm
181 180
182 .macro RESTORE_TOP_OF_STACK tmp,offset=0 181 .macro RESTORE_TOP_OF_STACK tmp offset=0
183 movq RSP-\offset(%rsp),\tmp 182 movq RSP+\offset(%rsp),\tmp
184 movq \tmp,%gs:pda_oldrsp 183 movq \tmp,%gs:pda_oldrsp
185 movq EFLAGS-\offset(%rsp),\tmp 184 movq EFLAGS+\offset(%rsp),\tmp
186 movq \tmp,R11-\offset(%rsp) 185 movq \tmp,R11+\offset(%rsp)
187 .endm 186 .endm
188 187
189 .macro FAKE_STACK_FRAME child_rip 188 .macro FAKE_STACK_FRAME child_rip
@@ -195,7 +194,7 @@ ENTRY(native_usergs_sysret64)
195 pushq %rax /* rsp */ 194 pushq %rax /* rsp */
196 CFI_ADJUST_CFA_OFFSET 8 195 CFI_ADJUST_CFA_OFFSET 8
197 CFI_REL_OFFSET rsp,0 196 CFI_REL_OFFSET rsp,0
198 pushq $(1<<9) /* eflags - interrupts on */ 197 pushq $X86_EFLAGS_IF /* eflags - interrupts on */
199 CFI_ADJUST_CFA_OFFSET 8 198 CFI_ADJUST_CFA_OFFSET 8
200 /*CFI_REL_OFFSET rflags,0*/ 199 /*CFI_REL_OFFSET rflags,0*/
201 pushq $__KERNEL_CS /* cs */ 200 pushq $__KERNEL_CS /* cs */
@@ -213,41 +212,160 @@ ENTRY(native_usergs_sysret64)
213 CFI_ADJUST_CFA_OFFSET -(6*8) 212 CFI_ADJUST_CFA_OFFSET -(6*8)
214 .endm 213 .endm
215 214
216 .macro CFI_DEFAULT_STACK start=1 215/*
216 * initial frame state for interrupts (and exceptions without error code)
217 */
218 .macro EMPTY_FRAME start=1 offset=0
217 .if \start 219 .if \start
218 CFI_STARTPROC simple 220 CFI_STARTPROC simple
219 CFI_SIGNAL_FRAME 221 CFI_SIGNAL_FRAME
220 CFI_DEF_CFA rsp,SS+8 222 CFI_DEF_CFA rsp,8+\offset
221 .else 223 .else
222 CFI_DEF_CFA_OFFSET SS+8 224 CFI_DEF_CFA_OFFSET 8+\offset
223 .endif 225 .endif
224 CFI_REL_OFFSET r15,R15
225 CFI_REL_OFFSET r14,R14
226 CFI_REL_OFFSET r13,R13
227 CFI_REL_OFFSET r12,R12
228 CFI_REL_OFFSET rbp,RBP
229 CFI_REL_OFFSET rbx,RBX
230 CFI_REL_OFFSET r11,R11
231 CFI_REL_OFFSET r10,R10
232 CFI_REL_OFFSET r9,R9
233 CFI_REL_OFFSET r8,R8
234 CFI_REL_OFFSET rax,RAX
235 CFI_REL_OFFSET rcx,RCX
236 CFI_REL_OFFSET rdx,RDX
237 CFI_REL_OFFSET rsi,RSI
238 CFI_REL_OFFSET rdi,RDI
239 CFI_REL_OFFSET rip,RIP
240 /*CFI_REL_OFFSET cs,CS*/
241 /*CFI_REL_OFFSET rflags,EFLAGS*/
242 CFI_REL_OFFSET rsp,RSP
243 /*CFI_REL_OFFSET ss,SS*/
244 .endm 226 .endm
227
228/*
229 * initial frame state for interrupts (and exceptions without error code)
230 */
231 .macro INTR_FRAME start=1 offset=0
232 EMPTY_FRAME \start, SS+8+\offset-RIP
233 /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
234 CFI_REL_OFFSET rsp, RSP+\offset-RIP
235 /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
236 /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
237 CFI_REL_OFFSET rip, RIP+\offset-RIP
238 .endm
239
240/*
241 * initial frame state for exceptions with error code (and interrupts
242 * with vector already pushed)
243 */
244 .macro XCPT_FRAME start=1 offset=0
245 INTR_FRAME \start, RIP+\offset-ORIG_RAX
246 /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
247 .endm
248
249/*
250 * frame that enables calling into C.
251 */
252 .macro PARTIAL_FRAME start=1 offset=0
253 XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
254 CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
255 CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
256 CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
257 CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
258 CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
259 CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
260 CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
261 CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
262 CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
263 .endm
264
265/*
266 * frame that enables passing a complete pt_regs to a C function.
267 */
268 .macro DEFAULT_FRAME start=1 offset=0
269 PARTIAL_FRAME \start, R11+\offset-R15
270 CFI_REL_OFFSET rbx, RBX+\offset
271 CFI_REL_OFFSET rbp, RBP+\offset
272 CFI_REL_OFFSET r12, R12+\offset
273 CFI_REL_OFFSET r13, R13+\offset
274 CFI_REL_OFFSET r14, R14+\offset
275 CFI_REL_OFFSET r15, R15+\offset
276 .endm
277
278/* save partial stack frame */
279ENTRY(save_args)
280 XCPT_FRAME
281 cld
282 movq_cfi rdi, RDI+16-ARGOFFSET
283 movq_cfi rsi, RSI+16-ARGOFFSET
284 movq_cfi rdx, RDX+16-ARGOFFSET
285 movq_cfi rcx, RCX+16-ARGOFFSET
286 movq_cfi rax, RAX+16-ARGOFFSET
287 movq_cfi r8, R8+16-ARGOFFSET
288 movq_cfi r9, R9+16-ARGOFFSET
289 movq_cfi r10, R10+16-ARGOFFSET
290 movq_cfi r11, R11+16-ARGOFFSET
291
292 leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
293 movq_cfi rbp, 8 /* push %rbp */
294 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
295 testl $3, CS(%rdi)
296 je 1f
297 SWAPGS
298 /*
299 * irqcount is used to check if a CPU is already on an interrupt stack
300 * or not. While this is essentially redundant with preempt_count it is
301 * a little cheaper to use a separate counter in the PDA (short of
302 * moving irq_enter into assembly, which would be too much work)
303 */
3041: incl %gs:pda_irqcount
305 jne 2f
306 popq_cfi %rax /* move return address... */
307 mov %gs:pda_irqstackptr,%rsp
308 EMPTY_FRAME 0
309 pushq_cfi %rax /* ... to the new stack */
310 /*
311 * We entered an interrupt context - irqs are off:
312 */
3132: TRACE_IRQS_OFF
314 ret
315 CFI_ENDPROC
316END(save_args)
317
318ENTRY(save_rest)
319 PARTIAL_FRAME 1 REST_SKIP+8
320 movq 5*8+16(%rsp), %r11 /* save return address */
321 movq_cfi rbx, RBX+16
322 movq_cfi rbp, RBP+16
323 movq_cfi r12, R12+16
324 movq_cfi r13, R13+16
325 movq_cfi r14, R14+16
326 movq_cfi r15, R15+16
327 movq %r11, 8(%rsp) /* return address */
328 FIXUP_TOP_OF_STACK %r11, 16
329 ret
330 CFI_ENDPROC
331END(save_rest)
332
333/* save complete stack frame */
334ENTRY(save_paranoid)
335 XCPT_FRAME 1 RDI+8
336 cld
337 movq_cfi rdi, RDI+8
338 movq_cfi rsi, RSI+8
339 movq_cfi rdx, RDX+8
340 movq_cfi rcx, RCX+8
341 movq_cfi rax, RAX+8
342 movq_cfi r8, R8+8
343 movq_cfi r9, R9+8
344 movq_cfi r10, R10+8
345 movq_cfi r11, R11+8
346 movq_cfi rbx, RBX+8
347 movq_cfi rbp, RBP+8
348 movq_cfi r12, R12+8
349 movq_cfi r13, R13+8
350 movq_cfi r14, R14+8
351 movq_cfi r15, R15+8
352 movl $1,%ebx
353 movl $MSR_GS_BASE,%ecx
354 rdmsr
355 testl %edx,%edx
356 js 1f /* negative -> in kernel */
357 SWAPGS
358 xorl %ebx,%ebx
3591: ret
360 CFI_ENDPROC
361END(save_paranoid)
362
245/* 363/*
246 * A newly forked process directly context switches into this. 364 * A newly forked process directly context switches into this.
247 */ 365 */
248/* rdi: prev */ 366/* rdi: prev */
249ENTRY(ret_from_fork) 367ENTRY(ret_from_fork)
250 CFI_DEFAULT_STACK 368 DEFAULT_FRAME
251 push kernel_eflags(%rip) 369 push kernel_eflags(%rip)
252 CFI_ADJUST_CFA_OFFSET 8 370 CFI_ADJUST_CFA_OFFSET 8
253 popf # reset kernel eflags 371 popf # reset kernel eflags
@@ -257,19 +375,19 @@ ENTRY(ret_from_fork)
257 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 375 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
258 CFI_REMEMBER_STATE 376 CFI_REMEMBER_STATE
259 jnz rff_trace 377 jnz rff_trace
260rff_action: 378rff_action:
261 RESTORE_REST 379 RESTORE_REST
262 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? 380 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
263 je int_ret_from_sys_call 381 je int_ret_from_sys_call
264 testl $_TIF_IA32,TI_flags(%rcx) 382 testl $_TIF_IA32,TI_flags(%rcx)
265 jnz int_ret_from_sys_call 383 jnz int_ret_from_sys_call
266 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET 384 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
267 jmp ret_from_sys_call 385 jmp ret_from_sys_call
268 CFI_RESTORE_STATE 386 CFI_RESTORE_STATE
269rff_trace: 387rff_trace:
270 movq %rsp,%rdi 388 movq %rsp,%rdi
271 call syscall_trace_leave 389 call syscall_trace_leave
272 GET_THREAD_INFO(%rcx) 390 GET_THREAD_INFO(%rcx)
273 jmp rff_action 391 jmp rff_action
274 CFI_ENDPROC 392 CFI_ENDPROC
275END(ret_from_fork) 393END(ret_from_fork)
@@ -280,20 +398,20 @@ END(ret_from_fork)
280 * SYSCALL does not save anything on the stack and does not change the 398 * SYSCALL does not save anything on the stack and does not change the
281 * stack pointer. 399 * stack pointer.
282 */ 400 */
283 401
284/* 402/*
285 * Register setup: 403 * Register setup:
286 * rax system call number 404 * rax system call number
287 * rdi arg0 405 * rdi arg0
288 * rcx return address for syscall/sysret, C arg3 406 * rcx return address for syscall/sysret, C arg3
289 * rsi arg1 407 * rsi arg1
290 * rdx arg2 408 * rdx arg2
291 * r10 arg3 (--> moved to rcx for C) 409 * r10 arg3 (--> moved to rcx for C)
292 * r8 arg4 410 * r8 arg4
293 * r9 arg5 411 * r9 arg5
294 * r11 eflags for syscall/sysret, temporary for C 412 * r11 eflags for syscall/sysret, temporary for C
295 * r12-r15,rbp,rbx saved by C code, not touched. 413 * r12-r15,rbp,rbx saved by C code, not touched.
296 * 414 *
297 * Interrupts are off on entry. 415 * Interrupts are off on entry.
298 * Only called from user space. 416 * Only called from user space.
299 * 417 *
@@ -303,7 +421,7 @@ END(ret_from_fork)
303 * When user can change the frames always force IRET. That is because 421 * When user can change the frames always force IRET. That is because
304 * it deals with uncanonical addresses better. SYSRET has trouble 422 * it deals with uncanonical addresses better. SYSRET has trouble
305 * with them due to bugs in both AMD and Intel CPUs. 423 * with them due to bugs in both AMD and Intel CPUs.
306 */ 424 */
307 425
308ENTRY(system_call) 426ENTRY(system_call)
309 CFI_STARTPROC simple 427 CFI_STARTPROC simple
@@ -319,7 +437,7 @@ ENTRY(system_call)
319 */ 437 */
320ENTRY(system_call_after_swapgs) 438ENTRY(system_call_after_swapgs)
321 439
322 movq %rsp,%gs:pda_oldrsp 440 movq %rsp,%gs:pda_oldrsp
323 movq %gs:pda_kernelstack,%rsp 441 movq %gs:pda_kernelstack,%rsp
324 /* 442 /*
325 * No need to follow this irqs off/on section - it's straight 443 * No need to follow this irqs off/on section - it's straight
@@ -327,7 +445,7 @@ ENTRY(system_call_after_swapgs)
327 */ 445 */
328 ENABLE_INTERRUPTS(CLBR_NONE) 446 ENABLE_INTERRUPTS(CLBR_NONE)
329 SAVE_ARGS 8,1 447 SAVE_ARGS 8,1
330 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 448 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
331 movq %rcx,RIP-ARGOFFSET(%rsp) 449 movq %rcx,RIP-ARGOFFSET(%rsp)
332 CFI_REL_OFFSET rip,RIP-ARGOFFSET 450 CFI_REL_OFFSET rip,RIP-ARGOFFSET
333 GET_THREAD_INFO(%rcx) 451 GET_THREAD_INFO(%rcx)
@@ -341,19 +459,19 @@ system_call_fastpath:
341 movq %rax,RAX-ARGOFFSET(%rsp) 459 movq %rax,RAX-ARGOFFSET(%rsp)
342/* 460/*
343 * Syscall return path ending with SYSRET (fast path) 461 * Syscall return path ending with SYSRET (fast path)
344 * Has incomplete stack frame and undefined top of stack. 462 * Has incomplete stack frame and undefined top of stack.
345 */ 463 */
346ret_from_sys_call: 464ret_from_sys_call:
347 movl $_TIF_ALLWORK_MASK,%edi 465 movl $_TIF_ALLWORK_MASK,%edi
348 /* edi: flagmask */ 466 /* edi: flagmask */
349sysret_check: 467sysret_check:
350 LOCKDEP_SYS_EXIT 468 LOCKDEP_SYS_EXIT
351 GET_THREAD_INFO(%rcx) 469 GET_THREAD_INFO(%rcx)
352 DISABLE_INTERRUPTS(CLBR_NONE) 470 DISABLE_INTERRUPTS(CLBR_NONE)
353 TRACE_IRQS_OFF 471 TRACE_IRQS_OFF
354 movl TI_flags(%rcx),%edx 472 movl TI_flags(%rcx),%edx
355 andl %edi,%edx 473 andl %edi,%edx
356 jnz sysret_careful 474 jnz sysret_careful
357 CFI_REMEMBER_STATE 475 CFI_REMEMBER_STATE
358 /* 476 /*
359 * sysretq will re-enable interrupts: 477 * sysretq will re-enable interrupts:
@@ -368,7 +486,7 @@ sysret_check:
368 486
369 CFI_RESTORE_STATE 487 CFI_RESTORE_STATE
370 /* Handle reschedules */ 488 /* Handle reschedules */
371 /* edx: work, edi: workmask */ 489 /* edx: work, edi: workmask */
372sysret_careful: 490sysret_careful:
373 bt $TIF_NEED_RESCHED,%edx 491 bt $TIF_NEED_RESCHED,%edx
374 jnc sysret_signal 492 jnc sysret_signal
@@ -381,7 +499,7 @@ sysret_careful:
381 CFI_ADJUST_CFA_OFFSET -8 499 CFI_ADJUST_CFA_OFFSET -8
382 jmp sysret_check 500 jmp sysret_check
383 501
384 /* Handle a signal */ 502 /* Handle a signal */
385sysret_signal: 503sysret_signal:
386 TRACE_IRQS_ON 504 TRACE_IRQS_ON
387 ENABLE_INTERRUPTS(CLBR_NONE) 505 ENABLE_INTERRUPTS(CLBR_NONE)
@@ -390,17 +508,20 @@ sysret_signal:
390 jc sysret_audit 508 jc sysret_audit
391#endif 509#endif
392 /* edx: work flags (arg3) */ 510 /* edx: work flags (arg3) */
393 leaq do_notify_resume(%rip),%rax
394 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 511 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
395 xorl %esi,%esi # oldset -> arg2 512 xorl %esi,%esi # oldset -> arg2
396 call ptregscall_common 513 SAVE_REST
514 FIXUP_TOP_OF_STACK %r11
515 call do_notify_resume
516 RESTORE_TOP_OF_STACK %r11
517 RESTORE_REST
397 movl $_TIF_WORK_MASK,%edi 518 movl $_TIF_WORK_MASK,%edi
398 /* Use IRET because user could have changed frame. This 519 /* Use IRET because user could have changed frame. This
399 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ 520 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
400 DISABLE_INTERRUPTS(CLBR_NONE) 521 DISABLE_INTERRUPTS(CLBR_NONE)
401 TRACE_IRQS_OFF 522 TRACE_IRQS_OFF
402 jmp int_with_check 523 jmp int_with_check
403 524
404badsys: 525badsys:
405 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 526 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
406 jmp ret_from_sys_call 527 jmp ret_from_sys_call
@@ -439,7 +560,7 @@ sysret_audit:
439#endif /* CONFIG_AUDITSYSCALL */ 560#endif /* CONFIG_AUDITSYSCALL */
440 561
441 /* Do syscall tracing */ 562 /* Do syscall tracing */
442tracesys: 563tracesys:
443#ifdef CONFIG_AUDITSYSCALL 564#ifdef CONFIG_AUDITSYSCALL
444 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 565 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
445 jz auditsys 566 jz auditsys
@@ -462,8 +583,8 @@ tracesys:
462 call *sys_call_table(,%rax,8) 583 call *sys_call_table(,%rax,8)
463 movq %rax,RAX-ARGOFFSET(%rsp) 584 movq %rax,RAX-ARGOFFSET(%rsp)
464 /* Use IRET because user could have changed frame */ 585 /* Use IRET because user could have changed frame */
465 586
466/* 587/*
467 * Syscall return path ending with IRET. 588 * Syscall return path ending with IRET.
468 * Has correct top of stack, but partial stack frame. 589 * Has correct top of stack, but partial stack frame.
469 */ 590 */
@@ -507,18 +628,18 @@ int_very_careful:
507 TRACE_IRQS_ON 628 TRACE_IRQS_ON
508 ENABLE_INTERRUPTS(CLBR_NONE) 629 ENABLE_INTERRUPTS(CLBR_NONE)
509 SAVE_REST 630 SAVE_REST
510 /* Check for syscall exit trace */ 631 /* Check for syscall exit trace */
511 testl $_TIF_WORK_SYSCALL_EXIT,%edx 632 testl $_TIF_WORK_SYSCALL_EXIT,%edx
512 jz int_signal 633 jz int_signal
513 pushq %rdi 634 pushq %rdi
514 CFI_ADJUST_CFA_OFFSET 8 635 CFI_ADJUST_CFA_OFFSET 8
515 leaq 8(%rsp),%rdi # &ptregs -> arg1 636 leaq 8(%rsp),%rdi # &ptregs -> arg1
516 call syscall_trace_leave 637 call syscall_trace_leave
517 popq %rdi 638 popq %rdi
518 CFI_ADJUST_CFA_OFFSET -8 639 CFI_ADJUST_CFA_OFFSET -8
519 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi 640 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
520 jmp int_restore_rest 641 jmp int_restore_rest
521 642
522int_signal: 643int_signal:
523 testl $_TIF_DO_NOTIFY_MASK,%edx 644 testl $_TIF_DO_NOTIFY_MASK,%edx
524 jz 1f 645 jz 1f
@@ -533,22 +654,24 @@ int_restore_rest:
533 jmp int_with_check 654 jmp int_with_check
534 CFI_ENDPROC 655 CFI_ENDPROC
535END(system_call) 656END(system_call)
536 657
537/* 658/*
538 * Certain special system calls that need to save a complete full stack frame. 659 * Certain special system calls that need to save a complete full stack frame.
539 */ 660 */
540
541 .macro PTREGSCALL label,func,arg 661 .macro PTREGSCALL label,func,arg
542 .globl \label 662ENTRY(\label)
543\label: 663 PARTIAL_FRAME 1 8 /* offset 8: return address */
544 leaq \func(%rip),%rax 664 subq $REST_SKIP, %rsp
545 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 665 CFI_ADJUST_CFA_OFFSET REST_SKIP
546 jmp ptregscall_common 666 call save_rest
667 DEFAULT_FRAME 0 8 /* offset 8: return address */
668 leaq 8(%rsp), \arg /* pt_regs pointer */
669 call \func
670 jmp ptregscall_common
671 CFI_ENDPROC
547END(\label) 672END(\label)
548 .endm 673 .endm
549 674
550 CFI_STARTPROC
551
552 PTREGSCALL stub_clone, sys_clone, %r8 675 PTREGSCALL stub_clone, sys_clone, %r8
553 PTREGSCALL stub_fork, sys_fork, %rdi 676 PTREGSCALL stub_fork, sys_fork, %rdi
554 PTREGSCALL stub_vfork, sys_vfork, %rdi 677 PTREGSCALL stub_vfork, sys_vfork, %rdi
@@ -556,25 +679,18 @@ END(\label)
556 PTREGSCALL stub_iopl, sys_iopl, %rsi 679 PTREGSCALL stub_iopl, sys_iopl, %rsi
557 680
558ENTRY(ptregscall_common) 681ENTRY(ptregscall_common)
559 popq %r11 682 DEFAULT_FRAME 1 8 /* offset 8: return address */
560 CFI_ADJUST_CFA_OFFSET -8 683 RESTORE_TOP_OF_STACK %r11, 8
561 CFI_REGISTER rip, r11 684 movq_cfi_restore R15+8, r15
562 SAVE_REST 685 movq_cfi_restore R14+8, r14
563 movq %r11, %r15 686 movq_cfi_restore R13+8, r13
564 CFI_REGISTER rip, r15 687 movq_cfi_restore R12+8, r12
565 FIXUP_TOP_OF_STACK %r11 688 movq_cfi_restore RBP+8, rbp
566 call *%rax 689 movq_cfi_restore RBX+8, rbx
567 RESTORE_TOP_OF_STACK %r11 690 ret $REST_SKIP /* pop extended registers */
568 movq %r15, %r11
569 CFI_REGISTER rip, r11
570 RESTORE_REST
571 pushq %r11
572 CFI_ADJUST_CFA_OFFSET 8
573 CFI_REL_OFFSET rip, 0
574 ret
575 CFI_ENDPROC 691 CFI_ENDPROC
576END(ptregscall_common) 692END(ptregscall_common)
577 693
578ENTRY(stub_execve) 694ENTRY(stub_execve)
579 CFI_STARTPROC 695 CFI_STARTPROC
580 popq %r11 696 popq %r11
@@ -590,11 +706,11 @@ ENTRY(stub_execve)
590 jmp int_ret_from_sys_call 706 jmp int_ret_from_sys_call
591 CFI_ENDPROC 707 CFI_ENDPROC
592END(stub_execve) 708END(stub_execve)
593 709
594/* 710/*
595 * sigreturn is special because it needs to restore all registers on return. 711 * sigreturn is special because it needs to restore all registers on return.
596 * This cannot be done with SYSRET, so use the IRET return path instead. 712 * This cannot be done with SYSRET, so use the IRET return path instead.
597 */ 713 */
598ENTRY(stub_rt_sigreturn) 714ENTRY(stub_rt_sigreturn)
599 CFI_STARTPROC 715 CFI_STARTPROC
600 addq $8, %rsp 716 addq $8, %rsp
@@ -610,70 +726,70 @@ ENTRY(stub_rt_sigreturn)
610END(stub_rt_sigreturn) 726END(stub_rt_sigreturn)
611 727
612/* 728/*
613 * initial frame state for interrupts and exceptions 729 * Build the entry stubs and pointer table with some assembler magic.
730 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
731 * single cache line on all modern x86 implementations.
614 */ 732 */
615 .macro _frame ref 733 .section .init.rodata,"a"
616 CFI_STARTPROC simple 734ENTRY(interrupt)
617 CFI_SIGNAL_FRAME 735 .text
618 CFI_DEF_CFA rsp,SS+8-\ref 736 .p2align 5
619 /*CFI_REL_OFFSET ss,SS-\ref*/ 737 .p2align CONFIG_X86_L1_CACHE_SHIFT
620 CFI_REL_OFFSET rsp,RSP-\ref 738ENTRY(irq_entries_start)
621 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ 739 INTR_FRAME
622 /*CFI_REL_OFFSET cs,CS-\ref*/ 740vector=FIRST_EXTERNAL_VECTOR
623 CFI_REL_OFFSET rip,RIP-\ref 741.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
624 .endm 742 .balign 32
743 .rept 7
744 .if vector < NR_VECTORS
745 .if vector <> FIRST_EXTERNAL_VECTOR
746 CFI_ADJUST_CFA_OFFSET -8
747 .endif
7481: pushq $(~vector+0x80) /* Note: always in signed byte range */
749 CFI_ADJUST_CFA_OFFSET 8
750 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
751 jmp 2f
752 .endif
753 .previous
754 .quad 1b
755 .text
756vector=vector+1
757 .endif
758 .endr
7592: jmp common_interrupt
760.endr
761 CFI_ENDPROC
762END(irq_entries_start)
625 763
626/* initial frame state for interrupts (and exceptions without error code) */ 764.previous
627#define INTR_FRAME _frame RIP 765END(interrupt)
628/* initial frame state for exceptions with error code (and interrupts with 766.previous
629 vector already pushed) */
630#define XCPT_FRAME _frame ORIG_RAX
631 767
632/* 768/*
633 * Interrupt entry/exit. 769 * Interrupt entry/exit.
634 * 770 *
635 * Interrupt entry points save only callee clobbered registers in fast path. 771 * Interrupt entry points save only callee clobbered registers in fast path.
636 * 772 *
637 * Entry runs with interrupts off. 773 * Entry runs with interrupts off.
638 */ 774 */
639 775
640/* 0(%rsp): interrupt number */ 776/* 0(%rsp): ~(interrupt number) */
641 .macro interrupt func 777 .macro interrupt func
642 cld 778 subq $10*8, %rsp
643 SAVE_ARGS 779 CFI_ADJUST_CFA_OFFSET 10*8
644 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 780 call save_args
645 pushq %rbp 781 PARTIAL_FRAME 0
646 /*
647 * Save rbp twice: One is for marking the stack frame, as usual, and the
648 * other, to fill pt_regs properly. This is because bx comes right
649 * before the last saved register in that structure, and not bp. If the
650 * base pointer were in the place bx is today, this would not be needed.
651 */
652 movq %rbp, -8(%rsp)
653 CFI_ADJUST_CFA_OFFSET 8
654 CFI_REL_OFFSET rbp, 0
655 movq %rsp,%rbp
656 CFI_DEF_CFA_REGISTER rbp
657 testl $3,CS(%rdi)
658 je 1f
659 SWAPGS
660 /* irqcount is used to check if a CPU is already on an interrupt
661 stack or not. While this is essentially redundant with preempt_count
662 it is a little cheaper to use a separate counter in the PDA
663 (short of moving irq_enter into assembly, which would be too
664 much work) */
6651: incl %gs:pda_irqcount
666 cmoveq %gs:pda_irqstackptr,%rsp
667 push %rbp # backlink for old unwinder
668 /*
669 * We entered an interrupt context - irqs are off:
670 */
671 TRACE_IRQS_OFF
672 call \func 782 call \func
673 .endm 783 .endm
674 784
675ENTRY(common_interrupt) 785 /*
786 * The interrupt stubs push (~vector+0x80) onto the stack and
787 * then jump to common_interrupt.
788 */
789 .p2align CONFIG_X86_L1_CACHE_SHIFT
790common_interrupt:
676 XCPT_FRAME 791 XCPT_FRAME
792 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
677 interrupt do_IRQ 793 interrupt do_IRQ
678 /* 0(%rsp): oldrsp-ARGOFFSET */ 794 /* 0(%rsp): oldrsp-ARGOFFSET */
679ret_from_intr: 795ret_from_intr:
@@ -687,12 +803,12 @@ exit_intr:
687 GET_THREAD_INFO(%rcx) 803 GET_THREAD_INFO(%rcx)
688 testl $3,CS-ARGOFFSET(%rsp) 804 testl $3,CS-ARGOFFSET(%rsp)
689 je retint_kernel 805 je retint_kernel
690 806
691 /* Interrupt came from user space */ 807 /* Interrupt came from user space */
692 /* 808 /*
693 * Has a correct top of stack, but a partial stack frame 809 * Has a correct top of stack, but a partial stack frame
694 * %rcx: thread info. Interrupts off. 810 * %rcx: thread info. Interrupts off.
695 */ 811 */
696retint_with_reschedule: 812retint_with_reschedule:
697 movl $_TIF_WORK_MASK,%edi 813 movl $_TIF_WORK_MASK,%edi
698retint_check: 814retint_check:
@@ -765,20 +881,20 @@ retint_careful:
765 pushq %rdi 881 pushq %rdi
766 CFI_ADJUST_CFA_OFFSET 8 882 CFI_ADJUST_CFA_OFFSET 8
767 call schedule 883 call schedule
768 popq %rdi 884 popq %rdi
769 CFI_ADJUST_CFA_OFFSET -8 885 CFI_ADJUST_CFA_OFFSET -8
770 GET_THREAD_INFO(%rcx) 886 GET_THREAD_INFO(%rcx)
771 DISABLE_INTERRUPTS(CLBR_NONE) 887 DISABLE_INTERRUPTS(CLBR_NONE)
772 TRACE_IRQS_OFF 888 TRACE_IRQS_OFF
773 jmp retint_check 889 jmp retint_check
774 890
775retint_signal: 891retint_signal:
776 testl $_TIF_DO_NOTIFY_MASK,%edx 892 testl $_TIF_DO_NOTIFY_MASK,%edx
777 jz retint_swapgs 893 jz retint_swapgs
778 TRACE_IRQS_ON 894 TRACE_IRQS_ON
779 ENABLE_INTERRUPTS(CLBR_NONE) 895 ENABLE_INTERRUPTS(CLBR_NONE)
780 SAVE_REST 896 SAVE_REST
781 movq $-1,ORIG_RAX(%rsp) 897 movq $-1,ORIG_RAX(%rsp)
782 xorl %esi,%esi # oldset 898 xorl %esi,%esi # oldset
783 movq %rsp,%rdi # &pt_regs 899 movq %rsp,%rdi # &pt_regs
784 call do_notify_resume 900 call do_notify_resume
@@ -800,324 +916,211 @@ ENTRY(retint_kernel)
800 jnc retint_restore_args 916 jnc retint_restore_args
801 call preempt_schedule_irq 917 call preempt_schedule_irq
802 jmp exit_intr 918 jmp exit_intr
803#endif 919#endif
804 920
805 CFI_ENDPROC 921 CFI_ENDPROC
806END(common_interrupt) 922END(common_interrupt)
807 923
808/* 924/*
809 * APIC interrupts. 925 * APIC interrupts.
810 */ 926 */
811 .macro apicinterrupt num,func 927.macro apicinterrupt num sym do_sym
928ENTRY(\sym)
812 INTR_FRAME 929 INTR_FRAME
813 pushq $~(\num) 930 pushq $~(\num)
814 CFI_ADJUST_CFA_OFFSET 8 931 CFI_ADJUST_CFA_OFFSET 8
815 interrupt \func 932 interrupt \do_sym
816 jmp ret_from_intr 933 jmp ret_from_intr
817 CFI_ENDPROC 934 CFI_ENDPROC
818 .endm 935END(\sym)
819 936.endm
820ENTRY(thermal_interrupt)
821 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
822END(thermal_interrupt)
823
824ENTRY(threshold_interrupt)
825 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
826END(threshold_interrupt)
827
828#ifdef CONFIG_SMP
829ENTRY(reschedule_interrupt)
830 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
831END(reschedule_interrupt)
832
833 .macro INVALIDATE_ENTRY num
834ENTRY(invalidate_interrupt\num)
835 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
836END(invalidate_interrupt\num)
837 .endm
838 937
839 INVALIDATE_ENTRY 0 938#ifdef CONFIG_SMP
840 INVALIDATE_ENTRY 1 939apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
841 INVALIDATE_ENTRY 2 940 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
842 INVALIDATE_ENTRY 3
843 INVALIDATE_ENTRY 4
844 INVALIDATE_ENTRY 5
845 INVALIDATE_ENTRY 6
846 INVALIDATE_ENTRY 7
847
848ENTRY(call_function_interrupt)
849 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
850END(call_function_interrupt)
851ENTRY(call_function_single_interrupt)
852 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
853END(call_function_single_interrupt)
854ENTRY(irq_move_cleanup_interrupt)
855 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
856END(irq_move_cleanup_interrupt)
857#endif 941#endif
858 942
859ENTRY(apic_timer_interrupt) 943apicinterrupt UV_BAU_MESSAGE \
860 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 944 uv_bau_message_intr1 uv_bau_message_interrupt
861END(apic_timer_interrupt) 945apicinterrupt LOCAL_TIMER_VECTOR \
946 apic_timer_interrupt smp_apic_timer_interrupt
947
948#ifdef CONFIG_SMP
949apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
950 invalidate_interrupt0 smp_invalidate_interrupt
951apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
952 invalidate_interrupt1 smp_invalidate_interrupt
953apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
954 invalidate_interrupt2 smp_invalidate_interrupt
955apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
956 invalidate_interrupt3 smp_invalidate_interrupt
957apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
958 invalidate_interrupt4 smp_invalidate_interrupt
959apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
960 invalidate_interrupt5 smp_invalidate_interrupt
961apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
962 invalidate_interrupt6 smp_invalidate_interrupt
963apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
964 invalidate_interrupt7 smp_invalidate_interrupt
965#endif
862 966
863ENTRY(uv_bau_message_intr1) 967apicinterrupt THRESHOLD_APIC_VECTOR \
864 apicinterrupt 220,uv_bau_message_interrupt 968 threshold_interrupt mce_threshold_interrupt
865END(uv_bau_message_intr1) 969apicinterrupt THERMAL_APIC_VECTOR \
970 thermal_interrupt smp_thermal_interrupt
971
972#ifdef CONFIG_SMP
973apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
974 call_function_single_interrupt smp_call_function_single_interrupt
975apicinterrupt CALL_FUNCTION_VECTOR \
976 call_function_interrupt smp_call_function_interrupt
977apicinterrupt RESCHEDULE_VECTOR \
978 reschedule_interrupt smp_reschedule_interrupt
979#endif
866 980
867ENTRY(error_interrupt) 981apicinterrupt ERROR_APIC_VECTOR \
868 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt 982 error_interrupt smp_error_interrupt
869END(error_interrupt) 983apicinterrupt SPURIOUS_APIC_VECTOR \
984 spurious_interrupt smp_spurious_interrupt
870 985
871ENTRY(spurious_interrupt)
872 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
873END(spurious_interrupt)
874
875/* 986/*
876 * Exception entry points. 987 * Exception entry points.
877 */ 988 */
878 .macro zeroentry sym 989.macro zeroentry sym do_sym
990ENTRY(\sym)
879 INTR_FRAME 991 INTR_FRAME
880 PARAVIRT_ADJUST_EXCEPTION_FRAME 992 PARAVIRT_ADJUST_EXCEPTION_FRAME
881 pushq $0 /* push error code/oldrax */ 993 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
882 CFI_ADJUST_CFA_OFFSET 8 994 subq $15*8,%rsp
883 pushq %rax /* push real oldrax to the rdi slot */ 995 CFI_ADJUST_CFA_OFFSET 15*8
884 CFI_ADJUST_CFA_OFFSET 8 996 call error_entry
885 CFI_REL_OFFSET rax,0 997 DEFAULT_FRAME 0
886 leaq \sym(%rip),%rax 998 movq %rsp,%rdi /* pt_regs pointer */
887 jmp error_entry 999 xorl %esi,%esi /* no error code */
1000 call \do_sym
1001 jmp error_exit /* %ebx: no swapgs flag */
888 CFI_ENDPROC 1002 CFI_ENDPROC
889 .endm 1003END(\sym)
1004.endm
890 1005
891 .macro errorentry sym 1006.macro paranoidzeroentry sym do_sym
892 XCPT_FRAME 1007ENTRY(\sym)
1008 INTR_FRAME
893 PARAVIRT_ADJUST_EXCEPTION_FRAME 1009 PARAVIRT_ADJUST_EXCEPTION_FRAME
894 pushq %rax 1010 pushq $-1 /* ORIG_RAX: no syscall to restart */
895 CFI_ADJUST_CFA_OFFSET 8 1011 CFI_ADJUST_CFA_OFFSET 8
896 CFI_REL_OFFSET rax,0 1012 subq $15*8, %rsp
897 leaq \sym(%rip),%rax 1013 call save_paranoid
898 jmp error_entry 1014 TRACE_IRQS_OFF
1015 movq %rsp,%rdi /* pt_regs pointer */
1016 xorl %esi,%esi /* no error code */
1017 call \do_sym
1018 jmp paranoid_exit /* %ebx: no swapgs flag */
899 CFI_ENDPROC 1019 CFI_ENDPROC
900 .endm 1020END(\sym)
1021.endm
901 1022
902 /* error code is on the stack already */ 1023.macro paranoidzeroentry_ist sym do_sym ist
903 /* handle NMI like exceptions that can happen everywhere */ 1024ENTRY(\sym)
904 .macro paranoidentry sym, ist=0, irqtrace=1 1025 INTR_FRAME
905 SAVE_ALL 1026 PARAVIRT_ADJUST_EXCEPTION_FRAME
906 cld 1027 pushq $-1 /* ORIG_RAX: no syscall to restart */
907 movl $1,%ebx 1028 CFI_ADJUST_CFA_OFFSET 8
908 movl $MSR_GS_BASE,%ecx 1029 subq $15*8, %rsp
909 rdmsr 1030 call save_paranoid
910 testl %edx,%edx
911 js 1f
912 SWAPGS
913 xorl %ebx,%ebx
9141:
915 .if \ist
916 movq %gs:pda_data_offset, %rbp
917 .endif
918 .if \irqtrace
919 TRACE_IRQS_OFF
920 .endif
921 movq %rsp,%rdi
922 movq ORIG_RAX(%rsp),%rsi
923 movq $-1,ORIG_RAX(%rsp)
924 .if \ist
925 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
926 .endif
927 call \sym
928 .if \ist
929 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
930 .endif
931 DISABLE_INTERRUPTS(CLBR_NONE)
932 .if \irqtrace
933 TRACE_IRQS_OFF 1031 TRACE_IRQS_OFF
934 .endif 1032 movq %rsp,%rdi /* pt_regs pointer */
935 .endm 1033 xorl %esi,%esi /* no error code */
1034 movq %gs:pda_data_offset, %rbp
1035 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
1036 call \do_sym
1037 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
1038 jmp paranoid_exit /* %ebx: no swapgs flag */
1039 CFI_ENDPROC
1040END(\sym)
1041.endm
936 1042
937 /* 1043.macro errorentry sym do_sym
938 * "Paranoid" exit path from exception stack. 1044ENTRY(\sym)
939 * Paranoid because this is used by NMIs and cannot take 1045 XCPT_FRAME
940 * any kernel state for granted. 1046 PARAVIRT_ADJUST_EXCEPTION_FRAME
941 * We don't do kernel preemption checks here, because only 1047 subq $15*8,%rsp
942 * NMI should be common and it does not enable IRQs and 1048 CFI_ADJUST_CFA_OFFSET 15*8
943 * cannot get reschedule ticks. 1049 call error_entry
944 * 1050 DEFAULT_FRAME 0
945 * "trace" is 0 for the NMI handler only, because irq-tracing 1051 movq %rsp,%rdi /* pt_regs pointer */
946 * is fundamentally NMI-unsafe. (we cannot change the soft and 1052 movq ORIG_RAX(%rsp),%rsi /* get error code */
947 * hard flags at once, atomically) 1053 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
948 */ 1054 call \do_sym
949 .macro paranoidexit trace=1 1055 jmp error_exit /* %ebx: no swapgs flag */
950 /* ebx: no swapgs flag */
951paranoid_exit\trace:
952 testl %ebx,%ebx /* swapgs needed? */
953 jnz paranoid_restore\trace
954 testl $3,CS(%rsp)
955 jnz paranoid_userspace\trace
956paranoid_swapgs\trace:
957 .if \trace
958 TRACE_IRQS_IRETQ 0
959 .endif
960 SWAPGS_UNSAFE_STACK
961paranoid_restore\trace:
962 RESTORE_ALL 8
963 jmp irq_return
964paranoid_userspace\trace:
965 GET_THREAD_INFO(%rcx)
966 movl TI_flags(%rcx),%ebx
967 andl $_TIF_WORK_MASK,%ebx
968 jz paranoid_swapgs\trace
969 movq %rsp,%rdi /* &pt_regs */
970 call sync_regs
971 movq %rax,%rsp /* switch stack for scheduling */
972 testl $_TIF_NEED_RESCHED,%ebx
973 jnz paranoid_schedule\trace
974 movl %ebx,%edx /* arg3: thread flags */
975 .if \trace
976 TRACE_IRQS_ON
977 .endif
978 ENABLE_INTERRUPTS(CLBR_NONE)
979 xorl %esi,%esi /* arg2: oldset */
980 movq %rsp,%rdi /* arg1: &pt_regs */
981 call do_notify_resume
982 DISABLE_INTERRUPTS(CLBR_NONE)
983 .if \trace
984 TRACE_IRQS_OFF
985 .endif
986 jmp paranoid_userspace\trace
987paranoid_schedule\trace:
988 .if \trace
989 TRACE_IRQS_ON
990 .endif
991 ENABLE_INTERRUPTS(CLBR_ANY)
992 call schedule
993 DISABLE_INTERRUPTS(CLBR_ANY)
994 .if \trace
995 TRACE_IRQS_OFF
996 .endif
997 jmp paranoid_userspace\trace
998 CFI_ENDPROC 1056 CFI_ENDPROC
999 .endm 1057END(\sym)
1058.endm
1000 1059
1001/* 1060 /* error code is on the stack already */
1002 * Exception entry point. This expects an error code/orig_rax on the stack 1061.macro paranoiderrorentry sym do_sym
1003 * and the exception handler in %rax. 1062ENTRY(\sym)
1004 */ 1063 XCPT_FRAME
1005KPROBE_ENTRY(error_entry) 1064 PARAVIRT_ADJUST_EXCEPTION_FRAME
1006 _frame RDI 1065 subq $15*8,%rsp
1007 CFI_REL_OFFSET rax,0 1066 CFI_ADJUST_CFA_OFFSET 15*8
1008 /* rdi slot contains rax, oldrax contains error code */ 1067 call save_paranoid
1009 cld 1068 DEFAULT_FRAME 0
1010 subq $14*8,%rsp
1011 CFI_ADJUST_CFA_OFFSET (14*8)
1012 movq %rsi,13*8(%rsp)
1013 CFI_REL_OFFSET rsi,RSI
1014 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
1015 CFI_REGISTER rax,rsi
1016 movq %rdx,12*8(%rsp)
1017 CFI_REL_OFFSET rdx,RDX
1018 movq %rcx,11*8(%rsp)
1019 CFI_REL_OFFSET rcx,RCX
1020 movq %rsi,10*8(%rsp) /* store rax */
1021 CFI_REL_OFFSET rax,RAX
1022 movq %r8, 9*8(%rsp)
1023 CFI_REL_OFFSET r8,R8
1024 movq %r9, 8*8(%rsp)
1025 CFI_REL_OFFSET r9,R9
1026 movq %r10,7*8(%rsp)
1027 CFI_REL_OFFSET r10,R10
1028 movq %r11,6*8(%rsp)
1029 CFI_REL_OFFSET r11,R11
1030 movq %rbx,5*8(%rsp)
1031 CFI_REL_OFFSET rbx,RBX
1032 movq %rbp,4*8(%rsp)
1033 CFI_REL_OFFSET rbp,RBP
1034 movq %r12,3*8(%rsp)
1035 CFI_REL_OFFSET r12,R12
1036 movq %r13,2*8(%rsp)
1037 CFI_REL_OFFSET r13,R13
1038 movq %r14,1*8(%rsp)
1039 CFI_REL_OFFSET r14,R14
1040 movq %r15,(%rsp)
1041 CFI_REL_OFFSET r15,R15
1042 xorl %ebx,%ebx
1043 testl $3,CS(%rsp)
1044 je error_kernelspace
1045error_swapgs:
1046 SWAPGS
1047error_sti:
1048 TRACE_IRQS_OFF
1049 movq %rdi,RDI(%rsp)
1050 CFI_REL_OFFSET rdi,RDI
1051 movq %rsp,%rdi
1052 movq ORIG_RAX(%rsp),%rsi /* get error code */
1053 movq $-1,ORIG_RAX(%rsp)
1054 call *%rax
1055 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1056error_exit:
1057 movl %ebx,%eax
1058 RESTORE_REST
1059 DISABLE_INTERRUPTS(CLBR_NONE)
1060 TRACE_IRQS_OFF 1069 TRACE_IRQS_OFF
1061 GET_THREAD_INFO(%rcx) 1070 movq %rsp,%rdi /* pt_regs pointer */
1062 testl %eax,%eax 1071 movq ORIG_RAX(%rsp),%rsi /* get error code */
1063 jne retint_kernel 1072 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
1064 LOCKDEP_SYS_EXIT_IRQ 1073 call \do_sym
1065 movl TI_flags(%rcx),%edx 1074 jmp paranoid_exit /* %ebx: no swapgs flag */
1066 movl $_TIF_WORK_MASK,%edi
1067 andl %edi,%edx
1068 jnz retint_careful
1069 jmp retint_swapgs
1070 CFI_ENDPROC 1075 CFI_ENDPROC
1076END(\sym)
1077.endm
1071 1078
1072error_kernelspace: 1079zeroentry divide_error do_divide_error
1073 incl %ebx 1080zeroentry overflow do_overflow
1074 /* There are two places in the kernel that can potentially fault with 1081zeroentry bounds do_bounds
1075 usergs. Handle them here. The exception handlers after 1082zeroentry invalid_op do_invalid_op
1076 iret run with kernel gs again, so don't set the user space flag. 1083zeroentry device_not_available do_device_not_available
1077 B stepping K8s sometimes report an truncated RIP for IRET 1084paranoiderrorentry double_fault do_double_fault
1078 exceptions returning to compat mode. Check for these here too. */ 1085zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
1079 leaq irq_return(%rip),%rcx 1086errorentry invalid_TSS do_invalid_TSS
1080 cmpq %rcx,RIP(%rsp) 1087errorentry segment_not_present do_segment_not_present
1081 je error_swapgs 1088zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1082 movl %ecx,%ecx /* zero extend */ 1089zeroentry coprocessor_error do_coprocessor_error
1083 cmpq %rcx,RIP(%rsp) 1090errorentry alignment_check do_alignment_check
1084 je error_swapgs 1091zeroentry simd_coprocessor_error do_simd_coprocessor_error
1085 cmpq $gs_change,RIP(%rsp) 1092
1086 je error_swapgs 1093 /* Reload gs selector with exception handling */
1087 jmp error_sti 1094 /* edi: new selector */
1088KPROBE_END(error_entry)
1089
1090 /* Reload gs selector with exception handling */
1091 /* edi: new selector */
1092ENTRY(native_load_gs_index) 1095ENTRY(native_load_gs_index)
1093 CFI_STARTPROC 1096 CFI_STARTPROC
1094 pushf 1097 pushf
1095 CFI_ADJUST_CFA_OFFSET 8 1098 CFI_ADJUST_CFA_OFFSET 8
1096 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) 1099 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1097 SWAPGS 1100 SWAPGS
1098gs_change: 1101gs_change:
1099 movl %edi,%gs 1102 movl %edi,%gs
11002: mfence /* workaround */ 11032: mfence /* workaround */
1101 SWAPGS 1104 SWAPGS
1102 popf 1105 popf
1103 CFI_ADJUST_CFA_OFFSET -8 1106 CFI_ADJUST_CFA_OFFSET -8
1104 ret 1107 ret
1105 CFI_ENDPROC 1108 CFI_ENDPROC
1106ENDPROC(native_load_gs_index) 1109END(native_load_gs_index)
1107 1110
1108 .section __ex_table,"a" 1111 .section __ex_table,"a"
1109 .align 8 1112 .align 8
1110 .quad gs_change,bad_gs 1113 .quad gs_change,bad_gs
1111 .previous 1114 .previous
1112 .section .fixup,"ax" 1115 .section .fixup,"ax"
1113 /* running with kernelgs */ 1116 /* running with kernelgs */
1114bad_gs: 1117bad_gs:
1115 SWAPGS /* switch back to user gs */ 1118 SWAPGS /* switch back to user gs */
1116 xorl %eax,%eax 1119 xorl %eax,%eax
1117 movl %eax,%gs 1120 movl %eax,%gs
1118 jmp 2b 1121 jmp 2b
1119 .previous 1122 .previous
1120 1123
1121/* 1124/*
1122 * Create a kernel thread. 1125 * Create a kernel thread.
1123 * 1126 *
@@ -1140,7 +1143,7 @@ ENTRY(kernel_thread)
1140 1143
1141 xorl %r8d,%r8d 1144 xorl %r8d,%r8d
1142 xorl %r9d,%r9d 1145 xorl %r9d,%r9d
1143 1146
1144 # clone now 1147 # clone now
1145 call do_fork 1148 call do_fork
1146 movq %rax,RAX(%rsp) 1149 movq %rax,RAX(%rsp)
@@ -1151,15 +1154,15 @@ ENTRY(kernel_thread)
1151 * so internally to the x86_64 port you can rely on kernel_thread() 1154 * so internally to the x86_64 port you can rely on kernel_thread()
1152 * not to reschedule the child before returning, this avoids the need 1155 * not to reschedule the child before returning, this avoids the need
1153 * of hacks for example to fork off the per-CPU idle tasks. 1156 * of hacks for example to fork off the per-CPU idle tasks.
1154 * [Hopefully no generic code relies on the reschedule -AK] 1157 * [Hopefully no generic code relies on the reschedule -AK]
1155 */ 1158 */
1156 RESTORE_ALL 1159 RESTORE_ALL
1157 UNFAKE_STACK_FRAME 1160 UNFAKE_STACK_FRAME
1158 ret 1161 ret
1159 CFI_ENDPROC 1162 CFI_ENDPROC
1160ENDPROC(kernel_thread) 1163END(kernel_thread)
1161 1164
1162child_rip: 1165ENTRY(child_rip)
1163 pushq $0 # fake return address 1166 pushq $0 # fake return address
1164 CFI_STARTPROC 1167 CFI_STARTPROC
1165 /* 1168 /*
@@ -1174,7 +1177,7 @@ child_rip:
1174 call do_exit 1177 call do_exit
1175 ud2 # padding for call trace 1178 ud2 # padding for call trace
1176 CFI_ENDPROC 1179 CFI_ENDPROC
1177ENDPROC(child_rip) 1180END(child_rip)
1178 1181
1179/* 1182/*
1180 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 1183 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -1194,10 +1197,10 @@ ENDPROC(child_rip)
1194ENTRY(kernel_execve) 1197ENTRY(kernel_execve)
1195 CFI_STARTPROC 1198 CFI_STARTPROC
1196 FAKE_STACK_FRAME $0 1199 FAKE_STACK_FRAME $0
1197 SAVE_ALL 1200 SAVE_ALL
1198 movq %rsp,%rcx 1201 movq %rsp,%rcx
1199 call sys_execve 1202 call sys_execve
1200 movq %rax, RAX(%rsp) 1203 movq %rax, RAX(%rsp)
1201 RESTORE_REST 1204 RESTORE_REST
1202 testq %rax,%rax 1205 testq %rax,%rax
1203 je int_ret_from_sys_call 1206 je int_ret_from_sys_call
@@ -1205,129 +1208,7 @@ ENTRY(kernel_execve)
1205 UNFAKE_STACK_FRAME 1208 UNFAKE_STACK_FRAME
1206 ret 1209 ret
1207 CFI_ENDPROC 1210 CFI_ENDPROC
1208ENDPROC(kernel_execve) 1211END(kernel_execve)
1209
1210KPROBE_ENTRY(page_fault)
1211 errorentry do_page_fault
1212KPROBE_END(page_fault)
1213
1214ENTRY(coprocessor_error)
1215 zeroentry do_coprocessor_error
1216END(coprocessor_error)
1217
1218ENTRY(simd_coprocessor_error)
1219 zeroentry do_simd_coprocessor_error
1220END(simd_coprocessor_error)
1221
1222ENTRY(device_not_available)
1223 zeroentry do_device_not_available
1224END(device_not_available)
1225
1226 /* runs on exception stack */
1227KPROBE_ENTRY(debug)
1228 INTR_FRAME
1229 PARAVIRT_ADJUST_EXCEPTION_FRAME
1230 pushq $0
1231 CFI_ADJUST_CFA_OFFSET 8
1232 paranoidentry do_debug, DEBUG_STACK
1233 paranoidexit
1234KPROBE_END(debug)
1235
1236 /* runs on exception stack */
1237KPROBE_ENTRY(nmi)
1238 INTR_FRAME
1239 PARAVIRT_ADJUST_EXCEPTION_FRAME
1240 pushq $-1
1241 CFI_ADJUST_CFA_OFFSET 8
1242 paranoidentry do_nmi, 0, 0
1243#ifdef CONFIG_TRACE_IRQFLAGS
1244 paranoidexit 0
1245#else
1246 jmp paranoid_exit1
1247 CFI_ENDPROC
1248#endif
1249KPROBE_END(nmi)
1250
1251KPROBE_ENTRY(int3)
1252 INTR_FRAME
1253 PARAVIRT_ADJUST_EXCEPTION_FRAME
1254 pushq $0
1255 CFI_ADJUST_CFA_OFFSET 8
1256 paranoidentry do_int3, DEBUG_STACK
1257 jmp paranoid_exit1
1258 CFI_ENDPROC
1259KPROBE_END(int3)
1260
1261ENTRY(overflow)
1262 zeroentry do_overflow
1263END(overflow)
1264
1265ENTRY(bounds)
1266 zeroentry do_bounds
1267END(bounds)
1268
1269ENTRY(invalid_op)
1270 zeroentry do_invalid_op
1271END(invalid_op)
1272
1273ENTRY(coprocessor_segment_overrun)
1274 zeroentry do_coprocessor_segment_overrun
1275END(coprocessor_segment_overrun)
1276
1277 /* runs on exception stack */
1278ENTRY(double_fault)
1279 XCPT_FRAME
1280 PARAVIRT_ADJUST_EXCEPTION_FRAME
1281 paranoidentry do_double_fault
1282 jmp paranoid_exit1
1283 CFI_ENDPROC
1284END(double_fault)
1285
1286ENTRY(invalid_TSS)
1287 errorentry do_invalid_TSS
1288END(invalid_TSS)
1289
1290ENTRY(segment_not_present)
1291 errorentry do_segment_not_present
1292END(segment_not_present)
1293
1294 /* runs on exception stack */
1295ENTRY(stack_segment)
1296 XCPT_FRAME
1297 PARAVIRT_ADJUST_EXCEPTION_FRAME
1298 paranoidentry do_stack_segment
1299 jmp paranoid_exit1
1300 CFI_ENDPROC
1301END(stack_segment)
1302
1303KPROBE_ENTRY(general_protection)
1304 errorentry do_general_protection
1305KPROBE_END(general_protection)
1306
1307ENTRY(alignment_check)
1308 errorentry do_alignment_check
1309END(alignment_check)
1310
1311ENTRY(divide_error)
1312 zeroentry do_divide_error
1313END(divide_error)
1314
1315ENTRY(spurious_interrupt_bug)
1316 zeroentry do_spurious_interrupt_bug
1317END(spurious_interrupt_bug)
1318
1319#ifdef CONFIG_X86_MCE
1320 /* runs on exception stack */
1321ENTRY(machine_check)
1322 INTR_FRAME
1323 PARAVIRT_ADJUST_EXCEPTION_FRAME
1324 pushq $0
1325 CFI_ADJUST_CFA_OFFSET 8
1326 paranoidentry do_machine_check
1327 jmp paranoid_exit1
1328 CFI_ENDPROC
1329END(machine_check)
1330#endif
1331 1212
1332/* Call softirq on interrupt stack. Interrupts are off. */ 1213/* Call softirq on interrupt stack. Interrupts are off. */
1333ENTRY(call_softirq) 1214ENTRY(call_softirq)
@@ -1347,40 +1228,33 @@ ENTRY(call_softirq)
1347 decl %gs:pda_irqcount 1228 decl %gs:pda_irqcount
1348 ret 1229 ret
1349 CFI_ENDPROC 1230 CFI_ENDPROC
1350ENDPROC(call_softirq) 1231END(call_softirq)
1351
1352KPROBE_ENTRY(ignore_sysret)
1353 CFI_STARTPROC
1354 mov $-ENOSYS,%eax
1355 sysret
1356 CFI_ENDPROC
1357ENDPROC(ignore_sysret)
1358 1232
1359#ifdef CONFIG_XEN 1233#ifdef CONFIG_XEN
1360ENTRY(xen_hypervisor_callback) 1234zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
1361 zeroentry xen_do_hypervisor_callback
1362END(xen_hypervisor_callback)
1363 1235
1364/* 1236/*
1365# A note on the "critical region" in our callback handler. 1237 * A note on the "critical region" in our callback handler.
1366# We want to avoid stacking callback handlers due to events occurring 1238 * We want to avoid stacking callback handlers due to events occurring
1367# during handling of the last event. To do this, we keep events disabled 1239 * during handling of the last event. To do this, we keep events disabled
1368# until we've done all processing. HOWEVER, we must enable events before 1240 * until we've done all processing. HOWEVER, we must enable events before
1369# popping the stack frame (can't be done atomically) and so it would still 1241 * popping the stack frame (can't be done atomically) and so it would still
1370# be possible to get enough handler activations to overflow the stack. 1242 * be possible to get enough handler activations to overflow the stack.
1371# Although unlikely, bugs of that kind are hard to track down, so we'd 1243 * Although unlikely, bugs of that kind are hard to track down, so we'd
1372# like to avoid the possibility. 1244 * like to avoid the possibility.
1373# So, on entry to the handler we detect whether we interrupted an 1245 * So, on entry to the handler we detect whether we interrupted an
1374# existing activation in its critical region -- if so, we pop the current 1246 * existing activation in its critical region -- if so, we pop the current
1375# activation and restart the handler using the previous one. 1247 * activation and restart the handler using the previous one.
1376*/ 1248 */
1377ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 1249ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1378 CFI_STARTPROC 1250 CFI_STARTPROC
1379/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will 1251/*
1380 see the correct pointer to the pt_regs */ 1252 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1253 * see the correct pointer to the pt_regs
1254 */
1381 movq %rdi, %rsp # we don't return, adjust the stack frame 1255 movq %rdi, %rsp # we don't return, adjust the stack frame
1382 CFI_ENDPROC 1256 CFI_ENDPROC
1383 CFI_DEFAULT_STACK 1257 DEFAULT_FRAME
138411: incl %gs:pda_irqcount 125811: incl %gs:pda_irqcount
1385 movq %rsp,%rbp 1259 movq %rsp,%rbp
1386 CFI_DEF_CFA_REGISTER rbp 1260 CFI_DEF_CFA_REGISTER rbp
@@ -1395,23 +1269,26 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1395END(do_hypervisor_callback) 1269END(do_hypervisor_callback)
1396 1270
1397/* 1271/*
1398# Hypervisor uses this for application faults while it executes. 1272 * Hypervisor uses this for application faults while it executes.
1399# We get here for two reasons: 1273 * We get here for two reasons:
1400# 1. Fault while reloading DS, ES, FS or GS 1274 * 1. Fault while reloading DS, ES, FS or GS
1401# 2. Fault while executing IRET 1275 * 2. Fault while executing IRET
1402# Category 1 we do not need to fix up as Xen has already reloaded all segment 1276 * Category 1 we do not need to fix up as Xen has already reloaded all segment
1403# registers that could be reloaded and zeroed the others. 1277 * registers that could be reloaded and zeroed the others.
1404# Category 2 we fix up by killing the current process. We cannot use the 1278 * Category 2 we fix up by killing the current process. We cannot use the
1405# normal Linux return path in this case because if we use the IRET hypercall 1279 * normal Linux return path in this case because if we use the IRET hypercall
1406# to pop the stack frame we end up in an infinite loop of failsafe callbacks. 1280 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1407# We distinguish between categories by comparing each saved segment register 1281 * We distinguish between categories by comparing each saved segment register
1408# with its current contents: any discrepancy means we in category 1. 1282 * with its current contents: any discrepancy means we in category 1.
1409*/ 1283 */
1410ENTRY(xen_failsafe_callback) 1284ENTRY(xen_failsafe_callback)
1411 framesz = (RIP-0x30) /* workaround buggy gas */ 1285 INTR_FRAME 1 (6*8)
1412 _frame framesz 1286 /*CFI_REL_OFFSET gs,GS*/
1413 CFI_REL_OFFSET rcx, 0 1287 /*CFI_REL_OFFSET fs,FS*/
1414 CFI_REL_OFFSET r11, 8 1288 /*CFI_REL_OFFSET es,ES*/
1289 /*CFI_REL_OFFSET ds,DS*/
1290 CFI_REL_OFFSET r11,8
1291 CFI_REL_OFFSET rcx,0
1415 movw %ds,%cx 1292 movw %ds,%cx
1416 cmpw %cx,0x10(%rsp) 1293 cmpw %cx,0x10(%rsp)
1417 CFI_REMEMBER_STATE 1294 CFI_REMEMBER_STATE
@@ -1432,12 +1309,9 @@ ENTRY(xen_failsafe_callback)
1432 CFI_RESTORE r11 1309 CFI_RESTORE r11
1433 addq $0x30,%rsp 1310 addq $0x30,%rsp
1434 CFI_ADJUST_CFA_OFFSET -0x30 1311 CFI_ADJUST_CFA_OFFSET -0x30
1435 pushq $0 1312 pushq_cfi $0 /* RIP */
1436 CFI_ADJUST_CFA_OFFSET 8 1313 pushq_cfi %r11
1437 pushq %r11 1314 pushq_cfi %rcx
1438 CFI_ADJUST_CFA_OFFSET 8
1439 pushq %rcx
1440 CFI_ADJUST_CFA_OFFSET 8
1441 jmp general_protection 1315 jmp general_protection
1442 CFI_RESTORE_STATE 1316 CFI_RESTORE_STATE
14431: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ 13171: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
@@ -1447,11 +1321,223 @@ ENTRY(xen_failsafe_callback)
1447 CFI_RESTORE r11 1321 CFI_RESTORE r11
1448 addq $0x30,%rsp 1322 addq $0x30,%rsp
1449 CFI_ADJUST_CFA_OFFSET -0x30 1323 CFI_ADJUST_CFA_OFFSET -0x30
1450 pushq $0 1324 pushq_cfi $0
1451 CFI_ADJUST_CFA_OFFSET 8
1452 SAVE_ALL 1325 SAVE_ALL
1453 jmp error_exit 1326 jmp error_exit
1454 CFI_ENDPROC 1327 CFI_ENDPROC
1455END(xen_failsafe_callback) 1328END(xen_failsafe_callback)
1456 1329
1457#endif /* CONFIG_XEN */ 1330#endif /* CONFIG_XEN */
1331
1332/*
1333 * Some functions should be protected against kprobes
1334 */
1335 .pushsection .kprobes.text, "ax"
1336
1337paranoidzeroentry_ist debug do_debug DEBUG_STACK
1338paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
1339paranoiderrorentry stack_segment do_stack_segment
1340errorentry general_protection do_general_protection
1341errorentry page_fault do_page_fault
1342#ifdef CONFIG_X86_MCE
1343paranoidzeroentry machine_check do_machine_check
1344#endif
1345
1346 /*
1347 * "Paranoid" exit path from exception stack.
1348 * Paranoid because this is used by NMIs and cannot take
1349 * any kernel state for granted.
1350 * We don't do kernel preemption checks here, because only
1351 * NMI should be common and it does not enable IRQs and
1352 * cannot get reschedule ticks.
1353 *
1354 * "trace" is 0 for the NMI handler only, because irq-tracing
1355 * is fundamentally NMI-unsafe. (we cannot change the soft and
1356 * hard flags at once, atomically)
1357 */
1358
1359 /* ebx: no swapgs flag */
1360ENTRY(paranoid_exit)
1361 INTR_FRAME
1362 DISABLE_INTERRUPTS(CLBR_NONE)
1363 TRACE_IRQS_OFF
1364 testl %ebx,%ebx /* swapgs needed? */
1365 jnz paranoid_restore
1366 testl $3,CS(%rsp)
1367 jnz paranoid_userspace
1368paranoid_swapgs:
1369 TRACE_IRQS_IRETQ 0
1370 SWAPGS_UNSAFE_STACK
1371paranoid_restore:
1372 RESTORE_ALL 8
1373 jmp irq_return
1374paranoid_userspace:
1375 GET_THREAD_INFO(%rcx)
1376 movl TI_flags(%rcx),%ebx
1377 andl $_TIF_WORK_MASK,%ebx
1378 jz paranoid_swapgs
1379 movq %rsp,%rdi /* &pt_regs */
1380 call sync_regs
1381 movq %rax,%rsp /* switch stack for scheduling */
1382 testl $_TIF_NEED_RESCHED,%ebx
1383 jnz paranoid_schedule
1384 movl %ebx,%edx /* arg3: thread flags */
1385 TRACE_IRQS_ON
1386 ENABLE_INTERRUPTS(CLBR_NONE)
1387 xorl %esi,%esi /* arg2: oldset */
1388 movq %rsp,%rdi /* arg1: &pt_regs */
1389 call do_notify_resume
1390 DISABLE_INTERRUPTS(CLBR_NONE)
1391 TRACE_IRQS_OFF
1392 jmp paranoid_userspace
1393paranoid_schedule:
1394 TRACE_IRQS_ON
1395 ENABLE_INTERRUPTS(CLBR_ANY)
1396 call schedule
1397 DISABLE_INTERRUPTS(CLBR_ANY)
1398 TRACE_IRQS_OFF
1399 jmp paranoid_userspace
1400 CFI_ENDPROC
1401END(paranoid_exit)
1402
1403/*
1404 * Exception entry point. This expects an error code/orig_rax on the stack.
1405 * returns in "no swapgs flag" in %ebx.
1406 */
1407ENTRY(error_entry)
1408 XCPT_FRAME
1409 CFI_ADJUST_CFA_OFFSET 15*8
1410 /* oldrax contains error code */
1411 cld
1412 movq_cfi rdi, RDI+8
1413 movq_cfi rsi, RSI+8
1414 movq_cfi rdx, RDX+8
1415 movq_cfi rcx, RCX+8
1416 movq_cfi rax, RAX+8
1417 movq_cfi r8, R8+8
1418 movq_cfi r9, R9+8
1419 movq_cfi r10, R10+8
1420 movq_cfi r11, R11+8
1421 movq_cfi rbx, RBX+8
1422 movq_cfi rbp, RBP+8
1423 movq_cfi r12, R12+8
1424 movq_cfi r13, R13+8
1425 movq_cfi r14, R14+8
1426 movq_cfi r15, R15+8
1427 xorl %ebx,%ebx
1428 testl $3,CS+8(%rsp)
1429 je error_kernelspace
1430error_swapgs:
1431 SWAPGS
1432error_sti:
1433 TRACE_IRQS_OFF
1434 ret
1435 CFI_ENDPROC
1436
1437/*
1438 * There are two places in the kernel that can potentially fault with
1439 * usergs. Handle them here. The exception handlers after iret run with
1440 * kernel gs again, so don't set the user space flag. B stepping K8s
1441 * sometimes report an truncated RIP for IRET exceptions returning to
1442 * compat mode. Check for these here too.
1443 */
1444error_kernelspace:
1445 incl %ebx
1446 leaq irq_return(%rip),%rcx
1447 cmpq %rcx,RIP+8(%rsp)
1448 je error_swapgs
1449 movl %ecx,%ecx /* zero extend */
1450 cmpq %rcx,RIP+8(%rsp)
1451 je error_swapgs
1452 cmpq $gs_change,RIP+8(%rsp)
1453 je error_swapgs
1454 jmp error_sti
1455END(error_entry)
1456
1457
1458/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1459ENTRY(error_exit)
1460 DEFAULT_FRAME
1461 movl %ebx,%eax
1462 RESTORE_REST
1463 DISABLE_INTERRUPTS(CLBR_NONE)
1464 TRACE_IRQS_OFF
1465 GET_THREAD_INFO(%rcx)
1466 testl %eax,%eax
1467 jne retint_kernel
1468 LOCKDEP_SYS_EXIT_IRQ
1469 movl TI_flags(%rcx),%edx
1470 movl $_TIF_WORK_MASK,%edi
1471 andl %edi,%edx
1472 jnz retint_careful
1473 jmp retint_swapgs
1474 CFI_ENDPROC
1475END(error_exit)
1476
1477
1478 /* runs on exception stack */
1479ENTRY(nmi)
1480 INTR_FRAME
1481 PARAVIRT_ADJUST_EXCEPTION_FRAME
1482 pushq_cfi $-1
1483 subq $15*8, %rsp
1484 CFI_ADJUST_CFA_OFFSET 15*8
1485 call save_paranoid
1486 DEFAULT_FRAME 0
1487 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1488 movq %rsp,%rdi
1489 movq $-1,%rsi
1490 call do_nmi
1491#ifdef CONFIG_TRACE_IRQFLAGS
1492 /* paranoidexit; without TRACE_IRQS_OFF */
1493 /* ebx: no swapgs flag */
1494 DISABLE_INTERRUPTS(CLBR_NONE)
1495 testl %ebx,%ebx /* swapgs needed? */
1496 jnz nmi_restore
1497 testl $3,CS(%rsp)
1498 jnz nmi_userspace
1499nmi_swapgs:
1500 SWAPGS_UNSAFE_STACK
1501nmi_restore:
1502 RESTORE_ALL 8
1503 jmp irq_return
1504nmi_userspace:
1505 GET_THREAD_INFO(%rcx)
1506 movl TI_flags(%rcx),%ebx
1507 andl $_TIF_WORK_MASK,%ebx
1508 jz nmi_swapgs
1509 movq %rsp,%rdi /* &pt_regs */
1510 call sync_regs
1511 movq %rax,%rsp /* switch stack for scheduling */
1512 testl $_TIF_NEED_RESCHED,%ebx
1513 jnz nmi_schedule
1514 movl %ebx,%edx /* arg3: thread flags */
1515 ENABLE_INTERRUPTS(CLBR_NONE)
1516 xorl %esi,%esi /* arg2: oldset */
1517 movq %rsp,%rdi /* arg1: &pt_regs */
1518 call do_notify_resume
1519 DISABLE_INTERRUPTS(CLBR_NONE)
1520 jmp nmi_userspace
1521nmi_schedule:
1522 ENABLE_INTERRUPTS(CLBR_ANY)
1523 call schedule
1524 DISABLE_INTERRUPTS(CLBR_ANY)
1525 jmp nmi_userspace
1526 CFI_ENDPROC
1527#else
1528 jmp paranoid_exit
1529 CFI_ENDPROC
1530#endif
1531END(nmi)
1532
1533ENTRY(ignore_sysret)
1534 CFI_STARTPROC
1535 mov $-ENOSYS,%eax
1536 sysret
1537 CFI_ENDPROC
1538END(ignore_sysret)
1539
1540/*
1541 * End of kprobes section
1542 */
1543 .popsection
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 845aa9803e80..607db63044a5 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -129,7 +129,7 @@ void __init native_init_IRQ(void)
129 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { 129 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
130 /* SYSCALL_VECTOR was reserved in trap_init. */ 130 /* SYSCALL_VECTOR was reserved in trap_init. */
131 if (i != SYSCALL_VECTOR) 131 if (i != SYSCALL_VECTOR)
132 set_intr_gate(i, interrupt[i]); 132 set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
133 } 133 }
134 134
135 135
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ff0235391285..8670b3ce626e 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -24,41 +24,6 @@
24#include <asm/i8259.h> 24#include <asm/i8259.h>
25 25
26/* 26/*
27 * Common place to define all x86 IRQ vectors
28 *
29 * This builds up the IRQ handler stubs using some ugly macros in irq.h
30 *
31 * These macros create the low-level assembly IRQ routines that save
32 * register context and call do_IRQ(). do_IRQ() then does all the
33 * operations that are needed to keep the AT (or SMP IOAPIC)
34 * interrupt-controller happy.
35 */
36
37#define IRQ_NAME2(nr) nr##_interrupt(void)
38#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
39
40/*
41 * SMP has a few special interrupts for IPI messages
42 */
43
44#define BUILD_IRQ(nr) \
45 asmlinkage void IRQ_NAME(nr); \
46 asm("\n.text\n.p2align\n" \
47 "IRQ" #nr "_interrupt:\n\t" \
48 "push $~(" #nr ") ; " \
49 "jmp common_interrupt\n" \
50 ".previous");
51
52#define BI(x,y) \
53 BUILD_IRQ(x##y)
54
55#define BUILD_16_IRQS(x) \
56 BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
57 BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
58 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
59 BI(x,c) BI(x,d) BI(x,e) BI(x,f)
60
61/*
62 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: 27 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
63 * (these are usually mapped to vectors 0x30-0x3f) 28 * (these are usually mapped to vectors 0x30-0x3f)
64 */ 29 */
@@ -73,37 +38,6 @@
73 * 38 *
74 * (these are usually mapped into the 0x30-0xff vector range) 39 * (these are usually mapped into the 0x30-0xff vector range)
75 */ 40 */
76 BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
77BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
78BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
79BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
80
81#undef BUILD_16_IRQS
82#undef BI
83
84
85#define IRQ(x,y) \
86 IRQ##x##y##_interrupt
87
88#define IRQLIST_16(x) \
89 IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
90 IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
91 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
92 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
93
94/* for the irq vectors */
95static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
96 IRQLIST_16(0x2), IRQLIST_16(0x3),
97 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
98 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
99 IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
100};
101
102#undef IRQ
103#undef IRQLIST_16
104
105
106
107 41
108/* 42/*
109 * IRQ2 is cascade interrupt to second interrupt controller 43 * IRQ2 is cascade interrupt to second interrupt controller
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0b8b6690a86d..ebf2f12900f5 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -128,7 +128,16 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
128 gettimeofday(tv,NULL); 128 gettimeofday(tv,NULL);
129 return; 129 return;
130 } 130 }
131
132 /*
133 * Surround the RDTSC by barriers, to make sure it's not
134 * speculated to outside the seqlock critical section and
135 * does not cause time warps:
136 */
137 rdtsc_barrier();
131 now = vread(); 138 now = vread();
139 rdtsc_barrier();
140
132 base = __vsyscall_gtod_data.clock.cycle_last; 141 base = __vsyscall_gtod_data.clock.cycle_last;
133 mask = __vsyscall_gtod_data.clock.mask; 142 mask = __vsyscall_gtod_data.clock.mask;
134 mult = __vsyscall_gtod_data.clock.mult; 143 mult = __vsyscall_gtod_data.clock.mult;
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a5d8e1ace1cf..50a779264bb1 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -590,7 +590,8 @@ static void __init lguest_init_IRQ(void)
590 * a straightforward 1 to 1 mapping, so force that here. */ 590 * a straightforward 1 to 1 mapping, so force that here. */
591 __get_cpu_var(vector_irq)[vector] = i; 591 __get_cpu_var(vector_irq)[vector] = i;
592 if (vector != SYSCALL_VECTOR) { 592 if (vector != SYSCALL_VECTOR) {
593 set_intr_gate(vector, interrupt[vector]); 593 set_intr_gate(vector,
594 interrupt[vector-FIRST_EXTERNAL_VECTOR]);
594 set_irq_chip_and_handler_name(i, &lguest_irq_controller, 595 set_irq_chip_and_handler_name(i, &lguest_irq_controller,
595 handle_level_irq, 596 handle_level_irq,
596 "level"); 597 "level");
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c483f4242079..3ffed259883e 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -102,6 +102,8 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
102 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 102 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
103 pud = pud_offset(pgd, 0); 103 pud = pud_offset(pgd, 0);
104 BUG_ON(pmd_table != pmd_offset(pud, 0)); 104 BUG_ON(pmd_table != pmd_offset(pud, 0));
105
106 return pmd_table;
105 } 107 }
106#endif 108#endif
107 pud = pud_offset(pgd, 0); 109 pud = pud_offset(pgd, 0);