diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-08-12 23:46:24 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-08-12 23:46:24 -0400 |
commit | 06e727d2a5d9d889fabad35223ad77205a9bebb9 (patch) | |
tree | 2a2d4ec9ed95c95f044c8d69e87ab47195a1d2ed /arch/x86 | |
parent | e68ff9cd15552e46e0f993eace25af0947b1222d (diff) | |
parent | 3ae36655b97a03fa1decf72f04078ef945647c1a (diff) |
Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-tip
* 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-tip:
x86-64: Rework vsyscall emulation and add vsyscall= parameter
x86-64: Wire up getcpu syscall
x86: Remove unnecessary compile flag tweaks for vsyscall code
x86-64: Add vsyscall:emulate_vsyscall trace event
x86-64: Add user_64bit_mode paravirt op
x86-64, xen: Enable the vvar mapping
x86-64: Work around gold bug 13023
x86-64: Move the "user" vsyscall segment out of the data segment.
x86-64: Pad vDSO to a page boundary
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/desc.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/irq_vectors.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/paravirt_types.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/ptrace.h | 19 | ||||
-rw-r--r-- | arch/x86/include/asm/traps.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/unistd_64.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/vsyscall.h | 6 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 13 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 1 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/step.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 41 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 90 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_emu_64.S | 36 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_trace.h | 29 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 14 | ||||
-rw-r--r-- | arch/x86/vdso/vdso.S | 1 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 4 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 4 |
20 files changed, 173 insertions, 115 deletions
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 7b439d9aea2a..41935fadfdfc 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -27,8 +27,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in | |||
27 | 27 | ||
28 | desc->base2 = (info->base_addr & 0xff000000) >> 24; | 28 | desc->base2 = (info->base_addr & 0xff000000) >> 24; |
29 | /* | 29 | /* |
30 | * Don't allow setting of the lm bit. It is useless anyway | 30 | * Don't allow setting of the lm bit. It would confuse |
31 | * because 64bit system calls require __USER_CS: | 31 | * user_64bit_mode and would get overridden by sysret anyway. |
32 | */ | 32 | */ |
33 | desc->l = 0; | 33 | desc->l = 0; |
34 | } | 34 | } |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index f9a320984a10..7e50f06393aa 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -17,7 +17,6 @@ | |||
17 | * Vectors 0 ... 31 : system traps and exceptions - hardcoded events | 17 | * Vectors 0 ... 31 : system traps and exceptions - hardcoded events |
18 | * Vectors 32 ... 127 : device interrupts | 18 | * Vectors 32 ... 127 : device interrupts |
19 | * Vector 128 : legacy int80 syscall interface | 19 | * Vector 128 : legacy int80 syscall interface |
20 | * Vector 204 : legacy x86_64 vsyscall emulation | ||
21 | * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts | 20 | * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts |
22 | * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts | 21 | * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts |
23 | * | 22 | * |
@@ -51,9 +50,6 @@ | |||
51 | #ifdef CONFIG_X86_32 | 50 | #ifdef CONFIG_X86_32 |
52 | # define SYSCALL_VECTOR 0x80 | 51 | # define SYSCALL_VECTOR 0x80 |
53 | #endif | 52 | #endif |
54 | #ifdef CONFIG_X86_64 | ||
55 | # define VSYSCALL_EMU_VECTOR 0xcc | ||
56 | #endif | ||
57 | 53 | ||
58 | /* | 54 | /* |
59 | * Vectors 0x30-0x3f are used for ISA interrupts. | 55 | * Vectors 0x30-0x3f are used for ISA interrupts. |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 2c7652163111..8e8b9a4987ee 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -41,6 +41,7 @@ | |||
41 | 41 | ||
42 | #include <asm/desc_defs.h> | 42 | #include <asm/desc_defs.h> |
43 | #include <asm/kmap_types.h> | 43 | #include <asm/kmap_types.h> |
44 | #include <asm/pgtable_types.h> | ||
44 | 45 | ||
45 | struct page; | 46 | struct page; |
46 | struct thread_struct; | 47 | struct thread_struct; |
@@ -63,6 +64,11 @@ struct paravirt_callee_save { | |||
63 | struct pv_info { | 64 | struct pv_info { |
64 | unsigned int kernel_rpl; | 65 | unsigned int kernel_rpl; |
65 | int shared_kernel_pmd; | 66 | int shared_kernel_pmd; |
67 | |||
68 | #ifdef CONFIG_X86_64 | ||
69 | u16 extra_user_64bit_cs; /* __USER_CS if none */ | ||
70 | #endif | ||
71 | |||
66 | int paravirt_enabled; | 72 | int paravirt_enabled; |
67 | const char *name; | 73 | const char *name; |
68 | }; | 74 | }; |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 94e7618fcac8..35664547125b 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -131,6 +131,9 @@ struct pt_regs { | |||
131 | #ifdef __KERNEL__ | 131 | #ifdef __KERNEL__ |
132 | 132 | ||
133 | #include <linux/init.h> | 133 | #include <linux/init.h> |
134 | #ifdef CONFIG_PARAVIRT | ||
135 | #include <asm/paravirt_types.h> | ||
136 | #endif | ||
134 | 137 | ||
135 | struct cpuinfo_x86; | 138 | struct cpuinfo_x86; |
136 | struct task_struct; | 139 | struct task_struct; |
@@ -187,6 +190,22 @@ static inline int v8086_mode(struct pt_regs *regs) | |||
187 | #endif | 190 | #endif |
188 | } | 191 | } |
189 | 192 | ||
193 | #ifdef CONFIG_X86_64 | ||
194 | static inline bool user_64bit_mode(struct pt_regs *regs) | ||
195 | { | ||
196 | #ifndef CONFIG_PARAVIRT | ||
197 | /* | ||
198 | * On non-paravirt systems, this is the only long mode CPL 3 | ||
199 | * selector. We do not allow long mode selectors in the LDT. | ||
200 | */ | ||
201 | return regs->cs == __USER_CS; | ||
202 | #else | ||
203 | /* Headers are too twisted for this to go in paravirt.h. */ | ||
204 | return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; | ||
205 | #endif | ||
206 | } | ||
207 | #endif | ||
208 | |||
190 | /* | 209 | /* |
191 | * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode | 210 | * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode |
192 | * when it traps. The previous stack will be directly underneath the saved | 211 | * when it traps. The previous stack will be directly underneath the saved |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 2bae0a513b40..0012d0902c5f 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -40,7 +40,6 @@ asmlinkage void alignment_check(void); | |||
40 | asmlinkage void machine_check(void); | 40 | asmlinkage void machine_check(void); |
41 | #endif /* CONFIG_X86_MCE */ | 41 | #endif /* CONFIG_X86_MCE */ |
42 | asmlinkage void simd_coprocessor_error(void); | 42 | asmlinkage void simd_coprocessor_error(void); |
43 | asmlinkage void emulate_vsyscall(void); | ||
44 | 43 | ||
45 | dotraplinkage void do_divide_error(struct pt_regs *, long); | 44 | dotraplinkage void do_divide_error(struct pt_regs *, long); |
46 | dotraplinkage void do_debug(struct pt_regs *, long); | 45 | dotraplinkage void do_debug(struct pt_regs *, long); |
@@ -67,7 +66,6 @@ dotraplinkage void do_alignment_check(struct pt_regs *, long); | |||
67 | dotraplinkage void do_machine_check(struct pt_regs *, long); | 66 | dotraplinkage void do_machine_check(struct pt_regs *, long); |
68 | #endif | 67 | #endif |
69 | dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); | 68 | dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); |
70 | dotraplinkage void do_emulate_vsyscall(struct pt_regs *, long); | ||
71 | #ifdef CONFIG_X86_32 | 69 | #ifdef CONFIG_X86_32 |
72 | dotraplinkage void do_iret_error(struct pt_regs *, long); | 70 | dotraplinkage void do_iret_error(struct pt_regs *, long); |
73 | #endif | 71 | #endif |
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 705bf139288c..d92641cc7acc 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -681,6 +681,8 @@ __SYSCALL(__NR_syncfs, sys_syncfs) | |||
681 | __SYSCALL(__NR_sendmmsg, sys_sendmmsg) | 681 | __SYSCALL(__NR_sendmmsg, sys_sendmmsg) |
682 | #define __NR_setns 308 | 682 | #define __NR_setns 308 |
683 | __SYSCALL(__NR_setns, sys_setns) | 683 | __SYSCALL(__NR_setns, sys_setns) |
684 | #define __NR_getcpu 309 | ||
685 | __SYSCALL(__NR_getcpu, sys_getcpu) | ||
684 | 686 | ||
685 | #ifndef __NO_STUBS | 687 | #ifndef __NO_STUBS |
686 | #define __ARCH_WANT_OLD_READDIR | 688 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index 60107072c28b..eaea1d31f753 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h | |||
@@ -27,6 +27,12 @@ extern struct timezone sys_tz; | |||
27 | 27 | ||
28 | extern void map_vsyscall(void); | 28 | extern void map_vsyscall(void); |
29 | 29 | ||
30 | /* | ||
31 | * Called on instruction fetch fault in vsyscall page. | ||
32 | * Returns true if handled. | ||
33 | */ | ||
34 | extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); | ||
35 | |||
30 | #endif /* __KERNEL__ */ | 36 | #endif /* __KERNEL__ */ |
31 | 37 | ||
32 | #endif /* _ASM_X86_VSYSCALL_H */ | 38 | #endif /* _ASM_X86_VSYSCALL_H */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 04105574c8e9..82f2912155a5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -17,19 +17,6 @@ CFLAGS_REMOVE_ftrace.o = -pg | |||
17 | CFLAGS_REMOVE_early_printk.o = -pg | 17 | CFLAGS_REMOVE_early_printk.o = -pg |
18 | endif | 18 | endif |
19 | 19 | ||
20 | # | ||
21 | # vsyscalls (which work on the user stack) should have | ||
22 | # no stack-protector checks: | ||
23 | # | ||
24 | nostackp := $(call cc-option, -fno-stack-protector) | ||
25 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) | ||
26 | CFLAGS_hpet.o := $(nostackp) | ||
27 | CFLAGS_paravirt.o := $(nostackp) | ||
28 | GCOV_PROFILE_vsyscall_64.o := n | ||
29 | GCOV_PROFILE_hpet.o := n | ||
30 | GCOV_PROFILE_tsc.o := n | ||
31 | GCOV_PROFILE_paravirt.o := n | ||
32 | |||
33 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o | 20 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
34 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 21 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
35 | obj-y += time.o ioport.o ldt.o dumpstack.o | 22 | obj-y += time.o ioport.o ldt.o dumpstack.o |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e13329d800c8..6419bb05ecd5 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1111,7 +1111,6 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug | |||
1111 | zeroentry coprocessor_error do_coprocessor_error | 1111 | zeroentry coprocessor_error do_coprocessor_error |
1112 | errorentry alignment_check do_alignment_check | 1112 | errorentry alignment_check do_alignment_check |
1113 | zeroentry simd_coprocessor_error do_simd_coprocessor_error | 1113 | zeroentry simd_coprocessor_error do_simd_coprocessor_error |
1114 | zeroentry emulate_vsyscall do_emulate_vsyscall | ||
1115 | 1114 | ||
1116 | 1115 | ||
1117 | /* Reload gs selector with exception handling */ | 1116 | /* Reload gs selector with exception handling */ |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 613a7931ecc1..d90272e6bc40 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -307,6 +307,10 @@ struct pv_info pv_info = { | |||
307 | .paravirt_enabled = 0, | 307 | .paravirt_enabled = 0, |
308 | .kernel_rpl = 0, | 308 | .kernel_rpl = 0, |
309 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ | 309 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ |
310 | |||
311 | #ifdef CONFIG_X86_64 | ||
312 | .extra_user_64bit_cs = __USER_CS, | ||
313 | #endif | ||
310 | }; | 314 | }; |
311 | 315 | ||
312 | struct pv_init_ops pv_init_ops = { | 316 | struct pv_init_ops pv_init_ops = { |
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 7977f0cfe339..c346d1161488 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -74,7 +74,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) | |||
74 | 74 | ||
75 | #ifdef CONFIG_X86_64 | 75 | #ifdef CONFIG_X86_64 |
76 | case 0x40 ... 0x4f: | 76 | case 0x40 ... 0x4f: |
77 | if (regs->cs != __USER_CS) | 77 | if (!user_64bit_mode(regs)) |
78 | /* 32-bit mode: register increment */ | 78 | /* 32-bit mode: register increment */ |
79 | return 0; | 79 | return 0; |
80 | /* 64-bit mode: REX prefix */ | 80 | /* 64-bit mode: REX prefix */ |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9682ec50180c..6913369c234c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -872,12 +872,6 @@ void __init trap_init(void) | |||
872 | set_bit(SYSCALL_VECTOR, used_vectors); | 872 | set_bit(SYSCALL_VECTOR, used_vectors); |
873 | #endif | 873 | #endif |
874 | 874 | ||
875 | #ifdef CONFIG_X86_64 | ||
876 | BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors)); | ||
877 | set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall); | ||
878 | set_bit(VSYSCALL_EMU_VECTOR, used_vectors); | ||
879 | #endif | ||
880 | |||
881 | /* | 875 | /* |
882 | * Should be a barrier for any external CPU state: | 876 | * Should be a barrier for any external CPU state: |
883 | */ | 877 | */ |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 4aa9c54a9b76..0f703f10901a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -71,7 +71,6 @@ PHDRS { | |||
71 | text PT_LOAD FLAGS(5); /* R_E */ | 71 | text PT_LOAD FLAGS(5); /* R_E */ |
72 | data PT_LOAD FLAGS(6); /* RW_ */ | 72 | data PT_LOAD FLAGS(6); /* RW_ */ |
73 | #ifdef CONFIG_X86_64 | 73 | #ifdef CONFIG_X86_64 |
74 | user PT_LOAD FLAGS(5); /* R_E */ | ||
75 | #ifdef CONFIG_SMP | 74 | #ifdef CONFIG_SMP |
76 | percpu PT_LOAD FLAGS(6); /* RW_ */ | 75 | percpu PT_LOAD FLAGS(6); /* RW_ */ |
77 | #endif | 76 | #endif |
@@ -154,44 +153,16 @@ SECTIONS | |||
154 | 153 | ||
155 | #ifdef CONFIG_X86_64 | 154 | #ifdef CONFIG_X86_64 |
156 | 155 | ||
157 | #define VSYSCALL_ADDR (-10*1024*1024) | 156 | . = ALIGN(PAGE_SIZE); |
158 | |||
159 | #define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET) | ||
160 | #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) | ||
161 | |||
162 | #define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) | ||
163 | #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) | ||
164 | |||
165 | . = ALIGN(4096); | ||
166 | __vsyscall_0 = .; | ||
167 | |||
168 | . = VSYSCALL_ADDR; | ||
169 | .vsyscall : AT(VLOAD(.vsyscall)) { | ||
170 | *(.vsyscall_0) | ||
171 | |||
172 | . = 1024; | ||
173 | *(.vsyscall_1) | ||
174 | |||
175 | . = 2048; | ||
176 | *(.vsyscall_2) | ||
177 | |||
178 | . = 4096; /* Pad the whole page. */ | ||
179 | } :user =0xcc | ||
180 | . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE); | ||
181 | |||
182 | #undef VSYSCALL_ADDR | ||
183 | #undef VLOAD_OFFSET | ||
184 | #undef VLOAD | ||
185 | #undef VVIRT_OFFSET | ||
186 | #undef VVIRT | ||
187 | |||
188 | __vvar_page = .; | 157 | __vvar_page = .; |
189 | 158 | ||
190 | .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) { | 159 | .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) { |
160 | /* work around gold bug 13023 */ | ||
161 | __vvar_beginning_hack = .; | ||
191 | 162 | ||
192 | /* Place all vvars at the offsets in asm/vvar.h. */ | 163 | /* Place all vvars at the offsets in asm/vvar.h. */ |
193 | #define EMIT_VVAR(name, offset) \ | 164 | #define EMIT_VVAR(name, offset) \ |
194 | . = offset; \ | 165 | . = __vvar_beginning_hack + offset; \ |
195 | *(.vvar_ ## name) | 166 | *(.vvar_ ## name) |
196 | #define __VVAR_KERNEL_LDS | 167 | #define __VVAR_KERNEL_LDS |
197 | #include <asm/vvar.h> | 168 | #include <asm/vvar.h> |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index dda7dff9cef7..18ae83dd1cd7 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -18,9 +18,6 @@ | |||
18 | * use the vDSO. | 18 | * use the vDSO. |
19 | */ | 19 | */ |
20 | 20 | ||
21 | /* Disable profiling for userspace code: */ | ||
22 | #define DISABLE_BRANCH_PROFILING | ||
23 | |||
24 | #include <linux/time.h> | 21 | #include <linux/time.h> |
25 | #include <linux/init.h> | 22 | #include <linux/init.h> |
26 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
@@ -50,12 +47,36 @@ | |||
50 | #include <asm/vgtod.h> | 47 | #include <asm/vgtod.h> |
51 | #include <asm/traps.h> | 48 | #include <asm/traps.h> |
52 | 49 | ||
50 | #define CREATE_TRACE_POINTS | ||
51 | #include "vsyscall_trace.h" | ||
52 | |||
53 | DEFINE_VVAR(int, vgetcpu_mode); | 53 | DEFINE_VVAR(int, vgetcpu_mode); |
54 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = | 54 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = |
55 | { | 55 | { |
56 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), | 56 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), |
57 | }; | 57 | }; |
58 | 58 | ||
59 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; | ||
60 | |||
61 | static int __init vsyscall_setup(char *str) | ||
62 | { | ||
63 | if (str) { | ||
64 | if (!strcmp("emulate", str)) | ||
65 | vsyscall_mode = EMULATE; | ||
66 | else if (!strcmp("native", str)) | ||
67 | vsyscall_mode = NATIVE; | ||
68 | else if (!strcmp("none", str)) | ||
69 | vsyscall_mode = NONE; | ||
70 | else | ||
71 | return -EINVAL; | ||
72 | |||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | return -EINVAL; | ||
77 | } | ||
78 | early_param("vsyscall", vsyscall_setup); | ||
79 | |||
59 | void update_vsyscall_tz(void) | 80 | void update_vsyscall_tz(void) |
60 | { | 81 | { |
61 | unsigned long flags; | 82 | unsigned long flags; |
@@ -100,7 +121,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, | |||
100 | 121 | ||
101 | printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", | 122 | printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", |
102 | level, tsk->comm, task_pid_nr(tsk), | 123 | level, tsk->comm, task_pid_nr(tsk), |
103 | message, regs->ip - 2, regs->cs, | 124 | message, regs->ip, regs->cs, |
104 | regs->sp, regs->ax, regs->si, regs->di); | 125 | regs->sp, regs->ax, regs->si, regs->di); |
105 | } | 126 | } |
106 | 127 | ||
@@ -118,46 +139,39 @@ static int addr_to_vsyscall_nr(unsigned long addr) | |||
118 | return nr; | 139 | return nr; |
119 | } | 140 | } |
120 | 141 | ||
121 | void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) | 142 | bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) |
122 | { | 143 | { |
123 | struct task_struct *tsk; | 144 | struct task_struct *tsk; |
124 | unsigned long caller; | 145 | unsigned long caller; |
125 | int vsyscall_nr; | 146 | int vsyscall_nr; |
126 | long ret; | 147 | long ret; |
127 | 148 | ||
128 | local_irq_enable(); | ||
129 | |||
130 | /* | 149 | /* |
131 | * Real 64-bit user mode code has cs == __USER_CS. Anything else | 150 | * No point in checking CS -- the only way to get here is a user mode |
132 | * is bogus. | 151 | * trap to a high address, which means that we're in 64-bit user code. |
133 | */ | 152 | */ |
134 | if (regs->cs != __USER_CS) { | ||
135 | /* | ||
136 | * If we trapped from kernel mode, we might as well OOPS now | ||
137 | * instead of returning to some random address and OOPSing | ||
138 | * then. | ||
139 | */ | ||
140 | BUG_ON(!user_mode(regs)); | ||
141 | 153 | ||
142 | /* Compat mode and non-compat 32-bit CS should both segfault. */ | 154 | WARN_ON_ONCE(address != regs->ip); |
143 | warn_bad_vsyscall(KERN_WARNING, regs, | 155 | |
144 | "illegal int 0xcc from 32-bit mode"); | 156 | if (vsyscall_mode == NONE) { |
145 | goto sigsegv; | 157 | warn_bad_vsyscall(KERN_INFO, regs, |
158 | "vsyscall attempted with vsyscall=none"); | ||
159 | return false; | ||
146 | } | 160 | } |
147 | 161 | ||
148 | /* | 162 | vsyscall_nr = addr_to_vsyscall_nr(address); |
149 | * x86-ism here: regs->ip points to the instruction after the int 0xcc, | 163 | |
150 | * and int 0xcc is two bytes long. | 164 | trace_emulate_vsyscall(vsyscall_nr); |
151 | */ | 165 | |
152 | vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2); | ||
153 | if (vsyscall_nr < 0) { | 166 | if (vsyscall_nr < 0) { |
154 | warn_bad_vsyscall(KERN_WARNING, regs, | 167 | warn_bad_vsyscall(KERN_WARNING, regs, |
155 | "illegal int 0xcc (exploit attempt?)"); | 168 | "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround"); |
156 | goto sigsegv; | 169 | goto sigsegv; |
157 | } | 170 | } |
158 | 171 | ||
159 | if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { | 172 | if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { |
160 | warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)"); | 173 | warn_bad_vsyscall(KERN_WARNING, regs, |
174 | "vsyscall with bad stack (exploit attempt?)"); | ||
161 | goto sigsegv; | 175 | goto sigsegv; |
162 | } | 176 | } |
163 | 177 | ||
@@ -202,13 +216,11 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) | |||
202 | regs->ip = caller; | 216 | regs->ip = caller; |
203 | regs->sp += 8; | 217 | regs->sp += 8; |
204 | 218 | ||
205 | local_irq_disable(); | 219 | return true; |
206 | return; | ||
207 | 220 | ||
208 | sigsegv: | 221 | sigsegv: |
209 | regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */ | ||
210 | force_sig(SIGSEGV, current); | 222 | force_sig(SIGSEGV, current); |
211 | local_irq_disable(); | 223 | return true; |
212 | } | 224 | } |
213 | 225 | ||
214 | /* | 226 | /* |
@@ -256,15 +268,21 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) | |||
256 | 268 | ||
257 | void __init map_vsyscall(void) | 269 | void __init map_vsyscall(void) |
258 | { | 270 | { |
259 | extern char __vsyscall_0; | 271 | extern char __vsyscall_page; |
260 | unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); | 272 | unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); |
261 | extern char __vvar_page; | 273 | extern char __vvar_page; |
262 | unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); | 274 | unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); |
263 | 275 | ||
264 | /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ | 276 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall, |
265 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); | 277 | vsyscall_mode == NATIVE |
278 | ? PAGE_KERNEL_VSYSCALL | ||
279 | : PAGE_KERNEL_VVAR); | ||
280 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) != | ||
281 | (unsigned long)VSYSCALL_START); | ||
282 | |||
266 | __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); | 283 | __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); |
267 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS); | 284 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != |
285 | (unsigned long)VVAR_ADDRESS); | ||
268 | } | 286 | } |
269 | 287 | ||
270 | static int __init vsyscall_init(void) | 288 | static int __init vsyscall_init(void) |
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S index ffa845eae5ca..c9596a9af159 100644 --- a/arch/x86/kernel/vsyscall_emu_64.S +++ b/arch/x86/kernel/vsyscall_emu_64.S | |||
@@ -7,21 +7,31 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
10 | |||
10 | #include <asm/irq_vectors.h> | 11 | #include <asm/irq_vectors.h> |
12 | #include <asm/page_types.h> | ||
13 | #include <asm/unistd_64.h> | ||
14 | |||
15 | __PAGE_ALIGNED_DATA | ||
16 | .globl __vsyscall_page | ||
17 | .balign PAGE_SIZE, 0xcc | ||
18 | .type __vsyscall_page, @object | ||
19 | __vsyscall_page: | ||
20 | |||
21 | mov $__NR_gettimeofday, %rax | ||
22 | syscall | ||
23 | ret | ||
11 | 24 | ||
12 | /* The unused parts of the page are filled with 0xcc by the linker script. */ | 25 | .balign 1024, 0xcc |
26 | mov $__NR_time, %rax | ||
27 | syscall | ||
28 | ret | ||
13 | 29 | ||
14 | .section .vsyscall_0, "a" | 30 | .balign 1024, 0xcc |
15 | ENTRY(vsyscall_0) | 31 | mov $__NR_getcpu, %rax |
16 | int $VSYSCALL_EMU_VECTOR | 32 | syscall |
17 | END(vsyscall_0) | 33 | ret |
18 | 34 | ||
19 | .section .vsyscall_1, "a" | 35 | .balign 4096, 0xcc |
20 | ENTRY(vsyscall_1) | ||
21 | int $VSYSCALL_EMU_VECTOR | ||
22 | END(vsyscall_1) | ||
23 | 36 | ||
24 | .section .vsyscall_2, "a" | 37 | .size __vsyscall_page, 4096 |
25 | ENTRY(vsyscall_2) | ||
26 | int $VSYSCALL_EMU_VECTOR | ||
27 | END(vsyscall_2) | ||
diff --git a/arch/x86/kernel/vsyscall_trace.h b/arch/x86/kernel/vsyscall_trace.h new file mode 100644 index 000000000000..a8b2edec54fe --- /dev/null +++ b/arch/x86/kernel/vsyscall_trace.h | |||
@@ -0,0 +1,29 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM vsyscall | ||
3 | |||
4 | #if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) | ||
5 | #define __VSYSCALL_TRACE_H | ||
6 | |||
7 | #include <linux/tracepoint.h> | ||
8 | |||
9 | TRACE_EVENT(emulate_vsyscall, | ||
10 | |||
11 | TP_PROTO(int nr), | ||
12 | |||
13 | TP_ARGS(nr), | ||
14 | |||
15 | TP_STRUCT__entry(__field(int, nr)), | ||
16 | |||
17 | TP_fast_assign( | ||
18 | __entry->nr = nr; | ||
19 | ), | ||
20 | |||
21 | TP_printk("nr = %d", __entry->nr) | ||
22 | ); | ||
23 | |||
24 | #endif | ||
25 | |||
26 | #undef TRACE_INCLUDE_PATH | ||
27 | #define TRACE_INCLUDE_PATH ../../arch/x86/kernel | ||
28 | #define TRACE_INCLUDE_FILE vsyscall_trace | ||
29 | #include <trace/define_trace.h> | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 4d09df054e39..247aae3dc008 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -105,7 +105,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, | |||
105 | * but for now it's good enough to assume that long | 105 | * but for now it's good enough to assume that long |
106 | * mode only uses well known segments or kernel. | 106 | * mode only uses well known segments or kernel. |
107 | */ | 107 | */ |
108 | return (!user_mode(regs)) || (regs->cs == __USER_CS); | 108 | return (!user_mode(regs) || user_64bit_mode(regs)); |
109 | #endif | 109 | #endif |
110 | case 0x60: | 110 | case 0x60: |
111 | /* 0x64 thru 0x67 are valid prefixes in all modes. */ | 111 | /* 0x64 thru 0x67 are valid prefixes in all modes. */ |
@@ -720,6 +720,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, | |||
720 | if (is_errata100(regs, address)) | 720 | if (is_errata100(regs, address)) |
721 | return; | 721 | return; |
722 | 722 | ||
723 | #ifdef CONFIG_X86_64 | ||
724 | /* | ||
725 | * Instruction fetch faults in the vsyscall page might need | ||
726 | * emulation. | ||
727 | */ | ||
728 | if (unlikely((error_code & PF_INSTR) && | ||
729 | ((address & ~0xfff) == VSYSCALL_START))) { | ||
730 | if (emulate_vsyscall(regs, address)) | ||
731 | return; | ||
732 | } | ||
733 | #endif | ||
734 | |||
723 | if (unlikely(show_unhandled_signals)) | 735 | if (unlikely(show_unhandled_signals)) |
724 | show_signal_msg(regs, error_code, address, tsk); | 736 | show_signal_msg(regs, error_code, address, tsk); |
725 | 737 | ||
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S index 1b979c12ba85..01f5e3b4613c 100644 --- a/arch/x86/vdso/vdso.S +++ b/arch/x86/vdso/vdso.S | |||
@@ -9,6 +9,7 @@ __PAGE_ALIGNED_DATA | |||
9 | vdso_start: | 9 | vdso_start: |
10 | .incbin "arch/x86/vdso/vdso.so" | 10 | .incbin "arch/x86/vdso/vdso.so" |
11 | vdso_end: | 11 | vdso_end: |
12 | .align PAGE_SIZE /* extra data here leaks to userspace. */ | ||
12 | 13 | ||
13 | .previous | 14 | .previous |
14 | 15 | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 974a528458a0..e2345af01af0 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -951,6 +951,10 @@ static const struct pv_info xen_info __initconst = { | |||
951 | .paravirt_enabled = 1, | 951 | .paravirt_enabled = 1, |
952 | .shared_kernel_pmd = 0, | 952 | .shared_kernel_pmd = 0, |
953 | 953 | ||
954 | #ifdef CONFIG_X86_64 | ||
955 | .extra_user_64bit_cs = FLAT_USER_CS64, | ||
956 | #endif | ||
957 | |||
954 | .name = "Xen", | 958 | .name = "Xen", |
955 | }; | 959 | }; |
956 | 960 | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f987bde77c49..8cce339db5e7 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1916,6 +1916,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
1916 | # endif | 1916 | # endif |
1917 | #else | 1917 | #else |
1918 | case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: | 1918 | case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: |
1919 | case VVAR_PAGE: | ||
1919 | #endif | 1920 | #endif |
1920 | case FIX_TEXT_POKE0: | 1921 | case FIX_TEXT_POKE0: |
1921 | case FIX_TEXT_POKE1: | 1922 | case FIX_TEXT_POKE1: |
@@ -1956,7 +1957,8 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
1956 | #ifdef CONFIG_X86_64 | 1957 | #ifdef CONFIG_X86_64 |
1957 | /* Replicate changes to map the vsyscall page into the user | 1958 | /* Replicate changes to map the vsyscall page into the user |
1958 | pagetable vsyscall mapping. */ | 1959 | pagetable vsyscall mapping. */ |
1959 | if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { | 1960 | if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) || |
1961 | idx == VVAR_PAGE) { | ||
1960 | unsigned long vaddr = __fix_to_virt(idx); | 1962 | unsigned long vaddr = __fix_to_virt(idx); |
1961 | set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); | 1963 | set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); |
1962 | } | 1964 | } |